From 2313b1897f574e036635b25c501ec5e008407241 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 7 Dec 2018 16:16:51 +0800 Subject: [PATCH 001/539] drm/i915/gvt: mandatory require hypervisor's host_init Don't mark hypervisor module's host_init as optional, but mandatory required. Reviewed-by: Yuan, Hang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/mpt.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 67f19992b226..ce721099a020 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -50,11 +50,10 @@ * Zero on success, negative error code if failed */ static inline int intel_gvt_hypervisor_host_init(struct device *dev, - void *gvt, const void *ops) + void *gvt, const void *ops) { - /* optional to provide */ if (!intel_gvt_host.mpt->host_init) - return 0; + return -ENODEV; return intel_gvt_host.mpt->host_init(dev, gvt, ops); } From a2b8419a9e2975d19c0cd85f4912f2873bd974e0 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 7 Dec 2018 16:16:52 +0800 Subject: [PATCH 002/539] drm/i915/gvt: remove unused parameter for hypervisor's host_exit call The parameter 'void *gvt' is not used and required for hypervisor's exit call. Even for non-merged Xen hypervisor support. So just remove it. Reviewed-by: Yuan, Hang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.c | 2 +- drivers/gpu/drm/i915/gvt/hypercall.h | 2 +- drivers/gpu/drm/i915/gvt/kvmgt.c | 2 +- drivers/gpu/drm/i915/gvt/mpt.h | 5 ++--- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 733a2a0d0c30..a5b760b7bc10 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -316,7 +316,7 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv) return; intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu); - intel_gvt_hypervisor_host_exit(&dev_priv->drm.pdev->dev, gvt); + intel_gvt_hypervisor_host_exit(&dev_priv->drm.pdev->dev); intel_gvt_cleanup_vgpu_type_groups(gvt); intel_gvt_clean_vgpu_types(gvt); diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index 5af11cf1b482..e49a9247ed78 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -39,7 +39,7 @@ */ struct intel_gvt_mpt { int (*host_init)(struct device *dev, void *gvt, const void *ops); - void (*host_exit)(struct device *dev, void *gvt); + void (*host_exit)(struct device *dev); int (*attach_vgpu)(void *vgpu, unsigned long *handle); void (*detach_vgpu)(unsigned long handle); int (*inject_msi)(unsigned long handle, u32 addr, u16 data); diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index c1072143da1d..1bbd04d30c42 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1467,7 +1467,7 @@ static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops) return mdev_register_device(dev, &intel_vgpu_ops); } -static void kvmgt_host_exit(struct device *dev, void *gvt) +static void kvmgt_host_exit(struct device *dev) { mdev_unregister_device(dev); } diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index ce721099a020..c95ef77da62c 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -61,14 +61,13 @@ static inline int intel_gvt_hypervisor_host_init(struct device *dev, /** * intel_gvt_hypervisor_host_exit - exit GVT-g host side */ -static inline void intel_gvt_hypervisor_host_exit(struct device *dev, - void *gvt) +static inline void intel_gvt_hypervisor_host_exit(struct device *dev) { /* optional to provide */ if (!intel_gvt_host.mpt->host_exit) return; - intel_gvt_host.mpt->host_exit(dev, gvt); + intel_gvt_host.mpt->host_exit(dev); } /** From 9bdb073464d6008ed1839d358e320108ed12daae Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 7 Dec 2018 16:16:53 +0800 Subject: [PATCH 003/539] drm/i915/gvt: Change KVMGT as self load module This trys to make 'kvmgt' module as self loadable instead of loading by i915/gvt device model. So hypervisor specific module could be stand-alone, e.g only after loading hypervisor specific module, GVT feature could be enabled via specific hypervisor interface, e.g VFIO/mdev. So this trys to use hypervisor module register/unregister interface for that. Hypervisor module needs to take care of module reference itself when working for hypervisor interface, e.g for VFIO/mdev, hypervisor module would reference counting mdev when open and release. This makes 'kvmgt' module really split from GVT device model. User needs to load 'kvmgt' to enable VFIO/mdev interface. v6: - remove unused variable v5: - put module reference in register error path v4: - fix checkpatch warning v3: - Fix module reference handling for device open and release. Unused mdev devices would be cleaned up in device unregister when module unload. v2: - Fix kvmgt order after i915 for built-in case Cc: "Yuan, Hang" Cc: Alex Williamson Cc: "He, Min" Reviewed-by: Yuan, Hang Acked-by: Joonas Lahtinen Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gvt/Makefile | 1 - drivers/gpu/drm/i915/gvt/gvt.c | 108 +++++++++++---------------- drivers/gpu/drm/i915/gvt/gvt.h | 6 +- drivers/gpu/drm/i915/gvt/hypercall.h | 7 +- drivers/gpu/drm/i915/gvt/kvmgt.c | 16 +++- drivers/gpu/drm/i915/gvt/mpt.h | 3 + drivers/gpu/drm/i915/intel_gvt.c | 9 --- 8 files changed, 69 insertions(+), 82 deletions(-) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 19b5fe5016bf..63893fe00711 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -198,3 +198,4 @@ endif i915-y += intel_lpe_audio.o obj-$(CONFIG_DRM_I915) += i915.o +obj-$(CONFIG_DRM_I915_GVT_KVMGT) += gvt/kvmgt.o diff --git a/drivers/gpu/drm/i915/gvt/Makefile b/drivers/gpu/drm/i915/gvt/Makefile index b016dc753db9..271fb46d4dd0 100644 --- a/drivers/gpu/drm/i915/gvt/Makefile +++ b/drivers/gpu/drm/i915/gvt/Makefile @@ -7,4 +7,3 @@ GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \ ccflags-y += -I$(src) -I$(src)/$(GVT_DIR) i915-y += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE)) -obj-$(CONFIG_DRM_I915_GVT_KVMGT) += $(GVT_DIR)/kvmgt.o diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index a5b760b7bc10..4e8947f33bd0 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -187,52 +187,6 @@ static const struct intel_gvt_ops intel_gvt_ops = { .write_protect_handler = intel_vgpu_page_track_handler, }; -/** - * intel_gvt_init_host - Load MPT modules and detect if we're running in host - * - * This function is called at the driver loading stage. If failed to find a - * loadable MPT module or detect currently we're running in a VM, then GVT-g - * will be disabled - * - * Returns: - * Zero on success, negative error code if failed. - * - */ -int intel_gvt_init_host(void) -{ - if (intel_gvt_host.initialized) - return 0; - - /* Xen DOM U */ - if (xen_domain() && !xen_initial_domain()) - return -ENODEV; - - /* Try to load MPT modules for hypervisors */ - if (xen_initial_domain()) { - /* In Xen dom0 */ - intel_gvt_host.mpt = try_then_request_module( - symbol_get(xengt_mpt), "xengt"); - intel_gvt_host.hypervisor_type = INTEL_GVT_HYPERVISOR_XEN; - } else { -#if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT) - /* not in Xen. Try KVMGT */ - intel_gvt_host.mpt = try_then_request_module( - symbol_get(kvmgt_mpt), "kvmgt"); - intel_gvt_host.hypervisor_type = INTEL_GVT_HYPERVISOR_KVM; -#endif - } - - /* Fail to load MPT modules - bail out */ - if (!intel_gvt_host.mpt) - return -EINVAL; - - gvt_dbg_core("Running with hypervisor %s in host mode\n", - supported_hypervisors[intel_gvt_host.hypervisor_type]); - - intel_gvt_host.initialized = true; - return 0; -} - static void init_device_info(struct intel_gvt *gvt) { struct intel_gvt_device_info *info = &gvt->device_info; @@ -316,7 +270,6 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv) return; intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu); - intel_gvt_hypervisor_host_exit(&dev_priv->drm.pdev->dev); intel_gvt_cleanup_vgpu_type_groups(gvt); intel_gvt_clean_vgpu_types(gvt); @@ -352,13 +305,6 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) struct intel_vgpu *vgpu; int ret; - /* - * Cannot initialize GVT device without intel_gvt_host gets - * initialized first. - */ - if (WARN_ON(!intel_gvt_host.initialized)) - return -EINVAL; - if (WARN_ON(dev_priv->gvt)) return -EEXIST; @@ -420,13 +366,6 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) goto out_clean_types; } - ret = intel_gvt_hypervisor_host_init(&dev_priv->drm.pdev->dev, gvt, - &intel_gvt_ops); - if (ret) { - gvt_err("failed to register gvt-g host device: %d\n", ret); - goto out_clean_types; - } - vgpu = intel_gvt_create_idle_vgpu(gvt); if (IS_ERR(vgpu)) { ret = PTR_ERR(vgpu); @@ -441,6 +380,8 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) gvt_dbg_core("gvt device initialization is done\n"); dev_priv->gvt = gvt; + intel_gvt_host.dev = &dev_priv->drm.pdev->dev; + intel_gvt_host.initialized = true; return 0; out_clean_types: @@ -467,6 +408,45 @@ out_clean_idr: return ret; } -#if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT) -MODULE_SOFTDEP("pre: kvmgt"); -#endif +int +intel_gvt_register_hypervisor(struct intel_gvt_mpt *m) +{ + int ret; + void *gvt; + + if (!intel_gvt_host.initialized) + return -ENODEV; + + if (m->type != INTEL_GVT_HYPERVISOR_KVM && + m->type != INTEL_GVT_HYPERVISOR_XEN) + return -EINVAL; + + /* Get a reference for device model module */ + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + intel_gvt_host.mpt = m; + intel_gvt_host.hypervisor_type = m->type; + gvt = (void *)kdev_to_i915(intel_gvt_host.dev)->gvt; + + ret = intel_gvt_hypervisor_host_init(intel_gvt_host.dev, gvt, + &intel_gvt_ops); + if (ret < 0) { + gvt_err("Failed to init %s hypervisor module\n", + supported_hypervisors[intel_gvt_host.hypervisor_type]); + module_put(THIS_MODULE); + return -ENODEV; + } + gvt_dbg_core("Running with hypervisor %s in host mode\n", + supported_hypervisors[intel_gvt_host.hypervisor_type]); + return 0; +} +EXPORT_SYMBOL_GPL(intel_gvt_register_hypervisor); + +void +intel_gvt_unregister_hypervisor(void) +{ + intel_gvt_hypervisor_host_exit(intel_gvt_host.dev); + module_put(THIS_MODULE); +} +EXPORT_SYMBOL_GPL(intel_gvt_unregister_hypervisor); diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index b4ab1dad0143..8a4cf995d755 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -52,12 +52,8 @@ #define GVT_MAX_VGPU 8 -enum { - INTEL_GVT_HYPERVISOR_XEN = 0, - INTEL_GVT_HYPERVISOR_KVM, -}; - struct intel_gvt_host { + struct device *dev; bool initialized; int hypervisor_type; struct intel_gvt_mpt *mpt; diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index e49a9247ed78..50798868ab15 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -33,11 +33,17 @@ #ifndef _GVT_HYPERCALL_H_ #define _GVT_HYPERCALL_H_ +enum hypervisor_type { + INTEL_GVT_HYPERVISOR_XEN = 0, + INTEL_GVT_HYPERVISOR_KVM, +}; + /* * Specific GVT-g MPT modules function collections. Currently GVT-g supports * both Xen and KVM by providing dedicated hypervisor-related MPT modules. */ struct intel_gvt_mpt { + enum hypervisor_type type; int (*host_init)(struct device *dev, void *gvt, const void *ops); void (*host_exit)(struct device *dev); int (*attach_vgpu)(void *vgpu, unsigned long *handle); @@ -67,6 +73,5 @@ struct intel_gvt_mpt { }; extern struct intel_gvt_mpt xengt_mpt; -extern struct intel_gvt_mpt kvmgt_mpt; #endif /* _GVT_HYPERCALL_H_ */ diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 1bbd04d30c42..a19e684e621a 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -627,6 +627,12 @@ static int intel_vgpu_open(struct mdev_device *mdev) goto undo_iommu; } + /* Take a module reference as mdev core doesn't take + * a reference for vendor driver. + */ + if (!try_module_get(THIS_MODULE)) + goto undo_group; + ret = kvmgt_guest_init(mdev); if (ret) goto undo_group; @@ -679,6 +685,9 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu) &vgpu->vdev.group_notifier); WARN(ret, "vfio_unregister_notifier for group failed: %d\n", ret); + /* dereference module reference taken at open */ + module_put(THIS_MODULE); + info = (struct kvmgt_guest_info *)vgpu->handle; kvmgt_guest_exit(info); @@ -1849,7 +1858,8 @@ static bool kvmgt_is_valid_gfn(unsigned long handle, unsigned long gfn) return ret; } -struct intel_gvt_mpt kvmgt_mpt = { +static struct intel_gvt_mpt kvmgt_mpt = { + .type = INTEL_GVT_HYPERVISOR_KVM, .host_init = kvmgt_host_init, .host_exit = kvmgt_host_exit, .attach_vgpu = kvmgt_attach_vgpu, @@ -1868,15 +1878,17 @@ struct intel_gvt_mpt kvmgt_mpt = { .put_vfio_device = kvmgt_put_vfio_device, .is_valid_gfn = kvmgt_is_valid_gfn, }; -EXPORT_SYMBOL_GPL(kvmgt_mpt); static int __init kvmgt_init(void) { + if (intel_gvt_register_hypervisor(&kvmgt_mpt) < 0) + return -ENODEV; return 0; } static void __exit kvmgt_exit(void) { + intel_gvt_unregister_hypervisor(); } module_init(kvmgt_init); diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index c95ef77da62c..9b4225d44243 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -360,4 +360,7 @@ static inline bool intel_gvt_hypervisor_is_valid_gfn( return intel_gvt_host.mpt->is_valid_gfn(vgpu->handle, gfn); } +int intel_gvt_register_hypervisor(struct intel_gvt_mpt *); +void intel_gvt_unregister_hypervisor(void); + #endif /* _GVT_MPT_H_ */ diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c index c22b3e18a0f5..d74e59e22c9d 100644 --- a/drivers/gpu/drm/i915/intel_gvt.c +++ b/drivers/gpu/drm/i915/intel_gvt.c @@ -105,15 +105,6 @@ int intel_gvt_init(struct drm_i915_private *dev_priv) return -EIO; } - /* - * We're not in host or fail to find a MPT module, disable GVT-g - */ - ret = intel_gvt_init_host(); - if (ret) { - DRM_DEBUG_DRIVER("Not in host or MPT modules not found\n"); - goto bail; - } - ret = intel_gvt_init_device(dev_priv); if (ret) { DRM_DEBUG_DRIVER("Fail to init GVT device\n"); From ba64bd96393414f29f0124ca8d6ca8b4b83d4902 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 8 Jan 2019 10:25:45 +0200 Subject: [PATCH 004/539] drm/i915/gvt: remove drmP.h include drmP.h is deprecated and no longer needed. Reviewed-by: Zhenyu Wang Signed-off-by: Jani Nikula Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/dmabuf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 51ed99a37803..2eb681175fae 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -29,7 +29,6 @@ */ #include -#include #include #include "i915_drv.h" From ed8cce30163f7b39cbca9c214a17789e7d516ff3 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 8 Jan 2019 16:12:04 +0200 Subject: [PATCH 005/539] drm/i915/gvt: give the cmd parser decode_info a const treatment It doesn't need to be changed, make it const. The string literals should anyway be referred to as const data. The following gets moved to rodata section: 0000000000000410 l O .rodata 0000000000000018 decode_info_mi 0000000000000390 l O .rodata 0000000000000018 decode_info_3d_media 00000000000003e0 l O .rodata 0000000000000018 decode_info_2d 0000000000000330 l O .rodata 0000000000000018 decode_info_mfx_vc 00000000000002e0 l O .rodata 0000000000000018 decode_info_vebox 0000000000000300 l O .rodata 0000000000000028 sub_op_vebox 0000000000000360 l O .rodata 0000000000000028 sub_op_mfx_vc 00000000000003c0 l O .rodata 0000000000000020 sub_op_3d_media 0000000000000400 l O .rodata 0000000000000010 sub_op_2d 0000000000000430 l O .rodata 0000000000000010 sub_op_mi Reviewed-by: Yan Zhao Signed-off-by: Jani Nikula Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 30 +++++++++++++-------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 77ae634eb11c..98415d465a09 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -55,10 +55,10 @@ struct sub_op_bits { int low; }; struct decode_info { - char *name; + const char *name; int op_len; int nr_sub_op; - struct sub_op_bits *sub_op; + const struct sub_op_bits *sub_op; }; #define MAX_CMD_BUDGET 0x7fffffff @@ -485,12 +485,12 @@ struct parser_exec_state { static unsigned long bypass_scan_mask = 0; /* ring ALL, type = 0 */ -static struct sub_op_bits sub_op_mi[] = { +static const struct sub_op_bits sub_op_mi[] = { {31, 29}, {28, 23}, }; -static struct decode_info decode_info_mi = { +static const struct decode_info decode_info_mi = { "MI", OP_LEN_MI, ARRAY_SIZE(sub_op_mi), @@ -498,12 +498,12 @@ static struct decode_info decode_info_mi = { }; /* ring RCS, command type 2 */ -static struct sub_op_bits sub_op_2d[] = { +static const struct sub_op_bits sub_op_2d[] = { {31, 29}, {28, 22}, }; -static struct decode_info decode_info_2d = { +static const struct decode_info decode_info_2d = { "2D", OP_LEN_2D, ARRAY_SIZE(sub_op_2d), @@ -511,14 +511,14 @@ static struct decode_info decode_info_2d = { }; /* ring RCS, command type 3 */ -static struct sub_op_bits sub_op_3d_media[] = { +static const struct sub_op_bits sub_op_3d_media[] = { {31, 29}, {28, 27}, {26, 24}, {23, 16}, }; -static struct decode_info decode_info_3d_media = { +static const struct decode_info decode_info_3d_media = { "3D_Media", OP_LEN_3D_MEDIA, ARRAY_SIZE(sub_op_3d_media), @@ -526,7 +526,7 @@ static struct decode_info decode_info_3d_media = { }; /* ring VCS, command type 3 */ -static struct sub_op_bits sub_op_mfx_vc[] = { +static const struct sub_op_bits sub_op_mfx_vc[] = { {31, 29}, {28, 27}, {26, 24}, @@ -534,7 +534,7 @@ static struct sub_op_bits sub_op_mfx_vc[] = { {20, 16}, }; -static struct decode_info decode_info_mfx_vc = { +static const struct decode_info decode_info_mfx_vc = { "MFX_VC", OP_LEN_MFX_VC, ARRAY_SIZE(sub_op_mfx_vc), @@ -542,7 +542,7 @@ static struct decode_info decode_info_mfx_vc = { }; /* ring VECS, command type 3 */ -static struct sub_op_bits sub_op_vebox[] = { +static const struct sub_op_bits sub_op_vebox[] = { {31, 29}, {28, 27}, {26, 24}, @@ -550,14 +550,14 @@ static struct sub_op_bits sub_op_vebox[] = { {20, 16}, }; -static struct decode_info decode_info_vebox = { +static const struct decode_info decode_info_vebox = { "VEBOX", OP_LEN_VEBOX, ARRAY_SIZE(sub_op_vebox), sub_op_vebox, }; -static struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { +static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { [RCS] = { &decode_info_mi, NULL, @@ -616,7 +616,7 @@ static struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = { static inline u32 get_opcode(u32 cmd, int ring_id) { - struct decode_info *d_info; + const struct decode_info *d_info; d_info = ring_decode_info[ring_id][CMD_TYPE(cmd)]; if (d_info == NULL) @@ -657,7 +657,7 @@ static inline u32 sub_op_val(u32 cmd, u32 hi, u32 low) static inline void print_opcode(u32 cmd, int ring_id) { - struct decode_info *d_info; + const struct decode_info *d_info; int i; d_info = ring_decode_info[ring_id][CMD_TYPE(cmd)]; From b007065a0aebfba859cecbc23271542b04784567 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 8 Jan 2019 16:12:05 +0200 Subject: [PATCH 006/539] drm/i915/gvt: give the cmd parser cmd_info a const treatment It doesn't need to be changed, make it const. The string literals should anyway be referred to as const data. The following gets moved to rodata section: 0000000000000080 l O .rodata 0000000000001c00 cmd_info Reviewed-by: Yan Zhao Signed-off-by: Jani Nikula Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 24 ++++++++++++------------ drivers/gpu/drm/i915/gvt/trace.h | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 98415d465a09..cae00e6debaf 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -375,7 +375,7 @@ typedef int (*parser_cmd_handler)(struct parser_exec_state *s); #define ADDR_FIX_5(x1, x2, x3, x4, x5) (ADDR_FIX_1(x1) | ADDR_FIX_4(x2, x3, x4, x5)) struct cmd_info { - char *name; + const char *name; u32 opcode; #define F_LEN_MASK (1U<<0) @@ -425,7 +425,7 @@ struct cmd_info { struct cmd_entry { struct hlist_node hlist; - struct cmd_info *info; + const struct cmd_info *info; }; enum { @@ -474,7 +474,7 @@ struct parser_exec_state { int saved_buf_addr_type; bool is_ctx_wa; - struct cmd_info *info; + const struct cmd_info *info; struct intel_vgpu_workload *workload; }; @@ -625,7 +625,7 @@ static inline u32 get_opcode(u32 cmd, int ring_id) return cmd >> (32 - d_info->op_len); } -static inline struct cmd_info *find_cmd_entry(struct intel_gvt *gvt, +static inline const struct cmd_info *find_cmd_entry(struct intel_gvt *gvt, unsigned int opcode, int ring_id) { struct cmd_entry *e; @@ -638,7 +638,7 @@ static inline struct cmd_info *find_cmd_entry(struct intel_gvt *gvt, return NULL; } -static inline struct cmd_info *get_cmd_info(struct intel_gvt *gvt, +static inline const struct cmd_info *get_cmd_info(struct intel_gvt *gvt, u32 cmd, int ring_id) { u32 opcode; @@ -776,7 +776,7 @@ static inline int ip_gma_advance(struct parser_exec_state *s, return 0; } -static inline int get_cmd_length(struct cmd_info *info, u32 cmd) +static inline int get_cmd_length(const struct cmd_info *info, u32 cmd) { if ((info->flag & F_LEN_MASK) == F_LEN_CONST) return info->len; @@ -1643,7 +1643,7 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s) static int find_bb_size(struct parser_exec_state *s, unsigned long *bb_size) { unsigned long gma = 0; - struct cmd_info *info; + const struct cmd_info *info; uint32_t cmd_len = 0; bool bb_end = false; struct intel_vgpu *vgpu = s->vgpu; @@ -1842,7 +1842,7 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s) static int mi_noop_index; -static struct cmd_info cmd_info[] = { +static const struct cmd_info cmd_info[] = { {"MI_NOOP", OP_MI_NOOP, F_LEN_CONST, R_ALL, D_ALL, 0, 1, NULL}, {"MI_SET_PREDICATE", OP_MI_SET_PREDICATE, F_LEN_CONST, R_ALL, D_ALL, @@ -2521,7 +2521,7 @@ static void add_cmd_entry(struct intel_gvt *gvt, struct cmd_entry *e) static int cmd_parser_exec(struct parser_exec_state *s) { struct intel_vgpu *vgpu = s->vgpu; - struct cmd_info *info; + const struct cmd_info *info; u32 cmd; int ret = 0; @@ -2895,10 +2895,10 @@ int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) return 0; } -static struct cmd_info *find_cmd_entry_any_ring(struct intel_gvt *gvt, +static const struct cmd_info *find_cmd_entry_any_ring(struct intel_gvt *gvt, unsigned int opcode, unsigned long rings) { - struct cmd_info *info = NULL; + const struct cmd_info *info = NULL; unsigned int ring; for_each_set_bit(ring, &rings, I915_NUM_ENGINES) { @@ -2913,7 +2913,7 @@ static int init_cmd_table(struct intel_gvt *gvt) { int i; struct cmd_entry *e; - struct cmd_info *info; + const struct cmd_info *info; unsigned int gen_type; gen_type = intel_gvt_get_device_type(gvt); diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h index 1fd64202d74e..6d787750d279 100644 --- a/drivers/gpu/drm/i915/gvt/trace.h +++ b/drivers/gpu/drm/i915/gvt/trace.h @@ -228,7 +228,7 @@ TRACE_EVENT(oos_sync, TRACE_EVENT(gvt_command, TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, u32 cmd_len, u32 buf_type, u32 buf_addr_type, - void *workload, char *cmd_name), + void *workload, const char *cmd_name), TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type, buf_addr_type, workload, cmd_name), From 36520ed005e71f6b8e2c23fa446b47d97410c173 Mon Sep 17 00:00:00 2001 From: fred gao Date: Wed, 9 Jan 2019 09:19:51 +0800 Subject: [PATCH 007/539] drm/i915/gvt: Add coffeelake platform definition Add D_CFL for CFL platform. Reviewed-by: Zhenyu Wang Signed-off-by: Fei Jiang Signed-off-by: fred gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 2 ++ drivers/gpu/drm/i915/gvt/mmio.h | 11 ++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index b5475c91e2ef..c1170f42b6a1 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -57,6 +57,8 @@ unsigned long intel_gvt_get_device_type(struct intel_gvt *gvt) return D_KBL; else if (IS_BROXTON(gvt->dev_priv)) return D_BXT; + else if (IS_COFFEELAKE(gvt->dev_priv)) + return D_CFL; return 0; } diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index 1ffc69eba30e..5874f1cb4306 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -43,15 +43,16 @@ struct intel_vgpu; #define D_SKL (1 << 1) #define D_KBL (1 << 2) #define D_BXT (1 << 3) +#define D_CFL (1 << 4) -#define D_GEN9PLUS (D_SKL | D_KBL | D_BXT) -#define D_GEN8PLUS (D_BDW | D_SKL | D_KBL | D_BXT) +#define D_GEN9PLUS (D_SKL | D_KBL | D_BXT | D_CFL) +#define D_GEN8PLUS (D_BDW | D_SKL | D_KBL | D_BXT | D_CFL) -#define D_SKL_PLUS (D_SKL | D_KBL | D_BXT) -#define D_BDW_PLUS (D_BDW | D_SKL | D_KBL | D_BXT) +#define D_SKL_PLUS (D_SKL | D_KBL | D_BXT | D_CFL) +#define D_BDW_PLUS (D_BDW | D_SKL | D_KBL | D_BXT | D_CFL) #define D_PRE_SKL (D_BDW) -#define D_ALL (D_BDW | D_SKL | D_KBL | D_BXT) +#define D_ALL (D_BDW | D_SKL | D_KBL | D_BXT | D_CFL) typedef int (*gvt_mmio_func)(struct intel_vgpu *, unsigned int, void *, unsigned int); From 5cd02703b0a497c502e8d8a8047f3a53bffacd20 Mon Sep 17 00:00:00 2001 From: fred gao Date: Wed, 9 Jan 2019 09:20:00 +0800 Subject: [PATCH 008/539] drm/i915/gvt: Add mmio handler for CFL Add registers of 0x4ab8 and 0x2248 into MMIO handler. Reviewed-by: Zhenyu Wang Signed-off-by: Fei Jiang Signed-off-by: fred gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index c1170f42b6a1..9910ba16d815 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -3043,8 +3043,8 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_DFH(GEN9_WM_CHICKEN3, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_D(_MMIO(0x4ab8), D_KBL); - MMIO_D(_MMIO(0x2248), D_KBL | D_SKL); + MMIO_D(_MMIO(0x4ab8), D_KBL | D_CFL); + MMIO_D(_MMIO(0x2248), D_SKL_PLUS); return 0; } From c3b5a8430daadf5b8ec9757d6c81149903cbe99f Mon Sep 17 00:00:00 2001 From: fred gao Date: Wed, 9 Jan 2019 09:20:07 +0800 Subject: [PATCH 009/539] drm/i915/gvt: Enable gfx virtualiztion for CFL Use INTEL_GEN to simplify the code for SKL+ platforms. v2: - split the enabling code into final one to identify any regression. Cc: Zhenyu Wang Reviewed-by: Zhenyu Wang Signed-off-by: Fei Jiang Signed-off-by: fred gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 15 +++++---------- drivers/gpu/drm/i915/gvt/display.c | 12 ++++++++---- drivers/gpu/drm/i915/gvt/dmabuf.c | 4 +--- drivers/gpu/drm/i915/gvt/fb_decoder.c | 12 +++--------- drivers/gpu/drm/i915/gvt/handlers.c | 17 ++++++++--------- drivers/gpu/drm/i915/gvt/interrupt.c | 4 +--- drivers/gpu/drm/i915/gvt/mmio_context.c | 18 +++++++----------- drivers/gpu/drm/i915/gvt/scheduler.c | 7 +++---- 8 files changed, 36 insertions(+), 53 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index cae00e6debaf..a04e8aa58547 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -901,7 +901,8 @@ static int cmd_reg_handler(struct parser_exec_state *s, * It's good enough to support initializing mmio by lri command in * vgpu inhibit context on KBL. */ - if (IS_KABYLAKE(s->vgpu->gvt->dev_priv) && + if ((IS_KABYLAKE(s->vgpu->gvt->dev_priv) + || IS_COFFEELAKE(s->vgpu->gvt->dev_priv)) && intel_gvt_mmio_is_in_ctx(gvt, offset) && !strncmp(cmd, "lri", 3)) { intel_gvt_hypervisor_read_gpa(s->vgpu, @@ -1280,9 +1281,7 @@ static int gen8_check_mi_display_flip(struct parser_exec_state *s, if (!info->async_flip) return 0; - if (IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv) - || IS_BROXTON(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 9) { stride = vgpu_vreg_t(s->vgpu, info->stride_reg) & GENMASK(9, 0); tile = (vgpu_vreg_t(s->vgpu, info->ctrl_reg) & GENMASK(12, 10)) >> 10; @@ -1310,9 +1309,7 @@ static int gen8_update_plane_mmio_from_mi_display_flip( set_mask_bits(&vgpu_vreg_t(vgpu, info->surf_reg), GENMASK(31, 12), info->surf_val << 12); - if (IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv) - || IS_BROXTON(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 9) { set_mask_bits(&vgpu_vreg_t(vgpu, info->stride_reg), GENMASK(9, 0), info->stride_val); set_mask_bits(&vgpu_vreg_t(vgpu, info->ctrl_reg), GENMASK(12, 10), @@ -1336,9 +1333,7 @@ static int decode_mi_display_flip(struct parser_exec_state *s, if (IS_BROADWELL(dev_priv)) return gen8_decode_mi_display_flip(s, info); - if (IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv) - || IS_BROXTON(dev_priv)) + if (INTEL_GEN(dev_priv) >= 9) return skl_decode_mi_display_flip(s, info); return -ENODEV; diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index df1e14145747..4f25b6b7728e 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -198,7 +198,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) SDE_PORTC_HOTPLUG_CPT | SDE_PORTD_HOTPLUG_CPT); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv)) { vgpu_vreg_t(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT | SDE_PORTE_HOTPLUG_SPT); vgpu_vreg_t(vgpu, SKL_FUSE_STATUS) |= @@ -273,7 +274,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED; } - if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) && + if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv)) && intel_vgpu_has_monitor_on_port(vgpu, PORT_E)) { vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTE_HOTPLUG_SPT; } @@ -453,7 +455,8 @@ void intel_vgpu_clean_display(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv)) clean_virtual_dp_monitor(vgpu, PORT_D); else clean_virtual_dp_monitor(vgpu, PORT_B); @@ -476,7 +479,8 @@ int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution) intel_vgpu_init_i2c_edid(vgpu); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv)) return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D, resolution); else diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 2eb681175fae..3e7e2b80c857 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -163,9 +163,7 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev, obj->read_domains = I915_GEM_DOMAIN_GTT; obj->write_domain = 0; - if (IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv) - || IS_BROXTON(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 9) { unsigned int tiling_mode = 0; unsigned int stride = 0; diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c index 481896fb712a..dbd91ef28886 100644 --- a/drivers/gpu/drm/i915/gvt/fb_decoder.c +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c @@ -151,9 +151,7 @@ static u32 intel_vgpu_get_stride(struct intel_vgpu *vgpu, int pipe, u32 stride_reg = vgpu_vreg_t(vgpu, DSPSTRIDE(pipe)) & stride_mask; u32 stride = stride_reg; - if (IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv) - || IS_BROXTON(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 9) { switch (tiled) { case PLANE_CTL_TILED_LINEAR: stride = stride_reg * 64; @@ -217,9 +215,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, if (!plane->enabled) return -ENODEV; - if (IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv) - || IS_BROXTON(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 9) { plane->tiled = val & PLANE_CTL_TILED_MASK; fmt = skl_format_to_drm( val & PLANE_CTL_FORMAT_MASK, @@ -260,9 +256,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, } plane->stride = intel_vgpu_get_stride(vgpu, pipe, plane->tiled, - (IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv) - || IS_BROXTON(dev_priv)) ? + (INTEL_GEN(dev_priv) >= 9) ? (_PRI_PLANE_STRIDE_MASK >> 6) : _PRI_PLANE_STRIDE_MASK, plane->bpp); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 9910ba16d815..68a62ba5bf54 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -283,9 +283,7 @@ static int mul_force_wake_write(struct intel_vgpu *vgpu, old = vgpu_vreg(vgpu, offset); new = CALC_MODE_MASK_REG(old, *(u32 *)p_data); - if (IS_SKYLAKE(vgpu->gvt->dev_priv) - || IS_KABYLAKE(vgpu->gvt->dev_priv) - || IS_BROXTON(vgpu->gvt->dev_priv)) { + if (INTEL_GEN(vgpu->gvt->dev_priv) >= 9) { switch (offset) { case FORCEWAKE_RENDER_GEN9_REG: ack_reg_offset = FORCEWAKE_ACK_RENDER_GEN9_REG; @@ -891,9 +889,7 @@ static int dp_aux_ch_ctl_mmio_write(struct intel_vgpu *vgpu, write_vreg(vgpu, offset, p_data, bytes); data = vgpu_vreg(vgpu, offset); - if ((IS_SKYLAKE(vgpu->gvt->dev_priv) - || IS_KABYLAKE(vgpu->gvt->dev_priv) - || IS_BROXTON(vgpu->gvt->dev_priv)) + if ((INTEL_GEN(vgpu->gvt->dev_priv) >= 9) && offset != _REG_SKL_DP_AUX_CH_CTL(port_index)) { /* SKL DPB/C/D aux ctl register changed */ return 0; @@ -1409,7 +1405,8 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, switch (cmd) { case GEN9_PCODE_READ_MEM_LATENCY: if (IS_SKYLAKE(vgpu->gvt->dev_priv) - || IS_KABYLAKE(vgpu->gvt->dev_priv)) { + || IS_KABYLAKE(vgpu->gvt->dev_priv) + || IS_COFFEELAKE(vgpu->gvt->dev_priv)) { /** * "Read memory latency" command on gen9. * Below memory latency values are read @@ -1433,7 +1430,8 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, break; case SKL_PCODE_CDCLK_CONTROL: if (IS_SKYLAKE(vgpu->gvt->dev_priv) - || IS_KABYLAKE(vgpu->gvt->dev_priv)) + || IS_KABYLAKE(vgpu->gvt->dev_priv) + || IS_COFFEELAKE(vgpu->gvt->dev_priv)) *data0 = SKL_CDCLK_READY_FOR_CHANGE; break; case GEN6_PCODE_READ_RC6VIDS: @@ -3304,7 +3302,8 @@ int intel_gvt_setup_mmio_info(struct intel_gvt *gvt) if (ret) goto err; } else if (IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv)) { + || IS_KABYLAKE(dev_priv) + || IS_COFFEELAKE(dev_priv)) { ret = init_broadwell_mmio_info(gvt); if (ret) goto err; diff --git a/drivers/gpu/drm/i915/gvt/interrupt.c b/drivers/gpu/drm/i915/gvt/interrupt.c index 6b9d1354ff29..67125c5eec6e 100644 --- a/drivers/gpu/drm/i915/gvt/interrupt.c +++ b/drivers/gpu/drm/i915/gvt/interrupt.c @@ -581,9 +581,7 @@ static void gen8_init_irq( SET_BIT_INFO(irq, 4, PRIMARY_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C); SET_BIT_INFO(irq, 5, SPRITE_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C); - } else if (IS_SKYLAKE(gvt->dev_priv) - || IS_KABYLAKE(gvt->dev_priv) - || IS_BROXTON(gvt->dev_priv)) { + } else if (INTEL_GEN(gvt->dev_priv) >= 9) { SET_BIT_INFO(irq, 25, AUX_CHANNEL_B, INTEL_GVT_IRQ_INFO_DE_PORT); SET_BIT_INFO(irq, 26, AUX_CHANNEL_C, INTEL_GVT_IRQ_INFO_DE_PORT); SET_BIT_INFO(irq, 27, AUX_CHANNEL_D, INTEL_GVT_IRQ_INFO_DE_PORT); diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 36a5147cd01e..893de7267b1f 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -351,8 +351,7 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) */ fw = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ | FW_REG_WRITE); - if (ring_id == RCS && (IS_SKYLAKE(dev_priv) || - IS_KABYLAKE(dev_priv) || IS_BROXTON(dev_priv))) + if (ring_id == RCS && (INTEL_GEN(dev_priv) >= 9)) fw |= FORCEWAKE_RENDER; intel_uncore_forcewake_get(dev_priv, fw); @@ -389,7 +388,8 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) return; - if ((IS_KABYLAKE(dev_priv) || IS_BROXTON(dev_priv)) && ring_id == RCS) + if ((IS_KABYLAKE(dev_priv) || IS_BROXTON(dev_priv) + || IS_COFFEELAKE(dev_priv)) && ring_id == RCS) return; if (!pre && !gen9_render_mocs.initialized) @@ -455,9 +455,7 @@ static void switch_mmio(struct intel_vgpu *pre, u32 old_v, new_v; dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv; - if (IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv) - || IS_BROXTON(dev_priv)) + if (INTEL_GEN(dev_priv) >= 9) switch_mocs(pre, next, ring_id); for (mmio = dev_priv->gvt->engine_mmio_list.mmio; @@ -469,8 +467,8 @@ static void switch_mmio(struct intel_vgpu *pre, * state image on kabylake, it's initialized by lri command and * save or restore with context together. */ - if ((IS_KABYLAKE(dev_priv) || IS_BROXTON(dev_priv)) - && mmio->in_context) + if ((IS_KABYLAKE(dev_priv) || IS_BROXTON(dev_priv) + || IS_COFFEELAKE(dev_priv)) && mmio->in_context) continue; // save @@ -563,9 +561,7 @@ void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt) { struct engine_mmio *mmio; - if (IS_SKYLAKE(gvt->dev_priv) || - IS_KABYLAKE(gvt->dev_priv) || - IS_BROXTON(gvt->dev_priv)) + if (INTEL_GEN(gvt->dev_priv) >= 9) gvt->engine_mmio_list.mmio = gen9_engine_mmio_list; else gvt->engine_mmio_list.mmio = gen8_engine_mmio_list; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 1ad8c5e1455d..fb7445baa139 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -299,7 +299,8 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) void *shadow_ring_buffer_va; u32 *cs; - if ((IS_KABYLAKE(req->i915) || IS_BROXTON(req->i915)) + if ((IS_KABYLAKE(req->i915) || IS_BROXTON(req->i915) + || IS_COFFEELAKE(req->i915)) && is_inhibit_context(req->hw_context)) intel_vgpu_restore_inhibit_context(vgpu, req); @@ -939,9 +940,7 @@ static int workload_thread(void *priv) struct intel_vgpu_workload *workload = NULL; struct intel_vgpu *vgpu = NULL; int ret; - bool need_force_wake = IS_SKYLAKE(gvt->dev_priv) - || IS_KABYLAKE(gvt->dev_priv) - || IS_BROXTON(gvt->dev_priv); + bool need_force_wake = (INTEL_GEN(gvt->dev_priv) >= 9); DEFINE_WAIT_FUNC(wait, woken_wake_function); kfree(p); From 360f864e43aaf92f541838b181f21d82c624063a Mon Sep 17 00:00:00 2001 From: fred gao Date: Wed, 9 Jan 2019 09:20:14 +0800 Subject: [PATCH 010/539] drm/i915/gvt: Reuse the gmbus pin macro Reuse the gmbus pin macro from i915_reg.h file to improve readablity. Reviewed-by: Zhenyu Wang Signed-off-by: fred gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/edid.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/edid.c b/drivers/gpu/drm/i915/gvt/edid.c index 5d4bb35bb889..752aa0fd1cc9 100644 --- a/drivers/gpu/drm/i915/gvt/edid.c +++ b/drivers/gpu/drm/i915/gvt/edid.c @@ -82,11 +82,11 @@ static inline int bxt_get_port_from_gmbus0(u32 gmbus0) int port_select = gmbus0 & _GMBUS_PIN_SEL_MASK; int port = -EINVAL; - if (port_select == 1) + if (port_select == GMBUS_PIN_1_BXT) port = PORT_B; - else if (port_select == 2) + else if (port_select == GMBUS_PIN_2_BXT) port = PORT_C; - else if (port_select == 3) + else if (port_select == GMBUS_PIN_3_BXT) port = PORT_D; return port; } @@ -96,13 +96,13 @@ static inline int get_port_from_gmbus0(u32 gmbus0) int port_select = gmbus0 & _GMBUS_PIN_SEL_MASK; int port = -EINVAL; - if (port_select == 2) + if (port_select == GMBUS_PIN_VGADDC) port = PORT_E; - else if (port_select == 4) + else if (port_select == GMBUS_PIN_DPC) port = PORT_C; - else if (port_select == 5) + else if (port_select == GMBUS_PIN_DPB) port = PORT_B; - else if (port_select == 6) + else if (port_select == GMBUS_PIN_DPD) port = PORT_D; return port; } From 5807bb4d8dd640685313ad67218b2fe0770d1d03 Mon Sep 17 00:00:00 2001 From: fred gao Date: Wed, 9 Jan 2019 09:21:14 +0800 Subject: [PATCH 011/539] drm/i915/gvt: Refine port select logic for CFL platform Refine the code since the port select definition for CFL is different than SKL/BXT. v2: - replace PCH_CNP with IS_COFFEELAKE. (zhenyu) Cc: Zhenyu Wang Reviewed-by: Zhenyu Wang Signed-off-by: fred gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/edid.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/edid.c b/drivers/gpu/drm/i915/gvt/edid.c index 752aa0fd1cc9..1fe6124918f1 100644 --- a/drivers/gpu/drm/i915/gvt/edid.c +++ b/drivers/gpu/drm/i915/gvt/edid.c @@ -77,6 +77,22 @@ static unsigned char edid_get_byte(struct intel_vgpu *vgpu) return chr; } +static inline int cnp_get_port_from_gmbus0(u32 gmbus0) +{ + int port_select = gmbus0 & _GMBUS_PIN_SEL_MASK; + int port = -EINVAL; + + if (port_select == GMBUS_PIN_1_BXT) + port = PORT_B; + else if (port_select == GMBUS_PIN_2_BXT) + port = PORT_C; + else if (port_select == GMBUS_PIN_3_BXT) + port = PORT_D; + else if (port_select == GMBUS_PIN_4_CNP) + port = PORT_E; + return port; +} + static inline int bxt_get_port_from_gmbus0(u32 gmbus0) { int port_select = gmbus0 & _GMBUS_PIN_SEL_MASK; @@ -133,6 +149,8 @@ static int gmbus0_mmio_write(struct intel_vgpu *vgpu, if (IS_BROXTON(dev_priv)) port = bxt_get_port_from_gmbus0(pin_select); + else if (IS_COFFEELAKE(dev_priv)) + port = cnp_get_port_from_gmbus0(pin_select); else port = get_port_from_gmbus0(pin_select); if (WARN_ON(port < 0)) From 6c46c2e8c589a85501c3816d15264f3afcc9e023 Mon Sep 17 00:00:00 2001 From: fred gao Date: Wed, 9 Jan 2019 09:21:23 +0800 Subject: [PATCH 012/539] drm/i915: Enable gfx virtualization for Coffeelake platform Enable gfx virtualization for CFL. Reviewed-by: Zhenyu Wang Signed-off-by: fred gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/intel_gvt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c index d74e59e22c9d..1d7d26e4cf14 100644 --- a/drivers/gpu/drm/i915/intel_gvt.c +++ b/drivers/gpu/drm/i915/intel_gvt.c @@ -49,6 +49,9 @@ static bool is_supported_device(struct drm_i915_private *dev_priv) return true; if (IS_BROXTON(dev_priv)) return true; + if (IS_COFFEELAKE(dev_priv)) + return true; + return false; } From 3affaa5a7ca3ca114e01049bc45e3ed4a3955505 Mon Sep 17 00:00:00 2001 From: Brian Starkey Date: Mon, 3 Dec 2018 11:31:57 +0000 Subject: [PATCH 013/539] drm/afbc: Add AFBC modifier usage documentation AFBC is a flexible, proprietary, lossless compression protocol and format, with a number of defined DRM format modifiers. To facilitate consistency and compatibility between different AFBC producers and consumers, document the expectations for usage of the AFBC DRM format modifiers in a new .rst chapter. Signed-off-by: Brian Starkey Reviewed-by: Liviu Dudau [Updated MAINTAINERS entry to show that "Mali DP Maintainers" is actually a mailing list and added an SPDX-License-Identifier to the documentation] Signed-off-by: Liviu Dudau --- Documentation/gpu/afbc.rst | 235 ++++++++++++++++++++++++++++++++++ Documentation/gpu/drivers.rst | 1 + MAINTAINERS | 3 +- include/uapi/drm/drm_fourcc.h | 3 + 4 files changed, 241 insertions(+), 1 deletion(-) create mode 100644 Documentation/gpu/afbc.rst diff --git a/Documentation/gpu/afbc.rst b/Documentation/gpu/afbc.rst new file mode 100644 index 000000000000..4d38dc49d105 --- /dev/null +++ b/Documentation/gpu/afbc.rst @@ -0,0 +1,235 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +=================================== + Arm Framebuffer Compression (AFBC) +=================================== + +AFBC is a proprietary lossless image compression protocol and format. +It provides fine-grained random access and minimizes the amount of +data transferred between IP blocks. + +AFBC can be enabled on drivers which support it via use of the AFBC +format modifiers defined in drm_fourcc.h. See DRM_FORMAT_MOD_ARM_AFBC(*). + +All users of the AFBC modifiers must follow the usage guidelines laid +out in this document, to ensure compatibility across different AFBC +producers and consumers. + +Components and Ordering +======================= + +AFBC streams can contain several components - where a component +corresponds to a color channel (i.e. R, G, B, X, A, Y, Cb, Cr). +The assignment of input/output color channels must be consistent +between the encoder and the decoder for correct operation, otherwise +the consumer will interpret the decoded data incorrectly. + +Furthermore, when the lossless colorspace transform is used +(AFBC_FORMAT_MOD_YTR, which should be enabled for RGB buffers for +maximum compression efficiency), the component order must be: + + * Component 0: R + * Component 1: G + * Component 2: B + +The component ordering is communicated via the fourcc code in the +fourcc:modifier pair. In general, component '0' is considered to +reside in the least-significant bits of the corresponding linear +format. For example, COMP(bits): + + * DRM_FORMAT_ABGR8888 + + * Component 0: R(8) + * Component 1: G(8) + * Component 2: B(8) + * Component 3: A(8) + + * DRM_FORMAT_BGR888 + + * Component 0: R(8) + * Component 1: G(8) + * Component 2: B(8) + + * DRM_FORMAT_YUYV + + * Component 0: Y(8) + * Component 1: Cb(8, 2x1 subsampled) + * Component 2: Cr(8, 2x1 subsampled) + +In AFBC, 'X' components are not treated any differently from any other +component. Therefore, an AFBC buffer with fourcc DRM_FORMAT_XBGR8888 +encodes with 4 components, like so: + + * DRM_FORMAT_XBGR8888 + + * Component 0: R(8) + * Component 1: G(8) + * Component 2: B(8) + * Component 3: X(8) + +Please note, however, that the inclusion of a "wasted" 'X' channel is +bad for compression efficiency, and so it's recommended to avoid +formats containing 'X' bits. If a fourth component is +required/expected by the encoder/decoder, then it is recommended to +instead use an equivalent format with alpha, setting all alpha bits to +'1'. If there is no requirement for a fourth component, then a format +which doesn't include alpha can be used, e.g. DRM_FORMAT_BGR888. + +Number of Planes +================ + +Formats which are typically multi-planar in linear layouts (e.g. YUV +420), can be encoded into one, or multiple, AFBC planes. As with +component order, the encoder and decoder must agree about the number +of planes in order to correctly decode the buffer. The fourcc code is +used to determine the number of encoded planes in an AFBC buffer, +matching the number of planes for the linear (unmodified) format. +Within each plane, the component ordering also follows the fourcc +code: + +For example: + + * DRM_FORMAT_YUYV: nplanes = 1 + + * Plane 0: + + * Component 0: Y(8) + * Component 1: Cb(8, 2x1 subsampled) + * Component 2: Cr(8, 2x1 subsampled) + + * DRM_FORMAT_NV12: nplanes = 2 + + * Plane 0: + + * Component 0: Y(8) + + * Plane 1: + + * Component 0: Cb(8, 2x1 subsampled) + * Component 1: Cr(8, 2x1 subsampled) + +Cross-device interoperability +============================= + +For maximum compatibility across devices, the table below defines +canonical formats for use between AFBC-enabled devices. Formats which +are listed here must be used exactly as specified when using the AFBC +modifiers. Formats which are not listed should be avoided. + +.. flat-table:: AFBC formats + + * - Fourcc code + - Description + - Planes/Components + + * - DRM_FORMAT_ABGR2101010 + - 10-bit per component RGB, with 2-bit alpha + - Plane 0: 4 components + * Component 0: R(10) + * Component 1: G(10) + * Component 2: B(10) + * Component 3: A(2) + + * - DRM_FORMAT_ABGR8888 + - 8-bit per component RGB, with 8-bit alpha + - Plane 0: 4 components + * Component 0: R(8) + * Component 1: G(8) + * Component 2: B(8) + * Component 3: A(8) + + * - DRM_FORMAT_BGR888 + - 8-bit per component RGB + - Plane 0: 3 components + * Component 0: R(8) + * Component 1: G(8) + * Component 2: B(8) + + * - DRM_FORMAT_BGR565 + - 5/6-bit per component RGB + - Plane 0: 3 components + * Component 0: R(5) + * Component 1: G(6) + * Component 2: B(5) + + * - DRM_FORMAT_ABGR1555 + - 5-bit per component RGB, with 1-bit alpha + - Plane 0: 4 components + * Component 0: R(5) + * Component 1: G(5) + * Component 2: B(5) + * Component 3: A(1) + + * - DRM_FORMAT_VUY888 + - 8-bit per component YCbCr 444, single plane + - Plane 0: 3 components + * Component 0: Y(8) + * Component 1: Cb(8) + * Component 2: Cr(8) + + * - DRM_FORMAT_VUY101010 + - 10-bit per component YCbCr 444, single plane + - Plane 0: 3 components + * Component 0: Y(10) + * Component 1: Cb(10) + * Component 2: Cr(10) + + * - DRM_FORMAT_YUYV + - 8-bit per component YCbCr 422, single plane + - Plane 0: 3 components + * Component 0: Y(8) + * Component 1: Cb(8, 2x1 subsampled) + * Component 2: Cr(8, 2x1 subsampled) + + * - DRM_FORMAT_NV16 + - 8-bit per component YCbCr 422, two plane + - Plane 0: 1 component + * Component 0: Y(8) + Plane 1: 2 components + * Component 0: Cb(8, 2x1 subsampled) + * Component 1: Cr(8, 2x1 subsampled) + + * - DRM_FORMAT_Y210 + - 10-bit per component YCbCr 422, single plane + - Plane 0: 3 components + * Component 0: Y(10) + * Component 1: Cb(10, 2x1 subsampled) + * Component 2: Cr(10, 2x1 subsampled) + + * - DRM_FORMAT_P210 + - 10-bit per component YCbCr 422, two plane + - Plane 0: 1 component + * Component 0: Y(10) + Plane 1: 2 components + * Component 0: Cb(10, 2x1 subsampled) + * Component 1: Cr(10, 2x1 subsampled) + + * - DRM_FORMAT_YUV420_8BIT + - 8-bit per component YCbCr 420, single plane + - Plane 0: 3 components + * Component 0: Y(8) + * Component 1: Cb(8, 2x2 subsampled) + * Component 2: Cr(8, 2x2 subsampled) + + * - DRM_FORMAT_YUV420_10BIT + - 10-bit per component YCbCr 420, single plane + - Plane 0: 3 components + * Component 0: Y(10) + * Component 1: Cb(10, 2x2 subsampled) + * Component 2: Cr(10, 2x2 subsampled) + + * - DRM_FORMAT_NV12 + - 8-bit per component YCbCr 420, two plane + - Plane 0: 1 component + * Component 0: Y(8) + Plane 1: 2 components + * Component 0: Cb(8, 2x2 subsampled) + * Component 1: Cr(8, 2x2 subsampled) + + * - DRM_FORMAT_P010 + - 10-bit per component YCbCr 420, two plane + - Plane 0: 1 component + * Component 0: Y(10) + Plane 1: 2 components + * Component 0: Cb(10, 2x2 subsampled) + * Component 1: Cr(10, 2x2 subsampled) diff --git a/Documentation/gpu/drivers.rst b/Documentation/gpu/drivers.rst index 7c1672118a73..c176b34ae9c8 100644 --- a/Documentation/gpu/drivers.rst +++ b/Documentation/gpu/drivers.rst @@ -17,6 +17,7 @@ GPU Driver Documentation vkms bridge/dw-hdmi xen-front + afbc .. only:: subproject and html diff --git a/MAINTAINERS b/MAINTAINERS index 32d444476a90..e79eca22532d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1136,10 +1136,11 @@ F: Documentation/devicetree/bindings/display/arm,hdlcd.txt ARM MALI-DP DRM DRIVER M: Liviu Dudau M: Brian Starkey -M: Mali DP Maintainers +L: Mali DP Maintainers S: Supported F: drivers/gpu/drm/arm/ F: Documentation/devicetree/bindings/display/arm,malidp.txt +F: Documentation/gpu/afbc.rst ARM MFM AND FLOPPY DRIVERS M: Ian Molton diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 0b44260a5ee9..df014ede4e57 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -572,6 +572,9 @@ extern "C" { * AFBC has several features which may be supported and/or used, which are * represented using bits in the modifier. Not all combinations are valid, * and different devices or use-cases may support different combinations. + * + * Further information on the use of AFBC modifiers can be found in + * Documentation/gpu/afbc.rst */ #define DRM_FORMAT_MOD_ARM_AFBC(__afbc_mode) fourcc_mod_code(ARM, __afbc_mode) From 37fc9bb022c654e261c5a7d2ce600c6ce26c022d Mon Sep 17 00:00:00 2001 From: "james qian wang (Arm Technology China)" Date: Wed, 5 Dec 2018 03:41:35 +0000 Subject: [PATCH 014/539] drm/arm: Delete redundant CONFIG_DRM_ARM Delete redundant CONFIG_DRM_ARM, and add a menu "ARM devices" to subclass ARM device drivers. Signed-off-by: James (Qian) Wang Signed-off-by: Liviu Dudau --- drivers/gpu/drm/Makefile | 2 +- drivers/gpu/drm/arm/Kconfig | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index ce8d1d384319..f0c1f8731a27 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -51,7 +51,7 @@ obj-$(CONFIG_DRM_DEBUG_SELFTEST) += selftests/ obj-$(CONFIG_DRM) += drm.o obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o -obj-$(CONFIG_DRM_ARM) += arm/ +obj-y += arm/ obj-$(CONFIG_DRM_TTM) += ttm/ obj-$(CONFIG_DRM_SCHED) += scheduler/ obj-$(CONFIG_DRM_TDFX) += tdfx/ diff --git a/drivers/gpu/drm/arm/Kconfig b/drivers/gpu/drm/arm/Kconfig index 9a18e1bd57b4..f9f7761cb2f4 100644 --- a/drivers/gpu/drm/arm/Kconfig +++ b/drivers/gpu/drm/arm/Kconfig @@ -1,13 +1,10 @@ -config DRM_ARM - bool - help - Choose this option to select drivers for ARM's devices +# SPDX-License-Identifier: GPL-2.0 +menu "ARM devices" config DRM_HDLCD tristate "ARM HDLCD" depends on DRM && OF && (ARM || ARM64) depends on COMMON_CLK - select DRM_ARM select DRM_KMS_HELPER select DRM_KMS_CMA_HELPER help @@ -29,7 +26,6 @@ config DRM_MALI_DISPLAY tristate "ARM Mali Display Processor" depends on DRM && OF && (ARM || ARM64) depends on COMMON_CLK - select DRM_ARM select DRM_KMS_HELPER select DRM_KMS_CMA_HELPER select DRM_GEM_CMA_HELPER @@ -40,3 +36,5 @@ config DRM_MALI_DISPLAY of the hardware. If compiled as a module it will be called mali-dp. + +endmenu From bd628c1bed7902ec1f24ba0fe70758949146abbe Mon Sep 17 00:00:00 2001 From: "james qian wang (Arm Technology China)" Date: Thu, 3 Jan 2019 11:39:48 +0000 Subject: [PATCH 015/539] drm/komeda: komeda_dev/pipeline/component definition and initialzation 1. Added a brief definition of komeda_dev/pipeline/component, this change didn't add the detailed component features and capabilities, which will be added in the following changes. 2. Corresponding resources discovery and initialzation functions. Changes in v4: - Deleted unnecessary headers Changes in v3: - Fixed style problem found by checkpatch.pl --strict. Changes in v2: - Unified abbreviation of "pipeline" to "pipe". Signed-off-by: James Qian Wang (Arm Technology China) Reviewed-by: Liviu Dudau Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/Kconfig | 2 + drivers/gpu/drm/arm/Makefile | 1 + drivers/gpu/drm/arm/display/Kbuild | 3 + drivers/gpu/drm/arm/display/Kconfig | 14 + .../drm/arm/display/include/malidp_product.h | 23 ++ .../drm/arm/display/include/malidp_utils.h | 16 + drivers/gpu/drm/arm/display/komeda/Makefile | 11 + .../gpu/drm/arm/display/komeda/komeda_dev.c | 114 ++++++ .../gpu/drm/arm/display/komeda/komeda_dev.h | 98 +++++ .../drm/arm/display/komeda/komeda_pipeline.c | 196 ++++++++++ .../drm/arm/display/komeda/komeda_pipeline.h | 348 ++++++++++++++++++ 11 files changed, 826 insertions(+) create mode 100644 drivers/gpu/drm/arm/display/Kbuild create mode 100644 drivers/gpu/drm/arm/display/Kconfig create mode 100644 drivers/gpu/drm/arm/display/include/malidp_product.h create mode 100644 drivers/gpu/drm/arm/display/include/malidp_utils.h create mode 100644 drivers/gpu/drm/arm/display/komeda/Makefile create mode 100644 drivers/gpu/drm/arm/display/komeda/komeda_dev.c create mode 100644 drivers/gpu/drm/arm/display/komeda/komeda_dev.h create mode 100644 drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c create mode 100644 drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h diff --git a/drivers/gpu/drm/arm/Kconfig b/drivers/gpu/drm/arm/Kconfig index f9f7761cb2f4..a204103b3efb 100644 --- a/drivers/gpu/drm/arm/Kconfig +++ b/drivers/gpu/drm/arm/Kconfig @@ -37,4 +37,6 @@ config DRM_MALI_DISPLAY If compiled as a module it will be called mali-dp. +source "drivers/gpu/drm/arm/display/Kconfig" + endmenu diff --git a/drivers/gpu/drm/arm/Makefile b/drivers/gpu/drm/arm/Makefile index 3bf31d1a4722..120bef801fcf 100644 --- a/drivers/gpu/drm/arm/Makefile +++ b/drivers/gpu/drm/arm/Makefile @@ -3,3 +3,4 @@ obj-$(CONFIG_DRM_HDLCD) += hdlcd.o mali-dp-y := malidp_drv.o malidp_hw.o malidp_planes.o malidp_crtc.o mali-dp-y += malidp_mw.o obj-$(CONFIG_DRM_MALI_DISPLAY) += mali-dp.o +obj-$(CONFIG_DRM_KOMEDA) += display/ diff --git a/drivers/gpu/drm/arm/display/Kbuild b/drivers/gpu/drm/arm/display/Kbuild new file mode 100644 index 000000000000..382f1ca831e4 --- /dev/null +++ b/drivers/gpu/drm/arm/display/Kbuild @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_DRM_KOMEDA) += komeda/ diff --git a/drivers/gpu/drm/arm/display/Kconfig b/drivers/gpu/drm/arm/display/Kconfig new file mode 100644 index 000000000000..cec0639e3aa1 --- /dev/null +++ b/drivers/gpu/drm/arm/display/Kconfig @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0 +config DRM_KOMEDA + tristate "ARM Komeda display driver" + depends on DRM && OF + depends on COMMON_CLK + select DRM_KMS_HELPER + select DRM_KMS_CMA_HELPER + select DRM_GEM_CMA_HELPER + select VIDEOMODE_HELPERS + help + Choose this option if you want to compile the ARM Komeda display + Processor driver. It supports the D71 variants of the hardware. + + If compiled as a module it will be called komeda. diff --git a/drivers/gpu/drm/arm/display/include/malidp_product.h b/drivers/gpu/drm/arm/display/include/malidp_product.h new file mode 100644 index 000000000000..b35fc5db866b --- /dev/null +++ b/drivers/gpu/drm/arm/display/include/malidp_product.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * Author: James.Qian.Wang + * + */ +#ifndef _MALIDP_PRODUCT_H_ +#define _MALIDP_PRODUCT_H_ + +/* Product identification */ +#define MALIDP_CORE_ID(__product, __major, __minor, __status) \ + ((((__product) & 0xFFFF) << 16) | (((__major) & 0xF) << 12) | \ + (((__minor) & 0xF) << 8) | ((__status) & 0xFF)) + +#define MALIDP_CORE_ID_PRODUCT_ID(__core_id) ((__u32)(__core_id) >> 16) +#define MALIDP_CORE_ID_MAJOR(__core_id) (((__u32)(__core_id) >> 12) & 0xF) +#define MALIDP_CORE_ID_MINOR(__core_id) (((__u32)(__core_id) >> 8) & 0xF) +#define MALIDP_CORE_ID_STATUS(__core_id) (((__u32)(__core_id)) & 0xFF) + +/* Mali-display product IDs */ +#define MALIDP_D71_PRODUCT_ID 0x0071 + +#endif /* _MALIDP_PRODUCT_H_ */ diff --git a/drivers/gpu/drm/arm/display/include/malidp_utils.h b/drivers/gpu/drm/arm/display/include/malidp_utils.h new file mode 100644 index 000000000000..63cc47cefcf8 --- /dev/null +++ b/drivers/gpu/drm/arm/display/include/malidp_utils.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * Author: James.Qian.Wang + * + */ +#ifndef _MALIDP_UTILS_ +#define _MALIDP_UTILS_ + +#define has_bit(nr, mask) (BIT(nr) & (mask)) +#define has_bits(bits, mask) (((bits) & (mask)) == (bits)) + +#define dp_for_each_set_bit(bit, mask) \ + for_each_set_bit((bit), ((unsigned long *)&(mask)), sizeof(mask) * 8) + +#endif /* _MALIDP_UTILS_ */ diff --git a/drivers/gpu/drm/arm/display/komeda/Makefile b/drivers/gpu/drm/arm/display/komeda/Makefile new file mode 100644 index 000000000000..5b44e36509b1 --- /dev/null +++ b/drivers/gpu/drm/arm/display/komeda/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 + +ccflags-y := \ + -I$(src)/../include \ + -I$(src) + +komeda-y := \ + komeda_dev.o \ + komeda_pipeline.o \ + +obj-$(CONFIG_DRM_KOMEDA) += komeda.o diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c new file mode 100644 index 000000000000..4dec259cecac --- /dev/null +++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * Author: James.Qian.Wang + * + */ +#include +#include +#include +#include "komeda_dev.h" + +struct komeda_dev *komeda_dev_create(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + const struct komeda_product_data *product; + struct komeda_dev *mdev; + struct resource *io_res; + int err = 0; + + product = of_device_get_match_data(dev); + if (!product) + return ERR_PTR(-ENODEV); + + io_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!io_res) { + DRM_ERROR("No registers defined.\n"); + return ERR_PTR(-ENODEV); + } + + mdev = devm_kzalloc(dev, sizeof(*mdev), GFP_KERNEL); + if (!mdev) + return ERR_PTR(-ENOMEM); + + mdev->dev = dev; + mdev->reg_base = devm_ioremap_resource(dev, io_res); + if (IS_ERR(mdev->reg_base)) { + DRM_ERROR("Map register space failed.\n"); + err = PTR_ERR(mdev->reg_base); + mdev->reg_base = NULL; + goto err_cleanup; + } + + mdev->pclk = devm_clk_get(dev, "pclk"); + if (IS_ERR(mdev->pclk)) { + DRM_ERROR("Get APB clk failed.\n"); + err = PTR_ERR(mdev->pclk); + mdev->pclk = NULL; + goto err_cleanup; + } + + /* Enable APB clock to access the registers */ + clk_prepare_enable(mdev->pclk); + + mdev->funcs = product->identify(mdev->reg_base, &mdev->chip); + if (!komeda_product_match(mdev, product->product_id)) { + DRM_ERROR("DT configured %x mismatch with real HW %x.\n", + product->product_id, + MALIDP_CORE_ID_PRODUCT_ID(mdev->chip.core_id)); + err = -ENODEV; + goto err_cleanup; + } + + DRM_INFO("Found ARM Mali-D%x version r%dp%d\n", + MALIDP_CORE_ID_PRODUCT_ID(mdev->chip.core_id), + MALIDP_CORE_ID_MAJOR(mdev->chip.core_id), + MALIDP_CORE_ID_MINOR(mdev->chip.core_id)); + + err = mdev->funcs->enum_resources(mdev); + if (err) { + DRM_ERROR("enumerate display resource failed.\n"); + goto err_cleanup; + } + + return mdev; + +err_cleanup: + komeda_dev_destroy(mdev); + return ERR_PTR(err); +} + +void komeda_dev_destroy(struct komeda_dev *mdev) +{ + struct device *dev = mdev->dev; + struct komeda_dev_funcs *funcs = mdev->funcs; + int i; + + for (i = 0; i < mdev->n_pipelines; i++) { + komeda_pipeline_destroy(mdev, mdev->pipelines[i]); + mdev->pipelines[i] = NULL; + } + + mdev->n_pipelines = 0; + + if (funcs && funcs->cleanup) + funcs->cleanup(mdev); + + if (mdev->reg_base) { + devm_iounmap(dev, mdev->reg_base); + mdev->reg_base = NULL; + } + + if (mdev->mclk) { + devm_clk_put(dev, mdev->mclk); + mdev->mclk = NULL; + } + + if (mdev->pclk) { + clk_disable_unprepare(mdev->pclk); + devm_clk_put(dev, mdev->pclk); + mdev->pclk = NULL; + } + + devm_kfree(dev, mdev); +} diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.h b/drivers/gpu/drm/arm/display/komeda/komeda_dev.h new file mode 100644 index 000000000000..03b8703736c2 --- /dev/null +++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * Author: James.Qian.Wang + * + */ +#ifndef _KOMEDA_DEV_H_ +#define _KOMEDA_DEV_H_ + +#include +#include +#include "komeda_pipeline.h" +#include "malidp_product.h" + +/* malidp device id */ +enum { + MALI_D71 = 0, +}; + +/* pipeline DT ports */ +enum { + KOMEDA_OF_PORT_OUTPUT = 0, + KOMEDA_OF_PORT_COPROC = 1, +}; + +struct komeda_chip_info { + u32 arch_id; + u32 core_id; + u32 core_info; + u32 bus_width; +}; + +struct komeda_product_data { + u32 product_id; + struct komeda_dev_funcs *(*identify)(u32 __iomem *reg, + struct komeda_chip_info *info); +}; + +struct komeda_dev; + +/** + * struct komeda_dev_funcs + * + * Supplied by chip level and returned by the chip entry function xxx_identify, + */ +struct komeda_dev_funcs { + /** + * @enum_resources: + * + * for CHIP to report or add pipeline and component resources to CORE + */ + int (*enum_resources)(struct komeda_dev *mdev); + /** @cleanup: call to chip to cleanup komeda_dev->chip data */ + void (*cleanup)(struct komeda_dev *mdev); +}; + +/** + * struct komeda_dev + * + * Pipeline and component are used to describe how to handle the pixel data. + * komeda_device is for describing the whole view of the device, and the + * control-abilites of device. + */ +struct komeda_dev { + struct device *dev; + u32 __iomem *reg_base; + + struct komeda_chip_info chip; + + /** @pclk: APB clock for register access */ + struct clk *pclk; + /** @mck: HW main engine clk */ + struct clk *mclk; + + int n_pipelines; + struct komeda_pipeline *pipelines[KOMEDA_MAX_PIPELINES]; + + /** @funcs: chip funcs to access to HW */ + struct komeda_dev_funcs *funcs; + /** + * @chip_data: + * + * chip data will be added by &komeda_dev_funcs.enum_resources() and + * destroyed by &komeda_dev_funcs.cleanup() + */ + void *chip_data; +}; + +static inline bool +komeda_product_match(struct komeda_dev *mdev, u32 target) +{ + return MALIDP_CORE_ID_PRODUCT_ID(mdev->chip.core_id) == target; +} + +struct komeda_dev *komeda_dev_create(struct device *dev); +void komeda_dev_destroy(struct komeda_dev *mdev); + +#endif /*_KOMEDA_DEV_H_*/ diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c new file mode 100644 index 000000000000..179122fc93ff --- /dev/null +++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * Author: James.Qian.Wang + * + */ +#include "komeda_dev.h" +#include "komeda_pipeline.h" + +/** komeda_pipeline_add - Add a pipeline to &komeda_dev */ +struct komeda_pipeline * +komeda_pipeline_add(struct komeda_dev *mdev, size_t size, + struct komeda_pipeline_funcs *funcs) +{ + struct komeda_pipeline *pipe; + + if (mdev->n_pipelines + 1 > KOMEDA_MAX_PIPELINES) { + DRM_ERROR("Exceed max support %d pipelines.\n", + KOMEDA_MAX_PIPELINES); + return NULL; + } + + if (size < sizeof(*pipe)) { + DRM_ERROR("Request pipeline size too small.\n"); + return NULL; + } + + pipe = devm_kzalloc(mdev->dev, size, GFP_KERNEL); + if (!pipe) + return NULL; + + pipe->mdev = mdev; + pipe->id = mdev->n_pipelines; + pipe->funcs = funcs; + + mdev->pipelines[mdev->n_pipelines] = pipe; + mdev->n_pipelines++; + + return pipe; +} + +void komeda_pipeline_destroy(struct komeda_dev *mdev, + struct komeda_pipeline *pipe) +{ + struct komeda_component *c; + int i; + + dp_for_each_set_bit(i, pipe->avail_comps) { + c = komeda_pipeline_get_component(pipe, i); + komeda_component_destroy(mdev, c); + } + + clk_put(pipe->pxlclk); + clk_put(pipe->aclk); + + devm_kfree(mdev->dev, pipe); +} + +struct komeda_component ** +komeda_pipeline_get_component_pos(struct komeda_pipeline *pipe, int id) +{ + struct komeda_dev *mdev = pipe->mdev; + struct komeda_pipeline *temp = NULL; + struct komeda_component **pos = NULL; + + switch (id) { + case KOMEDA_COMPONENT_LAYER0: + case KOMEDA_COMPONENT_LAYER1: + case KOMEDA_COMPONENT_LAYER2: + case KOMEDA_COMPONENT_LAYER3: + pos = to_cpos(pipe->layers[id - KOMEDA_COMPONENT_LAYER0]); + break; + case KOMEDA_COMPONENT_WB_LAYER: + pos = to_cpos(pipe->wb_layer); + break; + case KOMEDA_COMPONENT_COMPIZ0: + case KOMEDA_COMPONENT_COMPIZ1: + temp = mdev->pipelines[id - KOMEDA_COMPONENT_COMPIZ0]; + if (!temp) { + DRM_ERROR("compiz-%d doesn't exist.\n", id); + return NULL; + } + pos = to_cpos(temp->compiz); + break; + case KOMEDA_COMPONENT_SCALER0: + case KOMEDA_COMPONENT_SCALER1: + pos = to_cpos(pipe->scalers[id - KOMEDA_COMPONENT_SCALER0]); + break; + case KOMEDA_COMPONENT_IPS0: + case KOMEDA_COMPONENT_IPS1: + temp = mdev->pipelines[id - KOMEDA_COMPONENT_IPS0]; + if (!temp) { + DRM_ERROR("ips-%d doesn't exist.\n", id); + return NULL; + } + pos = to_cpos(temp->improc); + break; + case KOMEDA_COMPONENT_TIMING_CTRLR: + pos = to_cpos(pipe->ctrlr); + break; + default: + pos = NULL; + DRM_ERROR("Unknown pipeline resource ID: %d.\n", id); + break; + } + + return pos; +} + +struct komeda_component * +komeda_pipeline_get_component(struct komeda_pipeline *pipe, int id) +{ + struct komeda_component **pos = NULL; + struct komeda_component *c = NULL; + + pos = komeda_pipeline_get_component_pos(pipe, id); + if (pos) + c = *pos; + + return c; +} + +/** komeda_component_add - Add a component to &komeda_pipeline */ +struct komeda_component * +komeda_component_add(struct komeda_pipeline *pipe, + size_t comp_sz, u32 id, u32 hw_id, + struct komeda_component_funcs *funcs, + u8 max_active_inputs, u32 supported_inputs, + u8 max_active_outputs, u32 __iomem *reg, + const char *name_fmt, ...) +{ + struct komeda_component **pos; + struct komeda_component *c; + int idx, *num = NULL; + + if (max_active_inputs > KOMEDA_COMPONENT_N_INPUTS) { + WARN(1, "please large KOMEDA_COMPONENT_N_INPUTS to %d.\n", + max_active_inputs); + return NULL; + } + + pos = komeda_pipeline_get_component_pos(pipe, id); + if (!pos || (*pos)) + return NULL; + + if (has_bit(id, KOMEDA_PIPELINE_LAYERS)) { + idx = id - KOMEDA_COMPONENT_LAYER0; + num = &pipe->n_layers; + if (idx != pipe->n_layers) { + DRM_ERROR("please add Layer by id sequence.\n"); + return NULL; + } + } else if (has_bit(id, KOMEDA_PIPELINE_SCALERS)) { + idx = id - KOMEDA_COMPONENT_SCALER0; + num = &pipe->n_scalers; + if (idx != pipe->n_scalers) { + DRM_ERROR("please add Scaler by id sequence.\n"); + return NULL; + } + } + + c = devm_kzalloc(pipe->mdev->dev, comp_sz, GFP_KERNEL); + if (!c) + return NULL; + + c->id = id; + c->hw_id = hw_id; + c->reg = reg; + c->pipeline = pipe; + c->max_active_inputs = max_active_inputs; + c->max_active_outputs = max_active_outputs; + c->supported_inputs = supported_inputs; + c->funcs = funcs; + + if (name_fmt) { + va_list args; + + va_start(args, name_fmt); + vsnprintf(c->name, sizeof(c->name), name_fmt, args); + va_end(args); + } + + if (num) + *num = *num + 1; + + pipe->avail_comps |= BIT(c->id); + *pos = c; + + return c; +} + +void komeda_component_destroy(struct komeda_dev *mdev, + struct komeda_component *c) +{ + devm_kfree(mdev->dev, c); +} diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h new file mode 100644 index 000000000000..7daba0e1946b --- /dev/null +++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h @@ -0,0 +1,348 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * Author: James.Qian.Wang + * + */ +#ifndef _KOMEDA_PIPELINE_H_ +#define _KOMEDA_PIPELINE_H_ + +#include +#include +#include +#include "malidp_utils.h" + +#define KOMEDA_MAX_PIPELINES 2 +#define KOMEDA_PIPELINE_MAX_LAYERS 4 +#define KOMEDA_PIPELINE_MAX_SCALERS 2 +#define KOMEDA_COMPONENT_N_INPUTS 5 + +/* pipeline component IDs */ +enum { + KOMEDA_COMPONENT_LAYER0 = 0, + KOMEDA_COMPONENT_LAYER1 = 1, + KOMEDA_COMPONENT_LAYER2 = 2, + KOMEDA_COMPONENT_LAYER3 = 3, + KOMEDA_COMPONENT_WB_LAYER = 7, /* write back layer */ + KOMEDA_COMPONENT_SCALER0 = 8, + KOMEDA_COMPONENT_SCALER1 = 9, + KOMEDA_COMPONENT_SPLITTER = 12, + KOMEDA_COMPONENT_MERGER = 14, + KOMEDA_COMPONENT_COMPIZ0 = 16, /* compositor */ + KOMEDA_COMPONENT_COMPIZ1 = 17, + KOMEDA_COMPONENT_IPS0 = 20, /* post image processor */ + KOMEDA_COMPONENT_IPS1 = 21, + KOMEDA_COMPONENT_TIMING_CTRLR = 22, /* timing controller */ +}; + +#define KOMEDA_PIPELINE_LAYERS (BIT(KOMEDA_COMPONENT_LAYER0) |\ + BIT(KOMEDA_COMPONENT_LAYER1) |\ + BIT(KOMEDA_COMPONENT_LAYER2) |\ + BIT(KOMEDA_COMPONENT_LAYER3)) + +#define KOMEDA_PIPELINE_SCALERS (BIT(KOMEDA_COMPONENT_SCALER0) |\ + BIT(KOMEDA_COMPONENT_SCALER1)) + +#define KOMEDA_PIPELINE_COMPIZS (BIT(KOMEDA_COMPONENT_COMPIZ0) |\ + BIT(KOMEDA_COMPONENT_COMPIZ1)) + +#define KOMEDA_PIPELINE_IMPROCS (BIT(KOMEDA_COMPONENT_IPS0) |\ + BIT(KOMEDA_COMPONENT_IPS1)) +struct komeda_component; +struct komeda_component_state; + +/** komeda_component_funcs - component control functions */ +struct komeda_component_funcs { + /** @validate: optional, + * component may has special requirements or limitations, this function + * supply HW the ability to do the further HW specific check. + */ + int (*validate)(struct komeda_component *c, + struct komeda_component_state *state); + /** @update: update is a active update */ + void (*update)(struct komeda_component *c, + struct komeda_component_state *state); + /** @disable: disable component */ + void (*disable)(struct komeda_component *c); + /** @dump_register: Optional, dump registers to seq_file */ + void (*dump_register)(struct komeda_component *c, struct seq_file *seq); +}; + +/** + * struct komeda_component + * + * struct komeda_component describe the data flow capabilities for how to link a + * component into the display pipeline. + * all specified components are subclass of this structure. + */ +struct komeda_component { + /** @obj: treat component as private obj */ + struct drm_private_obj obj; + /** @pipeline: the komeda pipeline this component belongs to */ + struct komeda_pipeline *pipeline; + /** @name: component name */ + char name[32]; + /** + * @reg: + * component register base, + * which is initialized by chip and used by chip only + */ + u32 __iomem *reg; + /** @id: component id */ + u32 id; + /** @hw_ic: component hw id, + * which is initialized by chip and used by chip only + */ + u32 hw_id; + + /** + * @max_active_inputs: + * @max_active_outpus: + * + * maximum number of inputs/outputs that can be active in the same time + * Note: + * the number isn't the bit number of @supported_inputs or + * @supported_outputs, but may be less than it, since component may not + * support enabling all @supported_inputs/outputs at the same time. + */ + u8 max_active_inputs; + u8 max_active_outputs; + /** + * @supported_inputs: + * @supported_outputs: + * + * bitmask of BIT(component->id) for the supported inputs/outputs + * describes the possibilities of how a component is linked into a + * pipeline. + */ + u32 supported_inputs; + u32 supported_outputs; + + /** + * @funcs: chip functions to access HW + */ + struct komeda_component_funcs *funcs; +}; + +/** + * struct komeda_component_output + * + * a component has multiple outputs, if want to know where the data + * comes from, only know the component is not enough, we still need to know + * its output port + */ +struct komeda_component_output { + /** @component: indicate which component the data comes from */ + struct komeda_component *component; + /** @output_port: + * the output port of the &komeda_component_output.component + */ + u8 output_port; +}; + +/** + * struct komeda_component_state + * + * component_state is the data flow configuration of the component, and it's + * the superclass of all specific component_state like @komeda_layer_state, + * @komeda_scaler_state + */ +struct komeda_component_state { + /** @obj: tracking component_state by drm_atomic_state */ + struct drm_private_state obj; + struct komeda_component *component; + /** + * @binding_user: + * currently bound user, the user can be crtc/plane/wb_conn, which is + * valid decided by @component and @inputs + * + * - Layer: its user always is plane. + * - compiz/improc/timing_ctrlr: the user is crtc. + * - wb_layer: wb_conn; + * - scaler: plane when input is layer, wb_conn if input is compiz. + */ + union { + struct drm_crtc *crtc; + struct drm_plane *plane; + struct drm_connector *wb_conn; + void *binding_user; + }; + /** + * @active_inputs: + * + * active_inputs is bitmask of @inputs index + * + * - active_inputs = changed_active_inputs + unchanged_active_inputs + * - affected_inputs = old->active_inputs + new->active_inputs; + * - disabling_inputs = affected_inputs ^ active_inputs; + * - changed_inputs = disabling_inputs + changed_active_inputs; + * + * NOTE: + * changed_inputs doesn't include all active_input but only + * @changed_active_inputs, and this bitmask can be used in chip + * level for dirty update. + */ + u16 active_inputs; + u16 changed_active_inputs; + u16 affected_inputs; + /** + * @inputs: + * + * the specific inputs[i] only valid on BIT(i) has been set in + * @active_inputs, if not the inputs[i] is undefined. + */ + struct komeda_component_output inputs[KOMEDA_COMPONENT_N_INPUTS]; +}; + +static inline u16 component_disabling_inputs(struct komeda_component_state *st) +{ + return st->affected_inputs ^ st->active_inputs; +} + +static inline u16 component_changed_inputs(struct komeda_component_state *st) +{ + return component_disabling_inputs(st) | st->changed_active_inputs; +} + +#define to_comp(__c) (((__c) == NULL) ? NULL : &((__c)->base)) +#define to_cpos(__c) ((struct komeda_component **)&(__c)) + +/* these structures are going to be filled in in uture patches */ +struct komeda_layer { + struct komeda_component base; + /* layer specific features and caps */ +}; + +struct komeda_layer_state { + struct komeda_component_state base; + /* layer specific configuration state */ +}; + +struct komeda_compiz { + struct komeda_component base; + /* compiz specific features and caps */ +}; + +struct komeda_compiz_state { + struct komeda_component_state base; + /* compiz specific configuration state */ +}; + +struct komeda_scaler { + struct komeda_component base; + /* scaler features and caps */ +}; + +struct komeda_scaler_state { + struct komeda_component_state base; +}; + +struct komeda_improc { + struct komeda_component base; +}; + +struct komeda_improc_state { + struct komeda_component_state base; +}; + +/* display timing controller */ +struct komeda_timing_ctrlr { + struct komeda_component base; +}; + +struct komeda_timing_ctrlr_state { + struct komeda_component_state base; +}; + +/** struct komeda_pipeline_funcs */ +struct komeda_pipeline_funcs { + /* dump_register: Optional, dump registers to seq_file */ + void (*dump_register)(struct komeda_pipeline *pipe, + struct seq_file *sf); +}; + +/** + * struct komeda_pipeline + * + * Represent a complete display pipeline and hold all functional components. + */ +struct komeda_pipeline { + /** @obj: link pipeline as private obj of drm_atomic_state */ + struct drm_private_obj obj; + /** @mdev: the parent komeda_dev */ + struct komeda_dev *mdev; + /** @pxlclk: pixel clock */ + struct clk *pxlclk; + /** @aclk: AXI clock */ + struct clk *aclk; + /** @id: pipeline id */ + int id; + /** @avail_comps: available components mask of pipeline */ + u32 avail_comps; + int n_layers; + struct komeda_layer *layers[KOMEDA_PIPELINE_MAX_LAYERS]; + int n_scalers; + struct komeda_scaler *scalers[KOMEDA_PIPELINE_MAX_SCALERS]; + struct komeda_compiz *compiz; + struct komeda_layer *wb_layer; + struct komeda_improc *improc; + struct komeda_timing_ctrlr *ctrlr; + struct komeda_pipeline_funcs *funcs; /* private pipeline functions */ +}; + +/** + * struct komeda_pipeline_state + * + * NOTE: + * Unlike the pipeline, pipeline_state doesn’t gather any component_state + * into it. It because all component will be managed by drm_atomic_state. + */ +struct komeda_pipeline_state { + /** @obj: tracking pipeline_state by drm_atomic_state */ + struct drm_private_state obj; + struct komeda_pipeline *pipe; + /** @crtc: currently bound crtc */ + struct drm_crtc *crtc; + /** + * @active_comps: + * + * bitmask - BIT(component->id) of active components + */ + u32 active_comps; +}; + +#define to_layer(c) container_of(c, struct komeda_layer, base) +#define to_compiz(c) container_of(c, struct komeda_compiz, base) +#define to_scaler(c) container_of(c, struct komeda_scaler, base) +#define to_improc(c) container_of(c, struct komeda_improc, base) +#define to_ctrlr(c) container_of(c, struct komeda_timing_ctrlr, base) + +#define to_layer_st(c) container_of(c, struct komeda_layer_state, base) +#define to_compiz_st(c) container_of(c, struct komeda_compiz_state, base) +#define to_scaler_st(c) container_of(c, struct komeda_scaler_state, base) +#define to_improc_st(c) container_of(c, struct komeda_improc_state, base) +#define to_ctrlr_st(c) container_of(c, struct komeda_timing_ctrlr_state, base) + +/* pipeline APIs */ +struct komeda_pipeline * +komeda_pipeline_add(struct komeda_dev *mdev, size_t size, + struct komeda_pipeline_funcs *funcs); +void komeda_pipeline_destroy(struct komeda_dev *mdev, + struct komeda_pipeline *pipe); + +struct komeda_component * +komeda_pipeline_get_component(struct komeda_pipeline *pipe, int id); + +/* component APIs */ +struct komeda_component * +komeda_component_add(struct komeda_pipeline *pipe, + size_t comp_sz, u32 id, u32 hw_id, + struct komeda_component_funcs *funcs, + u8 max_active_inputs, u32 supported_inputs, + u8 max_active_outputs, u32 __iomem *reg, + const char *name_fmt, ...); + +void komeda_component_destroy(struct komeda_dev *mdev, + struct komeda_component *c); + +#endif /* _KOMEDA_PIPELINE_H_*/ From df766e4a419c77283485931070e0692f64aebf8a Mon Sep 17 00:00:00 2001 From: "james qian wang (Arm Technology China)" Date: Thu, 3 Jan 2019 11:40:04 +0000 Subject: [PATCH 016/539] dt/bindings: drm/komeda: Add DT bindings for ARM display processor D71 Add DT bindings documentation for the ARM display processor D71 and later IPs. Changes in v4: - Deleted unnecessary address-cells, size-cells [Liviu Dudau] Changes in v3: - Deleted unnecessary property: interrupt-names. - Dropped 'ports' and moving 'port' up a level. Signed-off-by: James Qian Wang (Arm Technology China) Reviewed-by: Liviu Dudau Reviewed-by: Rob Herring [moved in the same directory as the other Arm display bindings] Signed-off-by: Liviu Dudau --- .../bindings/display/arm,komeda.txt | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/arm,komeda.txt diff --git a/Documentation/devicetree/bindings/display/arm,komeda.txt b/Documentation/devicetree/bindings/display/arm,komeda.txt new file mode 100644 index 000000000000..02b226532ebd --- /dev/null +++ b/Documentation/devicetree/bindings/display/arm,komeda.txt @@ -0,0 +1,73 @@ +Device Tree bindings for Arm Komeda display driver + +Required properties: +- compatible: Should be "arm,mali-d71" +- reg: Physical base address and length of the registers in the system +- interrupts: the interrupt line number of the device in the system +- clocks: A list of phandle + clock-specifier pairs, one for each entry + in 'clock-names' +- clock-names: A list of clock names. It should contain: + - "mclk": for the main processor clock + - "pclk": for the APB interface clock +- #address-cells: Must be 1 +- #size-cells: Must be 0 + +Required properties for sub-node: pipeline@nq +Each device contains one or two pipeline sub-nodes (at least one), each +pipeline node should provide properties: +- reg: Zero-indexed identifier for the pipeline +- clocks: A list of phandle + clock-specifier pairs, one for each entry + in 'clock-names' +- clock-names: should contain: + - "pxclk": pixel clock + - "aclk": AXI interface clock + +- port: each pipeline connect to an encoder input port. The connection is + modeled using the OF graph bindings specified in + Documentation/devicetree/bindings/graph.txt + +Optional properties: + - memory-region: phandle to a node describing memory (see + Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt) + to be used for the framebuffer; if not present, the framebuffer may + be located anywhere in memory. + +Example: +/ { + ... + + dp0: display@c00000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "arm,mali-d71"; + reg = <0xc00000 0x20000>; + interrupts = <0 168 4>; + clocks = <&dpu_mclk>, <&dpu_aclk>; + clock-names = "mclk", "pclk"; + + dp0_pipe0: pipeline@0 { + clocks = <&fpgaosc2>, <&dpu_aclk>; + clock-names = "pxclk", "aclk"; + reg = <0>; + + port { + dp0_pipe0_out: endpoint { + remote-endpoint = <&db_dvi0_in>; + }; + }; + }; + + dp0_pipe1: pipeline@1 { + clocks = <&fpgaosc2>, <&dpu_aclk>; + clock-names = "pxclk", "aclk"; + reg = <1>; + + port { + dp0_pipe1_out: endpoint { + remote-endpoint = <&db_dvi1_in>; + }; + }; + }; + }; + ... +}; From 26bd43a759963981ad685b3ed917abd9c41c65f8 Mon Sep 17 00:00:00 2001 From: "james qian wang (Arm Technology China)" Date: Thu, 3 Jan 2019 11:40:21 +0000 Subject: [PATCH 017/539] drm/komeda: Build komeda to be a platform module Implement a simple wrapper for platform module to build komeda to module, Also add a very simple D71 layer code to show how to discover a product. Komeda driver direct bind the product ENTRY function xxx_identity to DT compatible name like: d71_product = { .product_id = MALIDP_D71_PRODUCT_ID, .identify = d71_identify, }, const struct of_device_id komeda_of_match[] = { { .compatible = "arm,mali-d71", .data = &d71_product, }, {}, }; Then when linux found a matched DT node and call driver to probe, we can easily get the of data, and call into the product to do the identify: komeda_bind() { ... product = of_device_get_match_data(dev); product->identify(); ... } Changes in v4: - Replaced kzalloc with devm_kzalloc Changes in v3: - Fixed style problem found by checkpatch.pl --strict. Signed-off-by: James Qian Wang (Arm Technology China) Acked-by: Liviu Dudau Signed-off-by: Liviu Dudau --- .../gpu/drm/arm/display/include/malidp_io.h | 42 ++++++ drivers/gpu/drm/arm/display/komeda/Makefile | 6 +- .../gpu/drm/arm/display/komeda/d71/d71_dev.c | 33 +++++ .../gpu/drm/arm/display/komeda/komeda_dev.h | 3 + .../gpu/drm/arm/display/komeda/komeda_drv.c | 132 ++++++++++++++++++ 5 files changed, 215 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/arm/display/include/malidp_io.h create mode 100644 drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c create mode 100644 drivers/gpu/drm/arm/display/komeda/komeda_drv.c diff --git a/drivers/gpu/drm/arm/display/include/malidp_io.h b/drivers/gpu/drm/arm/display/include/malidp_io.h new file mode 100644 index 000000000000..4fb3caf864ce --- /dev/null +++ b/drivers/gpu/drm/arm/display/include/malidp_io.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * Author: James.Qian.Wang + * + */ +#ifndef _MALIDP_IO_H_ +#define _MALIDP_IO_H_ + +#include + +static inline u32 +malidp_read32(u32 __iomem *base, u32 offset) +{ + return readl((base + (offset >> 2))); +} + +static inline void +malidp_write32(u32 __iomem *base, u32 offset, u32 v) +{ + writel(v, (base + (offset >> 2))); +} + +static inline void +malidp_write32_mask(u32 __iomem *base, u32 offset, u32 m, u32 v) +{ + u32 tmp = malidp_read32(base, offset); + + tmp &= (~m); + malidp_write32(base, offset, v | tmp); +} + +static inline void +malidp_write_group(u32 __iomem *base, u32 offset, int num, const u32 *values) +{ + int i; + + for (i = 0; i < num; i++) + malidp_write32(base, offset + i * 4, values[i]); +} + +#endif /*_MALIDP_IO_H_*/ diff --git a/drivers/gpu/drm/arm/display/komeda/Makefile b/drivers/gpu/drm/arm/display/komeda/Makefile index 5b44e36509b1..c03d6876ef75 100644 --- a/drivers/gpu/drm/arm/display/komeda/Makefile +++ b/drivers/gpu/drm/arm/display/komeda/Makefile @@ -5,7 +5,11 @@ ccflags-y := \ -I$(src) komeda-y := \ + komeda_drv.o \ komeda_dev.o \ - komeda_pipeline.o \ + komeda_pipeline.o + +komeda-y += \ + d71/d71_dev.o obj-$(CONFIG_DRM_KOMEDA) += komeda.o diff --git a/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c b/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c new file mode 100644 index 000000000000..af3dabb499cd --- /dev/null +++ b/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * Author: James.Qian.Wang + * + */ +#include "malidp_io.h" +#include "komeda_dev.h" + +static int d71_enum_resources(struct komeda_dev *mdev) +{ + /* TODO add enum resources */ + return -1; +} + +static struct komeda_dev_funcs d71_chip_funcs = { + .enum_resources = d71_enum_resources, + .cleanup = NULL, +}; + +#define GLB_ARCH_ID 0x000 +#define GLB_CORE_ID 0x004 +#define GLB_CORE_INFO 0x008 + +struct komeda_dev_funcs * +d71_identify(u32 __iomem *reg_base, struct komeda_chip_info *chip) +{ + chip->arch_id = malidp_read32(reg_base, GLB_ARCH_ID); + chip->core_id = malidp_read32(reg_base, GLB_CORE_ID); + chip->core_info = malidp_read32(reg_base, GLB_CORE_INFO); + + return &d71_chip_funcs; +} diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.h b/drivers/gpu/drm/arm/display/komeda/komeda_dev.h index 03b8703736c2..b48d711071be 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_dev.h +++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.h @@ -92,6 +92,9 @@ komeda_product_match(struct komeda_dev *mdev, u32 target) return MALIDP_CORE_ID_PRODUCT_ID(mdev->chip.core_id) == target; } +struct komeda_dev_funcs * +d71_identify(u32 __iomem *reg, struct komeda_chip_info *chip); + struct komeda_dev *komeda_dev_create(struct device *dev); void komeda_dev_destroy(struct komeda_dev *mdev); diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_drv.c b/drivers/gpu/drm/arm/display/komeda/komeda_drv.c new file mode 100644 index 000000000000..dd386d82d143 --- /dev/null +++ b/drivers/gpu/drm/arm/display/komeda/komeda_drv.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * Author: James.Qian.Wang + * + */ +#include +#include +#include +#include +#include +#include "komeda_dev.h" + +struct komeda_drv { + struct komeda_dev *mdev; +}; + +static void komeda_unbind(struct device *dev) +{ + struct komeda_drv *mdrv = dev_get_drvdata(dev); + + if (!mdrv) + return; + + komeda_dev_destroy(mdrv->mdev); + + dev_set_drvdata(dev, NULL); + devm_kfree(dev, mdrv); +} + +static int komeda_bind(struct device *dev) +{ + struct komeda_drv *mdrv; + int err; + + mdrv = devm_kzalloc(dev, sizeof(*mdrv), GFP_KERNEL); + if (!mdrv) + return -ENOMEM; + + mdrv->mdev = komeda_dev_create(dev); + if (IS_ERR(mdrv->mdev)) { + err = PTR_ERR(mdrv->mdev); + goto free_mdrv; + } + + dev_set_drvdata(dev, mdrv); + + return 0; + +free_mdrv: + devm_kfree(dev, mdrv); + return err; +} + +static const struct component_master_ops komeda_master_ops = { + .bind = komeda_bind, + .unbind = komeda_unbind, +}; + +static int compare_of(struct device *dev, void *data) +{ + return dev->of_node == data; +} + +static void komeda_add_slave(struct device *master, + struct component_match **match, + struct device_node *np, int port) +{ + struct device_node *remote; + + remote = of_graph_get_remote_node(np, port, 0); + if (remote) { + drm_of_component_match_add(master, match, compare_of, remote); + of_node_put(remote); + } +} + +static int komeda_platform_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct component_match *match = NULL; + struct device_node *child; + + if (!dev->of_node) + return -ENODEV; + + for_each_available_child_of_node(dev->of_node, child) { + if (of_node_cmp(child->name, "pipeline") != 0) + continue; + + /* add connector */ + komeda_add_slave(dev, &match, child, KOMEDA_OF_PORT_OUTPUT); + } + + return component_master_add_with_match(dev, &komeda_master_ops, match); +} + +static int komeda_platform_remove(struct platform_device *pdev) +{ + component_master_del(&pdev->dev, &komeda_master_ops); + return 0; +} + +static const struct komeda_product_data komeda_products[] = { + [MALI_D71] = { + .product_id = MALIDP_D71_PRODUCT_ID, + .identify = d71_identify, + }, +}; + +const struct of_device_id komeda_of_match[] = { + { .compatible = "arm,mali-d71", .data = &komeda_products[MALI_D71], }, + {}, +}; + +MODULE_DEVICE_TABLE(of, komeda_of_match); + +static struct platform_driver komeda_platform_driver = { + .probe = komeda_platform_probe, + .remove = komeda_platform_remove, + .driver = { + .name = "komeda", + .of_match_table = komeda_of_match, + .pm = NULL, + }, +}; + +module_platform_driver(komeda_platform_driver); + +MODULE_AUTHOR("James.Qian.Wang "); +MODULE_DESCRIPTION("Komeda KMS driver"); +MODULE_LICENSE("GPL v2"); From 29e56aec911dd784c1a29f28254f74fcd683bfe0 Mon Sep 17 00:00:00 2001 From: "james qian wang (Arm Technology China)" Date: Thu, 3 Jan 2019 11:40:38 +0000 Subject: [PATCH 018/539] drm/komeda: Add DT parsing Parse DT and initialize corresponding dev/pipeline attributes. Changes in v4: - Rebase. Changes in v3: - Fixed style problem found by checkpatch.pl --strict. Changes in v2: - Unified abbreviation of "pipeline" to "pipe". Signed-off-by: James Qian Wang (Arm Technology China) Acked-by: Liviu Dudau Signed-off-by: Liviu Dudau --- .../gpu/drm/arm/display/komeda/komeda_dev.c | 76 +++++++++++++++++++ .../gpu/drm/arm/display/komeda/komeda_dev.h | 3 + .../drm/arm/display/komeda/komeda_pipeline.c | 4 + .../drm/arm/display/komeda/komeda_pipeline.h | 7 ++ 4 files changed, 90 insertions(+) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c index 4dec259cecac..7257248463bc 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c @@ -9,6 +9,76 @@ #include #include "komeda_dev.h" +static int komeda_parse_pipe_dt(struct komeda_dev *mdev, struct device_node *np) +{ + struct komeda_pipeline *pipe; + struct clk *clk; + u32 pipe_id; + int ret = 0; + + ret = of_property_read_u32(np, "reg", &pipe_id); + if (ret != 0 || pipe_id >= mdev->n_pipelines) + return -EINVAL; + + pipe = mdev->pipelines[pipe_id]; + + clk = of_clk_get_by_name(np, "aclk"); + if (IS_ERR(clk)) { + DRM_ERROR("get aclk for pipeline %d failed!\n", pipe_id); + return PTR_ERR(clk); + } + pipe->aclk = clk; + + clk = of_clk_get_by_name(np, "pxclk"); + if (IS_ERR(clk)) { + DRM_ERROR("get pxclk for pipeline %d failed!\n", pipe_id); + return PTR_ERR(clk); + } + pipe->pxlclk = clk; + + /* enum ports */ + pipe->of_output_dev = + of_graph_get_remote_node(np, KOMEDA_OF_PORT_OUTPUT, 0); + pipe->of_output_port = + of_graph_get_port_by_id(np, KOMEDA_OF_PORT_OUTPUT); + + pipe->of_node = np; + + return 0; +} + +static int komeda_parse_dt(struct device *dev, struct komeda_dev *mdev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct device_node *child, *np = dev->of_node; + struct clk *clk; + int ret; + + clk = devm_clk_get(dev, "mclk"); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + mdev->mclk = clk; + mdev->irq = platform_get_irq(pdev, 0); + if (mdev->irq < 0) { + DRM_ERROR("could not get IRQ number.\n"); + return mdev->irq; + } + + for_each_available_child_of_node(np, child) { + if (of_node_cmp(child->name, "pipeline") == 0) { + ret = komeda_parse_pipe_dt(mdev, child); + if (ret) { + DRM_ERROR("parse pipeline dt error!\n"); + of_node_put(child); + break; + } + } + } + + return ret; +} + struct komeda_dev *komeda_dev_create(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); @@ -71,6 +141,12 @@ struct komeda_dev *komeda_dev_create(struct device *dev) goto err_cleanup; } + err = komeda_parse_dt(dev, mdev); + if (err) { + DRM_ERROR("parse device tree failed.\n"); + goto err_cleanup; + } + return mdev; err_cleanup: diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.h b/drivers/gpu/drm/arm/display/komeda/komeda_dev.h index b48d711071be..9c43565fec2a 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_dev.h +++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.h @@ -72,6 +72,9 @@ struct komeda_dev { /** @mck: HW main engine clk */ struct clk *mclk; + /** @irq: irq number */ + u32 irq; + int n_pipelines; struct komeda_pipeline *pipelines[KOMEDA_MAX_PIPELINES]; diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c index 179122fc93ff..edb1cd7795f9 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c @@ -53,6 +53,10 @@ void komeda_pipeline_destroy(struct komeda_dev *mdev, clk_put(pipe->pxlclk); clk_put(pipe->aclk); + of_node_put(pipe->of_output_dev); + of_node_put(pipe->of_output_port); + of_node_put(pipe->of_node); + devm_kfree(mdev->dev, pipe); } diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h index 7daba0e1946b..d2222e46b10b 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h +++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h @@ -288,6 +288,13 @@ struct komeda_pipeline { struct komeda_improc *improc; struct komeda_timing_ctrlr *ctrlr; struct komeda_pipeline_funcs *funcs; /* private pipeline functions */ + + /** @of_node: pipeline dt node */ + struct device_node *of_node; + /** @of_output_port: pipeline output port */ + struct device_node *of_output_port; + /** @of_output_dev: output connector device node */ + struct device_node *of_output_dev; }; /** From 981d29d2db7c96b942aa4b73fed31469140bcf9d Mon Sep 17 00:00:00 2001 From: "james qian wang (Arm Technology China)" Date: Thu, 3 Jan 2019 11:40:55 +0000 Subject: [PATCH 019/539] drm/komeda: Add komeda_format_caps for format handling komeda_format_caps is for describing ARM display specific features and limitations of a specific format, and format_caps will be linked into &komeda_framebuffer like a extension of &drm_format_info. And komed_format_caps_table will be initialized before the enum_resources, since the layer features description depend on this format_caps table, so we'd better initialize the table first. Changes in v4: - Rebase. Changes in v3: - Fixed style problem found by checkpatch.pl --strict. Signed-off-by: James Qian Wang (Arm Technology China) Acked-by: Liviu Dudau Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/display/komeda/Makefile | 1 + .../gpu/drm/arm/display/komeda/d71/d71_dev.c | 78 ++++++++++++++++ .../gpu/drm/arm/display/komeda/komeda_dev.c | 2 + .../gpu/drm/arm/display/komeda/komeda_dev.h | 11 ++- .../arm/display/komeda/komeda_format_caps.c | 75 ++++++++++++++++ .../arm/display/komeda/komeda_format_caps.h | 89 +++++++++++++++++++ .../drm/arm/display/komeda/komeda_pipeline.h | 1 + 7 files changed, 256 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/arm/display/komeda/komeda_format_caps.c create mode 100644 drivers/gpu/drm/arm/display/komeda/komeda_format_caps.h diff --git a/drivers/gpu/drm/arm/display/komeda/Makefile b/drivers/gpu/drm/arm/display/komeda/Makefile index c03d6876ef75..394fc2aa434a 100644 --- a/drivers/gpu/drm/arm/display/komeda/Makefile +++ b/drivers/gpu/drm/arm/display/komeda/Makefile @@ -7,6 +7,7 @@ ccflags-y := \ komeda-y := \ komeda_drv.o \ komeda_dev.o \ + komeda_format_caps.o \ komeda_pipeline.o komeda-y += \ diff --git a/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c b/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c index af3dabb499cd..edbf9daa1545 100644 --- a/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c +++ b/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c @@ -13,7 +13,85 @@ static int d71_enum_resources(struct komeda_dev *mdev) return -1; } +#define __HW_ID(__group, __format) \ + ((((__group) & 0x7) << 3) | ((__format) & 0x7)) + +#define RICH KOMEDA_FMT_RICH_LAYER +#define SIMPLE KOMEDA_FMT_SIMPLE_LAYER +#define RICH_SIMPLE (KOMEDA_FMT_RICH_LAYER | KOMEDA_FMT_SIMPLE_LAYER) +#define RICH_WB (KOMEDA_FMT_RICH_LAYER | KOMEDA_FMT_WB_LAYER) +#define RICH_SIMPLE_WB (RICH_SIMPLE | KOMEDA_FMT_WB_LAYER) + +#define Rot_0 DRM_MODE_ROTATE_0 +#define Flip_H_V (DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y | Rot_0) +#define Rot_ALL_H_V (DRM_MODE_ROTATE_MASK | Flip_H_V) + +#define LYT_NM BIT(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16) +#define LYT_WB BIT(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8) +#define LYT_NM_WB (LYT_NM | LYT_WB) + +#define AFB_TH AFBC(_TILED | _SPARSE) +#define AFB_TH_SC_YTR AFBC(_TILED | _SC | _SPARSE | _YTR) +#define AFB_TH_SC_YTR_BS AFBC(_TILED | _SC | _SPARSE | _YTR | _SPLIT) + +static struct komeda_format_caps d71_format_caps_table[] = { + /* HW_ID | fourcc | tile_sz | layer_types | rots | afbc_layouts | afbc_features */ + /* ABGR_2101010*/ + {__HW_ID(0, 0), DRM_FORMAT_ARGB2101010, 1, RICH_SIMPLE_WB, Flip_H_V, 0, 0}, + {__HW_ID(0, 1), DRM_FORMAT_ABGR2101010, 1, RICH_SIMPLE_WB, Flip_H_V, 0, 0}, + {__HW_ID(0, 1), DRM_FORMAT_ABGR2101010, 1, RICH_SIMPLE, Rot_ALL_H_V, LYT_NM_WB, AFB_TH_SC_YTR_BS}, /* afbc */ + {__HW_ID(0, 2), DRM_FORMAT_RGBA1010102, 1, RICH_SIMPLE_WB, Flip_H_V, 0, 0}, + {__HW_ID(0, 3), DRM_FORMAT_BGRA1010102, 1, RICH_SIMPLE_WB, Flip_H_V, 0, 0}, + /* ABGR_8888*/ + {__HW_ID(1, 0), DRM_FORMAT_ARGB8888, 1, RICH_SIMPLE_WB, Flip_H_V, 0, 0}, + {__HW_ID(1, 1), DRM_FORMAT_ABGR8888, 1, RICH_SIMPLE_WB, Flip_H_V, 0, 0}, + {__HW_ID(1, 1), DRM_FORMAT_ABGR8888, 1, RICH_SIMPLE, Rot_ALL_H_V, LYT_NM_WB, AFB_TH_SC_YTR_BS}, /* afbc */ + {__HW_ID(1, 2), DRM_FORMAT_RGBA8888, 1, RICH_SIMPLE_WB, Flip_H_V, 0, 0}, + {__HW_ID(1, 3), DRM_FORMAT_BGRA8888, 1, RICH_SIMPLE_WB, Flip_H_V, 0, 0}, + /* XBGB_8888 */ + {__HW_ID(2, 0), DRM_FORMAT_XRGB8888, 1, RICH_SIMPLE_WB, Flip_H_V, 0, 0}, + {__HW_ID(2, 1), DRM_FORMAT_XBGR8888, 1, RICH_SIMPLE_WB, Flip_H_V, 0, 0}, + {__HW_ID(2, 2), DRM_FORMAT_RGBX8888, 1, RICH_SIMPLE_WB, Flip_H_V, 0, 0}, + {__HW_ID(2, 3), DRM_FORMAT_BGRX8888, 1, RICH_SIMPLE_WB, Flip_H_V, 0, 0}, + /* BGR_888 */ /* none-afbc RGB888 doesn't support rotation and flip */ + {__HW_ID(3, 0), DRM_FORMAT_RGB888, 1, RICH_SIMPLE_WB, Rot_0, 0, 0}, + {__HW_ID(3, 1), DRM_FORMAT_BGR888, 1, RICH_SIMPLE_WB, Rot_0, 0, 0}, + {__HW_ID(3, 1), DRM_FORMAT_BGR888, 1, RICH_SIMPLE, Rot_ALL_H_V, LYT_NM_WB, AFB_TH_SC_YTR_BS}, /* afbc */ + /* BGR 16bpp */ + {__HW_ID(4, 0), DRM_FORMAT_RGBA5551, 1, RICH_SIMPLE, Flip_H_V, 0, 0}, + {__HW_ID(4, 1), DRM_FORMAT_ABGR1555, 1, RICH_SIMPLE, Flip_H_V, 0, 0}, + {__HW_ID(4, 1), DRM_FORMAT_ABGR1555, 1, RICH_SIMPLE, Rot_ALL_H_V, LYT_NM_WB, AFB_TH_SC_YTR}, /* afbc */ + {__HW_ID(4, 2), DRM_FORMAT_RGB565, 1, RICH_SIMPLE, Flip_H_V, 0, 0}, + {__HW_ID(4, 3), DRM_FORMAT_BGR565, 1, RICH_SIMPLE, Flip_H_V, 0, 0}, + {__HW_ID(4, 3), DRM_FORMAT_BGR565, 1, RICH_SIMPLE, Rot_ALL_H_V, LYT_NM_WB, AFB_TH_SC_YTR}, /* afbc */ + {__HW_ID(4, 4), DRM_FORMAT_R8, 1, SIMPLE, Rot_0, 0, 0}, + /* YUV 444/422/420 8bit */ + {__HW_ID(5, 0), 0 /*XYUV8888*/, 1, 0, 0, 0, 0}, + /* XYUV unsupported*/ + {__HW_ID(5, 1), DRM_FORMAT_YUYV, 1, RICH, Rot_ALL_H_V, LYT_NM, AFB_TH}, /* afbc */ + {__HW_ID(5, 2), DRM_FORMAT_YUYV, 1, RICH, Flip_H_V, 0, 0}, + {__HW_ID(5, 3), DRM_FORMAT_UYVY, 1, RICH, Flip_H_V, 0, 0}, + {__HW_ID(5, 4), 0, /*X0L0 */ 2, 0, 0, 0}, /* Y0L0 unsupported */ + {__HW_ID(5, 6), DRM_FORMAT_NV12, 1, RICH, Flip_H_V, 0, 0}, + {__HW_ID(5, 6), 0/*DRM_FORMAT_YUV420_8BIT*/, 1, RICH, Rot_ALL_H_V, LYT_NM, AFB_TH}, /* afbc */ + {__HW_ID(5, 7), DRM_FORMAT_YUV420, 1, RICH, Flip_H_V, 0, 0}, + /* YUV 10bit*/ + {__HW_ID(6, 0), 0,/*XVYU2101010*/ 1, 0, 0, 0, 0},/* VYV30 unsupported */ + {__HW_ID(6, 6), 0/*DRM_FORMAT_X0L2*/, 2, RICH, Flip_H_V, 0, 0}, + {__HW_ID(6, 7), 0/*DRM_FORMAT_P010*/, 1, RICH, Flip_H_V, 0, 0}, + {__HW_ID(6, 7), 0/*DRM_FORMAT_YUV420_10BIT*/, 1, RICH, Rot_ALL_H_V, LYT_NM, AFB_TH}, +}; + +static void d71_init_fmt_tbl(struct komeda_dev *mdev) +{ + struct komeda_format_caps_table *table = &mdev->fmt_tbl; + + table->format_caps = d71_format_caps_table; + table->n_formats = ARRAY_SIZE(d71_format_caps_table); +} + static struct komeda_dev_funcs d71_chip_funcs = { + .init_format_table = d71_init_fmt_tbl, .enum_resources = d71_enum_resources, .cleanup = NULL, }; diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c index 7257248463bc..84fdf707f210 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c @@ -135,6 +135,8 @@ struct komeda_dev *komeda_dev_create(struct device *dev) MALIDP_CORE_ID_MAJOR(mdev->chip.core_id), MALIDP_CORE_ID_MINOR(mdev->chip.core_id)); + mdev->funcs->init_format_table(mdev); + err = mdev->funcs->enum_resources(mdev); if (err) { DRM_ERROR("enumerate display resource failed.\n"); diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.h b/drivers/gpu/drm/arm/display/komeda/komeda_dev.h index 9c43565fec2a..a0bf7050037a 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_dev.h +++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.h @@ -11,6 +11,7 @@ #include #include "komeda_pipeline.h" #include "malidp_product.h" +#include "komeda_format_caps.h" /* malidp device id */ enum { @@ -44,6 +45,13 @@ struct komeda_dev; * Supplied by chip level and returned by the chip entry function xxx_identify, */ struct komeda_dev_funcs { + /** + * @init_format_table: + * + * initialize &komeda_dev->format_table, this function should be called + * before the &enum_resource + */ + void (*init_format_table)(struct komeda_dev *mdev); /** * @enum_resources: * @@ -66,7 +74,8 @@ struct komeda_dev { u32 __iomem *reg_base; struct komeda_chip_info chip; - + /** @fmt_tbl: initialized by &komeda_dev_funcs->init_format_table */ + struct komeda_format_caps_table fmt_tbl; /** @pclk: APB clock for register access */ struct clk *pclk; /** @mck: HW main engine clk */ diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.c b/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.c new file mode 100644 index 000000000000..1e17bd6107a4 --- /dev/null +++ b/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * Author: James.Qian.Wang + * + */ + +#include +#include "komeda_format_caps.h" +#include "malidp_utils.h" + +const struct komeda_format_caps * +komeda_get_format_caps(struct komeda_format_caps_table *table, + u32 fourcc, u64 modifier) +{ + const struct komeda_format_caps *caps; + u64 afbc_features = modifier & ~(AFBC_FORMAT_MOD_BLOCK_SIZE_MASK); + u32 afbc_layout = modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK; + int id; + + for (id = 0; id < table->n_formats; id++) { + caps = &table->format_caps[id]; + + if (fourcc != caps->fourcc) + continue; + + if ((modifier == 0ULL) && (caps->supported_afbc_layouts == 0)) + return caps; + + if (has_bits(afbc_features, caps->supported_afbc_features) && + has_bit(afbc_layout, caps->supported_afbc_layouts)) + return caps; + } + + return NULL; +} + +u32 *komeda_get_layer_fourcc_list(struct komeda_format_caps_table *table, + u32 layer_type, u32 *n_fmts) +{ + const struct komeda_format_caps *cap; + u32 *fmts; + int i, j, n = 0; + + fmts = kcalloc(table->n_formats, sizeof(u32), GFP_KERNEL); + if (!fmts) + return NULL; + + for (i = 0; i < table->n_formats; i++) { + cap = &table->format_caps[i]; + if (!(layer_type & cap->supported_layer_types) || + (cap->fourcc == 0)) + continue; + + /* one fourcc may has two caps items in table (afbc/none-afbc), + * so check the existing list to avoid adding a duplicated one. + */ + for (j = n - 1; j >= 0; j--) + if (fmts[j] == cap->fourcc) + break; + + if (j < 0) + fmts[n++] = cap->fourcc; + } + + if (n_fmts) + *n_fmts = n; + + return fmts; +} + +void komeda_put_fourcc_list(u32 *fourcc_list) +{ + kfree(fourcc_list); +} diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.h b/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.h new file mode 100644 index 000000000000..60f39e77b098 --- /dev/null +++ b/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * Author: James.Qian.Wang + * + */ + +#ifndef _KOMEDA_FORMAT_CAPS_H_ +#define _KOMEDA_FORMAT_CAPS_H_ + +#include +#include +#include + +#define AFBC(x) DRM_FORMAT_MOD_ARM_AFBC(x) + +/* afbc layerout */ +#define AFBC_16x16(x) AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 | (x)) +#define AFBC_32x8(x) AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 | (x)) + +/* afbc features */ +#define _YTR AFBC_FORMAT_MOD_YTR +#define _SPLIT AFBC_FORMAT_MOD_SPLIT +#define _SPARSE AFBC_FORMAT_MOD_SPARSE +#define _CBR AFBC_FORMAT_MOD_CBR +#define _TILED AFBC_FORMAT_MOD_TILED +#define _SC AFBC_FORMAT_MOD_SC + +/* layer_type */ +#define KOMEDA_FMT_RICH_LAYER BIT(0) +#define KOMEDA_FMT_SIMPLE_LAYER BIT(1) +#define KOMEDA_FMT_WB_LAYER BIT(2) + +#define AFBC_TH_LAYOUT_ALIGNMENT 8 +#define AFBC_HEADER_SIZE 16 +#define AFBC_SUPERBLK_ALIGNMENT 128 +#define AFBC_SUPERBLK_PIXELS 256 +#define AFBC_BODY_START_ALIGNMENT 1024 +#define AFBC_TH_BODY_START_ALIGNMENT 4096 + +/** + * struct komeda_format_caps + * + * komeda_format_caps is for describing ARM display specific features and + * limitations for a specific format, and format_caps will be linked into + * &komeda_framebuffer like a extension of &drm_format_info. + * + * NOTE: one fourcc may has two different format_caps items for fourcc and + * fourcc+modifier + * + * @hw_id: hw format id, hw specific value. + * @fourcc: drm fourcc format. + * @tile_size: format tiled size, used by ARM format X0L0/X0L2 + * @supported_layer_types: indicate which layer supports this format + * @supported_rots: allowed rotations for this format + * @supported_afbc_layouts: supported afbc layerout + * @supported_afbc_features: supported afbc features + */ +struct komeda_format_caps { + u32 hw_id; + u32 fourcc; + u32 tile_size; + u32 supported_layer_types; + u32 supported_rots; + u32 supported_afbc_layouts; + u64 supported_afbc_features; +}; + +/** + * struct komeda_format_caps_table - format_caps mananger + * + * @n_formats: the size of format_caps list. + * @format_caps: format_caps list. + */ +struct komeda_format_caps_table { + u32 n_formats; + const struct komeda_format_caps *format_caps; +}; + +const struct komeda_format_caps * +komeda_get_format_caps(struct komeda_format_caps_table *table, + u32 fourcc, u64 modifier); + +u32 *komeda_get_layer_fourcc_list(struct komeda_format_caps_table *table, + u32 layer_type, u32 *n_fmts); + +void komeda_put_fourcc_list(u32 *fourcc_list); + +#endif diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h index d2222e46b10b..35dc71acd7ec 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h +++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h @@ -211,6 +211,7 @@ static inline u16 component_changed_inputs(struct komeda_component_state *st) struct komeda_layer { struct komeda_component base; /* layer specific features and caps */ + int layer_type; /* RICH, SIMPLE or WB */ }; struct komeda_layer_state { From c46c24bb6b115b0d74789cf089894384875189c7 Mon Sep 17 00:00:00 2001 From: "james qian wang (Arm Technology China)" Date: Thu, 3 Jan 2019 11:41:13 +0000 Subject: [PATCH 020/539] drm/komeda: Add komeda_framebuffer komeda_framebuffer is for extending drm_framebuffer to add komeda own attributes and komeda specific fb handling. Changes in v3: - Fixed style problem found by checkpatch.pl --strict. Signed-off-by: James Qian Wang (Arm Technology China) Acked-by: Liviu Dudau Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/display/komeda/Makefile | 3 +- .../arm/display/komeda/komeda_framebuffer.c | 165 ++++++++++++++++++ .../arm/display/komeda/komeda_framebuffer.h | 34 ++++ 3 files changed, 201 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c create mode 100644 drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h diff --git a/drivers/gpu/drm/arm/display/komeda/Makefile b/drivers/gpu/drm/arm/display/komeda/Makefile index 394fc2aa434a..25beae900ed2 100644 --- a/drivers/gpu/drm/arm/display/komeda/Makefile +++ b/drivers/gpu/drm/arm/display/komeda/Makefile @@ -8,7 +8,8 @@ komeda-y := \ komeda_drv.o \ komeda_dev.o \ komeda_format_caps.o \ - komeda_pipeline.o + komeda_pipeline.o \ + komeda_framebuffer.o komeda-y += \ d71/d71_dev.o diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c new file mode 100644 index 000000000000..4ddd5314ca23 --- /dev/null +++ b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * Author: James.Qian.Wang + * + */ +#include +#include +#include +#include +#include "komeda_framebuffer.h" +#include "komeda_dev.h" + +static void komeda_fb_destroy(struct drm_framebuffer *fb) +{ + struct komeda_fb *kfb = to_kfb(fb); + u32 i; + + for (i = 0; i < fb->format->num_planes; i++) + drm_gem_object_put_unlocked(fb->obj[i]); + + drm_framebuffer_cleanup(fb); + kfree(kfb); +} + +static int komeda_fb_create_handle(struct drm_framebuffer *fb, + struct drm_file *file, u32 *handle) +{ + return drm_gem_handle_create(file, fb->obj[0], handle); +} + +static const struct drm_framebuffer_funcs komeda_fb_funcs = { + .destroy = komeda_fb_destroy, + .create_handle = komeda_fb_create_handle, +}; + +static int +komeda_fb_none_afbc_size_check(struct komeda_dev *mdev, struct komeda_fb *kfb, + struct drm_file *file, + const struct drm_mode_fb_cmd2 *mode_cmd) +{ + struct drm_framebuffer *fb = &kfb->base; + struct drm_gem_object *obj; + u32 min_size = 0; + u32 i; + + for (i = 0; i < fb->format->num_planes; i++) { + obj = drm_gem_object_lookup(file, mode_cmd->handles[i]); + if (!obj) { + DRM_DEBUG_KMS("Failed to lookup GEM object\n"); + fb->obj[i] = NULL; + + return -ENOENT; + } + + kfb->aligned_w = fb->width / (i ? fb->format->hsub : 1); + kfb->aligned_h = fb->height / (i ? fb->format->vsub : 1); + + if (fb->pitches[i] % mdev->chip.bus_width) { + DRM_DEBUG_KMS("Pitch[%d]: 0x%x doesn't align to 0x%x\n", + i, fb->pitches[i], mdev->chip.bus_width); + drm_gem_object_put_unlocked(obj); + fb->obj[i] = NULL; + + return -EINVAL; + } + + min_size = ((kfb->aligned_h / kfb->format_caps->tile_size - 1) + * fb->pitches[i]) + + (kfb->aligned_w * fb->format->cpp[i] + * kfb->format_caps->tile_size) + + fb->offsets[i]; + + if (obj->size < min_size) { + DRM_DEBUG_KMS("Fail to check none afbc fb size.\n"); + drm_gem_object_put_unlocked(obj); + fb->obj[i] = NULL; + + return -EINVAL; + } + + fb->obj[i] = obj; + } + + if (fb->format->num_planes == 3) { + if (fb->pitches[1] != fb->pitches[2]) { + DRM_DEBUG_KMS("The pitch[1] and [2] are not same\n"); + return -EINVAL; + } + } + + return 0; +} + +struct drm_framebuffer * +komeda_fb_create(struct drm_device *dev, struct drm_file *file, + const struct drm_mode_fb_cmd2 *mode_cmd) +{ + struct komeda_dev *mdev = dev->dev_private; + struct komeda_fb *kfb; + int ret = 0, i; + + kfb = kzalloc(sizeof(*kfb), GFP_KERNEL); + if (!kfb) + return ERR_PTR(-ENOMEM); + + kfb->format_caps = komeda_get_format_caps(&mdev->fmt_tbl, + mode_cmd->pixel_format, + mode_cmd->modifier[0]); + if (!kfb->format_caps) { + DRM_DEBUG_KMS("FMT %x is not supported.\n", + mode_cmd->pixel_format); + kfree(kfb); + return ERR_PTR(-EINVAL); + } + + drm_helper_mode_fill_fb_struct(dev, &kfb->base, mode_cmd); + + ret = komeda_fb_none_afbc_size_check(mdev, kfb, file, mode_cmd); + if (ret < 0) + goto err_cleanup; + + ret = drm_framebuffer_init(dev, &kfb->base, &komeda_fb_funcs); + if (ret < 0) { + DRM_DEBUG_KMS("failed to initialize fb\n"); + + goto err_cleanup; + } + + return &kfb->base; + +err_cleanup: + for (i = 0; i < kfb->base.format->num_planes; i++) + drm_gem_object_put_unlocked(kfb->base.obj[i]); + + kfree(kfb); + return ERR_PTR(ret); +} + +dma_addr_t +komeda_fb_get_pixel_addr(struct komeda_fb *kfb, int x, int y, int plane) +{ + struct drm_framebuffer *fb = &kfb->base; + const struct drm_gem_cma_object *obj; + u32 plane_x, plane_y, cpp, pitch, offset; + + if (plane > fb->format->num_planes) { + DRM_DEBUG_KMS("Out of max plane num.\n"); + return -EINVAL; + } + + obj = drm_fb_cma_get_gem_obj(fb, plane); + + offset = fb->offsets[plane]; + if (!fb->modifier) { + plane_x = x / (plane ? fb->format->hsub : 1); + plane_y = y / (plane ? fb->format->vsub : 1); + cpp = fb->format->cpp[plane]; + pitch = fb->pitches[plane]; + offset += plane_x * cpp * kfb->format_caps->tile_size + + (plane_y * pitch) / kfb->format_caps->tile_size; + } + + return obj->paddr + offset; +} diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h new file mode 100644 index 000000000000..0de2e4a2afd2 --- /dev/null +++ b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * Author: James.Qian.Wang + * + */ +#ifndef _KOMEDA_FRAMEBUFFER_H_ +#define _KOMEDA_FRAMEBUFFER_H_ + +#include +#include "komeda_format_caps.h" + +/** struct komeda_fb - entend drm_framebuffer with komeda attribute */ +struct komeda_fb { + /** @base: &drm_framebuffer */ + struct drm_framebuffer base; + /* @format_caps: &komeda_format_caps */ + const struct komeda_format_caps *format_caps; + /** @aligned_w: aligned frame buffer width */ + u32 aligned_w; + /** @aligned_h: aligned frame buffer height */ + u32 aligned_h; +}; + +#define to_kfb(dfb) container_of(dfb, struct komeda_fb, base) + +struct drm_framebuffer * +komeda_fb_create(struct drm_device *dev, struct drm_file *file, + const struct drm_mode_fb_cmd2 *mode_cmd); +dma_addr_t +komeda_fb_get_pixel_addr(struct komeda_fb *kfb, int x, int y, int plane); +bool komeda_fb_is_layer_supported(struct komeda_fb *kfb, u32 layer_type); + +#endif From 61f1c4a8ab7575c0147ae6c0d220f9719fdc741e Mon Sep 17 00:00:00 2001 From: "james qian wang (Arm Technology China)" Date: Thu, 3 Jan 2019 11:41:30 +0000 Subject: [PATCH 021/539] drm/komeda: Attach komeda_dev to DRM-KMS Add komeda_kms abstracton to attach komeda_dev to DRM-KMS CRTC: according to the komeda_pipeline PLANE: according to komeda_layer (layer input pipeline) PRIVATE_OBJS: komeda_pipeline/component all will be treat as private_objs komeda_kms is for connecting DRM-KMS and komeda_dev, like reporting the kms object properties according to the komeda_dev, and pass/convert KMS's requirement to komeda_dev. Changes in v4: - Set drm_atomic_helper_check as mode_config->atomic_check. Changes in v3: - Fixed style problem found by checkpatch.pl --strict. Changes in v2: - Unified abbreviation of "pipeline" to "pipe". Signed-off-by: James Qian Wang (Arm Technology China) Reviewed-by: Liviu Dudau Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/display/komeda/Makefile | 6 +- .../gpu/drm/arm/display/komeda/komeda_crtc.c | 106 +++++++++++ .../gpu/drm/arm/display/komeda/komeda_drv.c | 12 ++ .../gpu/drm/arm/display/komeda/komeda_kms.c | 169 ++++++++++++++++++ .../gpu/drm/arm/display/komeda/komeda_kms.h | 113 ++++++++++++ .../drm/arm/display/komeda/komeda_pipeline.h | 3 + .../gpu/drm/arm/display/komeda/komeda_plane.c | 109 +++++++++++ .../arm/display/komeda/komeda_private_obj.c | 88 +++++++++ 8 files changed, 605 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/arm/display/komeda/komeda_crtc.c create mode 100644 drivers/gpu/drm/arm/display/komeda/komeda_kms.c create mode 100644 drivers/gpu/drm/arm/display/komeda/komeda_kms.h create mode 100644 drivers/gpu/drm/arm/display/komeda/komeda_plane.c create mode 100644 drivers/gpu/drm/arm/display/komeda/komeda_private_obj.c diff --git a/drivers/gpu/drm/arm/display/komeda/Makefile b/drivers/gpu/drm/arm/display/komeda/Makefile index 25beae900ed2..1b875e5dc0f6 100644 --- a/drivers/gpu/drm/arm/display/komeda/Makefile +++ b/drivers/gpu/drm/arm/display/komeda/Makefile @@ -9,7 +9,11 @@ komeda-y := \ komeda_dev.o \ komeda_format_caps.o \ komeda_pipeline.o \ - komeda_framebuffer.o + komeda_framebuffer.o \ + komeda_kms.o \ + komeda_crtc.o \ + komeda_plane.o \ + komeda_private_obj.o komeda-y += \ d71/d71_dev.o diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c b/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c new file mode 100644 index 000000000000..5bb5a55f6b31 --- /dev/null +++ b/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * Author: James.Qian.Wang + * + */ +#include +#include +#include +#include +#include +#include +#include +#include "komeda_dev.h" +#include "komeda_kms.h" + +struct drm_crtc_helper_funcs komeda_crtc_helper_funcs = { +}; + +static const struct drm_crtc_funcs komeda_crtc_funcs = { +}; + +int komeda_kms_setup_crtcs(struct komeda_kms_dev *kms, + struct komeda_dev *mdev) +{ + struct komeda_crtc *crtc; + struct komeda_pipeline *master; + char str[16]; + int i; + + kms->n_crtcs = 0; + + for (i = 0; i < mdev->n_pipelines; i++) { + crtc = &kms->crtcs[kms->n_crtcs]; + master = mdev->pipelines[i]; + + crtc->master = master; + crtc->slave = NULL; + + if (crtc->slave) + sprintf(str, "pipe-%d", crtc->slave->id); + else + sprintf(str, "None"); + + DRM_INFO("crtc%d: master(pipe-%d) slave(%s) output: %s.\n", + kms->n_crtcs, master->id, str, + master->of_output_dev ? + master->of_output_dev->full_name : "None"); + + kms->n_crtcs++; + } + + return 0; +} + +static struct drm_plane * +get_crtc_primary(struct komeda_kms_dev *kms, struct komeda_crtc *crtc) +{ + struct komeda_plane *kplane; + struct drm_plane *plane; + + drm_for_each_plane(plane, &kms->base) { + if (plane->type != DRM_PLANE_TYPE_PRIMARY) + continue; + + kplane = to_kplane(plane); + /* only master can be primary */ + if (kplane->layer->base.pipeline == crtc->master) + return plane; + } + + return NULL; +} + +static int komeda_crtc_add(struct komeda_kms_dev *kms, + struct komeda_crtc *kcrtc) +{ + struct drm_crtc *crtc = &kcrtc->base; + int err; + + err = drm_crtc_init_with_planes(&kms->base, crtc, + get_crtc_primary(kms, kcrtc), NULL, + &komeda_crtc_funcs, NULL); + if (err) + return err; + + drm_crtc_helper_add(crtc, &komeda_crtc_helper_funcs); + drm_crtc_vblank_reset(crtc); + + crtc->port = kcrtc->master->of_output_port; + + return 0; +} + +int komeda_kms_add_crtcs(struct komeda_kms_dev *kms, struct komeda_dev *mdev) +{ + int i, err; + + for (i = 0; i < kms->n_crtcs; i++) { + err = komeda_crtc_add(kms, &kms->crtcs[i]); + if (err) + return err; + } + + return 0; +} diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_drv.c b/drivers/gpu/drm/arm/display/komeda/komeda_drv.c index dd386d82d143..2bdd189b041d 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_drv.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_drv.c @@ -10,9 +10,11 @@ #include #include #include "komeda_dev.h" +#include "komeda_kms.h" struct komeda_drv { struct komeda_dev *mdev; + struct komeda_kms_dev *kms; }; static void komeda_unbind(struct device *dev) @@ -22,6 +24,7 @@ static void komeda_unbind(struct device *dev) if (!mdrv) return; + komeda_kms_detach(mdrv->kms); komeda_dev_destroy(mdrv->mdev); dev_set_drvdata(dev, NULL); @@ -43,10 +46,19 @@ static int komeda_bind(struct device *dev) goto free_mdrv; } + mdrv->kms = komeda_kms_attach(mdrv->mdev); + if (IS_ERR(mdrv->kms)) { + err = PTR_ERR(mdrv->kms); + goto destroy_mdev; + } + dev_set_drvdata(dev, mdrv); return 0; +destroy_mdev: + komeda_dev_destroy(mdrv->mdev); + free_mdrv: devm_kfree(dev, mdrv); return err; diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c new file mode 100644 index 000000000000..f41b20235130 --- /dev/null +++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * Author: James.Qian.Wang + * + */ +#include +#include +#include +#include +#include +#include +#include +#include "komeda_dev.h" +#include "komeda_kms.h" +#include "komeda_framebuffer.h" + +DEFINE_DRM_GEM_CMA_FOPS(komeda_cma_fops); + +static int komeda_gem_cma_dumb_create(struct drm_file *file, + struct drm_device *dev, + struct drm_mode_create_dumb *args) +{ + u32 alignment = 16; /* TODO get alignment from dev */ + + args->pitch = ALIGN(DIV_ROUND_UP(args->width * args->bpp, 8), + alignment); + + return drm_gem_cma_dumb_create_internal(file, dev, args); +} + +static struct drm_driver komeda_kms_driver = { + .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC | + DRIVER_PRIME, + .lastclose = drm_fb_helper_lastclose, + .gem_free_object_unlocked = drm_gem_cma_free_object, + .gem_vm_ops = &drm_gem_cma_vm_ops, + .dumb_create = komeda_gem_cma_dumb_create, + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, + .gem_prime_export = drm_gem_prime_export, + .gem_prime_import = drm_gem_prime_import, + .gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table, + .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table, + .gem_prime_vmap = drm_gem_cma_prime_vmap, + .gem_prime_vunmap = drm_gem_cma_prime_vunmap, + .gem_prime_mmap = drm_gem_cma_prime_mmap, + .fops = &komeda_cma_fops, + .name = "komeda", + .desc = "Arm Komeda Display Processor driver", + .date = "20181101", + .major = 0, + .minor = 1, +}; + +static void komeda_kms_commit_tail(struct drm_atomic_state *old_state) +{ + struct drm_device *dev = old_state->dev; + + drm_atomic_helper_commit_modeset_disables(dev, old_state); + + drm_atomic_helper_commit_planes(dev, old_state, 0); + + drm_atomic_helper_commit_modeset_enables(dev, old_state); + + drm_atomic_helper_wait_for_flip_done(dev, old_state); + + drm_atomic_helper_commit_hw_done(old_state); + + drm_atomic_helper_cleanup_planes(dev, old_state); +} + +static const struct drm_mode_config_helper_funcs komeda_mode_config_helpers = { + .atomic_commit_tail = komeda_kms_commit_tail, +}; + +static const struct drm_mode_config_funcs komeda_mode_config_funcs = { + .fb_create = komeda_fb_create, + .atomic_check = drm_atomic_helper_check, + .atomic_commit = drm_atomic_helper_commit, +}; + +static void komeda_kms_mode_config_init(struct komeda_kms_dev *kms, + struct komeda_dev *mdev) +{ + struct drm_mode_config *config = &kms->base.mode_config; + + drm_mode_config_init(&kms->base); + + komeda_kms_setup_crtcs(kms, mdev); + + /* Get value from dev */ + config->min_width = 0; + config->min_height = 0; + config->max_width = 4096; + config->max_height = 4096; + config->allow_fb_modifiers = false; + + config->funcs = &komeda_mode_config_funcs; + config->helper_private = &komeda_mode_config_helpers; +} + +struct komeda_kms_dev *komeda_kms_attach(struct komeda_dev *mdev) +{ + struct komeda_kms_dev *kms = kzalloc(sizeof(*kms), GFP_KERNEL); + struct drm_device *drm; + int err; + + if (!kms) + return ERR_PTR(-ENOMEM); + + drm = &kms->base; + err = drm_dev_init(drm, &komeda_kms_driver, mdev->dev); + if (err) + goto free_kms; + + drm->dev_private = mdev; + + komeda_kms_mode_config_init(kms, mdev); + + err = komeda_kms_add_private_objs(kms, mdev); + if (err) + goto cleanup_mode_config; + + err = komeda_kms_add_planes(kms, mdev); + if (err) + goto cleanup_mode_config; + + err = drm_vblank_init(drm, kms->n_crtcs); + if (err) + goto cleanup_mode_config; + + err = komeda_kms_add_crtcs(kms, mdev); + if (err) + goto cleanup_mode_config; + + err = component_bind_all(mdev->dev, kms); + if (err) + goto cleanup_mode_config; + + drm_mode_config_reset(drm); + + err = drm_dev_register(drm, 0); + if (err) + goto uninstall_irq; + + return kms; + +uninstall_irq: + drm_irq_uninstall(drm); +cleanup_mode_config: + drm_mode_config_cleanup(drm); +free_kms: + kfree(kms); + return ERR_PTR(err); +} + +void komeda_kms_detach(struct komeda_kms_dev *kms) +{ + struct drm_device *drm = &kms->base; + struct komeda_dev *mdev = drm->dev_private; + + drm_dev_unregister(drm); + component_unbind_all(mdev->dev, drm); + komeda_kms_cleanup_private_objs(mdev); + drm_mode_config_cleanup(drm); + drm->dev_private = NULL; + drm_dev_put(drm); +} diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.h b/drivers/gpu/drm/arm/display/komeda/komeda_kms.h new file mode 100644 index 000000000000..f13666004a42 --- /dev/null +++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * Author: James.Qian.Wang + * + */ +#ifndef _KOMEDA_KMS_H_ +#define _KOMEDA_KMS_H_ + +#include +#include +#include +#include + +/** struct komeda_plane - komeda instance of drm_plane */ +struct komeda_plane { + /** @base: &drm_plane */ + struct drm_plane base; + /** + * @layer: + * + * represents available layer input pipelines for this plane. + * + * NOTE: + * the layer is not for a specific Layer, but indicate a group of + * Layers with same capabilities. + */ + struct komeda_layer *layer; +}; + +/** + * struct komeda_plane_state + * + * The plane_state can be split into two data flow (left/right) and handled + * by two layers &komeda_plane.layer and &komeda_plane.layer.right + */ +struct komeda_plane_state { + /** @base: &drm_plane_state */ + struct drm_plane_state base; + + /* private properties */ +}; + +/** + * struct komeda_wb_connector + */ +struct komeda_wb_connector { + /** @base: &drm_writeback_connector */ + struct drm_writeback_connector base; + + /** @wb_layer: represents associated writeback pipeline of komeda */ + struct komeda_layer *wb_layer; +}; + +/** + * struct komeda_crtc + */ +struct komeda_crtc { + /** @base: &drm_crtc */ + struct drm_crtc base; + /** @master: only master has display output */ + struct komeda_pipeline *master; + /** + * @slave: optional + * + * Doesn't have its own display output, the handled data flow will + * merge into the master. + */ + struct komeda_pipeline *slave; +}; + +/** struct komeda_crtc_state */ +struct komeda_crtc_state { + /** @base: &drm_crtc_state */ + struct drm_crtc_state base; + + /* private properties */ + + /* computed state which are used by validate/check */ + u32 affected_pipes; + u32 active_pipes; +}; + +/** struct komeda_kms_dev - for gather KMS related things */ +struct komeda_kms_dev { + /** @base: &drm_device */ + struct drm_device base; + + /** @n_crtcs: valid numbers of crtcs in &komeda_kms_dev.crtcs */ + int n_crtcs; + /** @crtcs: crtcs list */ + struct komeda_crtc crtcs[KOMEDA_MAX_PIPELINES]; +}; + +#define to_kplane(p) container_of(p, struct komeda_plane, base) +#define to_kplane_st(p) container_of(p, struct komeda_plane_state, base) +#define to_kconn(p) container_of(p, struct komeda_wb_connector, base) +#define to_kcrtc(p) container_of(p, struct komeda_crtc, base) +#define to_kcrtc_st(p) container_of(p, struct komeda_crtc_state, base) +#define to_kdev(p) container_of(p, struct komeda_kms_dev, base) + +int komeda_kms_setup_crtcs(struct komeda_kms_dev *kms, struct komeda_dev *mdev); + +int komeda_kms_add_crtcs(struct komeda_kms_dev *kms, struct komeda_dev *mdev); +int komeda_kms_add_planes(struct komeda_kms_dev *kms, struct komeda_dev *mdev); +int komeda_kms_add_private_objs(struct komeda_kms_dev *kms, + struct komeda_dev *mdev); +void komeda_kms_cleanup_private_objs(struct komeda_dev *mdev); + +struct komeda_kms_dev *komeda_kms_attach(struct komeda_dev *mdev); +void komeda_kms_detach(struct komeda_kms_dev *kms); + +#endif /*_KOMEDA_KMS_H_*/ diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h index 35dc71acd7ec..8c950bc8ae96 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h +++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h @@ -331,6 +331,9 @@ struct komeda_pipeline_state { #define to_improc_st(c) container_of(c, struct komeda_improc_state, base) #define to_ctrlr_st(c) container_of(c, struct komeda_timing_ctrlr_state, base) +#define priv_to_comp_st(o) container_of(o, struct komeda_component_state, obj) +#define priv_to_pipe_st(o) container_of(o, struct komeda_pipeline_state, obj) + /* pipeline APIs */ struct komeda_pipeline * komeda_pipeline_add(struct komeda_dev *mdev, size_t size, diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_plane.c b/drivers/gpu/drm/arm/display/komeda/komeda_plane.c new file mode 100644 index 000000000000..0a4953a9a909 --- /dev/null +++ b/drivers/gpu/drm/arm/display/komeda/komeda_plane.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * Author: James.Qian.Wang + * + */ +#include +#include +#include +#include "komeda_dev.h" +#include "komeda_kms.h" + +static const struct drm_plane_helper_funcs komeda_plane_helper_funcs = { +}; + +static void komeda_plane_destroy(struct drm_plane *plane) +{ + drm_plane_cleanup(plane); + + kfree(to_kplane(plane)); +} + +static const struct drm_plane_funcs komeda_plane_funcs = { +}; + +/* for komeda, which is pipeline can be share between crtcs */ +static u32 get_possible_crtcs(struct komeda_kms_dev *kms, + struct komeda_pipeline *pipe) +{ + struct komeda_crtc *crtc; + u32 possible_crtcs = 0; + int i; + + for (i = 0; i < kms->n_crtcs; i++) { + crtc = &kms->crtcs[i]; + + if ((pipe == crtc->master) || (pipe == crtc->slave)) + possible_crtcs |= BIT(i); + } + + return possible_crtcs; +} + +/* use Layer0 as primary */ +static u32 get_plane_type(struct komeda_kms_dev *kms, + struct komeda_component *c) +{ + bool is_primary = (c->id == KOMEDA_COMPONENT_LAYER0); + + return is_primary ? DRM_PLANE_TYPE_PRIMARY : DRM_PLANE_TYPE_OVERLAY; +} + +static int komeda_plane_add(struct komeda_kms_dev *kms, + struct komeda_layer *layer) +{ + struct komeda_dev *mdev = kms->base.dev_private; + struct komeda_component *c = &layer->base; + struct komeda_plane *kplane; + struct drm_plane *plane; + u32 *formats, n_formats = 0; + int err; + + kplane = kzalloc(sizeof(*kplane), GFP_KERNEL); + if (!kplane) + return -ENOMEM; + + plane = &kplane->base; + kplane->layer = layer; + + formats = komeda_get_layer_fourcc_list(&mdev->fmt_tbl, + layer->layer_type, &n_formats); + + err = drm_universal_plane_init(&kms->base, plane, + get_possible_crtcs(kms, c->pipeline), + &komeda_plane_funcs, + formats, n_formats, NULL, + get_plane_type(kms, c), + "%s", c->name); + + komeda_put_fourcc_list(formats); + + if (err) + goto cleanup; + + drm_plane_helper_add(plane, &komeda_plane_helper_funcs); + + return 0; +cleanup: + komeda_plane_destroy(plane); + return err; +} + +int komeda_kms_add_planes(struct komeda_kms_dev *kms, struct komeda_dev *mdev) +{ + struct komeda_pipeline *pipe; + int i, j, err; + + for (i = 0; i < mdev->n_pipelines; i++) { + pipe = mdev->pipelines[i]; + + for (j = 0; j < pipe->n_layers; j++) { + err = komeda_plane_add(kms, pipe->layers[j]); + if (err) + return err; + } + } + + return 0; +} diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_private_obj.c b/drivers/gpu/drm/arm/display/komeda/komeda_private_obj.c new file mode 100644 index 000000000000..9edfd6ab0c12 --- /dev/null +++ b/drivers/gpu/drm/arm/display/komeda/komeda_private_obj.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * Author: James.Qian.Wang + * + */ +#include "komeda_dev.h" +#include "komeda_kms.h" + +static struct drm_private_state * +komeda_pipeline_atomic_duplicate_state(struct drm_private_obj *obj) +{ + struct komeda_pipeline_state *st; + + st = kmemdup(obj->state, sizeof(*st), GFP_KERNEL); + if (!st) + return NULL; + + st->active_comps = 0; + + __drm_atomic_helper_private_obj_duplicate_state(obj, &st->obj); + + return &st->obj; +} + +static void +komeda_pipeline_atomic_destroy_state(struct drm_private_obj *obj, + struct drm_private_state *state) +{ + kfree(priv_to_pipe_st(state)); +} + +static const struct drm_private_state_funcs komeda_pipeline_obj_funcs = { + .atomic_duplicate_state = komeda_pipeline_atomic_duplicate_state, + .atomic_destroy_state = komeda_pipeline_atomic_destroy_state, +}; + +static int komeda_pipeline_obj_add(struct komeda_kms_dev *kms, + struct komeda_pipeline *pipe) +{ + struct komeda_pipeline_state *st; + + st = kzalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return -ENOMEM; + + st->pipe = pipe; + drm_atomic_private_obj_init(&pipe->obj, &st->obj, + &komeda_pipeline_obj_funcs); + + return 0; +} + +int komeda_kms_add_private_objs(struct komeda_kms_dev *kms, + struct komeda_dev *mdev) +{ + struct komeda_pipeline *pipe; + int i, err; + + for (i = 0; i < mdev->n_pipelines; i++) { + pipe = mdev->pipelines[i]; + + err = komeda_pipeline_obj_add(kms, pipe); + if (err) + return err; + + /* Add component */ + } + + return 0; +} + +void komeda_kms_cleanup_private_objs(struct komeda_dev *mdev) +{ + struct komeda_pipeline *pipe; + struct komeda_component *c; + int i, id; + + for (i = 0; i < mdev->n_pipelines; i++) { + pipe = mdev->pipelines[i]; + dp_for_each_set_bit(id, pipe->avail_comps) { + c = komeda_pipeline_get_component(pipe, id); + + drm_atomic_private_obj_fini(&c->obj); + } + drm_atomic_private_obj_fini(&pipe->obj); + } +} From 557c37360eca864eba692be7b7b72ec937b330af Mon Sep 17 00:00:00 2001 From: "james qian wang (Arm Technology China)" Date: Thu, 3 Jan 2019 11:41:48 +0000 Subject: [PATCH 022/539] drm/doc: Add initial komeda driver documentation v2: Some editing changes according to Randy Dunlap's comments Signed-off-by: James Qian Wang (Arm Technology China) Signed-off-by: Liviu Dudau --- Documentation/gpu/drivers.rst | 1 + Documentation/gpu/komeda-kms.rst | 488 +++++++++++++++++++++++++++++++ 2 files changed, 489 insertions(+) create mode 100644 Documentation/gpu/komeda-kms.rst diff --git a/Documentation/gpu/drivers.rst b/Documentation/gpu/drivers.rst index c176b34ae9c8..044a7025477c 100644 --- a/Documentation/gpu/drivers.rst +++ b/Documentation/gpu/drivers.rst @@ -18,6 +18,7 @@ GPU Driver Documentation bridge/dw-hdmi xen-front afbc + komeda-kms .. only:: subproject and html diff --git a/Documentation/gpu/komeda-kms.rst b/Documentation/gpu/komeda-kms.rst new file mode 100644 index 000000000000..b08da1cffecc --- /dev/null +++ b/Documentation/gpu/komeda-kms.rst @@ -0,0 +1,488 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================== + drm/komeda Arm display driver +============================== + +The drm/komeda driver supports the Arm display processor D71 and later products, +this document gives a brief overview of driver design: how it works and why +design it like that. + +Overview of D71 like display IPs +================================ + +From D71, Arm display IP begins to adopt a flexible and modularized +architecture. A display pipeline is made up of multiple individual and +functional pipeline stages called components, and every component has some +specific capabilities that can give the flowed pipeline pixel data a +particular processing. + +Typical D71 components: + +Layer +----- +Layer is the first pipeline stage, which prepares the pixel data for the next +stage. It fetches the pixel from memory, decodes it if it's AFBC, rotates the +source image, unpacks or converts YUV pixels to the device internal RGB pixels, +then adjusts the color_space of pixels if needed. + +Scaler +------ +As its name suggests, scaler takes responsibility for scaling, and D71 also +supports image enhancements by scaler. +The usage of scaler is very flexible and can be connected to layer output +for layer scaling, or connected to compositor and scale the whole display +frame and then feed the output data into wb_layer which will then write it +into memory. + +Compositor (compiz) +------------------- +Compositor blends multiple layers or pixel data flows into one single display +frame. its output frame can be fed into post image processor for showing it on +the monitor or fed into wb_layer and written to memory at the same time. +user can also insert a scaler between compositor and wb_layer to down scale +the display frame first and and then write to memory. + +Writeback Layer (wb_layer) +-------------------------- +Writeback layer does the opposite things of Layer, which connects to compiz +and writes the composition result to memory. + +Post image processor (improc) +----------------------------- +Post image processor adjusts frame data like gamma and color space to fit the +requirements of the monitor. + +Timing controller (timing_ctrlr) +-------------------------------- +Final stage of display pipeline, Timing controller is not for the pixel +handling, but only for controlling the display timing. + +Merger +------ +D71 scaler mostly only has the half horizontal input/output capabilities +compared with Layer, like if Layer supports 4K input size, the scaler only can +support 2K input/output in the same time. To achieve the ful frame scaling, D71 +introduces Layer Split, which splits the whole image to two half parts and feeds +them to two Layers A and B, and does the scaling independently. After scaling +the result need to be fed to merger to merge two part images together, and then +output merged result to compiz. + +Splitter +-------- +Similar to Layer Split, but Splitter is used for writeback, which splits the +compiz result to two parts and then feed them to two scalers. + +Possible D71 Pipeline usage +=========================== + +Benefitting from the modularized architecture, D71 pipelines can be easily +adjusted to fit different usages. And D71 has two pipelines, which support two +types of working mode: + +- Dual display mode + Two pipelines work independently and separately to drive two display outputs. + +- Single display mode + Two pipelines work together to drive only one display output. + + On this mode, pipeline_B doesn't work indenpendently, but outputs its + composition result into pipeline_A, and its pixel timing also derived from + pipeline_A.timing_ctrlr. The pipeline_B works just like a "slave" of + pipeline_A(master) + +Single pipeline data flow +------------------------- + +.. kernel-render:: DOT + :alt: Single pipeline digraph + :caption: Single pipeline data flow + + digraph single_ppl { + rankdir=LR; + + subgraph { + "Memory"; + "Monitor"; + } + + subgraph cluster_pipeline { + style=dashed + node [shape=box] + { + node [bgcolor=grey style=dashed] + "Scaler-0"; + "Scaler-1"; + "Scaler-0/1" + } + + node [bgcolor=grey style=filled] + "Layer-0" -> "Scaler-0" + "Layer-1" -> "Scaler-0" + "Layer-2" -> "Scaler-1" + "Layer-3" -> "Scaler-1" + + "Layer-0" -> "Compiz" + "Layer-1" -> "Compiz" + "Layer-2" -> "Compiz" + "Layer-3" -> "Compiz" + "Scaler-0" -> "Compiz" + "Scaler-1" -> "Compiz" + + "Compiz" -> "Scaler-0/1" -> "Wb_layer" + "Compiz" -> "Improc" -> "Timing Controller" + } + + "Wb_layer" -> "Memory" + "Timing Controller" -> "Monitor" + } + +Dual pipeline with Slave enabled +-------------------------------- + +.. kernel-render:: DOT + :alt: Slave pipeline digraph + :caption: Slave pipeline enabled data flow + + digraph slave_ppl { + rankdir=LR; + + subgraph { + "Memory"; + "Monitor"; + } + node [shape=box] + subgraph cluster_pipeline_slave { + style=dashed + label="Slave Pipeline_B" + node [shape=box] + { + node [bgcolor=grey style=dashed] + "Slave.Scaler-0"; + "Slave.Scaler-1"; + } + + node [bgcolor=grey style=filled] + "Slave.Layer-0" -> "Slave.Scaler-0" + "Slave.Layer-1" -> "Slave.Scaler-0" + "Slave.Layer-2" -> "Slave.Scaler-1" + "Slave.Layer-3" -> "Slave.Scaler-1" + + "Slave.Layer-0" -> "Slave.Compiz" + "Slave.Layer-1" -> "Slave.Compiz" + "Slave.Layer-2" -> "Slave.Compiz" + "Slave.Layer-3" -> "Slave.Compiz" + "Slave.Scaler-0" -> "Slave.Compiz" + "Slave.Scaler-1" -> "Slave.Compiz" + } + + subgraph cluster_pipeline_master { + style=dashed + label="Master Pipeline_A" + node [shape=box] + { + node [bgcolor=grey style=dashed] + "Scaler-0"; + "Scaler-1"; + "Scaler-0/1" + } + + node [bgcolor=grey style=filled] + "Layer-0" -> "Scaler-0" + "Layer-1" -> "Scaler-0" + "Layer-2" -> "Scaler-1" + "Layer-3" -> "Scaler-1" + + "Slave.Compiz" -> "Compiz" + "Layer-0" -> "Compiz" + "Layer-1" -> "Compiz" + "Layer-2" -> "Compiz" + "Layer-3" -> "Compiz" + "Scaler-0" -> "Compiz" + "Scaler-1" -> "Compiz" + + "Compiz" -> "Scaler-0/1" -> "Wb_layer" + "Compiz" -> "Improc" -> "Timing Controller" + } + + "Wb_layer" -> "Memory" + "Timing Controller" -> "Monitor" + } + +Sub-pipelines for input and output +---------------------------------- + +A complete display pipeline can be easily divided into three sub-pipelines +according to the in/out usage. + +Layer(input) pipeline +~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-render:: DOT + :alt: Layer data digraph + :caption: Layer (input) data flow + + digraph layer_data_flow { + rankdir=LR; + node [shape=box] + + { + node [bgcolor=grey style=dashed] + "Scaler-n"; + } + + "Layer-n" -> "Scaler-n" -> "Compiz" + } + +.. kernel-render:: DOT + :alt: Layer Split digraph + :caption: Layer Split pipeline + + digraph layer_data_flow { + rankdir=LR; + node [shape=box] + + "Layer-0/1" -> "Scaler-0" -> "Merger" + "Layer-2/3" -> "Scaler-1" -> "Merger" + "Merger" -> "Compiz" + } + +Writeback(output) pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. kernel-render:: DOT + :alt: writeback digraph + :caption: Writeback(output) data flow + + digraph writeback_data_flow { + rankdir=LR; + node [shape=box] + + { + node [bgcolor=grey style=dashed] + "Scaler-n"; + } + + "Compiz" -> "Scaler-n" -> "Wb_layer" + } + +.. kernel-render:: DOT + :alt: split writeback digraph + :caption: Writeback(output) Split data flow + + digraph writeback_data_flow { + rankdir=LR; + node [shape=box] + + "Compiz" -> "Splitter" + "Splitter" -> "Scaler-0" -> "Merger" + "Splitter" -> "Scaler-1" -> "Merger" + "Merger" -> "Wb_layer" + } + +Display output pipeline +~~~~~~~~~~~~~~~~~~~~~~~ +.. kernel-render:: DOT + :alt: display digraph + :caption: display output data flow + + digraph single_ppl { + rankdir=LR; + node [shape=box] + + "Compiz" -> "Improc" -> "Timing Controller" + } + +In the following section we'll see these three sub-pipelines will be handled +by KMS-plane/wb_conn/crtc respectively. + +Komeda Resource abstraction +=========================== + +struct komeda_pipeline/component +-------------------------------- + +To fully utilize and easily access/configure the HW, the driver side also uses +a similar architecture: Pipeline/Component to describe the HW features and +capabilities, and a specific component includes two parts: + +- Data flow controlling. +- Specific component capabilities and features. + +So the driver defines a common header struct komeda_component to describe the +data flow control and all specific components are a subclass of this base +structure. + +.. kernel-doc:: drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h + :internal: + +Resource discovery and initialization +===================================== + +Pipeline and component are used to describe how to handle the pixel data. We +still need a @struct komeda_dev to describe the whole view of the device, and +the control-abilites of device. + +We have &komeda_dev, &komeda_pipeline, &komeda_component. Now fill devices with +pipelines. Since komeda is not for D71 only but also intended for later products, +of course we’d better share as much as possible between different products. To +achieve this, split the komeda device into two layers: CORE and CHIP. + +- CORE: for common features and capabilities handling. +- CHIP: for register programing and HW specific feature (limitation) handling. + +CORE can access CHIP by three chip function structures: + +- struct komeda_dev_funcs +- struct komeda_pipeline_funcs +- struct komeda_component_funcs + +.. kernel-doc:: drivers/gpu/drm/arm/display/komeda/komeda_dev.h + :internal: + +Format handling +=============== + +.. kernel-doc:: drivers/gpu/drm/arm/display/komeda/komeda_format_caps.h + :internal: +.. kernel-doc:: drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h + :internal: + +Attach komeda_dev to DRM-KMS +============================ + +Komeda abstracts resources by pipeline/component, but DRM-KMS uses +crtc/plane/connector. One KMS-obj cannot represent only one single component, +since the requirements of a single KMS object cannot simply be achieved by a +single component, usually that needs multiple components to fit the requirement. +Like set mode, gamma, ctm for KMS all target on CRTC-obj, but komeda needs +compiz, improc and timing_ctrlr to work together to fit these requirements. +And a KMS-Plane may require multiple komeda resources: layer/scaler/compiz. + +So, one KMS-Obj represents a sub-pipeline of komeda resources. + +- Plane: `Layer(input) pipeline`_ +- Wb_connector: `Writeback(output) pipeline`_ +- Crtc: `Display output pipeline`_ + +So, for komeda, we treat KMS crtc/plane/connector as users of pipeline and +component, and at any one time a pipeline/component only can be used by one +user. And pipeline/component will be treated as private object of DRM-KMS; the +state will be managed by drm_atomic_state as well. + +How to map plane to Layer(input) pipeline +----------------------------------------- + +Komeda has multiple Layer input pipelines, see: +- `Single pipeline data flow`_ +- `Dual pipeline with Slave enabled`_ + +The easiest way is binding a plane to a fixed Layer pipeline, but consider the +komeda capabilities: + +- Layer Split, See `Layer(input) pipeline`_ + + Layer_Split is quite complicated feature, which splits a big image into two + parts and handles it by two layers and two scalers individually. But it + imports an edge problem or effect in the middle of the image after the split. + To avoid such a problem, it needs a complicated Split calculation and some + special configurations to the layer and scaler. We'd better hide such HW + related complexity to user mode. + +- Slave pipeline, See `Dual pipeline with Slave enabled`_ + + Since the compiz component doesn't output alpha value, the slave pipeline + only can be used for bottom layers composition. The komeda driver wants to + hide this limitation to the user. The way to do this is to pick a suitable + Layer according to plane_state->zpos. + +So for komeda, the KMS-plane doesn't represent a fixed komeda layer pipeline, +but multiple Layers with same capabilities. Komeda will select one or more +Layers to fit the requirement of one KMS-plane. + +Make component/pipeline to be drm_private_obj +--------------------------------------------- + +Add :c:type:`drm_private_obj` to :c:type:`komeda_component`, :c:type:`komeda_pipeline` + +.. code-block:: c + + struct komeda_component { + struct drm_private_obj obj; + ... + } + + struct komeda_pipeline { + struct drm_private_obj obj; + ... + } + +Tracking component_state/pipeline_state by drm_atomic_state +----------------------------------------------------------- + +Add :c:type:`drm_private_state` and user to :c:type:`komeda_component_state`, +:c:type:`komeda_pipeline_state` + +.. code-block:: c + + struct komeda_component_state { + struct drm_private_state obj; + void *binding_user; + ... + } + + struct komeda_pipeline_state { + struct drm_private_state obj; + struct drm_crtc *crtc; + ... + } + +komeda component validation +--------------------------- + +Komeda has multiple types of components, but the process of validation are +similar, usually including the following steps: + +.. code-block:: c + + int komeda_xxxx_validate(struct komeda_component_xxx xxx_comp, + struct komeda_component_output *input_dflow, + struct drm_plane/crtc/connector *user, + struct drm_plane/crtc/connector_state, *user_state) + { + setup 1: check if component is needed, like the scaler is optional depending + on the user_state; if unneeded, just return, and the caller will + put the data flow into next stage. + Setup 2: check user_state with component features and capabilities to see + if requirements can be met; if not, return fail. + Setup 3: get component_state from drm_atomic_state, and try set to set + user to component; fail if component has been assigned to another + user already. + Setup 3: configure the component_state, like set its input component, + convert user_state to component specific state. + Setup 4: adjust the input_dflow and prepare it for the next stage. + } + +komeda_kms Abstraction +---------------------- + +.. kernel-doc:: drivers/gpu/drm/arm/display/komeda/komeda_kms.h + :internal: + +komde_kms Functions +------------------- +.. kernel-doc:: drivers/gpu/drm/arm/display/komeda/komeda_crtc.c + :internal: +.. kernel-doc:: drivers/gpu/drm/arm/display/komeda/komeda_plane.c + :internal: + +Build komeda to be a Linux module driver +======================================== + +Now we have two level devices: + +- komeda_dev: describes the real display hardware. +- komeda_kms_dev: attachs or connects komeda_dev to DRM-KMS. + +All komeda operations are supplied or operated by komeda_dev or komeda_kms_dev, +the module driver is only a simple wrapper to pass the Linux command +(probe/remove/pm) into komeda_dev or komeda_kms_dev. From ab6911b73493fac9cdcc3b2c77ddca6b1016e057 Mon Sep 17 00:00:00 2001 From: "james qian wang (Arm Technology China)" Date: Thu, 3 Jan 2019 11:42:05 +0000 Subject: [PATCH 023/539] MAINTAINERS: Add maintainer for Arm komeda driver v4: Added git tree [Daniel Vetter] v2: Adjusted the position of KOMEDA by alphabetical order Signed-off-by: James Qian Wang (Arm Technology China) Acked-by: Liviu Dudau [Updated entry to show that Mali Developers is a mailing list and to add myself as maintainer] Signed-off-by: Liviu Dudau --- MAINTAINERS | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index e79eca22532d..e5e741714462 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1133,11 +1133,23 @@ S: Supported F: drivers/gpu/drm/arm/hdlcd_* F: Documentation/devicetree/bindings/display/arm,hdlcd.txt +ARM KOMEDA DRM-KMS DRIVER +M: James (Qian) Wang +M: Liviu Dudau +L: Mali DP Maintainers +S: Supported +T: git git://linux-arm.org/linux-ld.git for-upstream/mali-dp +F: drivers/gpu/drm/arm/display/include/ +F: drivers/gpu/drm/arm/display/komeda/ +F: Documentation/devicetree/bindings/display/arm/arm,komeda.txt +F: Documentation/gpu/komeda-kms.rst + ARM MALI-DP DRM DRIVER M: Liviu Dudau M: Brian Starkey L: Mali DP Maintainers S: Supported +T: git git://linux-arm.org/linux-ld.git for-upstream/mali-dp F: drivers/gpu/drm/arm/ F: Documentation/devicetree/bindings/display/arm,malidp.txt F: Documentation/gpu/afbc.rst From bd780f37a3617d3dda74b97013ae8aa9b07a1d91 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 14:21:09 +0000 Subject: [PATCH 024/539] drm/i915: Track all held rpm wakerefs Everytime we take a wakeref, record the stack trace of where it was taken; clearing the set if we ever drop back to no owners. For debugging a rpm leak, we can look at all the current wakerefs and check if they have a matching rpm_put. v2: Use skip=0 for unwinding the stack as it appears our noinline function doesn't appear on the stack (nor does save_stack_trace itself!) v3: Allow rpm->debug_count to disappear between inspections and so avoid calling krealloc(0) as that may return a ZERO_PTR not NULL! (Mika) v4: Show who last acquire/released the runtime pm Signed-off-by: Chris Wilson Cc: Jani Nikula Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Tested-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190114142129.24398-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig.debug | 3 +- drivers/gpu/drm/i915/i915_debugfs.c | 6 + drivers/gpu/drm/i915/i915_drv.c | 8 +- drivers/gpu/drm/i915/i915_drv.h | 20 ++ drivers/gpu/drm/i915/intel_drv.h | 44 ++- drivers/gpu/drm/i915/intel_runtime_pm.c | 285 ++++++++++++++++-- .../gpu/drm/i915/selftests/mock_gem_device.c | 8 +- 7 files changed, 324 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 9e36ffb5eb7c..ad4d71161dda 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -21,11 +21,11 @@ config DRM_I915_DEBUG select DEBUG_FS select PREEMPT_COUNT select I2C_CHARDEV + select STACKDEPOT select DRM_DP_AUX_CHARDEV select X86_MSR # used by igt/pm_rpm select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) select DRM_DEBUG_MM if DRM=y - select STACKDEPOT if DRM=y # for DRM_DEBUG_MM select DRM_DEBUG_SELFTEST select SW_SYNC # signaling validation framework (igt/syncobj*) select DRM_I915_SW_FENCE_DEBUG_OBJECTS @@ -173,6 +173,7 @@ config DRM_I915_DEBUG_RUNTIME_PM bool "Enable extra state checking for runtime PM" depends on DRM_I915 default n + select STACKDEPOT help Choose this option to turn on extra state checking for the runtime PM functionality. This may introduce overhead during diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 95813e21ae02..050cf8abd426 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2702,6 +2702,12 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused) pci_power_name(pdev->current_state), pdev->current_state); + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)) { + struct drm_printer p = drm_seq_file_printer(m); + + print_intel_runtime_pm_wakeref(dev_priv, &p); + } + return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 75652dc1e24c..5731f992cf44 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -905,6 +905,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv) mutex_init(&dev_priv->pps_mutex); i915_memcpy_init_early(dev_priv); + intel_runtime_pm_init_early(dev_priv); ret = i915_workqueues_init(dev_priv); if (ret < 0) @@ -1807,8 +1808,7 @@ void i915_driver_unload(struct drm_device *dev) i915_driver_cleanup_mmio(dev_priv); enable_rpm_wakeref_asserts(dev_priv); - - WARN_ON(atomic_read(&dev_priv->runtime_pm.wakeref_count)); + intel_runtime_pm_cleanup(dev_priv); } static void i915_driver_release(struct drm_device *dev) @@ -2010,6 +2010,8 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) out: enable_rpm_wakeref_asserts(dev_priv); + if (!dev_priv->uncore.user_forcewake.count) + intel_runtime_pm_cleanup(dev_priv); return ret; } @@ -2965,7 +2967,7 @@ static int intel_runtime_suspend(struct device *kdev) } enable_rpm_wakeref_asserts(dev_priv); - WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count)); + intel_runtime_pm_cleanup(dev_priv); if (intel_uncore_arm_unclaimed_mmio_detection(dev_priv)) DRM_ERROR("Unclaimed access detected prior to suspending\n"); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5df26ccda8a4..7e3566a0ba72 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -45,6 +45,7 @@ #include #include #include +#include #include #include /* for struct drm_dma_handle */ @@ -1156,6 +1157,25 @@ struct i915_runtime_pm { atomic_t wakeref_count; bool suspended; bool irqs_enabled; + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) + /* + * To aide detection of wakeref leaks and general misuse, we + * track all wakeref holders. With manual markup (i.e. returning + * a cookie to each rpm_get caller which they then supply to their + * paired rpm_put) we can remove corresponding pairs of and keep + * the array trimmed to active wakerefs. + */ + struct intel_runtime_pm_debug { + spinlock_t lock; + + depot_stack_handle_t last_acquire; + depot_stack_handle_t last_release; + + depot_stack_handle_t *owners; + unsigned long count; + } debug; +#endif }; enum intel_pipe_crc_source { diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 1a11c2beb7f3..ac513fd70315 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -41,6 +41,8 @@ #include #include +struct drm_printer; + /** * __wait_for - magic wait macro * @@ -2084,6 +2086,7 @@ bool intel_psr_enabled(struct intel_dp *intel_dp); void intel_init_quirks(struct drm_i915_private *dev_priv); /* intel_runtime_pm.c */ +void intel_runtime_pm_init_early(struct drm_i915_private *dev_priv); int intel_power_domains_init(struct drm_i915_private *); void intel_power_domains_cleanup(struct drm_i915_private *dev_priv); void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume); @@ -2106,6 +2109,7 @@ void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume); void bxt_display_core_uninit(struct drm_i915_private *dev_priv); void intel_runtime_pm_enable(struct drm_i915_private *dev_priv); void intel_runtime_pm_disable(struct drm_i915_private *dev_priv); +void intel_runtime_pm_cleanup(struct drm_i915_private *dev_priv); const char * intel_display_power_domain_str(enum intel_display_power_domain domain); @@ -2123,23 +2127,23 @@ void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, u8 req_slices); static inline void -assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv) +assert_rpm_device_not_suspended(struct drm_i915_private *i915) { - WARN_ONCE(dev_priv->runtime_pm.suspended, + WARN_ONCE(i915->runtime_pm.suspended, "Device suspended during HW access\n"); } static inline void -assert_rpm_wakelock_held(struct drm_i915_private *dev_priv) +assert_rpm_wakelock_held(struct drm_i915_private *i915) { - assert_rpm_device_not_suspended(dev_priv); - WARN_ONCE(!atomic_read(&dev_priv->runtime_pm.wakeref_count), + assert_rpm_device_not_suspended(i915); + WARN_ONCE(!atomic_read(&i915->runtime_pm.wakeref_count), "RPM wakelock ref not held during HW access"); } /** * disable_rpm_wakeref_asserts - disable the RPM assert checks - * @dev_priv: i915 device instance + * @i915: i915 device instance * * This function disable asserts that check if we hold an RPM wakelock * reference, while keeping the device-not-suspended checks still enabled. @@ -2156,14 +2160,14 @@ assert_rpm_wakelock_held(struct drm_i915_private *dev_priv) * enable_rpm_wakeref_asserts(). */ static inline void -disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) +disable_rpm_wakeref_asserts(struct drm_i915_private *i915) { - atomic_inc(&dev_priv->runtime_pm.wakeref_count); + atomic_inc(&i915->runtime_pm.wakeref_count); } /** * enable_rpm_wakeref_asserts - re-enable the RPM assert checks - * @dev_priv: i915 device instance + * @i915: i915 device instance * * This function re-enables the RPM assert checks after disabling them with * disable_rpm_wakeref_asserts. It's meant to be used only in special @@ -2173,15 +2177,25 @@ disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) * disable_rpm_wakeref_asserts(). */ static inline void -enable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) +enable_rpm_wakeref_asserts(struct drm_i915_private *i915) { - atomic_dec(&dev_priv->runtime_pm.wakeref_count); + atomic_dec(&i915->runtime_pm.wakeref_count); } -void intel_runtime_pm_get(struct drm_i915_private *dev_priv); -bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv); -void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv); -void intel_runtime_pm_put(struct drm_i915_private *dev_priv); +void intel_runtime_pm_get(struct drm_i915_private *i915); +bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915); +void intel_runtime_pm_get_noresume(struct drm_i915_private *i915); +void intel_runtime_pm_put(struct drm_i915_private *i915); + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) +void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915, + struct drm_printer *p); +#else +static inline void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915, + struct drm_printer *p) +{ +} +#endif void chv_phy_powergate_lanes(struct intel_encoder *encoder, bool override, unsigned int mask); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 9e9501f82f06..08f809371bbd 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -29,6 +29,8 @@ #include #include +#include + #include "i915_drv.h" #include "intel_drv.h" @@ -49,6 +51,218 @@ * present for a given platform. */ +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) + +#include + +#define STACKDEPTH 8 + +static noinline depot_stack_handle_t __save_depot_stack(void) +{ + unsigned long entries[STACKDEPTH]; + struct stack_trace trace = { + .entries = entries, + .max_entries = ARRAY_SIZE(entries), + .skip = 1, + }; + + save_stack_trace(&trace); + if (trace.nr_entries && + trace.entries[trace.nr_entries - 1] == ULONG_MAX) + trace.nr_entries--; + + return depot_save_stack(&trace, GFP_NOWAIT | __GFP_NOWARN); +} + +static void __print_depot_stack(depot_stack_handle_t stack, + char *buf, int sz, int indent) +{ + unsigned long entries[STACKDEPTH]; + struct stack_trace trace = { + .entries = entries, + .max_entries = ARRAY_SIZE(entries), + }; + + depot_fetch_stack(stack, &trace); + snprint_stack_trace(buf, sz, &trace, indent); +} + +static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915) +{ + struct i915_runtime_pm *rpm = &i915->runtime_pm; + + spin_lock_init(&rpm->debug.lock); +} + +static noinline void +track_intel_runtime_pm_wakeref(struct drm_i915_private *i915) +{ + struct i915_runtime_pm *rpm = &i915->runtime_pm; + depot_stack_handle_t stack, *stacks; + unsigned long flags; + + atomic_inc(&rpm->wakeref_count); + assert_rpm_wakelock_held(i915); + + if (!HAS_RUNTIME_PM(i915)) + return; + + stack = __save_depot_stack(); + if (!stack) + return; + + spin_lock_irqsave(&rpm->debug.lock, flags); + + if (!rpm->debug.count) + rpm->debug.last_acquire = stack; + + stacks = krealloc(rpm->debug.owners, + (rpm->debug.count + 1) * sizeof(*stacks), + GFP_NOWAIT | __GFP_NOWARN); + if (stacks) { + stacks[rpm->debug.count++] = stack; + rpm->debug.owners = stacks; + } + + spin_unlock_irqrestore(&rpm->debug.lock, flags); +} + +static int cmphandle(const void *_a, const void *_b) +{ + const depot_stack_handle_t * const a = _a, * const b = _b; + + if (*a < *b) + return -1; + else if (*a > *b) + return 1; + else + return 0; +} + +static void +__print_intel_runtime_pm_wakeref(struct drm_printer *p, + const struct intel_runtime_pm_debug *dbg) +{ + unsigned long i; + char *buf; + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return; + + if (dbg->last_acquire) { + __print_depot_stack(dbg->last_acquire, buf, PAGE_SIZE, 2); + drm_printf(p, "Wakeref last acquired:\n%s", buf); + } + + if (dbg->last_release) { + __print_depot_stack(dbg->last_release, buf, PAGE_SIZE, 2); + drm_printf(p, "Wakeref last released:\n%s", buf); + } + + drm_printf(p, "Wakeref count: %lu\n", dbg->count); + + sort(dbg->owners, dbg->count, sizeof(*dbg->owners), cmphandle, NULL); + + for (i = 0; i < dbg->count; i++) { + depot_stack_handle_t stack = dbg->owners[i]; + unsigned long rep; + + rep = 1; + while (i + 1 < dbg->count && dbg->owners[i + 1] == stack) + rep++, i++; + __print_depot_stack(stack, buf, PAGE_SIZE, 2); + drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf); + } + + kfree(buf); +} + +static noinline void +untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915) +{ + struct i915_runtime_pm *rpm = &i915->runtime_pm; + struct intel_runtime_pm_debug dbg = {}; + struct drm_printer p; + unsigned long flags; + + assert_rpm_wakelock_held(i915); + if (atomic_dec_and_lock_irqsave(&rpm->wakeref_count, + &rpm->debug.lock, + flags)) { + dbg = rpm->debug; + + rpm->debug.owners = NULL; + rpm->debug.count = 0; + rpm->debug.last_release = __save_depot_stack(); + + spin_unlock_irqrestore(&rpm->debug.lock, flags); + } + if (!dbg.count) + return; + + p = drm_debug_printer("i915"); + __print_intel_runtime_pm_wakeref(&p, &dbg); + + kfree(dbg.owners); +} + +void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915, + struct drm_printer *p) +{ + struct intel_runtime_pm_debug dbg = {}; + + do { + struct i915_runtime_pm *rpm = &i915->runtime_pm; + unsigned long alloc = dbg.count; + depot_stack_handle_t *s; + + spin_lock_irq(&rpm->debug.lock); + dbg.count = rpm->debug.count; + if (dbg.count <= alloc) { + memcpy(dbg.owners, + rpm->debug.owners, + dbg.count * sizeof(*s)); + } + dbg.last_acquire = rpm->debug.last_acquire; + dbg.last_release = rpm->debug.last_release; + spin_unlock_irq(&rpm->debug.lock); + if (dbg.count <= alloc) + break; + + s = krealloc(dbg.owners, dbg.count * sizeof(*s), GFP_KERNEL); + if (!s) + goto out; + + dbg.owners = s; + } while (1); + + __print_intel_runtime_pm_wakeref(p, &dbg); + +out: + kfree(dbg.owners); +} + +#else + +static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915) +{ +} + +static void track_intel_runtime_pm_wakeref(struct drm_i915_private *i915) +{ + atomic_inc(&i915->runtime_pm.wakeref_count); + assert_rpm_wakelock_held(i915); +} + +static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915) +{ + assert_rpm_wakelock_held(i915); + atomic_dec(&i915->runtime_pm.wakeref_count); +} + +#endif + bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv, enum i915_power_well_id power_well_id); @@ -3986,7 +4200,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv) /** * intel_runtime_pm_get - grab a runtime pm reference - * @dev_priv: i915 device instance + * @i915: i915 device instance * * This function grabs a device-level runtime pm reference (mostly used for GEM * code to ensure the GTT or GT is on) and ensures that it is powered up. @@ -3994,22 +4208,21 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv) * Any runtime pm reference obtained by this function must have a symmetric * call to intel_runtime_pm_put() to release the reference again. */ -void intel_runtime_pm_get(struct drm_i915_private *dev_priv) +void intel_runtime_pm_get(struct drm_i915_private *i915) { - struct pci_dev *pdev = dev_priv->drm.pdev; + struct pci_dev *pdev = i915->drm.pdev; struct device *kdev = &pdev->dev; int ret; ret = pm_runtime_get_sync(kdev); WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret); - atomic_inc(&dev_priv->runtime_pm.wakeref_count); - assert_rpm_wakelock_held(dev_priv); + track_intel_runtime_pm_wakeref(i915); } /** * intel_runtime_pm_get_if_in_use - grab a runtime pm reference if device in use - * @dev_priv: i915 device instance + * @i915: i915 device instance * * This function grabs a device-level runtime pm reference if the device is * already in use and ensures that it is powered up. It is illegal to try @@ -4020,10 +4233,10 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv) * * Returns: True if the wakeref was acquired, or False otherwise. */ -bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv) +bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915) { if (IS_ENABLED(CONFIG_PM)) { - struct pci_dev *pdev = dev_priv->drm.pdev; + struct pci_dev *pdev = i915->drm.pdev; struct device *kdev = &pdev->dev; /* @@ -4036,15 +4249,14 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv) return false; } - atomic_inc(&dev_priv->runtime_pm.wakeref_count); - assert_rpm_wakelock_held(dev_priv); + track_intel_runtime_pm_wakeref(i915); return true; } /** * intel_runtime_pm_get_noresume - grab a runtime pm reference - * @dev_priv: i915 device instance + * @i915: i915 device instance * * This function grabs a device-level runtime pm reference (mostly used for GEM * code to ensure the GTT or GT is on). @@ -4059,32 +4271,31 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv) * Any runtime pm reference obtained by this function must have a symmetric * call to intel_runtime_pm_put() to release the reference again. */ -void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv) +void intel_runtime_pm_get_noresume(struct drm_i915_private *i915) { - struct pci_dev *pdev = dev_priv->drm.pdev; + struct pci_dev *pdev = i915->drm.pdev; struct device *kdev = &pdev->dev; - assert_rpm_wakelock_held(dev_priv); + assert_rpm_wakelock_held(i915); pm_runtime_get_noresume(kdev); - atomic_inc(&dev_priv->runtime_pm.wakeref_count); + track_intel_runtime_pm_wakeref(i915); } /** * intel_runtime_pm_put - release a runtime pm reference - * @dev_priv: i915 device instance + * @i915: i915 device instance * * This function drops the device-level runtime pm reference obtained by * intel_runtime_pm_get() and might power down the corresponding * hardware block right away if this is the last reference. */ -void intel_runtime_pm_put(struct drm_i915_private *dev_priv) +void intel_runtime_pm_put(struct drm_i915_private *i915) { - struct pci_dev *pdev = dev_priv->drm.pdev; + struct pci_dev *pdev = i915->drm.pdev; struct device *kdev = &pdev->dev; - assert_rpm_wakelock_held(dev_priv); - atomic_dec(&dev_priv->runtime_pm.wakeref_count); + untrack_intel_runtime_pm_wakeref(i915); pm_runtime_mark_last_busy(kdev); pm_runtime_put_autosuspend(kdev); @@ -4092,7 +4303,7 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv) /** * intel_runtime_pm_enable - enable runtime pm - * @dev_priv: i915 device instance + * @i915: i915 device instance * * This function enables runtime pm at the end of the driver load sequence. * @@ -4100,9 +4311,9 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv) * subordinate display power domains. That is done by * intel_power_domains_enable(). */ -void intel_runtime_pm_enable(struct drm_i915_private *dev_priv) +void intel_runtime_pm_enable(struct drm_i915_private *i915) { - struct pci_dev *pdev = dev_priv->drm.pdev; + struct pci_dev *pdev = i915->drm.pdev; struct device *kdev = &pdev->dev; /* @@ -4124,7 +4335,7 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv) * so the driver's own RPM reference tracking asserts also work on * platforms without RPM support. */ - if (!HAS_RUNTIME_PM(dev_priv)) { + if (!HAS_RUNTIME_PM(i915)) { int ret; pm_runtime_dont_use_autosuspend(kdev); @@ -4142,17 +4353,35 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv) pm_runtime_put_autosuspend(kdev); } -void intel_runtime_pm_disable(struct drm_i915_private *dev_priv) +void intel_runtime_pm_disable(struct drm_i915_private *i915) { - struct pci_dev *pdev = dev_priv->drm.pdev; + struct pci_dev *pdev = i915->drm.pdev; struct device *kdev = &pdev->dev; /* Transfer rpm ownership back to core */ - WARN(pm_runtime_get_sync(&dev_priv->drm.pdev->dev) < 0, + WARN(pm_runtime_get_sync(kdev) < 0, "Failed to pass rpm ownership back to core\n"); pm_runtime_dont_use_autosuspend(kdev); - if (!HAS_RUNTIME_PM(dev_priv)) + if (!HAS_RUNTIME_PM(i915)) pm_runtime_put(kdev); } + +void intel_runtime_pm_cleanup(struct drm_i915_private *i915) +{ + struct i915_runtime_pm *rpm = &i915->runtime_pm; + int count; + + count = atomic_fetch_inc(&rpm->wakeref_count); /* balance untrack */ + WARN(count, + "i915->runtime_pm.wakeref_count=%d on cleanup\n", + count); + + untrack_intel_runtime_pm_wakeref(i915); +} + +void intel_runtime_pm_init_early(struct drm_i915_private *i915) +{ + init_intel_runtime_pm_wakeref(i915); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index baa3c38919de..082809569681 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -154,15 +154,17 @@ struct drm_i915_private *mock_gem_device(void) pdev->dev.archdata.iommu = (void *)-1; #endif + i915 = (struct drm_i915_private *)(pdev + 1); + pci_set_drvdata(pdev, i915); + + intel_runtime_pm_init_early(i915); + dev_pm_domain_set(&pdev->dev, &pm_domain); pm_runtime_enable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); if (pm_runtime_enabled(&pdev->dev)) WARN_ON(pm_runtime_get_sync(&pdev->dev)); - i915 = (struct drm_i915_private *)(pdev + 1); - pci_set_drvdata(pdev, i915); - err = drm_dev_init(&i915->drm, &mock_driver, &pdev->dev); if (err) { pr_err("Failed to initialise mock GEM device: err=%d\n", err); From 16e4dd0342a804090fd0958bb271d3a6b57056ac Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 14:21:10 +0000 Subject: [PATCH 025/539] drm/i915: Markup paired operations on wakerefs The majority of runtime-pm operations are bounded and scoped within a function; these are easy to verify that the wakeref are handled correctly. We can employ the compiler to help us, and reduce the number of wakerefs tracked when debugging, by passing around cookies provided by the various rpm_get functions to their rpm_put counterpart. This makes the pairing explicit, and given the required wakeref cookie the compiler can verify that we pass an initialised value to the rpm_put (quite handy for double checking error paths). For regular builds, the compiler should be able to eliminate the unused local variables and the program growth should be minimal. Fwiw, it came out as a net improvement as gcc was able to refactor rpm_get and rpm_get_if_in_use together, v2: Just s/rpm_put/rpm_put_unchecked/ everywhere, leaving the manual mark up for smaller more targeted patches. v3: Mention the cookie in Returns Signed-off-by: Chris Wilson Cc: Jani Nikula Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190114142129.24398-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/aperture_gm.c | 8 +- drivers/gpu/drm/i915/gvt/gvt.h | 2 +- drivers/gpu/drm/i915/gvt/sched_policy.c | 2 +- drivers/gpu/drm/i915/gvt/scheduler.c | 4 +- drivers/gpu/drm/i915/i915_debugfs.c | 54 +++++------ drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gem.c | 20 ++-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 8 +- drivers/gpu/drm/i915/i915_gem_shrinker.c | 10 +- drivers/gpu/drm/i915/i915_irq.c | 2 +- drivers/gpu/drm/i915/i915_perf.c | 4 +- drivers/gpu/drm/i915/i915_pmu.c | 6 +- drivers/gpu/drm/i915/i915_sysfs.c | 12 +-- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_drv.h | 15 ++- drivers/gpu/drm/i915/intel_engine_cs.c | 4 +- drivers/gpu/drm/i915/intel_fbdev.c | 4 +- drivers/gpu/drm/i915/intel_guc_log.c | 6 +- drivers/gpu/drm/i915/intel_hotplug.c | 2 +- drivers/gpu/drm/i915/intel_huc.c | 2 +- drivers/gpu/drm/i915/intel_panel.c | 2 +- drivers/gpu/drm/i915/intel_runtime_pm.c | 97 +++++++++++++++---- drivers/gpu/drm/i915/intel_uncore.c | 2 +- drivers/gpu/drm/i915/selftests/huge_pages.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem.c | 10 +- .../drm/i915/selftests/i915_gem_coherency.c | 2 +- .../gpu/drm/i915/selftests/i915_gem_context.c | 10 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 4 +- .../gpu/drm/i915/selftests/i915_gem_object.c | 6 +- drivers/gpu/drm/i915/selftests/i915_request.c | 8 +- drivers/gpu/drm/i915/selftests/intel_guc.c | 4 +- .../gpu/drm/i915/selftests/intel_hangcheck.c | 6 +- drivers/gpu/drm/i915/selftests/intel_lrc.c | 10 +- .../drm/i915/selftests/intel_workarounds.c | 10 +- 37 files changed, 209 insertions(+), 139 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index 359d37d5c958..1fa2f65c3cd1 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -180,7 +180,7 @@ static void free_vgpu_fence(struct intel_vgpu *vgpu) } mutex_unlock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); } static int alloc_vgpu_fence(struct intel_vgpu *vgpu) @@ -206,7 +206,7 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu) _clear_vgpu_fence(vgpu); mutex_unlock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return 0; out_free_fence: gvt_vgpu_err("Failed to alloc fences\n"); @@ -219,7 +219,7 @@ out_free_fence: vgpu->fence.regs[i] = NULL; } mutex_unlock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return -ENOSPC; } @@ -317,7 +317,7 @@ void intel_vgpu_reset_resource(struct intel_vgpu *vgpu) intel_runtime_pm_get(dev_priv); _clear_vgpu_fence(vgpu); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); } /** diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index b4ab1dad0143..435c746c3f73 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -597,7 +597,7 @@ static inline void mmio_hw_access_pre(struct drm_i915_private *dev_priv) static inline void mmio_hw_access_post(struct drm_i915_private *dev_priv) { - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); } /** diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index c32e7d5e8629..f04b3b965bfc 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -474,6 +474,6 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu) } } spin_unlock_bh(&scheduler->mmio_context_lock); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); mutex_unlock(&vgpu->gvt->sched_lock); } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 1ad8c5e1455d..3816dcae2185 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -997,7 +997,7 @@ complete: intel_uncore_forcewake_put(gvt->dev_priv, FORCEWAKE_ALL); - intel_runtime_pm_put(gvt->dev_priv); + intel_runtime_pm_put_unchecked(gvt->dev_priv); if (ret && (vgpu_is_vm_unhealthy(ret))) enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); } @@ -1451,7 +1451,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, mutex_lock(&dev_priv->drm.struct_mutex); ret = intel_gvt_scan_and_shadow_workload(workload); mutex_unlock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); } if (ret && (vgpu_is_vm_unhealthy(ret))) { diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 050cf8abd426..6818079669a7 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -877,7 +877,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data) } } - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return 0; } @@ -953,7 +953,7 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file) intel_runtime_pm_get(i915); gpu = i915_capture_gpu_state(i915); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); if (IS_ERR(gpu)) return PTR_ERR(gpu); @@ -1226,7 +1226,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "Max CD clock frequency: %d kHz\n", dev_priv->max_cdclk_freq); seq_printf(m, "Max pixel clock frequency: %d kHz\n", dev_priv->max_dotclk_freq); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return ret; } @@ -1292,7 +1292,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) intel_engine_get_instdone(dev_priv->engine[RCS], &instdone); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer)) seq_printf(m, "Hangcheck active, timer fires in %dms\n", @@ -1579,7 +1579,7 @@ static int i915_drpc_info(struct seq_file *m, void *unused) else err = ironlake_drpc_info(m); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return err; } @@ -1632,7 +1632,7 @@ static int i915_fbc_status(struct seq_file *m, void *unused) } mutex_unlock(&fbc->lock); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return 0; } @@ -1695,7 +1695,7 @@ static int i915_ips_status(struct seq_file *m, void *unused) seq_puts(m, "Currently: disabled\n"); } - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return 0; } @@ -1723,7 +1723,7 @@ static int i915_sr_status(struct seq_file *m, void *unused) sr_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); seq_printf(m, "self-refresh: %s\n", enableddisabled(sr_enabled)); @@ -1756,7 +1756,7 @@ static int i915_emon_status(struct seq_file *m, void *unused) seq_printf(m, "GFX power: %ld\n", gfx); seq_printf(m, "Total power: %ld\n", chipset + gfx); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return 0; } @@ -1805,7 +1805,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) mutex_unlock(&dev_priv->pcu_lock); out: - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return ret; } @@ -2017,7 +2017,7 @@ static int i915_swizzle_info(struct seq_file *m, void *data) if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) seq_puts(m, "L-shaped memory detected\n"); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return 0; } @@ -2067,7 +2067,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) act_freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1)); } - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); } seq_printf(m, "RPS enabled? %d\n", rps->enabled); @@ -2160,7 +2160,7 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data) intel_runtime_pm_get(dev_priv); seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2)); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return 0; } @@ -2192,7 +2192,7 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) for (i = 0; i < 16; i++) seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i))); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return 0; } @@ -2601,7 +2601,7 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) dev_priv->psr.last_exit); } - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return 0; } @@ -2632,7 +2632,7 @@ retry: drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return ret; } @@ -2665,7 +2665,7 @@ static int i915_energy_uJ(struct seq_file *m, void *data) intel_runtime_pm_get(dev_priv); if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power)) { - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return -ENODEV; } @@ -2673,7 +2673,7 @@ static int i915_energy_uJ(struct seq_file *m, void *data) power = I915_READ(MCH_SECP_NRG_STTS); power = (1000000 * power) >> units; /* convert to uJ */ - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); seq_printf(m, "%llu", power); @@ -2775,7 +2775,7 @@ out: seq_printf(m, "ssp base: 0x%08x\n", I915_READ(CSR_SSP_BASE)); seq_printf(m, "htp: 0x%08x\n", I915_READ(CSR_HTP_SKL)); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return 0; } @@ -3114,7 +3114,7 @@ static int i915_display_info(struct seq_file *m, void *unused) drm_connector_list_iter_end(&conn_iter); mutex_unlock(&dev->mode_config.mutex); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return 0; } @@ -3139,7 +3139,7 @@ static int i915_engine_info(struct seq_file *m, void *unused) for_each_engine(engine, dev_priv, id) intel_engine_dump(engine, &p, "%s\n", engine->name); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return 0; } @@ -3265,7 +3265,7 @@ static ssize_t i915_ipc_status_write(struct file *file, const char __user *ubuf, dev_priv->wm.distrust_bios_wm = true; dev_priv->ipc_enabled = enable; intel_enable_ipc(dev_priv); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return len; } @@ -4090,7 +4090,7 @@ i915_drop_caches_set(void *data, u64 val) i915_gem_drain_freed_objects(i915); out: - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); return ret; } @@ -4112,7 +4112,7 @@ i915_cache_sharing_get(void *data, u64 *val) snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); *val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT; @@ -4140,7 +4140,7 @@ i915_cache_sharing_set(void *data, u64 val) snpcr |= (val << GEN6_MBC_SNPCR_SHIFT); I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return 0; } @@ -4388,7 +4388,7 @@ static int i915_sseu_status(struct seq_file *m, void *unused) gen10_sseu_device_status(dev_priv, &sseu); } - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); i915_print_sseu_info(m, false, &sseu); @@ -4416,7 +4416,7 @@ static int i915_forcewake_release(struct inode *inode, struct file *file) return 0; intel_uncore_forcewake_user_put(i915); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7e3566a0ba72..e9c909c43759 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -131,6 +131,8 @@ bool i915_error_injected(void); __i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \ fmt, ##__VA_ARGS__) +typedef depot_stack_handle_t intel_wakeref_t; + enum hpd_pin { HPD_NONE = 0, HPD_TV = HPD_NONE, /* TV is known to be unreliable */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 74710e5d946e..640e6361dda3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -175,7 +175,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915) intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); return i915->gt.epoch; } @@ -814,7 +814,7 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); spin_unlock_irq(&dev_priv->uncore.lock); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); } static void @@ -1149,7 +1149,7 @@ out_unpin: i915_vma_unpin(vma); } out_unlock: - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); mutex_unlock(&i915->drm.struct_mutex); return ret; @@ -1356,7 +1356,7 @@ out_unpin: i915_vma_unpin(vma); } out_rpm: - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); out_unlock: mutex_unlock(&i915->drm.struct_mutex); return ret; @@ -1968,7 +1968,7 @@ err_unpin: err_unlock: mutex_unlock(&dev->struct_mutex); err_rpm: - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); i915_gem_object_unpin_pages(obj); err: switch (ret) { @@ -2068,7 +2068,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) wmb(); out: - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); } void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) @@ -4765,7 +4765,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, if (on) cond_resched(); } - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); } static void i915_gem_flush_free_objects(struct drm_i915_private *i915) @@ -4901,7 +4901,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915) intel_engines_sanitize(i915, false); intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); i915_gem_contexts_lost(i915); mutex_unlock(&i915->drm.struct_mutex); @@ -4965,12 +4965,12 @@ int i915_gem_suspend(struct drm_i915_private *i915) if (WARN_ON(!intel_engines_are_idle(i915))) i915_gem_set_wedged(i915); /* no hope, discard everything */ - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); return 0; err_unlock: mutex_unlock(&i915->drm.struct_mutex); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index e7994505d850..c80943698ca2 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -2424,7 +2424,7 @@ err_vma: eb_release_vmas(&eb); mutex_unlock(&dev->struct_mutex); err_rpm: - intel_runtime_pm_put(eb.i915); + intel_runtime_pm_put_unchecked(eb.i915); i915_gem_context_put(eb.ctx); err_destroy: eb_destroy(&eb); diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index d67c07cdd0b8..b3391070acf7 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -258,7 +258,7 @@ static int fence_update(struct drm_i915_fence_reg *fence, */ if (intel_runtime_pm_get_if_in_use(fence->i915)) { fence_write(fence, vma); - intel_runtime_pm_put(fence->i915); + intel_runtime_pm_put_unchecked(fence->i915); } if (vma) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a8807fbed0aa..51f80ddd938d 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2536,7 +2536,7 @@ static int ggtt_bind_vma(struct i915_vma *vma, intel_runtime_pm_get(i915); vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; @@ -2556,7 +2556,7 @@ static void ggtt_unbind_vma(struct i915_vma *vma) intel_runtime_pm_get(i915); vma->vm->clear_range(vma->vm, vma->node.start, vma->size); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); } static int aliasing_gtt_bind_vma(struct i915_vma *vma, @@ -2590,7 +2590,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, if (flags & I915_VMA_GLOBAL_BIND) { intel_runtime_pm_get(i915); vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); } return 0; @@ -2603,7 +2603,7 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma) if (vma->flags & I915_VMA_GLOBAL_BIND) { intel_runtime_pm_get(i915); vma->vm->clear_range(vma->vm, vma->node.start, vma->size); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); } if (vma->flags & I915_VMA_LOCAL_BIND) { diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 6cc2b964c955..2bef02d0883d 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -265,7 +265,7 @@ i915_gem_shrink(struct drm_i915_private *i915, } if (flags & I915_SHRINK_BOUND) - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); i915_retire_requests(i915); @@ -299,7 +299,7 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *i915) I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_ACTIVE); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); return freed; } @@ -377,7 +377,7 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) I915_SHRINK_ACTIVE | I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); } shrinker_unlock(i915, unlock); @@ -397,7 +397,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) freed_pages = i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); /* Because we may be allocating inside our own driver, we cannot * assert that there are no objects with pinned pages that are not @@ -451,7 +451,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_VMAPS); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); /* We also want to clear any cached iomaps as they wrap vmap */ list_for_each_entry_safe(vma, next, diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 288b0662f7b7..787a9ed1ef7d 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3374,7 +3374,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv, wake_up_all(&dev_priv->gpu_error.reset_queue); out: - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); } /* Called from drm generic code, passed 'crtc' which diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 5b1ae5ed97b3..e4dfd1477c78 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1365,7 +1365,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) free_oa_buffer(dev_priv); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); if (stream->ctx) oa_put_render_ctx_id(stream); @@ -2123,7 +2123,7 @@ err_oa_buf_alloc: put_oa_config(dev_priv, stream->oa_config); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); err_config: if (stream->ctx) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index d6c8f8fdfda5..c99fcfce79d5 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -210,7 +210,7 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns) if (fw) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); } static void @@ -231,7 +231,7 @@ frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns) intel_runtime_pm_get_if_in_use(dev_priv)) { val = intel_get_cagf(dev_priv, I915_READ_NOTRACE(GEN6_RPSTAT1)); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); } add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT], @@ -448,7 +448,7 @@ static u64 get_rc6(struct drm_i915_private *i915) if (intel_runtime_pm_get_if_in_use(i915)) { val = __get_rc6(i915); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); /* * If we are coming back from being runtime suspended we must diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index c0cfe7ae2ba5..53c20e103d56 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -46,7 +46,7 @@ static u32 calc_residency(struct drm_i915_private *dev_priv, intel_runtime_pm_get(dev_priv); res = intel_rc6_residency_us(dev_priv, reg); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return DIV_ROUND_CLOSEST_ULL(res, 1000); } @@ -274,7 +274,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, } mutex_unlock(&dev_priv->pcu_lock); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return snprintf(buf, PAGE_SIZE, "%d\n", ret); } @@ -371,7 +371,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, val > rps->max_freq || val < rps->min_freq_softlimit) { mutex_unlock(&dev_priv->pcu_lock); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return -EINVAL; } @@ -392,7 +392,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, mutex_unlock(&dev_priv->pcu_lock); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return ret ?: count; } @@ -429,7 +429,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, val > rps->max_freq || val > rps->max_freq_softlimit) { mutex_unlock(&dev_priv->pcu_lock); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return -EINVAL; } @@ -446,7 +446,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, mutex_unlock(&dev_priv->pcu_lock); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return ret ?: count; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1cc441f06c73..a980d5d1e601 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2101,7 +2101,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, err: atomic_dec(&dev_priv->gpu_error.pending_fb_pin); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return vma; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index ac513fd70315..a1e4e1033289 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #include "i915_drv.h" #include @@ -2182,10 +2183,16 @@ enable_rpm_wakeref_asserts(struct drm_i915_private *i915) atomic_dec(&i915->runtime_pm.wakeref_count); } -void intel_runtime_pm_get(struct drm_i915_private *i915); -bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915); -void intel_runtime_pm_get_noresume(struct drm_i915_private *i915); -void intel_runtime_pm_put(struct drm_i915_private *i915); +intel_wakeref_t intel_runtime_pm_get(struct drm_i915_private *i915); +intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915); +intel_wakeref_t intel_runtime_pm_get_noresume(struct drm_i915_private *i915); + +void intel_runtime_pm_put_unchecked(struct drm_i915_private *i915); +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) +void intel_runtime_pm_put(struct drm_i915_private *i915, intel_wakeref_t wref); +#else +#define intel_runtime_pm_put(i915, wref) intel_runtime_pm_put_unchecked(i915) +#endif #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 5990f8500bca..2e60463f2468 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -928,7 +928,7 @@ static bool ring_is_idle(struct intel_engine_cs *engine) if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE)) idle = false; - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return idle; } @@ -1485,7 +1485,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (intel_runtime_pm_get_if_in_use(engine->i915)) { intel_engine_print_registers(engine, m); - intel_runtime_pm_put(engine->i915); + intel_runtime_pm_put_unchecked(engine->i915); } else { drm_printf(m, "\tDevice is asleep; skipping register dump\n"); } diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index a0c5046e170c..215e5894842d 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -276,7 +276,7 @@ static int intelfb_create(struct drm_fb_helper *helper, ifbdev->vma = vma; ifbdev->vma_flags = flags; - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); mutex_unlock(&dev->struct_mutex); vga_switcheroo_client_fb_set(pdev, info); return 0; @@ -284,7 +284,7 @@ static int intelfb_create(struct drm_fb_helper *helper, out_unpin: intel_unpin_fb_vma(vma, flags); out_unlock: - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); mutex_unlock(&dev->struct_mutex); return ret; } diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index d3ebdbc0182e..1b1581a42aa1 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -445,7 +445,7 @@ static void guc_log_capture_logs(struct intel_guc_log *log) */ intel_runtime_pm_get(dev_priv); guc_action_flush_log_complete(guc); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); } int intel_guc_log_create(struct intel_guc_log *log) @@ -528,7 +528,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) ret = guc_action_control_log(guc, GUC_LOG_LEVEL_IS_VERBOSE(level), GUC_LOG_LEVEL_IS_ENABLED(level), GUC_LOG_LEVEL_TO_VERBOSITY(level)); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); if (ret) { DRM_DEBUG_DRIVER("guc_log_control action failed %d\n", ret); goto out_unlock; @@ -610,7 +610,7 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log) intel_runtime_pm_get(i915); guc_action_flush_log(guc); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); /* GuC would have updated log buffer by now, so capture it */ guc_log_capture_logs(log); diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index ae92d6560165..b1a9cb960ca4 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -261,7 +261,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work) dev_priv->display.hpd_irq_setup(dev_priv); spin_unlock_irq(&dev_priv->irq_lock); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); } bool intel_encoder_hotplug(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index bc27b691d824..c2b076e9bada 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -122,7 +122,7 @@ int intel_huc_check_status(struct intel_huc *huc) intel_runtime_pm_get(dev_priv); status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED; - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return status; } diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index ee3e0842d542..c2b7455a023e 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -1213,7 +1213,7 @@ static int intel_backlight_device_get_brightness(struct backlight_device *bd) ret = scale_hw_to_user(connector, hw_level, bd->props.max_brightness); drm_modeset_unlock(&dev->mode_config.connection_mutex); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return ret; } diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 08f809371bbd..c29577d7a35a 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -94,7 +94,7 @@ static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915) spin_lock_init(&rpm->debug.lock); } -static noinline void +static noinline depot_stack_handle_t track_intel_runtime_pm_wakeref(struct drm_i915_private *i915) { struct i915_runtime_pm *rpm = &i915->runtime_pm; @@ -105,11 +105,11 @@ track_intel_runtime_pm_wakeref(struct drm_i915_private *i915) assert_rpm_wakelock_held(i915); if (!HAS_RUNTIME_PM(i915)) - return; + return -1; stack = __save_depot_stack(); if (!stack) - return; + return -1; spin_lock_irqsave(&rpm->debug.lock, flags); @@ -122,9 +122,57 @@ track_intel_runtime_pm_wakeref(struct drm_i915_private *i915) if (stacks) { stacks[rpm->debug.count++] = stack; rpm->debug.owners = stacks; + } else { + stack = -1; } spin_unlock_irqrestore(&rpm->debug.lock, flags); + + return stack; +} + +static void cancel_intel_runtime_pm_wakeref(struct drm_i915_private *i915, + depot_stack_handle_t stack) +{ + struct i915_runtime_pm *rpm = &i915->runtime_pm; + unsigned long flags, n; + bool found = false; + + if (unlikely(stack == -1)) + return; + + spin_lock_irqsave(&rpm->debug.lock, flags); + for (n = rpm->debug.count; n--; ) { + if (rpm->debug.owners[n] == stack) { + memmove(rpm->debug.owners + n, + rpm->debug.owners + n + 1, + (--rpm->debug.count - n) * sizeof(stack)); + found = true; + break; + } + } + spin_unlock_irqrestore(&rpm->debug.lock, flags); + + if (WARN(!found, + "Unmatched wakeref (tracking %lu), count %u\n", + rpm->debug.count, atomic_read(&rpm->wakeref_count))) { + char *buf; + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return; + + __print_depot_stack(stack, buf, PAGE_SIZE, 2); + DRM_DEBUG_DRIVER("wakeref %x from\n%s", stack, buf); + + stack = READ_ONCE(rpm->debug.last_release); + if (stack) { + __print_depot_stack(stack, buf, PAGE_SIZE, 2); + DRM_DEBUG_DRIVER("wakeref last released at\n%s", buf); + } + + kfree(buf); + } } static int cmphandle(const void *_a, const void *_b) @@ -249,10 +297,12 @@ static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915) { } -static void track_intel_runtime_pm_wakeref(struct drm_i915_private *i915) +static depot_stack_handle_t +track_intel_runtime_pm_wakeref(struct drm_i915_private *i915) { atomic_inc(&i915->runtime_pm.wakeref_count); assert_rpm_wakelock_held(i915); + return -1; } static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915) @@ -1852,7 +1902,7 @@ bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv, mutex_unlock(&power_domains->lock); if (!is_enabled) - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return is_enabled; } @@ -1886,7 +1936,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, mutex_unlock(&power_domains->lock); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); } #define I830_PIPES_POWER_DOMAINS ( \ @@ -3994,7 +4044,7 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv) { /* Keep the power well enabled, but cancel its rpm wakeref. */ - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); /* Remove the refcount we took to keep power well support disabled. */ if (!i915_modparams.disable_power_well) @@ -4207,8 +4257,10 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv) * * Any runtime pm reference obtained by this function must have a symmetric * call to intel_runtime_pm_put() to release the reference again. + * + * Returns: the wakeref cookie to pass to intel_runtime_pm_put() */ -void intel_runtime_pm_get(struct drm_i915_private *i915) +intel_wakeref_t intel_runtime_pm_get(struct drm_i915_private *i915) { struct pci_dev *pdev = i915->drm.pdev; struct device *kdev = &pdev->dev; @@ -4217,7 +4269,7 @@ void intel_runtime_pm_get(struct drm_i915_private *i915) ret = pm_runtime_get_sync(kdev); WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret); - track_intel_runtime_pm_wakeref(i915); + return track_intel_runtime_pm_wakeref(i915); } /** @@ -4231,9 +4283,10 @@ void intel_runtime_pm_get(struct drm_i915_private *i915) * Any runtime pm reference obtained by this function must have a symmetric * call to intel_runtime_pm_put() to release the reference again. * - * Returns: True if the wakeref was acquired, or False otherwise. + * Returns: the wakeref cookie to pass to intel_runtime_pm_put(), evaluates + * as True if the wakeref was acquired, or False otherwise. */ -bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915) +intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915) { if (IS_ENABLED(CONFIG_PM)) { struct pci_dev *pdev = i915->drm.pdev; @@ -4246,12 +4299,10 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915) * atm to the late/early system suspend/resume handlers. */ if (pm_runtime_get_if_in_use(kdev) <= 0) - return false; + return 0; } - track_intel_runtime_pm_wakeref(i915); - - return true; + return track_intel_runtime_pm_wakeref(i915); } /** @@ -4270,8 +4321,10 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915) * * Any runtime pm reference obtained by this function must have a symmetric * call to intel_runtime_pm_put() to release the reference again. + * + * Returns: the wakeref cookie to pass to intel_runtime_pm_put() */ -void intel_runtime_pm_get_noresume(struct drm_i915_private *i915) +intel_wakeref_t intel_runtime_pm_get_noresume(struct drm_i915_private *i915) { struct pci_dev *pdev = i915->drm.pdev; struct device *kdev = &pdev->dev; @@ -4279,7 +4332,7 @@ void intel_runtime_pm_get_noresume(struct drm_i915_private *i915) assert_rpm_wakelock_held(i915); pm_runtime_get_noresume(kdev); - track_intel_runtime_pm_wakeref(i915); + return track_intel_runtime_pm_wakeref(i915); } /** @@ -4290,7 +4343,7 @@ void intel_runtime_pm_get_noresume(struct drm_i915_private *i915) * intel_runtime_pm_get() and might power down the corresponding * hardware block right away if this is the last reference. */ -void intel_runtime_pm_put(struct drm_i915_private *i915) +void intel_runtime_pm_put_unchecked(struct drm_i915_private *i915) { struct pci_dev *pdev = i915->drm.pdev; struct device *kdev = &pdev->dev; @@ -4301,6 +4354,14 @@ void intel_runtime_pm_put(struct drm_i915_private *i915) pm_runtime_put_autosuspend(kdev); } +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) +void intel_runtime_pm_put(struct drm_i915_private *i915, intel_wakeref_t wref) +{ + cancel_intel_runtime_pm_wakeref(i915, wref); + intel_runtime_pm_put_unchecked(i915); +} +#endif + /** * intel_runtime_pm_enable - enable runtime pm * @i915: i915 device instance diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index fff468f17d2d..8d4c76ac0e7d 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1709,7 +1709,7 @@ int i915_reg_read_ioctl(struct drm_device *dev, reg->val = I915_READ8(entry->offset_ldw); else ret = -EINVAL; - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); return ret; } diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index 6c10734e948d..a4d8b12be12c 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -1785,7 +1785,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) err = i915_subtests(tests, ctx); out_unlock: - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); mock_file_free(dev_priv, file); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index bdcc53e15e75..762e1a7125f5 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -32,7 +32,7 @@ static int switch_to_context(struct drm_i915_private *i915, i915_request_add(rq); } - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); return err; } @@ -76,7 +76,7 @@ static void simulate_hibernate(struct drm_i915_private *i915) */ trash_stolen(i915); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); } static int pm_prepare(struct drm_i915_private *i915) @@ -98,7 +98,7 @@ static void pm_suspend(struct drm_i915_private *i915) i915_gem_suspend_gtt_mappings(i915); i915_gem_suspend_late(i915); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); } static void pm_hibernate(struct drm_i915_private *i915) @@ -110,7 +110,7 @@ static void pm_hibernate(struct drm_i915_private *i915) i915_gem_freeze(i915); i915_gem_freeze_late(i915); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); } static void pm_resume(struct drm_i915_private *i915) @@ -125,7 +125,7 @@ static void pm_resume(struct drm_i915_private *i915) i915_gem_sanitize(i915); i915_gem_resume(i915); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); } static int igt_gem_suspend(void *arg) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index f7392c1ffe75..eea4fc2445ae 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -376,7 +376,7 @@ static int igt_gem_coherency(void *arg) } } unlock: - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); mutex_unlock(&i915->drm.struct_mutex); kfree(offsets); return err; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index d00cdf3c2939..6e1a0711d201 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -243,7 +243,7 @@ static int live_nop_switch(void *arg) } out_unlock: - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); return err; @@ -609,7 +609,7 @@ static int igt_ctx_exec(void *arg) intel_runtime_pm_get(i915); err = gpu_fill(obj, ctx, engine, dw); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); if (err) { pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), @@ -715,7 +715,7 @@ static int igt_ctx_readonly(void *arg) intel_runtime_pm_get(i915); err = gpu_fill(obj, ctx, engine, dw); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); if (err) { pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), @@ -1067,7 +1067,7 @@ static int igt_vm_isolation(void *arg) count, RUNTIME_INFO(i915)->num_rings); out_rpm: - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); out_unlock: if (end_live_test(&t)) err = -EIO; @@ -1200,7 +1200,7 @@ out_unlock: if (igt_flush_test(i915, I915_WAIT_LOCKED)) err = -EIO; - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); mutex_unlock(&i915->drm.struct_mutex); kernel_context_close(ctx); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 4365979d8222..8d22f73a9b63 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -464,7 +464,7 @@ out_locked: } if (drm_mm_node_allocated(&hole)) drm_mm_remove_node(&hole); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); mutex_unlock(&i915->drm.struct_mutex); return err; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index a9ed0ecc94e2..87cb0602a5fc 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -295,7 +295,7 @@ static int lowlevel_hole(struct drm_i915_private *i915, intel_runtime_pm_get(i915); vm->insert_entries(vm, &mock_vma, I915_CACHE_NONE, 0); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); } count = n; @@ -1216,7 +1216,7 @@ static int igt_ggtt_page(void *arg) kfree(order); out_remove: ggtt->vm.clear_range(&ggtt->vm, tmp.start, tmp.size); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); drm_mm_remove_node(&tmp); out_unpin: i915_gem_object_unpin_pages(obj); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index be7ecb66ad11..b03890c590d7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -444,7 +444,7 @@ next_tiling: ; } out_unlock: - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); mutex_unlock(&i915->drm.struct_mutex); i915_gem_object_unpin_pages(obj); out: @@ -508,7 +508,7 @@ static void disable_retire_worker(struct drm_i915_private *i915) if (!i915->gt.active_requests++) { intel_runtime_pm_get(i915); i915_gem_unpark(i915); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); } mutex_unlock(&i915->drm.struct_mutex); cancel_delayed_work_sync(&i915->gt.retire_work); @@ -590,7 +590,7 @@ static int igt_mmap_offset_exhaustion(void *arg) mutex_lock(&i915->drm.struct_mutex); intel_runtime_pm_get(i915); err = make_obj_busy(obj); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); mutex_unlock(&i915->drm.struct_mutex); if (err) { pr_err("[loop %d] Failed to busy the object\n", loop); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 07e557815308..e8880cabd5c7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -403,7 +403,7 @@ static int live_nop_request(void *arg) } out_unlock: - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -553,7 +553,7 @@ out_batch: i915_vma_unpin(batch); i915_vma_put(batch); out_unlock: - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -731,7 +731,7 @@ out_request: i915_vma_unpin(batch); i915_vma_put(batch); out_unlock: - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -860,7 +860,7 @@ out_request: i915_request_put(request[id]); } out_unlock: - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); mutex_unlock(&i915->drm.struct_mutex); return err; } diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c index 32cba4cae31a..3590ba3d8897 100644 --- a/drivers/gpu/drm/i915/selftests/intel_guc.c +++ b/drivers/gpu/drm/i915/selftests/intel_guc.c @@ -225,7 +225,7 @@ out: guc_clients_create(guc); guc_clients_enable(guc); unlock: - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); return err; } @@ -337,7 +337,7 @@ out: guc_client_free(clients[i]); } unlock: - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put_unchecked(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); return err; } diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 797cf5e6d6d4..58cba8188bd2 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -402,7 +402,7 @@ static int igt_wedged_reset(void *arg) i915_reset(i915, ALL_ENGINES, NULL); GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); mutex_unlock(&i915->drm.struct_mutex); igt_global_reset_unlock(i915); @@ -1636,7 +1636,7 @@ out: force_reset(i915); unlock: - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); mutex_unlock(&i915->drm.struct_mutex); igt_global_reset_unlock(i915); @@ -1679,7 +1679,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) mutex_unlock(&i915->drm.struct_mutex); i915_modparams.enable_hangcheck = saved_hangcheck; - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); return err; } diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index 00caaa00f02f..ac1b18a17f3c 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -65,7 +65,7 @@ err_spin: igt_spinner_fini(&spin); err_unlock: igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -158,7 +158,7 @@ err_spin_hi: igt_spinner_fini(&spin_hi); err_unlock: igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -251,7 +251,7 @@ err_spin_hi: igt_spinner_fini(&spin_hi); err_unlock: igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); mutex_unlock(&i915->drm.struct_mutex); return err; @@ -374,7 +374,7 @@ err_spin_hi: igt_spinner_fini(&spin_hi); err_unlock: igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -627,7 +627,7 @@ err_ctx: err_batch: i915_gem_object_put(smoke.batch); err_unlock: - intel_runtime_pm_put(smoke.i915); + intel_runtime_pm_put_unchecked(smoke.i915); mutex_unlock(&smoke.i915->drm.struct_mutex); kfree(smoke.contexts); diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index 8b3f3200a3bd..b1b39c70c702 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -94,7 +94,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) intel_runtime_pm_get(engine->i915); rq = i915_request_alloc(engine, ctx); - intel_runtime_pm_put(engine->i915); + intel_runtime_pm_put_unchecked(engine->i915); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_pin; @@ -241,7 +241,7 @@ switch_to_scratch_context(struct intel_engine_cs *engine, else rq = i915_request_alloc(engine, ctx); - intel_runtime_pm_put(engine->i915); + intel_runtime_pm_put_unchecked(engine->i915); kernel_context_close(ctx); @@ -300,7 +300,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, intel_runtime_pm_get(i915); err = reset(engine); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); if (want_spin) { igt_spinner_end(&spin); @@ -414,7 +414,7 @@ live_gpu_reset_gt_engine_workarounds(void *arg) out: reference_lists_fini(i915, &lists); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); igt_global_reset_unlock(i915); return ok ? 0 : -ESRCH; @@ -496,7 +496,7 @@ live_engine_reset_gt_engine_workarounds(void *arg) err: reference_lists_fini(i915, &lists); - intel_runtime_pm_put(i915); + intel_runtime_pm_put_unchecked(i915); igt_global_reset_unlock(i915); kernel_context_close(ctx); From 506d1f62454b27535591cdc20e4148a736d0da66 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 14:21:11 +0000 Subject: [PATCH 026/539] drm/i915: Track GT wakeref Record the wakeref used for keeping the device awake as the GPU is executing requests and be sure to cancel the tracking upon parking. Signed-off-by: Chris Wilson Cc: Jani Nikula Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190114142129.24398-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e9c909c43759..f33dc8a1fd1b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1979,7 +1979,7 @@ struct drm_i915_private { * In order to reduce the effect on performance, there * is a slight delay before we do so. */ - bool awake; + intel_wakeref_t awake; /** * The number of times we have woken up. diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 640e6361dda3..abd5d83fb0e5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -138,6 +138,8 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) static u32 __i915_gem_park(struct drm_i915_private *i915) { + intel_wakeref_t wakeref; + GEM_TRACE("\n"); lockdep_assert_held(&i915->drm.struct_mutex); @@ -168,14 +170,15 @@ static u32 __i915_gem_park(struct drm_i915_private *i915) i915_pmu_gt_parked(i915); i915_vma_parked(i915); - i915->gt.awake = false; + wakeref = fetch_and_zero(&i915->gt.awake); + GEM_BUG_ON(!wakeref); if (INTEL_GEN(i915) >= 6) gen6_rps_idle(i915); intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); return i915->gt.epoch; } @@ -204,7 +207,8 @@ void i915_gem_unpark(struct drm_i915_private *i915) if (i915->gt.awake) return; - intel_runtime_pm_get_noresume(i915); + i915->gt.awake = intel_runtime_pm_get_noresume(i915); + GEM_BUG_ON(!i915->gt.awake); /* * It seems that the DMC likes to transition between the DC states a lot @@ -219,7 +223,6 @@ void i915_gem_unpark(struct drm_i915_private *i915) */ intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); - i915->gt.awake = true; if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ i915->gt.epoch = 1; From 183e260ba2467be4c790781af0eb8a834bda65a7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 14:21:12 +0000 Subject: [PATCH 027/539] drm/i915: Track the rpm wakerefs for error handling Keep hold of the local wakeref used in error handling, to cancel the tracking upon release so that leaks can be identified. Signed-off-by: Chris Wilson Cc: Jani Nikula Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190114142129.24398-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_irq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 787a9ed1ef7d..94187e68d39a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3291,6 +3291,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv, const char *fmt, ...) { struct intel_engine_cs *engine; + intel_wakeref_t wakeref; unsigned int tmp; char error_msg[80]; char *msg = NULL; @@ -3312,7 +3313,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv, * isn't the case at least when we get here by doing a * simulated reset via debugfs, so get an RPM reference. */ - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); engine_mask &= INTEL_INFO(dev_priv)->ring_mask; @@ -3374,7 +3375,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv, wake_up_all(&dev_priv->gpu_error.reset_queue); out: - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); } /* Called from drm generic code, passed 'crtc' which From 48d1c812160712429c8eacbe7ca7d72c2d06ae26 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 14:21:13 +0000 Subject: [PATCH 028/539] drm/i915: Mark up sysfs with rpm wakeref tracking As sysfs has a simple pattern of taking a rpm wakeref around the user access, we can track the local reference and drop it as soon as possible. Signed-off-by: Chris Wilson Cc: Jani Nikula Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190114142129.24398-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_sysfs.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 53c20e103d56..2cbbf165d179 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -42,11 +42,12 @@ static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev) static u32 calc_residency(struct drm_i915_private *dev_priv, i915_reg_t reg) { + intel_wakeref_t wakeref; u64 res; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); res = intel_rc6_residency_us(dev_priv, reg); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return DIV_ROUND_CLOSEST_ULL(res, 1000); } @@ -258,9 +259,10 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + intel_wakeref_t wakeref; int ret; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->pcu_lock); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { @@ -274,7 +276,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, } mutex_unlock(&dev_priv->pcu_lock); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return snprintf(buf, PAGE_SIZE, "%d\n", ret); } @@ -354,6 +356,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); struct intel_rps *rps = &dev_priv->gt_pm.rps; + intel_wakeref_t wakeref; u32 val; ssize_t ret; @@ -361,7 +364,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, if (ret) return ret; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->pcu_lock); @@ -371,7 +374,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, val > rps->max_freq || val < rps->min_freq_softlimit) { mutex_unlock(&dev_priv->pcu_lock); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return -EINVAL; } @@ -392,7 +395,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, mutex_unlock(&dev_priv->pcu_lock); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return ret ?: count; } @@ -412,6 +415,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); struct intel_rps *rps = &dev_priv->gt_pm.rps; + intel_wakeref_t wakeref; u32 val; ssize_t ret; @@ -419,7 +423,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, if (ret) return ret; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->pcu_lock); @@ -429,7 +433,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, val > rps->max_freq || val > rps->max_freq_softlimit) { mutex_unlock(&dev_priv->pcu_lock); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return -EINVAL; } @@ -446,7 +450,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, mutex_unlock(&dev_priv->pcu_lock); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return ret ?: count; } From a037121c3c7fb7e3d88f9a27d3d77581404f9b1d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 14:21:14 +0000 Subject: [PATCH 029/539] drm/i915: Mark up debugfs with rpm wakeref tracking As debugfs has a simple pattern of taking a rpm wakeref around the user access, we can track the local reference and drop it as soon as possible. Signed-off-by: Chris Wilson Cc: Jani Nikula Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190114142129.24398-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 135 +++++++++++++++++----------- 1 file changed, 82 insertions(+), 53 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6818079669a7..66c520ba0df8 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -674,9 +674,10 @@ static int i915_interrupt_info(struct seq_file *m, void *data) struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int i, pipe; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); if (IS_CHERRYVIEW(dev_priv)) { seq_printf(m, "Master Interrupt Control:\t%08x\n", @@ -877,7 +878,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data) } } - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return 0; } @@ -950,10 +951,11 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file) { struct drm_i915_private *i915 = inode->i_private; struct i915_gpu_state *gpu; + intel_wakeref_t wakeref; - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); gpu = i915_capture_gpu_state(i915); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); if (IS_ERR(gpu)) return PTR_ERR(gpu); @@ -1012,9 +1014,10 @@ static int i915_frequency_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_rps *rps = &dev_priv->gt_pm.rps; + intel_wakeref_t wakeref; int ret = 0; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); if (IS_GEN(dev_priv, 5)) { u16 rgvswctl = I915_READ16(MEMSWCTL); @@ -1226,7 +1229,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "Max CD clock frequency: %d kHz\n", dev_priv->max_cdclk_freq); seq_printf(m, "Max pixel clock frequency: %d kHz\n", dev_priv->max_dotclk_freq); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return ret; } @@ -1265,6 +1268,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) u64 acthd[I915_NUM_ENGINES]; u32 seqno[I915_NUM_ENGINES]; struct intel_instdone instdone; + intel_wakeref_t wakeref; enum intel_engine_id id; if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags)) @@ -1283,7 +1287,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) return 0; } - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); for_each_engine(engine, dev_priv, id) { acthd[id] = intel_engine_get_active_head(engine); @@ -1292,7 +1296,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) intel_engine_get_instdone(dev_priv->engine[RCS], &instdone); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer)) seq_printf(m, "Hangcheck active, timer fires in %dms\n", @@ -1568,9 +1572,10 @@ static int gen6_drpc_info(struct seq_file *m) static int i915_drpc_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + intel_wakeref_t wakeref; int err; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) err = vlv_drpc_info(m); @@ -1579,7 +1584,7 @@ static int i915_drpc_info(struct seq_file *m, void *unused) else err = ironlake_drpc_info(m); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return err; } @@ -1601,11 +1606,12 @@ static int i915_fbc_status(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_fbc *fbc = &dev_priv->fbc; + intel_wakeref_t wakeref; if (!HAS_FBC(dev_priv)) return -ENODEV; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); mutex_lock(&fbc->lock); if (intel_fbc_is_active(dev_priv)) @@ -1632,7 +1638,7 @@ static int i915_fbc_status(struct seq_file *m, void *unused) } mutex_unlock(&fbc->lock); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return 0; } @@ -1677,11 +1683,12 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_fbc_false_color_fops, static int i915_ips_status(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + intel_wakeref_t wakeref; if (!HAS_IPS(dev_priv)) return -ENODEV; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); seq_printf(m, "Enabled by kernel parameter: %s\n", yesno(i915_modparams.enable_ips)); @@ -1695,7 +1702,7 @@ static int i915_ips_status(struct seq_file *m, void *unused) seq_puts(m, "Currently: disabled\n"); } - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return 0; } @@ -1703,9 +1710,10 @@ static int i915_ips_status(struct seq_file *m, void *unused) static int i915_sr_status(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + intel_wakeref_t wakeref; bool sr_enabled = false; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); if (INTEL_GEN(dev_priv) >= 9) @@ -1723,7 +1731,7 @@ static int i915_sr_status(struct seq_file *m, void *unused) sr_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); seq_printf(m, "self-refresh: %s\n", enableddisabled(sr_enabled)); @@ -1735,29 +1743,30 @@ static int i915_emon_status(struct seq_file *m, void *unused) struct drm_i915_private *dev_priv = node_to_i915(m->private); struct drm_device *dev = &dev_priv->drm; unsigned long temp, chipset, gfx; + intel_wakeref_t wakeref; int ret; if (!IS_GEN(dev_priv, 5)) return -ENODEV; - intel_runtime_pm_get(dev_priv); - ret = mutex_lock_interruptible(&dev->struct_mutex); if (ret) return ret; + wakeref = intel_runtime_pm_get(dev_priv); + temp = i915_mch_val(dev_priv); chipset = i915_chipset_val(dev_priv); gfx = i915_gfx_val(dev_priv); mutex_unlock(&dev->struct_mutex); + intel_runtime_pm_put(dev_priv, wakeref); + seq_printf(m, "GMCH temp: %ld\n", temp); seq_printf(m, "Chipset power: %ld\n", chipset); seq_printf(m, "GFX power: %ld\n", gfx); seq_printf(m, "Total power: %ld\n", chipset + gfx); - intel_runtime_pm_put_unchecked(dev_priv); - return 0; } @@ -1766,13 +1775,14 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_rps *rps = &dev_priv->gt_pm.rps; unsigned int max_gpu_freq, min_gpu_freq; + intel_wakeref_t wakeref; int gpu_freq, ia_freq; int ret; if (!HAS_LLC(dev_priv)) return -ENODEV; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); ret = mutex_lock_interruptible(&dev_priv->pcu_lock); if (ret) @@ -1805,7 +1815,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) mutex_unlock(&dev_priv->pcu_lock); out: - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return ret; } @@ -1978,8 +1988,9 @@ static const char *swizzle_string(unsigned swizzle) static int i915_swizzle_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + intel_wakeref_t wakeref; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); seq_printf(m, "bit6 swizzle for X-tiling = %s\n", swizzle_string(dev_priv->mm.bit_6_swizzle_x)); @@ -2017,7 +2028,7 @@ static int i915_swizzle_info(struct seq_file *m, void *data) if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) seq_puts(m, "L-shaped memory detected\n"); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return 0; } @@ -2054,9 +2065,11 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) struct drm_device *dev = &dev_priv->drm; struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 act_freq = rps->cur_freq; + intel_wakeref_t wakeref; struct drm_file *file; - if (intel_runtime_pm_get_if_in_use(dev_priv)) { + wakeref = intel_runtime_pm_get_if_in_use(dev_priv); + if (wakeref) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { mutex_lock(&dev_priv->pcu_lock); act_freq = vlv_punit_read(dev_priv, @@ -2067,7 +2080,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) act_freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1)); } - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); } seq_printf(m, "RPS enabled? %d\n", rps->enabled); @@ -2150,6 +2163,7 @@ static int i915_llc(struct seq_file *m, void *data) static int i915_huc_load_status_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + intel_wakeref_t wakeref; struct drm_printer p; if (!HAS_HUC(dev_priv)) @@ -2158,9 +2172,9 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data) p = drm_seq_file_printer(m); intel_uc_fw_dump(&dev_priv->huc.fw, &p); - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2)); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return 0; } @@ -2168,6 +2182,7 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data) static int i915_guc_load_status_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + intel_wakeref_t wakeref; struct drm_printer p; u32 tmp, i; @@ -2177,7 +2192,7 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) p = drm_seq_file_printer(m); intel_uc_fw_dump(&dev_priv->guc.fw, &p); - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); tmp = I915_READ(GUC_STATUS); @@ -2192,7 +2207,7 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) for (i = 0; i < 16; i++) seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i))); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return 0; } @@ -2550,6 +2565,7 @@ psr_source_status(struct drm_i915_private *dev_priv, struct seq_file *m) static int i915_edp_psr_status(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + intel_wakeref_t wakeref; u32 psrperf = 0; bool enabled = false; bool sink_support; @@ -2562,7 +2578,7 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) if (!sink_support) return 0; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->psr.lock); seq_printf(m, "PSR mode: %s\n", @@ -2601,7 +2617,7 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) dev_priv->psr.last_exit); } - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return 0; } @@ -2610,6 +2626,7 @@ i915_edp_psr_debug_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; struct drm_modeset_acquire_ctx ctx; + intel_wakeref_t wakeref; int ret; if (!CAN_PSR(dev_priv)) @@ -2617,7 +2634,7 @@ i915_edp_psr_debug_set(void *data, u64 val) DRM_DEBUG_KMS("Setting PSR debug to %llx\n", val); - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE); @@ -2632,7 +2649,7 @@ retry: drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return ret; } @@ -2657,15 +2674,16 @@ static int i915_energy_uJ(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); unsigned long long power; + intel_wakeref_t wakeref; u32 units; if (INTEL_GEN(dev_priv) < 6) return -ENODEV; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power)) { - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return -ENODEV; } @@ -2673,7 +2691,7 @@ static int i915_energy_uJ(struct seq_file *m, void *data) power = I915_READ(MCH_SECP_NRG_STTS); power = (1000000 * power) >> units; /* convert to uJ */ - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); seq_printf(m, "%llu", power); @@ -2742,6 +2760,7 @@ static int i915_power_domain_info(struct seq_file *m, void *unused) static int i915_dmc_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + intel_wakeref_t wakeref; struct intel_csr *csr; if (!HAS_CSR(dev_priv)) @@ -2749,7 +2768,7 @@ static int i915_dmc_info(struct seq_file *m, void *unused) csr = &dev_priv->csr; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); seq_printf(m, "fw loaded: %s\n", yesno(csr->dmc_payload != NULL)); seq_printf(m, "path: %s\n", csr->fw_path); @@ -2775,7 +2794,7 @@ out: seq_printf(m, "ssp base: 0x%08x\n", I915_READ(CSR_SSP_BASE)); seq_printf(m, "htp: 0x%08x\n", I915_READ(CSR_HTP_SKL)); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return 0; } @@ -3065,8 +3084,10 @@ static int i915_display_info(struct seq_file *m, void *unused) struct intel_crtc *crtc; struct drm_connector *connector; struct drm_connector_list_iter conn_iter; + intel_wakeref_t wakeref; + + wakeref = intel_runtime_pm_get(dev_priv); - intel_runtime_pm_get(dev_priv); seq_printf(m, "CRTC info\n"); seq_printf(m, "---------\n"); for_each_intel_crtc(dev, crtc) { @@ -3114,7 +3135,7 @@ static int i915_display_info(struct seq_file *m, void *unused) drm_connector_list_iter_end(&conn_iter); mutex_unlock(&dev->mode_config.mutex); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return 0; } @@ -3123,10 +3144,11 @@ static int i915_engine_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_engine_cs *engine; + intel_wakeref_t wakeref; enum intel_engine_id id; struct drm_printer p; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); seq_printf(m, "GT awake? %s (epoch %u)\n", yesno(dev_priv->gt.awake), dev_priv->gt.epoch); @@ -3139,7 +3161,7 @@ static int i915_engine_info(struct seq_file *m, void *unused) for_each_engine(engine, dev_priv, id) intel_engine_dump(engine, &p, "%s\n", engine->name); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return 0; } @@ -3252,6 +3274,7 @@ static ssize_t i915_ipc_status_write(struct file *file, const char __user *ubuf, { struct seq_file *m = file->private_data; struct drm_i915_private *dev_priv = m->private; + intel_wakeref_t wakeref; int ret; bool enable; @@ -3259,13 +3282,15 @@ static ssize_t i915_ipc_status_write(struct file *file, const char __user *ubuf, if (ret < 0) return ret; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); + if (!dev_priv->ipc_enabled && enable) DRM_INFO("Enabling IPC: WM will be proper only after next commit\n"); dev_priv->wm.distrust_bios_wm = true; dev_priv->ipc_enabled = enable; intel_enable_ipc(dev_priv); - intel_runtime_pm_put_unchecked(dev_priv); + + intel_runtime_pm_put(dev_priv, wakeref); return len; } @@ -4031,11 +4056,12 @@ static int i915_drop_caches_set(void *data, u64 val) { struct drm_i915_private *i915 = data; + intel_wakeref_t wakeref; int ret = 0; DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n", val, val & DROP_ALL); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); if (val & DROP_RESET_ACTIVE && !intel_engines_are_idle(i915)) i915_gem_set_wedged(i915); @@ -4090,7 +4116,7 @@ i915_drop_caches_set(void *data, u64 val) i915_gem_drain_freed_objects(i915); out: - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); return ret; } @@ -4103,16 +4129,17 @@ static int i915_cache_sharing_get(void *data, u64 *val) { struct drm_i915_private *dev_priv = data; + intel_wakeref_t wakeref; u32 snpcr; if (!(IS_GEN_RANGE(dev_priv, 6, 7))) return -ENODEV; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); *val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT; @@ -4123,6 +4150,7 @@ static int i915_cache_sharing_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; + intel_wakeref_t wakeref; u32 snpcr; if (!(IS_GEN_RANGE(dev_priv, 6, 7))) @@ -4131,7 +4159,7 @@ i915_cache_sharing_set(void *data, u64 val) if (val > 3) return -EINVAL; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); DRM_DEBUG_DRIVER("Manually setting uncore sharing to %llu\n", val); /* Update the cache sharing policy here as well */ @@ -4140,7 +4168,7 @@ i915_cache_sharing_set(void *data, u64 val) snpcr |= (val << GEN6_MBC_SNPCR_SHIFT); I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return 0; } @@ -4362,6 +4390,7 @@ static int i915_sseu_status(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct sseu_dev_info sseu; + intel_wakeref_t wakeref; if (INTEL_GEN(dev_priv) < 8) return -ENODEV; @@ -4376,7 +4405,7 @@ static int i915_sseu_status(struct seq_file *m, void *unused) sseu.max_eus_per_subslice = RUNTIME_INFO(dev_priv)->sseu.max_eus_per_subslice; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); if (IS_CHERRYVIEW(dev_priv)) { cherryview_sseu_device_status(dev_priv, &sseu); @@ -4388,7 +4417,7 @@ static int i915_sseu_status(struct seq_file *m, void *unused) gen10_sseu_device_status(dev_priv, &sseu); } - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); i915_print_sseu_info(m, false, &sseu); From 6619c0075f784d7720fc9810279c956d51b22aaf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 14:21:15 +0000 Subject: [PATCH 030/539] drm/i915/perf: Track the rpm wakeref Keep track of our wakeref used to keep the device awake so we can catch any leak. Signed-off-by: Chris Wilson Cc: Jani Nikula Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190114142129.24398-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_perf.c | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f33dc8a1fd1b..b6d0cd890a19 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1333,6 +1333,8 @@ struct i915_perf_stream { */ struct list_head link; + intel_wakeref_t wakeref; + /** * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` * properties given when opening a stream, representing the contents diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index e4dfd1477c78..faff6cf1aaa1 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1365,7 +1365,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) free_oa_buffer(dev_priv); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, stream->wakeref); if (stream->ctx) oa_put_render_ctx_id(stream); @@ -2087,7 +2087,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, * In our case we are expecting that taking pm + FORCEWAKE * references will effectively disable RC6. */ - intel_runtime_pm_get(dev_priv); + stream->wakeref = intel_runtime_pm_get(dev_priv); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); ret = alloc_oa_buffer(dev_priv); @@ -2123,7 +2123,7 @@ err_oa_buf_alloc: put_oa_config(dev_priv, stream->oa_config); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, stream->wakeref); err_config: if (stream->ctx) From 00e27cbe4c8960b79324ccd48a6077e251e42bd7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 14:21:16 +0000 Subject: [PATCH 031/539] drm/i915/pmu: Track rpm wakeref Track the wakeref used for temporary access to the device, and discard it upon release so that leaks can be identified. Signed-off-by: Chris Wilson Cc: Jani Nikula Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190114142129.24398-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_pmu.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index c99fcfce79d5..3d43fc9dd25d 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -167,6 +167,7 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns) { struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; bool fw = false; if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0) @@ -175,7 +176,8 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns) if (!dev_priv->gt.awake) return; - if (!intel_runtime_pm_get_if_in_use(dev_priv)) + wakeref = intel_runtime_pm_get_if_in_use(dev_priv); + if (!wakeref) return; for_each_engine(engine, dev_priv, id) { @@ -210,7 +212,7 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns) if (fw) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); } static void @@ -227,11 +229,15 @@ frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns) u32 val; val = dev_priv->gt_pm.rps.cur_freq; - if (dev_priv->gt.awake && - intel_runtime_pm_get_if_in_use(dev_priv)) { - val = intel_get_cagf(dev_priv, - I915_READ_NOTRACE(GEN6_RPSTAT1)); - intel_runtime_pm_put_unchecked(dev_priv); + if (dev_priv->gt.awake) { + intel_wakeref_t wakeref = + intel_runtime_pm_get_if_in_use(dev_priv); + + if (wakeref) { + val = intel_get_cagf(dev_priv, + I915_READ_NOTRACE(GEN6_RPSTAT1)); + intel_runtime_pm_put(dev_priv, wakeref); + } } add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT], @@ -443,12 +449,14 @@ static u64 __get_rc6(struct drm_i915_private *i915) static u64 get_rc6(struct drm_i915_private *i915) { #if IS_ENABLED(CONFIG_PM) + intel_wakeref_t wakeref; unsigned long flags; u64 val; - if (intel_runtime_pm_get_if_in_use(i915)) { + wakeref = intel_runtime_pm_get_if_in_use(i915); + if (wakeref) { val = __get_rc6(i915); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); /* * If we are coming back from being runtime suspended we must From 3055f0cd782fe3ef556227fc153c2f66fe47b721 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 14:21:17 +0000 Subject: [PATCH 032/539] drm/i915/guc: Track the rpm wakeref Keep track of our acquired wakeref for interacting with the guc, so that we can cancel it upon release and so clearly identify leaks. Signed-off-by: Chris Wilson Cc: Jani Nikula Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190114142129.24398-9-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_guc_log.c | 15 +++++++++------ drivers/gpu/drm/i915/intel_huc.c | 5 +++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index 1b1581a42aa1..20c0b36d748e 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -436,6 +436,7 @@ static void guc_log_capture_logs(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *dev_priv = guc_to_i915(guc); + intel_wakeref_t wakeref; guc_read_update_log_buffer(log); @@ -443,9 +444,9 @@ static void guc_log_capture_logs(struct intel_guc_log *log) * Generally device is expected to be active only at this * time, so get/put should be really quick. */ - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); guc_action_flush_log_complete(guc); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); } int intel_guc_log_create(struct intel_guc_log *log) @@ -505,6 +506,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) { struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *dev_priv = guc_to_i915(guc); + intel_wakeref_t wakeref; int ret; BUILD_BUG_ON(GUC_LOG_VERBOSITY_MIN != 0); @@ -524,11 +526,11 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) goto out_unlock; } - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); ret = guc_action_control_log(guc, GUC_LOG_LEVEL_IS_VERBOSE(level), GUC_LOG_LEVEL_IS_ENABLED(level), GUC_LOG_LEVEL_TO_VERBOSITY(level)); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); if (ret) { DRM_DEBUG_DRIVER("guc_log_control action failed %d\n", ret); goto out_unlock; @@ -601,6 +603,7 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *i915 = guc_to_i915(guc); + intel_wakeref_t wakeref; /* * Before initiating the forceful flush, wait for any pending/ongoing @@ -608,9 +611,9 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log) */ flush_work(&log->relay.flush_work); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); guc_action_flush_log(guc); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); /* GuC would have updated log buffer by now, so capture it */ guc_log_capture_logs(log); diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index c2b076e9bada..3e8c18b6a42d 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -115,14 +115,15 @@ fail: int intel_huc_check_status(struct intel_huc *huc) { struct drm_i915_private *dev_priv = huc_to_i915(huc); + intel_wakeref_t wakeref; bool status; if (!HAS_HUC(dev_priv)) return -ENODEV; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED; - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return status; } From 538ef96b9dae7fe0c021e1c0bfc86f370d3b1488 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 14:21:18 +0000 Subject: [PATCH 033/539] drm/i915/gem: Track the rpm wakerefs Keep track of the temporary rpm wakerefs used for user access to the device, so that we can cancel them upon release and clearly identify any leaks. Signed-off-by: Chris Wilson Cc: Jani Nikula Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190114142129.24398-10-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 47 +++++++++++++--------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 ++- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 6 ++- drivers/gpu/drm/i915/i915_gem_gtt.c | 22 ++++++---- drivers/gpu/drm/i915/i915_gem_shrinker.c | 32 +++++++++------ drivers/gpu/drm/i915/intel_engine_cs.c | 12 ++++-- drivers/gpu/drm/i915/intel_uncore.c | 5 ++- 7 files changed, 81 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index abd5d83fb0e5..3186859ff378 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -785,6 +785,8 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) { + intel_wakeref_t wakeref; + /* * No actual flushing is required for the GTT write domain for reads * from the GTT domain. Writes to it "immediately" go to main memory @@ -811,13 +813,13 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) i915_gem_chipset_flush(dev_priv); - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); spin_lock_irq(&dev_priv->uncore.lock); POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); spin_unlock_irq(&dev_priv->uncore.lock); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); } static void @@ -1069,6 +1071,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, { struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_ggtt *ggtt = &i915->ggtt; + intel_wakeref_t wakeref; struct drm_mm_node node; struct i915_vma *vma; void __user *user_data; @@ -1079,7 +1082,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, if (ret) return ret; - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE | PIN_NONFAULT | @@ -1152,7 +1155,7 @@ out_unpin: i915_vma_unpin(vma); } out_unlock: - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); return ret; @@ -1253,6 +1256,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, { struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_ggtt *ggtt = &i915->ggtt; + intel_wakeref_t wakeref; struct drm_mm_node node; struct i915_vma *vma; u64 remain, offset; @@ -1271,13 +1275,14 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, * This easily dwarfs any performance advantage from * using the cache bypass of indirect GGTT access. */ - if (!intel_runtime_pm_get_if_in_use(i915)) { + wakeref = intel_runtime_pm_get_if_in_use(i915); + if (!wakeref) { ret = -EFAULT; goto out_unlock; } } else { /* No backing pages, no fallback, we must force GGTT access */ - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); } vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, @@ -1359,7 +1364,7 @@ out_unpin: i915_vma_unpin(vma); } out_rpm: - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); out_unlock: mutex_unlock(&i915->drm.struct_mutex); return ret; @@ -1864,6 +1869,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; bool write = area->vm_flags & VM_WRITE; + intel_wakeref_t wakeref; struct i915_vma *vma; pgoff_t page_offset; int ret; @@ -1893,7 +1899,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) if (ret) goto err; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); ret = i915_mutex_lock_interruptible(dev); if (ret) @@ -1971,7 +1977,7 @@ err_unpin: err_unlock: mutex_unlock(&dev->struct_mutex); err_rpm: - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); i915_gem_object_unpin_pages(obj); err: switch (ret) { @@ -2044,6 +2050,7 @@ void i915_gem_release_mmap(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); + intel_wakeref_t wakeref; /* Serialisation between user GTT access and our code depends upon * revoking the CPU's PTE whilst the mutex is held. The next user @@ -2054,7 +2061,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) * wakeref. */ lockdep_assert_held(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); if (!obj->userfault_count) goto out; @@ -2071,7 +2078,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) wmb(); out: - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); } void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) @@ -4706,8 +4713,9 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, struct llist_node *freed) { struct drm_i915_gem_object *obj, *on; + intel_wakeref_t wakeref; - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); llist_for_each_entry_safe(obj, on, freed, freed) { struct i915_vma *vma, *vn; @@ -4768,7 +4776,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, if (on) cond_resched(); } - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); } static void i915_gem_flush_free_objects(struct drm_i915_private *i915) @@ -4877,11 +4885,13 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) void i915_gem_sanitize(struct drm_i915_private *i915) { + intel_wakeref_t wakeref; + GEM_TRACE("\n"); mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); /* @@ -4904,7 +4914,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915) intel_engines_sanitize(i915, false); intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); i915_gem_contexts_lost(i915); mutex_unlock(&i915->drm.struct_mutex); @@ -4912,11 +4922,12 @@ void i915_gem_sanitize(struct drm_i915_private *i915) int i915_gem_suspend(struct drm_i915_private *i915) { + intel_wakeref_t wakeref; int ret; GEM_TRACE("\n"); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); intel_suspend_gt_powersave(i915); mutex_lock(&i915->drm.struct_mutex); @@ -4968,12 +4979,12 @@ int i915_gem_suspend(struct drm_i915_private *i915) if (WARN_ON(!intel_engines_are_idle(i915))) i915_gem_set_wedged(i915); /* no hope, discard everything */ - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); return 0; err_unlock: mutex_unlock(&i915->drm.struct_mutex); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index c80943698ca2..f250109e1f66 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -2202,6 +2202,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; struct sync_file *out_fence = NULL; + intel_wakeref_t wakeref; int out_fence_fd = -1; int err; @@ -2272,7 +2273,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, * wakeref that we hold until the GPU has been idle for at least * 100ms. */ - intel_runtime_pm_get(eb.i915); + wakeref = intel_runtime_pm_get(eb.i915); err = i915_mutex_lock_interruptible(dev); if (err) @@ -2424,7 +2425,7 @@ err_vma: eb_release_vmas(&eb); mutex_unlock(&dev->struct_mutex); err_rpm: - intel_runtime_pm_put_unchecked(eb.i915); + intel_runtime_pm_put(eb.i915, wakeref); i915_gem_context_put(eb.ctx); err_destroy: eb_destroy(&eb); diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index b3391070acf7..f7947d89cf45 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -209,6 +209,7 @@ static void fence_write(struct drm_i915_fence_reg *fence, static int fence_update(struct drm_i915_fence_reg *fence, struct i915_vma *vma) { + intel_wakeref_t wakeref; int ret; if (vma) { @@ -256,9 +257,10 @@ static int fence_update(struct drm_i915_fence_reg *fence, * If the device is currently powered down, we will defer the write * to the runtime resume, see i915_gem_restore_fences(). */ - if (intel_runtime_pm_get_if_in_use(fence->i915)) { + wakeref = intel_runtime_pm_get_if_in_use(fence->i915); + if (wakeref) { fence_write(fence, vma); - intel_runtime_pm_put_unchecked(fence->i915); + intel_runtime_pm_put(fence->i915, wakeref); } if (vma) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 51f80ddd938d..e2c61633e95d 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2527,6 +2527,7 @@ static int ggtt_bind_vma(struct i915_vma *vma, { struct drm_i915_private *i915 = vma->vm->i915; struct drm_i915_gem_object *obj = vma->obj; + intel_wakeref_t wakeref; u32 pte_flags; /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ @@ -2534,9 +2535,9 @@ static int ggtt_bind_vma(struct i915_vma *vma, if (i915_gem_object_is_readonly(obj)) pte_flags |= PTE_READ_ONLY; - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; @@ -2553,10 +2554,11 @@ static int ggtt_bind_vma(struct i915_vma *vma, static void ggtt_unbind_vma(struct i915_vma *vma) { struct drm_i915_private *i915 = vma->vm->i915; + intel_wakeref_t wakeref; - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); vma->vm->clear_range(vma->vm, vma->node.start, vma->size); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); } static int aliasing_gtt_bind_vma(struct i915_vma *vma, @@ -2588,9 +2590,11 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, } if (flags & I915_VMA_GLOBAL_BIND) { - intel_runtime_pm_get(i915); + intel_wakeref_t wakeref; + + wakeref = intel_runtime_pm_get(i915); vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); } return 0; @@ -2601,9 +2605,11 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma) struct drm_i915_private *i915 = vma->vm->i915; if (vma->flags & I915_VMA_GLOBAL_BIND) { - intel_runtime_pm_get(i915); + intel_wakeref_t wakeref; + + wakeref = intel_runtime_pm_get(i915); vma->vm->clear_range(vma->vm, vma->node.start, vma->size); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); } if (vma->flags & I915_VMA_LOCAL_BIND) { diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 2bef02d0883d..f01489b05b5e 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -153,6 +153,7 @@ i915_gem_shrink(struct drm_i915_private *i915, { &i915->mm.bound_list, I915_SHRINK_BOUND }, { NULL, 0 }, }, *phase; + intel_wakeref_t wakeref = 0; unsigned long count = 0; unsigned long scanned = 0; bool unlock; @@ -182,9 +183,11 @@ i915_gem_shrink(struct drm_i915_private *i915, * device just to recover a little memory. If absolutely necessary, * we will force the wake during oom-notifier. */ - if ((flags & I915_SHRINK_BOUND) && - !intel_runtime_pm_get_if_in_use(i915)) - flags &= ~I915_SHRINK_BOUND; + if (flags & I915_SHRINK_BOUND) { + wakeref = intel_runtime_pm_get_if_in_use(i915); + if (!wakeref) + flags &= ~I915_SHRINK_BOUND; + } /* * As we may completely rewrite the (un)bound list whilst unbinding @@ -265,7 +268,7 @@ i915_gem_shrink(struct drm_i915_private *i915, } if (flags & I915_SHRINK_BOUND) - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); i915_retire_requests(i915); @@ -292,14 +295,15 @@ i915_gem_shrink(struct drm_i915_private *i915, */ unsigned long i915_gem_shrink_all(struct drm_i915_private *i915) { + intel_wakeref_t wakeref; unsigned long freed; - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); freed = i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_ACTIVE); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); return freed; } @@ -370,14 +374,16 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) { - intel_runtime_pm_get(i915); + intel_wakeref_t wakeref; + + wakeref = intel_runtime_pm_get(i915); freed += i915_gem_shrink(i915, sc->nr_to_scan - sc->nr_scanned, &sc->nr_scanned, I915_SHRINK_ACTIVE | I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); } shrinker_unlock(i915, unlock); @@ -392,12 +398,13 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) container_of(nb, struct drm_i915_private, mm.oom_notifier); struct drm_i915_gem_object *obj; unsigned long unevictable, bound, unbound, freed_pages; + intel_wakeref_t wakeref; - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); freed_pages = i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); /* Because we may be allocating inside our own driver, we cannot * assert that there are no objects with pinned pages that are not @@ -435,6 +442,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr container_of(nb, struct drm_i915_private, mm.vmap_notifier); struct i915_vma *vma, *next; unsigned long freed_pages = 0; + intel_wakeref_t wakeref; bool unlock; if (!shrinker_lock(i915, 0, &unlock)) @@ -446,12 +454,12 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr MAX_SCHEDULE_TIMEOUT)) goto out; - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); freed_pages += i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_VMAPS); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); /* We also want to clear any cached iomaps as they wrap vmap */ list_for_each_entry_safe(vma, next, diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 2e60463f2468..45e33eee76f9 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -913,10 +913,12 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, static bool ring_is_idle(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; + intel_wakeref_t wakeref; bool idle = true; /* If the whole device is asleep, the engine must be idle */ - if (!intel_runtime_pm_get_if_in_use(dev_priv)) + wakeref = intel_runtime_pm_get_if_in_use(dev_priv); + if (!wakeref) return true; /* First check that no commands are left in the ring */ @@ -928,7 +930,7 @@ static bool ring_is_idle(struct intel_engine_cs *engine) if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE)) idle = false; - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return idle; } @@ -1425,6 +1427,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, const struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_gpu_error * const error = &engine->i915->gpu_error; struct i915_request *rq, *last; + intel_wakeref_t wakeref; unsigned long flags; struct rb_node *rb; int count; @@ -1483,9 +1486,10 @@ void intel_engine_dump(struct intel_engine_cs *engine, rcu_read_unlock(); - if (intel_runtime_pm_get_if_in_use(engine->i915)) { + wakeref = intel_runtime_pm_get_if_in_use(engine->i915); + if (wakeref) { intel_engine_print_registers(engine, m); - intel_runtime_pm_put_unchecked(engine->i915); + intel_runtime_pm_put(engine->i915, wakeref); } else { drm_printf(m, "\tDevice is asleep; skipping register dump\n"); } diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 8d4c76ac0e7d..d494d92da02c 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1670,6 +1670,7 @@ int i915_reg_read_ioctl(struct drm_device *dev, struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_reg_read *reg = data; struct reg_whitelist const *entry; + intel_wakeref_t wakeref; unsigned int flags; int remain; int ret = 0; @@ -1695,7 +1696,7 @@ int i915_reg_read_ioctl(struct drm_device *dev, flags = reg->offset & (entry->size - 1); - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); if (entry->size == 8 && flags == I915_REG_READ_8B_WA) reg->val = I915_READ64_2x32(entry->offset_ldw, entry->offset_udw); @@ -1709,7 +1710,7 @@ int i915_reg_read_ioctl(struct drm_device *dev, reg->val = I915_READ8(entry->offset_ldw); else ret = -EINVAL; - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return ret; } From 1d264d91befc3131e68edc225fa49acb2f315f73 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 14:21:19 +0000 Subject: [PATCH 034/539] drm/i915/fb: Track rpm wakerefs Keep track of the rpm wakeref used for framebuffer access so that we can cancel upon release and so more clearly identify leaks. Signed-off-by: Chris Wilson Cc: Jani Nikula Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190114142129.24398-11-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_display.c | 5 +++-- drivers/gpu/drm/i915/intel_fbdev.c | 9 +++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a980d5d1e601..b0b8f9ffd873 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2023,6 +2023,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, struct drm_device *dev = fb->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj = intel_fb_obj(fb); + intel_wakeref_t wakeref; struct i915_vma *vma; unsigned int pinctl; u32 alignment; @@ -2046,7 +2047,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, * intel_runtime_pm_put(), so it is correct to wrap only the * pin/unpin/fence and not more. */ - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); atomic_inc(&dev_priv->gpu_error.pending_fb_pin); @@ -2101,7 +2102,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, err: atomic_dec(&dev_priv->gpu_error.pending_fb_pin); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return vma; } diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 215e5894842d..3036d835bc2b 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -177,8 +177,9 @@ static int intelfb_create(struct drm_fb_helper *helper, const struct i915_ggtt_view view = { .type = I915_GGTT_VIEW_NORMAL, }; - struct fb_info *info; struct drm_framebuffer *fb; + intel_wakeref_t wakeref; + struct fb_info *info; struct i915_vma *vma; unsigned long flags = 0; bool prealloc = false; @@ -209,7 +210,7 @@ static int intelfb_create(struct drm_fb_helper *helper, } mutex_lock(&dev->struct_mutex); - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); /* Pin the GGTT vma for our access via info->screen_base. * This also validates that any existing fb inherited from the @@ -276,7 +277,7 @@ static int intelfb_create(struct drm_fb_helper *helper, ifbdev->vma = vma; ifbdev->vma_flags = flags; - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); mutex_unlock(&dev->struct_mutex); vga_switcheroo_client_fb_set(pdev, info); return 0; @@ -284,7 +285,7 @@ static int intelfb_create(struct drm_fb_helper *helper, out_unpin: intel_unpin_fb_vma(vma, flags); out_unlock: - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); mutex_unlock(&dev->struct_mutex); return ret; } From 6a712a20bff467e82c3507c4286f04499af1c52d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 14:21:20 +0000 Subject: [PATCH 035/539] drm/i915/hotplug: Track temporary rpm wakeref Keep track of the temporary rpm wakeref inside hotplug detection, so that we can cancel it immediately upon release and so clearly identify leaks. Signed-off-by: Chris Wilson Cc: Jani Nikula Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190114142129.24398-12-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_hotplug.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index b1a9cb960ca4..e027d2b4abe5 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -226,9 +226,10 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work) container_of(work, typeof(*dev_priv), hotplug.reenable_work.work); struct drm_device *dev = &dev_priv->drm; + intel_wakeref_t wakeref; enum hpd_pin pin; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); spin_lock_irq(&dev_priv->irq_lock); for_each_hpd_pin(pin) { @@ -261,7 +262,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work) dev_priv->display.hpd_irq_setup(dev_priv); spin_unlock_irq(&dev_priv->irq_lock); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); } bool intel_encoder_hotplug(struct intel_encoder *encoder, From 2cb2cb5ff41abd92d6a7bfb1459b25974fa6d509 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 14:21:21 +0000 Subject: [PATCH 036/539] drm/i915/panel: Track temporary rpm wakeref Keep track of the temporary rpm wakeref used for panel backlight access, so that we can cancel it immediately upon release and so more clearly identify leaks. Signed-off-by: Chris Wilson Cc: Jani Nikula Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190114142129.24398-13-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_panel.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index c2b7455a023e..93a2e4b5c54c 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -1203,17 +1203,18 @@ static int intel_backlight_device_get_brightness(struct backlight_device *bd) struct intel_connector *connector = bl_get_data(bd); struct drm_device *dev = connector->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); + intel_wakeref_t wakeref; u32 hw_level; int ret; - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); hw_level = intel_panel_get_backlight(connector); ret = scale_hw_to_user(connector, hw_level, bd->props.max_brightness); drm_modeset_unlock(&dev->mode_config.connection_mutex); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); return ret; } From c9d08cc3e3393e19162cb2cfaa1f454baf2aaffe Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 14:21:22 +0000 Subject: [PATCH 037/539] drm/i915/selftests: Mark up rpm wakerefs Track the temporary wakerefs used within the selftests so that leaks are clear. v2: Add a couple of coarse annotations for mock selftests as we now loudly warn about the errors. Signed-off-by: Chris Wilson Cc: Jani Nikula Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190114142129.24398-14-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/huge_pages.c | 5 ++-- drivers/gpu/drm/i915/selftests/i915_gem.c | 29 ++++++++++++------- .../drm/i915/selftests/i915_gem_coherency.c | 5 ++-- .../gpu/drm/i915/selftests/i915_gem_context.c | 27 ++++++++++------- .../gpu/drm/i915/selftests/i915_gem_evict.c | 16 +++++++--- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 10 ++++--- .../gpu/drm/i915/selftests/i915_gem_object.c | 18 ++++++++---- drivers/gpu/drm/i915/selftests/i915_request.c | 27 +++++++++++------ drivers/gpu/drm/i915/selftests/intel_guc.c | 10 ++++--- .../gpu/drm/i915/selftests/intel_hangcheck.c | 15 ++++++---- drivers/gpu/drm/i915/selftests/intel_lrc.c | 25 +++++++++------- .../drm/i915/selftests/intel_workarounds.c | 29 ++++++++++++------- 12 files changed, 138 insertions(+), 78 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index a4d8b12be12c..a52450111802 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -1756,6 +1756,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) }; struct drm_file *file; struct i915_gem_context *ctx; + intel_wakeref_t wakeref; int err; if (!HAS_PPGTT(dev_priv)) { @@ -1771,7 +1772,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) return PTR_ERR(file); mutex_lock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); ctx = live_context(dev_priv, file); if (IS_ERR(ctx)) { @@ -1785,7 +1786,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) err = i915_subtests(tests, ctx); out_unlock: - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); mutex_unlock(&dev_priv->drm.struct_mutex); mock_file_free(dev_priv, file); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index 762e1a7125f5..01a46c46fe25 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -16,9 +16,10 @@ static int switch_to_context(struct drm_i915_private *i915, { struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int err = 0; - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); for_each_engine(engine, i915, id) { struct i915_request *rq; @@ -32,7 +33,7 @@ static int switch_to_context(struct drm_i915_private *i915, i915_request_add(rq); } - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); return err; } @@ -65,7 +66,9 @@ static void trash_stolen(struct drm_i915_private *i915) static void simulate_hibernate(struct drm_i915_private *i915) { - intel_runtime_pm_get(i915); + intel_wakeref_t wakeref; + + wakeref = intel_runtime_pm_get(i915); /* * As a final sting in the tail, invalidate stolen. Under a real S4, @@ -76,7 +79,7 @@ static void simulate_hibernate(struct drm_i915_private *i915) */ trash_stolen(i915); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); } static int pm_prepare(struct drm_i915_private *i915) @@ -93,39 +96,45 @@ static int pm_prepare(struct drm_i915_private *i915) static void pm_suspend(struct drm_i915_private *i915) { - intel_runtime_pm_get(i915); + intel_wakeref_t wakeref; + + wakeref = intel_runtime_pm_get(i915); i915_gem_suspend_gtt_mappings(i915); i915_gem_suspend_late(i915); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); } static void pm_hibernate(struct drm_i915_private *i915) { - intel_runtime_pm_get(i915); + intel_wakeref_t wakeref; + + wakeref = intel_runtime_pm_get(i915); i915_gem_suspend_gtt_mappings(i915); i915_gem_freeze(i915); i915_gem_freeze_late(i915); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); } static void pm_resume(struct drm_i915_private *i915) { + intel_wakeref_t wakeref; + /* * Both suspend and hibernate follow the same wakeup path and assume * that runtime-pm just works. */ - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); intel_engines_sanitize(i915, false); i915_gem_sanitize(i915); i915_gem_resume(i915); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); } static int igt_gem_suspend(void *arg) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index eea4fc2445ae..fd89a5a33c1a 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -279,6 +279,7 @@ static int igt_gem_coherency(void *arg) struct drm_i915_private *i915 = arg; const struct igt_coherency_mode *read, *write, *over; struct drm_i915_gem_object *obj; + intel_wakeref_t wakeref; unsigned long count, n; u32 *offsets, *values; int err = 0; @@ -298,7 +299,7 @@ static int igt_gem_coherency(void *arg) values = offsets + ncachelines; mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); for (over = igt_coherency_mode; over->name; over++) { if (!over->set) continue; @@ -376,7 +377,7 @@ static int igt_gem_coherency(void *arg) } } unlock: - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); kfree(offsets); return err; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 6e1a0711d201..7a9b1f20b019 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -119,6 +119,7 @@ static int live_nop_switch(void *arg) struct intel_engine_cs *engine; struct i915_gem_context **ctx; enum intel_engine_id id; + intel_wakeref_t wakeref; struct drm_file *file; struct live_test t; unsigned long n; @@ -140,7 +141,7 @@ static int live_nop_switch(void *arg) return PTR_ERR(file); mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); if (!ctx) { @@ -243,7 +244,7 @@ static int live_nop_switch(void *arg) } out_unlock: - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); return err; @@ -593,6 +594,8 @@ static int igt_ctx_exec(void *arg) } for_each_engine(engine, i915, id) { + intel_wakeref_t wakeref; + if (!engine->context_size) continue; /* No logical context support in HW */ @@ -607,9 +610,9 @@ static int igt_ctx_exec(void *arg) } } - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); err = gpu_fill(obj, ctx, engine, dw); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); if (err) { pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), @@ -699,6 +702,8 @@ static int igt_ctx_readonly(void *arg) unsigned int id; for_each_engine(engine, i915, id) { + intel_wakeref_t wakeref; + if (!intel_engine_can_store_dword(engine)) continue; @@ -713,9 +718,9 @@ static int igt_ctx_readonly(void *arg) i915_gem_object_set_readonly(obj); } - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); err = gpu_fill(obj, ctx, engine, dw); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); if (err) { pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), @@ -976,6 +981,7 @@ static int igt_vm_isolation(void *arg) struct drm_i915_private *i915 = arg; struct i915_gem_context *ctx_a, *ctx_b; struct intel_engine_cs *engine; + intel_wakeref_t wakeref; struct drm_file *file; I915_RND_STATE(prng); unsigned long count; @@ -1022,7 +1028,7 @@ static int igt_vm_isolation(void *arg) GEM_BUG_ON(ctx_b->ppgtt->vm.total != vm_total); vm_total -= I915_GTT_PAGE_SIZE; - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); count = 0; for_each_engine(engine, i915, id) { @@ -1067,7 +1073,7 @@ static int igt_vm_isolation(void *arg) count, RUNTIME_INFO(i915)->num_rings); out_rpm: - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); out_unlock: if (end_live_test(&t)) err = -EIO; @@ -1165,6 +1171,7 @@ static int igt_switch_to_kernel_context(void *arg) struct intel_engine_cs *engine; struct i915_gem_context *ctx; enum intel_engine_id id; + intel_wakeref_t wakeref; int err; /* @@ -1175,7 +1182,7 @@ static int igt_switch_to_kernel_context(void *arg) */ mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); ctx = kernel_context(i915); if (IS_ERR(ctx)) { @@ -1200,7 +1207,7 @@ out_unlock: if (igt_flush_test(i915, I915_WAIT_LOCKED)) err = -EIO; - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); kernel_context_close(ctx); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 8d22f73a9b63..bbcbf11c72b3 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -336,6 +336,7 @@ static int igt_evict_contexts(void *arg) struct drm_mm_node node; struct reserved *next; } *reserved = NULL; + intel_wakeref_t wakeref; struct drm_mm_node hole; unsigned long count; int err; @@ -355,7 +356,7 @@ static int igt_evict_contexts(void *arg) return 0; mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); /* Reserve a block so that we know we have enough to fit a few rq */ memset(&hole, 0, sizeof(hole)); @@ -400,8 +401,10 @@ static int igt_evict_contexts(void *arg) struct drm_file *file; file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); + if (IS_ERR(file)) { + err = PTR_ERR(file); + break; + } count = 0; mutex_lock(&i915->drm.struct_mutex); @@ -464,7 +467,7 @@ out_locked: } if (drm_mm_node_allocated(&hole)) drm_mm_remove_node(&hole); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; @@ -480,6 +483,7 @@ int i915_gem_evict_mock_selftests(void) SUBTEST(igt_overcommit), }; struct drm_i915_private *i915; + intel_wakeref_t wakeref; int err; i915 = mock_gem_device(); @@ -487,7 +491,11 @@ int i915_gem_evict_mock_selftests(void) return -ENOMEM; mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + err = i915_subtests(tests, i915); + + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); drm_dev_put(&i915->drm); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 87cb0602a5fc..fea8ab14e79d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -275,6 +275,7 @@ static int lowlevel_hole(struct drm_i915_private *i915, for (n = 0; n < count; n++) { u64 addr = hole_start + order[n] * BIT_ULL(size); + intel_wakeref_t wakeref; GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); @@ -293,9 +294,9 @@ static int lowlevel_hole(struct drm_i915_private *i915, mock_vma.node.size = BIT_ULL(size); mock_vma.node.start = addr; - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); vm->insert_entries(vm, &mock_vma, I915_CACHE_NONE, 0); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); } count = n; @@ -1144,6 +1145,7 @@ static int igt_ggtt_page(void *arg) struct drm_i915_private *i915 = arg; struct i915_ggtt *ggtt = &i915->ggtt; struct drm_i915_gem_object *obj; + intel_wakeref_t wakeref; struct drm_mm_node tmp; unsigned int *order, n; int err; @@ -1169,7 +1171,7 @@ static int igt_ggtt_page(void *arg) if (err) goto out_unpin; - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); for (n = 0; n < count; n++) { u64 offset = tmp.start + n * PAGE_SIZE; @@ -1216,7 +1218,7 @@ static int igt_ggtt_page(void *arg) kfree(order); out_remove: ggtt->vm.clear_range(&ggtt->vm, tmp.start, tmp.size); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); drm_mm_remove_node(&tmp); out_unpin: i915_gem_object_unpin_pages(obj); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index b03890c590d7..3575e1387c3f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -308,6 +308,7 @@ static int igt_partial_tiling(void *arg) const unsigned int nreal = 1 << 12; /* largest tile row x2 */ struct drm_i915_private *i915 = arg; struct drm_i915_gem_object *obj; + intel_wakeref_t wakeref; int tiling; int err; @@ -333,7 +334,7 @@ static int igt_partial_tiling(void *arg) } mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); if (1) { IGT_TIMEOUT(end); @@ -444,7 +445,7 @@ next_tiling: ; } out_unlock: - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); i915_gem_object_unpin_pages(obj); out: @@ -506,11 +507,14 @@ static void disable_retire_worker(struct drm_i915_private *i915) mutex_lock(&i915->drm.struct_mutex); if (!i915->gt.active_requests++) { - intel_runtime_pm_get(i915); + intel_wakeref_t wakeref; + + wakeref = intel_runtime_pm_get(i915); i915_gem_unpark(i915); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); } mutex_unlock(&i915->drm.struct_mutex); + cancel_delayed_work_sync(&i915->gt.retire_work); cancel_delayed_work_sync(&i915->gt.idle_work); } @@ -578,6 +582,8 @@ static int igt_mmap_offset_exhaustion(void *arg) /* Now fill with busy dead objects that we expect to reap */ for (loop = 0; loop < 3; loop++) { + intel_wakeref_t wakeref; + if (i915_terminally_wedged(&i915->gpu_error)) break; @@ -588,9 +594,9 @@ static int igt_mmap_offset_exhaustion(void *arg) } mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); err = make_obj_busy(obj); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); if (err) { pr_err("[loop %d] Failed to busy the object\n", loop); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index e8880cabd5c7..9f705ff9423f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -255,13 +255,18 @@ int i915_request_mock_selftests(void) SUBTEST(igt_request_rewind), }; struct drm_i915_private *i915; + intel_wakeref_t wakeref; int err; i915 = mock_gem_device(); if (!i915) return -ENOMEM; + wakeref = intel_runtime_pm_get(i915); + err = i915_subtests(tests, i915); + + intel_runtime_pm_put(i915, wakeref); drm_dev_put(&i915->drm); return err; @@ -332,6 +337,7 @@ static int live_nop_request(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; + intel_wakeref_t wakeref; struct live_test t; unsigned int id; int err = -ENODEV; @@ -342,7 +348,7 @@ static int live_nop_request(void *arg) */ mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); for_each_engine(engine, i915, id) { struct i915_request *request = NULL; @@ -403,7 +409,7 @@ static int live_nop_request(void *arg) } out_unlock: - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -478,8 +484,9 @@ static int live_empty_request(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; - struct live_test t; + intel_wakeref_t wakeref; struct i915_vma *batch; + struct live_test t; unsigned int id; int err = 0; @@ -489,7 +496,7 @@ static int live_empty_request(void *arg) */ mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); batch = empty_batch(i915); if (IS_ERR(batch)) { @@ -553,7 +560,7 @@ out_batch: i915_vma_unpin(batch); i915_vma_put(batch); out_unlock: - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -637,6 +644,7 @@ static int live_all_engines(void *arg) struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; struct i915_request *request[I915_NUM_ENGINES]; + intel_wakeref_t wakeref; struct i915_vma *batch; struct live_test t; unsigned int id; @@ -648,7 +656,7 @@ static int live_all_engines(void *arg) */ mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); err = begin_live_test(&t, i915, __func__, ""); if (err) @@ -731,7 +739,7 @@ out_request: i915_vma_unpin(batch); i915_vma_put(batch); out_unlock: - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -742,6 +750,7 @@ static int live_sequential_engines(void *arg) struct i915_request *request[I915_NUM_ENGINES] = {}; struct i915_request *prev = NULL; struct intel_engine_cs *engine; + intel_wakeref_t wakeref; struct live_test t; unsigned int id; int err; @@ -753,7 +762,7 @@ static int live_sequential_engines(void *arg) */ mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); err = begin_live_test(&t, i915, __func__, ""); if (err) @@ -860,7 +869,7 @@ out_request: i915_request_put(request[id]); } out_unlock: - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; } diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c index 3590ba3d8897..c5e0a0e98fcb 100644 --- a/drivers/gpu/drm/i915/selftests/intel_guc.c +++ b/drivers/gpu/drm/i915/selftests/intel_guc.c @@ -137,12 +137,13 @@ static bool client_doorbell_in_sync(struct intel_guc_client *client) static int igt_guc_clients(void *args) { struct drm_i915_private *dev_priv = args; + intel_wakeref_t wakeref; struct intel_guc *guc; int err = 0; GEM_BUG_ON(!HAS_GUC(dev_priv)); mutex_lock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); guc = &dev_priv->guc; if (!guc) { @@ -225,7 +226,7 @@ out: guc_clients_create(guc); guc_clients_enable(guc); unlock: - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); mutex_unlock(&dev_priv->drm.struct_mutex); return err; } @@ -238,13 +239,14 @@ unlock: static int igt_guc_doorbells(void *arg) { struct drm_i915_private *dev_priv = arg; + intel_wakeref_t wakeref; struct intel_guc *guc; int i, err = 0; u16 db_id; GEM_BUG_ON(!HAS_GUC(dev_priv)); mutex_lock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_get(dev_priv); + wakeref = intel_runtime_pm_get(dev_priv); guc = &dev_priv->guc; if (!guc) { @@ -337,7 +339,7 @@ out: guc_client_free(clients[i]); } unlock: - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put(dev_priv, wakeref); mutex_unlock(&dev_priv->drm.struct_mutex); return err; } diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 58cba8188bd2..edd53d18077b 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -388,12 +388,13 @@ static int igt_global_reset(void *arg) static int igt_wedged_reset(void *arg) { struct drm_i915_private *i915 = arg; + intel_wakeref_t wakeref; /* Check that we can recover a wedged device with a GPU reset */ igt_global_reset_lock(i915); mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); i915_gem_set_wedged(i915); GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error)); @@ -402,7 +403,7 @@ static int igt_wedged_reset(void *arg) i915_reset(i915, ALL_ENGINES, NULL); GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); igt_global_reset_unlock(i915); @@ -1600,6 +1601,7 @@ static int igt_atomic_reset(void *arg) { } }; struct drm_i915_private *i915 = arg; + intel_wakeref_t wakeref; int err = 0; /* Check that the resets are usable from atomic context */ @@ -1609,7 +1611,7 @@ static int igt_atomic_reset(void *arg) igt_global_reset_lock(i915); mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); /* Flush any requests before we get started and check basics */ force_reset(i915); @@ -1636,7 +1638,7 @@ out: force_reset(i915); unlock: - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); igt_global_reset_unlock(i915); @@ -1660,6 +1662,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_handle_error), SUBTEST(igt_atomic_reset), }; + intel_wakeref_t wakeref; bool saved_hangcheck; int err; @@ -1669,7 +1672,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) if (i915_terminally_wedged(&i915->gpu_error)) return -EIO; /* we're long past hope of a successful reset */ - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); err = i915_subtests(tests, i915); @@ -1679,7 +1682,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) mutex_unlock(&i915->drm.struct_mutex); i915_modparams.enable_hangcheck = saved_hangcheck; - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); return err; } diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index ac1b18a17f3c..e6073cd4719c 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -18,13 +18,14 @@ static int live_sanitycheck(void *arg) struct i915_gem_context *ctx; enum intel_engine_id id; struct igt_spinner spin; + intel_wakeref_t wakeref; int err = -ENOMEM; if (!HAS_LOGICAL_RING_CONTEXTS(i915)) return 0; mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); if (igt_spinner_init(&spin, i915)) goto err_unlock; @@ -65,7 +66,7 @@ err_spin: igt_spinner_fini(&spin); err_unlock: igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -77,13 +78,14 @@ static int live_preempt(void *arg) struct igt_spinner spin_hi, spin_lo; struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int err = -ENOMEM; if (!HAS_LOGICAL_RING_PREEMPTION(i915)) return 0; mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); if (igt_spinner_init(&spin_hi, i915)) goto err_unlock; @@ -158,7 +160,7 @@ err_spin_hi: igt_spinner_fini(&spin_hi); err_unlock: igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -171,13 +173,14 @@ static int live_late_preempt(void *arg) struct intel_engine_cs *engine; struct i915_sched_attr attr = {}; enum intel_engine_id id; + intel_wakeref_t wakeref; int err = -ENOMEM; if (!HAS_LOGICAL_RING_PREEMPTION(i915)) return 0; mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); if (igt_spinner_init(&spin_hi, i915)) goto err_unlock; @@ -251,7 +254,7 @@ err_spin_hi: igt_spinner_fini(&spin_hi); err_unlock: igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; @@ -270,6 +273,7 @@ static int live_preempt_hang(void *arg) struct igt_spinner spin_hi, spin_lo; struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int err = -ENOMEM; if (!HAS_LOGICAL_RING_PREEMPTION(i915)) @@ -279,7 +283,7 @@ static int live_preempt_hang(void *arg) return 0; mutex_lock(&i915->drm.struct_mutex); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); if (igt_spinner_init(&spin_hi, i915)) goto err_unlock; @@ -374,7 +378,7 @@ err_spin_hi: igt_spinner_fini(&spin_hi); err_unlock: igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -562,6 +566,7 @@ static int live_preempt_smoke(void *arg) .ncontext = 1024, }; const unsigned int phase[] = { 0, BATCH }; + intel_wakeref_t wakeref; int err = -ENOMEM; u32 *cs; int n; @@ -576,7 +581,7 @@ static int live_preempt_smoke(void *arg) return -ENOMEM; mutex_lock(&smoke.i915->drm.struct_mutex); - intel_runtime_pm_get(smoke.i915); + wakeref = intel_runtime_pm_get(smoke.i915); smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE); if (IS_ERR(smoke.batch)) { @@ -627,7 +632,7 @@ err_ctx: err_batch: i915_gem_object_put(smoke.batch); err_unlock: - intel_runtime_pm_put_unchecked(smoke.i915); + intel_runtime_pm_put(smoke.i915, wakeref); mutex_unlock(&smoke.i915->drm.struct_mutex); kfree(smoke.contexts); diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index b1b39c70c702..75324b6249e3 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -60,10 +60,11 @@ reference_lists_fini(struct drm_i915_private *i915, struct wa_lists *lists) static struct drm_i915_gem_object * read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { + const u32 base = engine->mmio_base; struct drm_i915_gem_object *result; + intel_wakeref_t wakeref; struct i915_request *rq; struct i915_vma *vma; - const u32 base = engine->mmio_base; u32 srm, *cs; int err; int i; @@ -92,9 +93,9 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) if (err) goto err_obj; - intel_runtime_pm_get(engine->i915); + wakeref = intel_runtime_pm_get(engine->i915); rq = i915_request_alloc(engine, ctx); - intel_runtime_pm_put_unchecked(engine->i915); + intel_runtime_pm_put(engine->i915, wakeref); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_pin; @@ -228,20 +229,21 @@ switch_to_scratch_context(struct intel_engine_cs *engine, { struct i915_gem_context *ctx; struct i915_request *rq; + intel_wakeref_t wakeref; int err = 0; ctx = kernel_context(engine->i915); if (IS_ERR(ctx)) return PTR_ERR(ctx); - intel_runtime_pm_get(engine->i915); + wakeref = intel_runtime_pm_get(engine->i915); if (spin) rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP); else rq = i915_request_alloc(engine, ctx); - intel_runtime_pm_put_unchecked(engine->i915); + intel_runtime_pm_put(engine->i915, wakeref); kernel_context_close(ctx); @@ -273,6 +275,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, bool want_spin = reset == do_engine_reset; struct i915_gem_context *ctx; struct igt_spinner spin; + intel_wakeref_t wakeref; int err; pr_info("Checking %d whitelisted registers (RING_NONPRIV) [%s]\n", @@ -298,9 +301,9 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, if (err) goto out; - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); err = reset(engine); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); if (want_spin) { igt_spinner_end(&spin); @@ -391,6 +394,7 @@ live_gpu_reset_gt_engine_workarounds(void *arg) { struct drm_i915_private *i915 = arg; struct i915_gpu_error *error = &i915->gpu_error; + intel_wakeref_t wakeref; struct wa_lists lists; bool ok; @@ -400,7 +404,8 @@ live_gpu_reset_gt_engine_workarounds(void *arg) pr_info("Verifying after GPU reset...\n"); igt_global_reset_lock(i915); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); + reference_lists_init(i915, &lists); ok = verify_gt_engine_wa(i915, &lists, "before reset"); @@ -414,7 +419,7 @@ live_gpu_reset_gt_engine_workarounds(void *arg) out: reference_lists_fini(i915, &lists); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); igt_global_reset_unlock(i915); return ok ? 0 : -ESRCH; @@ -429,6 +434,7 @@ live_engine_reset_gt_engine_workarounds(void *arg) struct igt_spinner spin; enum intel_engine_id id; struct i915_request *rq; + intel_wakeref_t wakeref; struct wa_lists lists; int ret = 0; @@ -440,7 +446,8 @@ live_engine_reset_gt_engine_workarounds(void *arg) return PTR_ERR(ctx); igt_global_reset_lock(i915); - intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(i915); + reference_lists_init(i915, &lists); for_each_engine(engine, i915, id) { @@ -496,7 +503,7 @@ live_engine_reset_gt_engine_workarounds(void *arg) err: reference_lists_fini(i915, &lists); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, wakeref); igt_global_reset_unlock(i915); kernel_context_close(ctx); From d4225a535b3b086868ce1f82dc0593d85d04dae8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 14:21:23 +0000 Subject: [PATCH 038/539] drm/i915: Syntatic sugar for using intel_runtime_pm Frequently, we use intel_runtime_pm_get/_put around a small block. Formalise that usage by providing a macro to define such a block with an automatic closure to scope the intel_runtime_pm wakeref to that block, i.e. macro abuse smelling of python. Signed-off-by: Chris Wilson Cc: Jani Nikula Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190114142129.24398-15-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 158 ++++++++---------- drivers/gpu/drm/i915/i915_gem.c | 10 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 23 ++- drivers/gpu/drm/i915/i915_gem_shrinker.c | 51 +++--- drivers/gpu/drm/i915/i915_pmu.c | 7 +- drivers/gpu/drm/i915/i915_sysfs.c | 7 +- drivers/gpu/drm/i915/intel_drv.h | 8 + drivers/gpu/drm/i915/intel_guc_log.c | 26 ++- drivers/gpu/drm/i915/intel_huc.c | 7 +- drivers/gpu/drm/i915/intel_panel.c | 18 +- drivers/gpu/drm/i915/intel_uncore.c | 30 ++-- drivers/gpu/drm/i915/selftests/i915_gem.c | 34 ++-- .../gpu/drm/i915/selftests/i915_gem_context.c | 12 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 8 +- .../gpu/drm/i915/selftests/i915_gem_object.c | 11 +- drivers/gpu/drm/i915/selftests/i915_request.c | 8 +- .../drm/i915/selftests/intel_workarounds.c | 28 ++-- 17 files changed, 207 insertions(+), 239 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 66c520ba0df8..1c7913b40bb7 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -953,9 +953,9 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file) struct i915_gpu_state *gpu; intel_wakeref_t wakeref; - wakeref = intel_runtime_pm_get(i915); - gpu = i915_capture_gpu_state(i915); - intel_runtime_pm_put(i915, wakeref); + gpu = NULL; + with_intel_runtime_pm(i915, wakeref) + gpu = i915_capture_gpu_state(i915); if (IS_ERR(gpu)) return PTR_ERR(gpu); @@ -1287,17 +1287,15 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) return 0; } - wakeref = intel_runtime_pm_get(dev_priv); + with_intel_runtime_pm(dev_priv, wakeref) { + for_each_engine(engine, dev_priv, id) { + acthd[id] = intel_engine_get_active_head(engine); + seqno[id] = intel_engine_get_seqno(engine); + } - for_each_engine(engine, dev_priv, id) { - acthd[id] = intel_engine_get_active_head(engine); - seqno[id] = intel_engine_get_seqno(engine); + intel_engine_get_instdone(dev_priv->engine[RCS], &instdone); } - intel_engine_get_instdone(dev_priv->engine[RCS], &instdone); - - intel_runtime_pm_put(dev_priv, wakeref); - if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer)) seq_printf(m, "Hangcheck active, timer fires in %dms\n", jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires - @@ -1573,18 +1571,16 @@ static int i915_drpc_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); intel_wakeref_t wakeref; - int err; + int err = -ENODEV; - wakeref = intel_runtime_pm_get(dev_priv); - - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - err = vlv_drpc_info(m); - else if (INTEL_GEN(dev_priv) >= 6) - err = gen6_drpc_info(m); - else - err = ironlake_drpc_info(m); - - intel_runtime_pm_put(dev_priv, wakeref); + with_intel_runtime_pm(dev_priv, wakeref) { + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + err = vlv_drpc_info(m); + else if (INTEL_GEN(dev_priv) >= 6) + err = gen6_drpc_info(m); + else + err = ironlake_drpc_info(m); + } return err; } @@ -2068,8 +2064,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) intel_wakeref_t wakeref; struct drm_file *file; - wakeref = intel_runtime_pm_get_if_in_use(dev_priv); - if (wakeref) { + with_intel_runtime_pm_if_in_use(dev_priv, wakeref) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { mutex_lock(&dev_priv->pcu_lock); act_freq = vlv_punit_read(dev_priv, @@ -2080,7 +2075,6 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) act_freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1)); } - intel_runtime_pm_put(dev_priv, wakeref); } seq_printf(m, "RPS enabled? %d\n", rps->enabled); @@ -2172,9 +2166,8 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data) p = drm_seq_file_printer(m); intel_uc_fw_dump(&dev_priv->huc.fw, &p); - wakeref = intel_runtime_pm_get(dev_priv); - seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2)); - intel_runtime_pm_put(dev_priv, wakeref); + with_intel_runtime_pm(dev_priv, wakeref) + seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2)); return 0; } @@ -2184,7 +2177,6 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) struct drm_i915_private *dev_priv = node_to_i915(m->private); intel_wakeref_t wakeref; struct drm_printer p; - u32 tmp, i; if (!HAS_GUC(dev_priv)) return -ENODEV; @@ -2192,22 +2184,23 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) p = drm_seq_file_printer(m); intel_uc_fw_dump(&dev_priv->guc.fw, &p); - wakeref = intel_runtime_pm_get(dev_priv); + with_intel_runtime_pm(dev_priv, wakeref) { + u32 tmp = I915_READ(GUC_STATUS); + u32 i; - tmp = I915_READ(GUC_STATUS); - - seq_printf(m, "\nGuC status 0x%08x:\n", tmp); - seq_printf(m, "\tBootrom status = 0x%x\n", - (tmp & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT); - seq_printf(m, "\tuKernel status = 0x%x\n", - (tmp & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT); - seq_printf(m, "\tMIA Core status = 0x%x\n", - (tmp & GS_MIA_MASK) >> GS_MIA_SHIFT); - seq_puts(m, "\nScratch registers:\n"); - for (i = 0; i < 16; i++) - seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i))); - - intel_runtime_pm_put(dev_priv, wakeref); + seq_printf(m, "\nGuC status 0x%08x:\n", tmp); + seq_printf(m, "\tBootrom status = 0x%x\n", + (tmp & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT); + seq_printf(m, "\tuKernel status = 0x%x\n", + (tmp & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT); + seq_printf(m, "\tMIA Core status = 0x%x\n", + (tmp & GS_MIA_MASK) >> GS_MIA_SHIFT); + seq_puts(m, "\nScratch registers:\n"); + for (i = 0; i < 16; i++) { + seq_printf(m, "\t%2d: \t0x%x\n", + i, I915_READ(SOFT_SCRATCH(i))); + } + } return 0; } @@ -2680,19 +2673,14 @@ static int i915_energy_uJ(struct seq_file *m, void *data) if (INTEL_GEN(dev_priv) < 6) return -ENODEV; - wakeref = intel_runtime_pm_get(dev_priv); - - if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power)) { - intel_runtime_pm_put(dev_priv, wakeref); + if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power)) return -ENODEV; - } units = (power & 0x1f00) >> 8; - power = I915_READ(MCH_SECP_NRG_STTS); + with_intel_runtime_pm(dev_priv, wakeref) + power = I915_READ(MCH_SECP_NRG_STTS); + power = (1000000 * power) >> units; /* convert to uJ */ - - intel_runtime_pm_put(dev_priv, wakeref); - seq_printf(m, "%llu", power); return 0; @@ -3275,22 +3263,20 @@ static ssize_t i915_ipc_status_write(struct file *file, const char __user *ubuf, struct seq_file *m = file->private_data; struct drm_i915_private *dev_priv = m->private; intel_wakeref_t wakeref; - int ret; bool enable; + int ret; ret = kstrtobool_from_user(ubuf, len, &enable); if (ret < 0) return ret; - wakeref = intel_runtime_pm_get(dev_priv); - - if (!dev_priv->ipc_enabled && enable) - DRM_INFO("Enabling IPC: WM will be proper only after next commit\n"); - dev_priv->wm.distrust_bios_wm = true; - dev_priv->ipc_enabled = enable; - intel_enable_ipc(dev_priv); - - intel_runtime_pm_put(dev_priv, wakeref); + with_intel_runtime_pm(dev_priv, wakeref) { + if (!dev_priv->ipc_enabled && enable) + DRM_INFO("Enabling IPC: WM will be proper only after next commit\n"); + dev_priv->wm.distrust_bios_wm = true; + dev_priv->ipc_enabled = enable; + intel_enable_ipc(dev_priv); + } return len; } @@ -4130,16 +4116,13 @@ i915_cache_sharing_get(void *data, u64 *val) { struct drm_i915_private *dev_priv = data; intel_wakeref_t wakeref; - u32 snpcr; + u32 snpcr = 0; if (!(IS_GEN_RANGE(dev_priv, 6, 7))) return -ENODEV; - wakeref = intel_runtime_pm_get(dev_priv); - - snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); - - intel_runtime_pm_put(dev_priv, wakeref); + with_intel_runtime_pm(dev_priv, wakeref) + snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); *val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT; @@ -4151,7 +4134,6 @@ i915_cache_sharing_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; intel_wakeref_t wakeref; - u32 snpcr; if (!(IS_GEN_RANGE(dev_priv, 6, 7))) return -ENODEV; @@ -4159,16 +4141,17 @@ i915_cache_sharing_set(void *data, u64 val) if (val > 3) return -EINVAL; - wakeref = intel_runtime_pm_get(dev_priv); DRM_DEBUG_DRIVER("Manually setting uncore sharing to %llu\n", val); + with_intel_runtime_pm(dev_priv, wakeref) { + u32 snpcr; - /* Update the cache sharing policy here as well */ - snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); - snpcr &= ~GEN6_MBC_SNPCR_MASK; - snpcr |= (val << GEN6_MBC_SNPCR_SHIFT); - I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr); + /* Update the cache sharing policy here as well */ + snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); + snpcr &= ~GEN6_MBC_SNPCR_MASK; + snpcr |= val << GEN6_MBC_SNPCR_SHIFT; + I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr); + } - intel_runtime_pm_put(dev_priv, wakeref); return 0; } @@ -4405,20 +4388,17 @@ static int i915_sseu_status(struct seq_file *m, void *unused) sseu.max_eus_per_subslice = RUNTIME_INFO(dev_priv)->sseu.max_eus_per_subslice; - wakeref = intel_runtime_pm_get(dev_priv); - - if (IS_CHERRYVIEW(dev_priv)) { - cherryview_sseu_device_status(dev_priv, &sseu); - } else if (IS_BROADWELL(dev_priv)) { - broadwell_sseu_device_status(dev_priv, &sseu); - } else if (IS_GEN(dev_priv, 9)) { - gen9_sseu_device_status(dev_priv, &sseu); - } else if (INTEL_GEN(dev_priv) >= 10) { - gen10_sseu_device_status(dev_priv, &sseu); + with_intel_runtime_pm(dev_priv, wakeref) { + if (IS_CHERRYVIEW(dev_priv)) + cherryview_sseu_device_status(dev_priv, &sseu); + else if (IS_BROADWELL(dev_priv)) + broadwell_sseu_device_status(dev_priv, &sseu); + else if (IS_GEN(dev_priv, 9)) + gen9_sseu_device_status(dev_priv, &sseu); + else if (INTEL_GEN(dev_priv) >= 10) + gen10_sseu_device_status(dev_priv, &sseu); } - intel_runtime_pm_put(dev_priv, wakeref); - i915_print_sseu_info(m, false, &sseu); return 0; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3186859ff378..f5e2456c4f73 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -813,13 +813,13 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) i915_gem_chipset_flush(dev_priv); - wakeref = intel_runtime_pm_get(dev_priv); - spin_lock_irq(&dev_priv->uncore.lock); + with_intel_runtime_pm(dev_priv, wakeref) { + spin_lock_irq(&dev_priv->uncore.lock); - POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); + POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); - spin_unlock_irq(&dev_priv->uncore.lock); - intel_runtime_pm_put(dev_priv, wakeref); + spin_unlock_irq(&dev_priv->uncore.lock); + } } static void diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e2c61633e95d..dbea14bf67cc 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2535,9 +2535,8 @@ static int ggtt_bind_vma(struct i915_vma *vma, if (i915_gem_object_is_readonly(obj)) pte_flags |= PTE_READ_ONLY; - wakeref = intel_runtime_pm_get(i915); - vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); - intel_runtime_pm_put(i915, wakeref); + with_intel_runtime_pm(i915, wakeref) + vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; @@ -2556,9 +2555,8 @@ static void ggtt_unbind_vma(struct i915_vma *vma) struct drm_i915_private *i915 = vma->vm->i915; intel_wakeref_t wakeref; - wakeref = intel_runtime_pm_get(i915); - vma->vm->clear_range(vma->vm, vma->node.start, vma->size); - intel_runtime_pm_put(i915, wakeref); + with_intel_runtime_pm(i915, wakeref) + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); } static int aliasing_gtt_bind_vma(struct i915_vma *vma, @@ -2592,9 +2590,10 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, if (flags & I915_VMA_GLOBAL_BIND) { intel_wakeref_t wakeref; - wakeref = intel_runtime_pm_get(i915); - vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); - intel_runtime_pm_put(i915, wakeref); + with_intel_runtime_pm(i915, wakeref) { + vma->vm->insert_entries(vma->vm, vma, + cache_level, pte_flags); + } } return 0; @@ -2605,11 +2604,11 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma) struct drm_i915_private *i915 = vma->vm->i915; if (vma->flags & I915_VMA_GLOBAL_BIND) { + struct i915_address_space *vm = vma->vm; intel_wakeref_t wakeref; - wakeref = intel_runtime_pm_get(i915); - vma->vm->clear_range(vma->vm, vma->node.start, vma->size); - intel_runtime_pm_put(i915, wakeref); + with_intel_runtime_pm(i915, wakeref) + vm->clear_range(vm, vma->node.start, vma->size); } if (vma->flags & I915_VMA_LOCAL_BIND) { diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index f01489b05b5e..8ceecb026910 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -296,14 +296,14 @@ i915_gem_shrink(struct drm_i915_private *i915, unsigned long i915_gem_shrink_all(struct drm_i915_private *i915) { intel_wakeref_t wakeref; - unsigned long freed; + unsigned long freed = 0; - wakeref = intel_runtime_pm_get(i915); - freed = i915_gem_shrink(i915, -1UL, NULL, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_ACTIVE); - intel_runtime_pm_put(i915, wakeref); + with_intel_runtime_pm(i915, wakeref) { + freed = i915_gem_shrink(i915, -1UL, NULL, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_ACTIVE); + } return freed; } @@ -376,14 +376,14 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) { intel_wakeref_t wakeref; - wakeref = intel_runtime_pm_get(i915); - freed += i915_gem_shrink(i915, - sc->nr_to_scan - sc->nr_scanned, - &sc->nr_scanned, - I915_SHRINK_ACTIVE | - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND); - intel_runtime_pm_put(i915, wakeref); + with_intel_runtime_pm(i915, wakeref) { + freed += i915_gem_shrink(i915, + sc->nr_to_scan - sc->nr_scanned, + &sc->nr_scanned, + I915_SHRINK_ACTIVE | + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND); + } } shrinker_unlock(i915, unlock); @@ -400,11 +400,11 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) unsigned long unevictable, bound, unbound, freed_pages; intel_wakeref_t wakeref; - wakeref = intel_runtime_pm_get(i915); - freed_pages = i915_gem_shrink(i915, -1UL, NULL, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND); - intel_runtime_pm_put(i915, wakeref); + freed_pages = 0; + with_intel_runtime_pm(i915, wakeref) + freed_pages += i915_gem_shrink(i915, -1UL, NULL, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND); /* Because we may be allocating inside our own driver, we cannot * assert that there are no objects with pinned pages that are not @@ -454,12 +454,11 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr MAX_SCHEDULE_TIMEOUT)) goto out; - wakeref = intel_runtime_pm_get(i915); - freed_pages += i915_gem_shrink(i915, -1UL, NULL, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_VMAPS); - intel_runtime_pm_put(i915, wakeref); + with_intel_runtime_pm(i915, wakeref) + freed_pages += i915_gem_shrink(i915, -1UL, NULL, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_VMAPS); /* We also want to clear any cached iomaps as they wrap vmap */ list_for_each_entry_safe(vma, next, diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 3d43fc9dd25d..b1cb2d3cae16 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -230,14 +230,11 @@ frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns) val = dev_priv->gt_pm.rps.cur_freq; if (dev_priv->gt.awake) { - intel_wakeref_t wakeref = - intel_runtime_pm_get_if_in_use(dev_priv); + intel_wakeref_t wakeref; - if (wakeref) { + with_intel_runtime_pm_if_in_use(dev_priv, wakeref) val = intel_get_cagf(dev_priv, I915_READ_NOTRACE(GEN6_RPSTAT1)); - intel_runtime_pm_put(dev_priv, wakeref); - } } add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT], diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 2cbbf165d179..41313005af42 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -43,11 +43,10 @@ static u32 calc_residency(struct drm_i915_private *dev_priv, i915_reg_t reg) { intel_wakeref_t wakeref; - u64 res; + u64 res = 0; - wakeref = intel_runtime_pm_get(dev_priv); - res = intel_rc6_residency_us(dev_priv, reg); - intel_runtime_pm_put(dev_priv, wakeref); + with_intel_runtime_pm(dev_priv, wakeref) + res = intel_rc6_residency_us(dev_priv, reg); return DIV_ROUND_CLOSEST_ULL(res, 1000); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index a1e4e1033289..71377ec49a10 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -2187,6 +2187,14 @@ intel_wakeref_t intel_runtime_pm_get(struct drm_i915_private *i915); intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915); intel_wakeref_t intel_runtime_pm_get_noresume(struct drm_i915_private *i915); +#define with_intel_runtime_pm(i915, wf) \ + for ((wf) = intel_runtime_pm_get(i915); (wf); \ + intel_runtime_pm_put((i915), (wf)), (wf) = 0) + +#define with_intel_runtime_pm_if_in_use(i915, wf) \ + for ((wf) = intel_runtime_pm_get_if_in_use(i915); (wf); \ + intel_runtime_pm_put((i915), (wf)), (wf) = 0) + void intel_runtime_pm_put_unchecked(struct drm_i915_private *i915); #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) void intel_runtime_pm_put(struct drm_i915_private *i915, intel_wakeref_t wref); diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index 20c0b36d748e..b53582c0c6c1 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -444,9 +444,8 @@ static void guc_log_capture_logs(struct intel_guc_log *log) * Generally device is expected to be active only at this * time, so get/put should be really quick. */ - wakeref = intel_runtime_pm_get(dev_priv); - guc_action_flush_log_complete(guc); - intel_runtime_pm_put(dev_priv, wakeref); + with_intel_runtime_pm(dev_priv, wakeref) + guc_action_flush_log_complete(guc); } int intel_guc_log_create(struct intel_guc_log *log) @@ -507,7 +506,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *dev_priv = guc_to_i915(guc); intel_wakeref_t wakeref; - int ret; + int ret = 0; BUILD_BUG_ON(GUC_LOG_VERBOSITY_MIN != 0); GEM_BUG_ON(!log->vma); @@ -521,16 +520,14 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) mutex_lock(&dev_priv->drm.struct_mutex); - if (log->level == level) { - ret = 0; + if (log->level == level) goto out_unlock; - } - wakeref = intel_runtime_pm_get(dev_priv); - ret = guc_action_control_log(guc, GUC_LOG_LEVEL_IS_VERBOSE(level), - GUC_LOG_LEVEL_IS_ENABLED(level), - GUC_LOG_LEVEL_TO_VERBOSITY(level)); - intel_runtime_pm_put(dev_priv, wakeref); + with_intel_runtime_pm(dev_priv, wakeref) + ret = guc_action_control_log(guc, + GUC_LOG_LEVEL_IS_VERBOSE(level), + GUC_LOG_LEVEL_IS_ENABLED(level), + GUC_LOG_LEVEL_TO_VERBOSITY(level)); if (ret) { DRM_DEBUG_DRIVER("guc_log_control action failed %d\n", ret); goto out_unlock; @@ -611,9 +608,8 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log) */ flush_work(&log->relay.flush_work); - wakeref = intel_runtime_pm_get(i915); - guc_action_flush_log(guc); - intel_runtime_pm_put(i915, wakeref); + with_intel_runtime_pm(i915, wakeref) + guc_action_flush_log(guc); /* GuC would have updated log buffer by now, so capture it */ guc_log_capture_logs(log); diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 3e8c18b6a42d..9bd1c9002c2a 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -116,14 +116,13 @@ int intel_huc_check_status(struct intel_huc *huc) { struct drm_i915_private *dev_priv = huc_to_i915(huc); intel_wakeref_t wakeref; - bool status; + bool status = false; if (!HAS_HUC(dev_priv)) return -ENODEV; - wakeref = intel_runtime_pm_get(dev_priv); - status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED; - intel_runtime_pm_put(dev_priv, wakeref); + with_intel_runtime_pm(dev_priv, wakeref) + status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED; return status; } diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 93a2e4b5c54c..5a39a6347a7a 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -1204,17 +1204,19 @@ static int intel_backlight_device_get_brightness(struct backlight_device *bd) struct drm_device *dev = connector->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); intel_wakeref_t wakeref; - u32 hw_level; - int ret; + int ret = 0; - wakeref = intel_runtime_pm_get(dev_priv); - drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); + with_intel_runtime_pm(dev_priv, wakeref) { + u32 hw_level; - hw_level = intel_panel_get_backlight(connector); - ret = scale_hw_to_user(connector, hw_level, bd->props.max_brightness); + drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); - drm_modeset_unlock(&dev->mode_config.connection_mutex); - intel_runtime_pm_put(dev_priv, wakeref); + hw_level = intel_panel_get_backlight(connector); + ret = scale_hw_to_user(connector, + hw_level, bd->props.max_brightness); + + drm_modeset_unlock(&dev->mode_config.connection_mutex); + } return ret; } diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index d494d92da02c..681ea532585e 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1696,21 +1696,21 @@ int i915_reg_read_ioctl(struct drm_device *dev, flags = reg->offset & (entry->size - 1); - wakeref = intel_runtime_pm_get(dev_priv); - if (entry->size == 8 && flags == I915_REG_READ_8B_WA) - reg->val = I915_READ64_2x32(entry->offset_ldw, - entry->offset_udw); - else if (entry->size == 8 && flags == 0) - reg->val = I915_READ64(entry->offset_ldw); - else if (entry->size == 4 && flags == 0) - reg->val = I915_READ(entry->offset_ldw); - else if (entry->size == 2 && flags == 0) - reg->val = I915_READ16(entry->offset_ldw); - else if (entry->size == 1 && flags == 0) - reg->val = I915_READ8(entry->offset_ldw); - else - ret = -EINVAL; - intel_runtime_pm_put(dev_priv, wakeref); + with_intel_runtime_pm(dev_priv, wakeref) { + if (entry->size == 8 && flags == I915_REG_READ_8B_WA) + reg->val = I915_READ64_2x32(entry->offset_ldw, + entry->offset_udw); + else if (entry->size == 8 && flags == 0) + reg->val = I915_READ64(entry->offset_ldw); + else if (entry->size == 4 && flags == 0) + reg->val = I915_READ(entry->offset_ldw); + else if (entry->size == 2 && flags == 0) + reg->val = I915_READ16(entry->offset_ldw); + else if (entry->size == 1 && flags == 0) + reg->val = I915_READ8(entry->offset_ldw); + else + ret = -EINVAL; + } return ret; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index 01a46c46fe25..e77b7ed449ae 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -98,26 +98,22 @@ static void pm_suspend(struct drm_i915_private *i915) { intel_wakeref_t wakeref; - wakeref = intel_runtime_pm_get(i915); - - i915_gem_suspend_gtt_mappings(i915); - i915_gem_suspend_late(i915); - - intel_runtime_pm_put(i915, wakeref); + with_intel_runtime_pm(i915, wakeref) { + i915_gem_suspend_gtt_mappings(i915); + i915_gem_suspend_late(i915); + } } static void pm_hibernate(struct drm_i915_private *i915) { intel_wakeref_t wakeref; - wakeref = intel_runtime_pm_get(i915); + with_intel_runtime_pm(i915, wakeref) { + i915_gem_suspend_gtt_mappings(i915); - i915_gem_suspend_gtt_mappings(i915); - - i915_gem_freeze(i915); - i915_gem_freeze_late(i915); - - intel_runtime_pm_put(i915, wakeref); + i915_gem_freeze(i915); + i915_gem_freeze_late(i915); + } } static void pm_resume(struct drm_i915_private *i915) @@ -128,13 +124,11 @@ static void pm_resume(struct drm_i915_private *i915) * Both suspend and hibernate follow the same wakeup path and assume * that runtime-pm just works. */ - wakeref = intel_runtime_pm_get(i915); - - intel_engines_sanitize(i915, false); - i915_gem_sanitize(i915); - i915_gem_resume(i915); - - intel_runtime_pm_put(i915, wakeref); + with_intel_runtime_pm(i915, wakeref) { + intel_engines_sanitize(i915, false); + i915_gem_sanitize(i915); + i915_gem_resume(i915); + } } static int igt_gem_suspend(void *arg) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 7a9b1f20b019..4cba50679607 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -610,9 +610,9 @@ static int igt_ctx_exec(void *arg) } } - wakeref = intel_runtime_pm_get(i915); - err = gpu_fill(obj, ctx, engine, dw); - intel_runtime_pm_put(i915, wakeref); + err = 0; + with_intel_runtime_pm(i915, wakeref) + err = gpu_fill(obj, ctx, engine, dw); if (err) { pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), @@ -718,9 +718,9 @@ static int igt_ctx_readonly(void *arg) i915_gem_object_set_readonly(obj); } - wakeref = intel_runtime_pm_get(i915); - err = gpu_fill(obj, ctx, engine, dw); - intel_runtime_pm_put(i915, wakeref); + err = 0; + with_intel_runtime_pm(i915, wakeref) + err = gpu_fill(obj, ctx, engine, dw); if (err) { pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index bbcbf11c72b3..067e5dfa0a24 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -484,18 +484,16 @@ int i915_gem_evict_mock_selftests(void) }; struct drm_i915_private *i915; intel_wakeref_t wakeref; - int err; + int err = 0; i915 = mock_gem_device(); if (!i915) return -ENOMEM; mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); + with_intel_runtime_pm(i915, wakeref) + err = i915_subtests(tests, i915); - err = i915_subtests(tests, i915); - - intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); drm_dev_put(&i915->drm); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 3575e1387c3f..395ae878e0f7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -509,9 +509,8 @@ static void disable_retire_worker(struct drm_i915_private *i915) if (!i915->gt.active_requests++) { intel_wakeref_t wakeref; - wakeref = intel_runtime_pm_get(i915); - i915_gem_unpark(i915); - intel_runtime_pm_put(i915, wakeref); + with_intel_runtime_pm(i915, wakeref) + i915_gem_unpark(i915); } mutex_unlock(&i915->drm.struct_mutex); @@ -593,10 +592,10 @@ static int igt_mmap_offset_exhaustion(void *arg) goto out; } + err = 0; mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); - err = make_obj_busy(obj); - intel_runtime_pm_put(i915, wakeref); + with_intel_runtime_pm(i915, wakeref) + err = make_obj_busy(obj); mutex_unlock(&i915->drm.struct_mutex); if (err) { pr_err("[loop %d] Failed to busy the object\n", loop); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 9f705ff9423f..2e14d6d3bad7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -256,17 +256,15 @@ int i915_request_mock_selftests(void) }; struct drm_i915_private *i915; intel_wakeref_t wakeref; - int err; + int err = 0; i915 = mock_gem_device(); if (!i915) return -ENOMEM; - wakeref = intel_runtime_pm_get(i915); + with_intel_runtime_pm(i915, wakeref) + err = i915_subtests(tests, i915); - err = i915_subtests(tests, i915); - - intel_runtime_pm_put(i915, wakeref); drm_dev_put(&i915->drm); return err; diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index 75324b6249e3..9009d7b8b136 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -93,9 +93,9 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) if (err) goto err_obj; - wakeref = intel_runtime_pm_get(engine->i915); - rq = i915_request_alloc(engine, ctx); - intel_runtime_pm_put(engine->i915, wakeref); + rq = ERR_PTR(-ENODEV); + with_intel_runtime_pm(engine->i915, wakeref) + rq = i915_request_alloc(engine, ctx); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_pin; @@ -236,14 +236,15 @@ switch_to_scratch_context(struct intel_engine_cs *engine, if (IS_ERR(ctx)) return PTR_ERR(ctx); - wakeref = intel_runtime_pm_get(engine->i915); - - if (spin) - rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP); - else - rq = i915_request_alloc(engine, ctx); - - intel_runtime_pm_put(engine->i915, wakeref); + rq = ERR_PTR(-ENODEV); + with_intel_runtime_pm(engine->i915, wakeref) { + if (spin) + rq = igt_spinner_create_request(spin, + ctx, engine, + MI_NOOP); + else + rq = i915_request_alloc(engine, ctx); + } kernel_context_close(ctx); @@ -301,9 +302,8 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, if (err) goto out; - wakeref = intel_runtime_pm_get(i915); - err = reset(engine); - intel_runtime_pm_put(i915, wakeref); + with_intel_runtime_pm(i915, wakeref) + err = reset(engine); if (want_spin) { igt_spinner_end(&spin); From 0e6e0be4c952372cc4c3f30bb8ddf9451f314503 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 14:21:24 +0000 Subject: [PATCH 039/539] drm/i915: Markup paired operations on display power domains The majority of runtime-pm operations are bounded and scoped within a function; these are easy to verify that the wakeref are handled correctly. We can employ the compiler to help us, and reduce the number of wakerefs tracked when debugging, by passing around cookies provided by the various rpm_get functions to their rpm_put counterpart. This makes the pairing explicit, and given the required wakeref cookie the compiler can verify that we pass an initialised value to the rpm_put (quite handy for double checking error paths). Signed-off-by: Chris Wilson Cc: Jani Nikula Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190114142129.24398-16-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 35 ++++++------ drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gem.c | 4 +- drivers/gpu/drm/i915/icl_dsi.c | 34 ++++++++---- drivers/gpu/drm/i915/intel_audio.c | 3 +- drivers/gpu/drm/i915/intel_cdclk.c | 10 ++-- drivers/gpu/drm/i915/intel_crt.c | 25 +++++---- drivers/gpu/drm/i915/intel_csr.c | 25 +++++++-- drivers/gpu/drm/i915/intel_ddi.c | 36 ++++++++----- drivers/gpu/drm/i915/intel_display.c | 68 ++++++++++++++--------- drivers/gpu/drm/i915/intel_dp.c | 38 +++++++------ drivers/gpu/drm/i915/intel_dpll_mgr.c | 66 +++++++++++++++-------- drivers/gpu/drm/i915/intel_drv.h | 17 ++++-- drivers/gpu/drm/i915/intel_dsi.h | 1 + drivers/gpu/drm/i915/intel_hdmi.c | 18 ++++--- drivers/gpu/drm/i915/intel_i2c.c | 20 +++---- drivers/gpu/drm/i915/intel_lvds.c | 8 +-- drivers/gpu/drm/i915/intel_pipe_crc.c | 6 ++- drivers/gpu/drm/i915/intel_pm.c | 6 ++- drivers/gpu/drm/i915/intel_runtime_pm.c | 71 ++++++++++++++++--------- drivers/gpu/drm/i915/intel_sprite.c | 24 ++++++--- drivers/gpu/drm/i915/intel_vdsc.c | 4 +- drivers/gpu/drm/i915/vlv_dsi.c | 14 +++-- 23 files changed, 346 insertions(+), 189 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1c7913b40bb7..e846608ee6aa 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -626,10 +626,12 @@ static void gen8_display_interrupt_info(struct seq_file *m) for_each_pipe(dev_priv, pipe) { enum intel_display_power_domain power_domain; + intel_wakeref_t wakeref; power_domain = POWER_DOMAIN_PIPE(pipe); - if (!intel_display_power_get_if_enabled(dev_priv, - power_domain)) { + wakeref = intel_display_power_get_if_enabled(dev_priv, + power_domain); + if (!wakeref) { seq_printf(m, "Pipe %c power disabled\n", pipe_name(pipe)); continue; @@ -644,7 +646,7 @@ static void gen8_display_interrupt_info(struct seq_file *m) pipe_name(pipe), I915_READ(GEN8_DE_PIPE_IER(pipe))); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); } seq_printf(m, "Display Engine port interrupt mask:\t%08x\n", @@ -680,6 +682,8 @@ static int i915_interrupt_info(struct seq_file *m, void *data) wakeref = intel_runtime_pm_get(dev_priv); if (IS_CHERRYVIEW(dev_priv)) { + intel_wakeref_t pref; + seq_printf(m, "Master Interrupt Control:\t%08x\n", I915_READ(GEN8_MASTER_IRQ)); @@ -695,8 +699,9 @@ static int i915_interrupt_info(struct seq_file *m, void *data) enum intel_display_power_domain power_domain; power_domain = POWER_DOMAIN_PIPE(pipe); - if (!intel_display_power_get_if_enabled(dev_priv, - power_domain)) { + pref = intel_display_power_get_if_enabled(dev_priv, + power_domain); + if (!pref) { seq_printf(m, "Pipe %c power disabled\n", pipe_name(pipe)); continue; @@ -706,17 +711,17 @@ static int i915_interrupt_info(struct seq_file *m, void *data) pipe_name(pipe), I915_READ(PIPESTAT(pipe))); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, pref); } - intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + pref = intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); seq_printf(m, "Port hotplug:\t%08x\n", I915_READ(PORT_HOTPLUG_EN)); seq_printf(m, "DPFLIPSTAT:\t%08x\n", I915_READ(VLV_DPFLIPSTAT)); seq_printf(m, "DPINVGTT:\t%08x\n", I915_READ(DPINVGTT)); - intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); + intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, pref); for (i = 0; i < 4; i++) { seq_printf(m, "GT Interrupt IMR %d:\t%08x\n", @@ -779,10 +784,12 @@ static int i915_interrupt_info(struct seq_file *m, void *data) I915_READ(VLV_IMR)); for_each_pipe(dev_priv, pipe) { enum intel_display_power_domain power_domain; + intel_wakeref_t pref; power_domain = POWER_DOMAIN_PIPE(pipe); - if (!intel_display_power_get_if_enabled(dev_priv, - power_domain)) { + pref = intel_display_power_get_if_enabled(dev_priv, + power_domain); + if (!pref) { seq_printf(m, "Pipe %c power disabled\n", pipe_name(pipe)); continue; @@ -791,7 +798,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data) seq_printf(m, "Pipe %c stat:\t%08x\n", pipe_name(pipe), I915_READ(PIPESTAT(pipe))); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, pref); } seq_printf(m, "Master IER:\t%08x\n", @@ -1709,8 +1716,7 @@ static int i915_sr_status(struct seq_file *m, void *unused) intel_wakeref_t wakeref; bool sr_enabled = false; - wakeref = intel_runtime_pm_get(dev_priv); - intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); if (INTEL_GEN(dev_priv) >= 9) /* no global SR status; inspect per-plane WM */; @@ -1726,8 +1732,7 @@ static int i915_sr_status(struct seq_file *m, void *unused) else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) sr_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; - intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); - intel_runtime_pm_put(dev_priv, wakeref); + intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, wakeref); seq_printf(m, "self-refresh: %s\n", enableddisabled(sr_enabled)); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b6d0cd890a19..0aedbb88eb5b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -344,6 +344,7 @@ struct intel_csr { uint32_t mmiodata[8]; uint32_t dc_state; uint32_t allowed_dc_mask; + intel_wakeref_t wakeref; }; enum i915_cache_level { @@ -1982,6 +1983,7 @@ struct drm_i915_private { * is a slight delay before we do so. */ intel_wakeref_t awake; + intel_wakeref_t power; /** * The number of times we have woken up. diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f5e2456c4f73..abbca28f4cff 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -176,7 +176,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915) if (INTEL_GEN(i915) >= 6) gen6_rps_idle(i915); - intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ); + intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, i915->gt.power); intel_runtime_pm_put(i915, wakeref); @@ -221,7 +221,7 @@ void i915_gem_unpark(struct drm_i915_private *i915) * Work around it by grabbing a GT IRQ power domain whilst there is any * GT activity, preventing any DC state transitions. */ - intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + i915->gt.power = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ i915->gt.epoch = 1; diff --git a/drivers/gpu/drm/i915/icl_dsi.c b/drivers/gpu/drm/i915/icl_dsi.c index 4dd793b78996..f3a5f03646ce 100644 --- a/drivers/gpu/drm/i915/icl_dsi.c +++ b/drivers/gpu/drm/i915/icl_dsi.c @@ -337,9 +337,11 @@ static void gen11_dsi_enable_io_power(struct intel_encoder *encoder) } for_each_dsi_port(port, intel_dsi->ports) { - intel_display_power_get(dev_priv, port == PORT_A ? - POWER_DOMAIN_PORT_DDI_A_IO : - POWER_DOMAIN_PORT_DDI_B_IO); + intel_dsi->io_wakeref[port] = + intel_display_power_get(dev_priv, + port == PORT_A ? + POWER_DOMAIN_PORT_DDI_A_IO : + POWER_DOMAIN_PORT_DDI_B_IO); } } @@ -1125,10 +1127,18 @@ static void gen11_dsi_disable_io_power(struct intel_encoder *encoder) enum port port; u32 tmp; - intel_display_power_put(dev_priv, POWER_DOMAIN_PORT_DDI_A_IO); + for_each_dsi_port(port, intel_dsi->ports) { + intel_wakeref_t wakeref; - if (intel_dsi->dual_link) - intel_display_power_put(dev_priv, POWER_DOMAIN_PORT_DDI_B_IO); + wakeref = fetch_and_zero(&intel_dsi->io_wakeref[port]); + if (wakeref) { + intel_display_power_put(dev_priv, + port == PORT_A ? + POWER_DOMAIN_PORT_DDI_A_IO : + POWER_DOMAIN_PORT_DDI_B_IO, + wakeref); + } + } /* set mode to DDI */ for_each_dsi_port(port, intel_dsi->ports) { @@ -1229,13 +1239,15 @@ static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - u32 tmp; - enum port port; enum transcoder dsi_trans; + intel_wakeref_t wakeref; + enum port port; bool ret = false; + u32 tmp; - if (!intel_display_power_get_if_enabled(dev_priv, - encoder->power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain); + if (!wakeref) return false; for_each_dsi_port(port, intel_dsi->ports) { @@ -1260,7 +1272,7 @@ static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder, ret = tmp & PIPECONF_ENABLE; } out: - intel_display_power_put(dev_priv, encoder->power_domain); + intel_display_power_put(dev_priv, encoder->power_domain, wakeref); return ret; } diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 202a58cf2d9f..de26cd0a5497 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -748,7 +748,8 @@ static void i915_audio_component_get_power(struct device *kdev) static void i915_audio_component_put_power(struct device *kdev) { - intel_display_power_put(kdev_to_i915(kdev), POWER_DOMAIN_AUDIO); + intel_display_power_put_unchecked(kdev_to_i915(kdev), + POWER_DOMAIN_AUDIO); } static void i915_audio_component_codec_wake_override(struct device *kdev, diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 2021e484a287..73cb7250118e 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -520,6 +520,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, { int cdclk = cdclk_state->cdclk; u32 val, cmd = cdclk_state->voltage_level; + intel_wakeref_t wakeref; switch (cdclk) { case 400000: @@ -539,7 +540,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, * a system suspend. So grab the PIPE-A domain, which covers * the HW blocks needed for the following programming. */ - intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); @@ -593,7 +594,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, vlv_program_pfi_credits(dev_priv); - intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A); + intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A, wakeref); } static void chv_set_cdclk(struct drm_i915_private *dev_priv, @@ -601,6 +602,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, { int cdclk = cdclk_state->cdclk; u32 val, cmd = cdclk_state->voltage_level; + intel_wakeref_t wakeref; switch (cdclk) { case 333333: @@ -619,7 +621,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, * a system suspend. So grab the PIPE-A domain, which covers * the HW blocks needed for the following programming. */ - intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); @@ -637,7 +639,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, vlv_program_pfi_credits(dev_priv); - intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A); + intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A, wakeref); } static int bdw_calc_cdclk(int min_cdclk) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 951e9bae6921..33bd2addcbdd 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -83,15 +83,17 @@ static bool intel_crt_get_hw_state(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crt *crt = intel_encoder_to_crt(encoder); + intel_wakeref_t wakeref; bool ret; - if (!intel_display_power_get_if_enabled(dev_priv, - encoder->power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain); + if (!wakeref) return false; ret = intel_crt_port_enabled(dev_priv, crt->adpa_reg, pipe); - intel_display_power_put(dev_priv, encoder->power_domain); + intel_display_power_put(dev_priv, encoder->power_domain, wakeref); return ret; } @@ -776,6 +778,7 @@ intel_crt_detect(struct drm_connector *connector, struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_crt *crt = intel_attached_crt(connector); struct intel_encoder *intel_encoder = &crt->base; + intel_wakeref_t wakeref; int status, ret; struct intel_load_detect_pipe tmp; @@ -784,7 +787,8 @@ intel_crt_detect(struct drm_connector *connector, force); if (i915_modparams.load_detect_test) { - intel_display_power_get(dev_priv, intel_encoder->power_domain); + wakeref = intel_display_power_get(dev_priv, + intel_encoder->power_domain); goto load_detect; } @@ -792,7 +796,8 @@ intel_crt_detect(struct drm_connector *connector, if (dmi_check_system(intel_spurious_crt_detect)) return connector_status_disconnected; - intel_display_power_get(dev_priv, intel_encoder->power_domain); + wakeref = intel_display_power_get(dev_priv, + intel_encoder->power_domain); if (I915_HAS_HOTPLUG(dev_priv)) { /* We can not rely on the HPD pin always being correctly wired @@ -847,7 +852,7 @@ load_detect: } out: - intel_display_power_put(dev_priv, intel_encoder->power_domain); + intel_display_power_put(dev_priv, intel_encoder->power_domain, wakeref); return status; } @@ -857,10 +862,12 @@ static int intel_crt_get_modes(struct drm_connector *connector) struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crt *crt = intel_attached_crt(connector); struct intel_encoder *intel_encoder = &crt->base; - int ret; + intel_wakeref_t wakeref; struct i2c_adapter *i2c; + int ret; - intel_display_power_get(dev_priv, intel_encoder->power_domain); + wakeref = intel_display_power_get(dev_priv, + intel_encoder->power_domain); i2c = intel_gmbus_get_adapter(dev_priv, dev_priv->vbt.crt_ddc_pin); ret = intel_crt_ddc_get_modes(connector, i2c); @@ -872,7 +879,7 @@ static int intel_crt_get_modes(struct drm_connector *connector) ret = intel_crt_ddc_get_modes(connector, i2c); out: - intel_display_power_put(dev_priv, intel_encoder->power_domain); + intel_display_power_put(dev_priv, intel_encoder->power_domain, wakeref); return ret; } diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index a516697bf57d..ea5fb64d33dd 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -409,6 +409,21 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, return memcpy(dmc_payload, &fw->data[readcount], nbytes); } +static void intel_csr_runtime_pm_get(struct drm_i915_private *dev_priv) +{ + WARN_ON(dev_priv->csr.wakeref); + dev_priv->csr.wakeref = + intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); +} + +static void intel_csr_runtime_pm_put(struct drm_i915_private *dev_priv) +{ + intel_wakeref_t wakeref __maybe_unused = + fetch_and_zero(&dev_priv->csr.wakeref); + + intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, wakeref); +} + static void csr_load_work_fn(struct work_struct *work) { struct drm_i915_private *dev_priv; @@ -424,8 +439,7 @@ static void csr_load_work_fn(struct work_struct *work) if (dev_priv->csr.dmc_payload) { intel_csr_load_program(dev_priv); - - intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); + intel_csr_runtime_pm_put(dev_priv); DRM_INFO("Finished loading DMC firmware %s (v%u.%u)\n", dev_priv->csr.fw_path, @@ -467,7 +481,7 @@ void intel_csr_ucode_init(struct drm_i915_private *dev_priv) * suspend as runtime suspend *requires* a working CSR for whatever * reason. */ - intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + intel_csr_runtime_pm_get(dev_priv); if (INTEL_GEN(dev_priv) >= 12) { /* Allow to load fw via parameter using the last known size */ @@ -538,7 +552,7 @@ void intel_csr_ucode_suspend(struct drm_i915_private *dev_priv) /* Drop the reference held in case DMC isn't loaded. */ if (!dev_priv->csr.dmc_payload) - intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); + intel_csr_runtime_pm_put(dev_priv); } /** @@ -558,7 +572,7 @@ void intel_csr_ucode_resume(struct drm_i915_private *dev_priv) * loaded. */ if (!dev_priv->csr.dmc_payload) - intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + intel_csr_runtime_pm_get(dev_priv); } /** @@ -574,6 +588,7 @@ void intel_csr_ucode_fini(struct drm_i915_private *dev_priv) return; intel_csr_ucode_suspend(dev_priv); + WARN_ON(dev_priv->csr.wakeref); kfree(dev_priv->csr.dmc_payload); } diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 2d6ed990a232..7f3cd055de50 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1860,12 +1860,14 @@ int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder, { struct drm_device *dev = intel_encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); + intel_wakeref_t wakeref; enum pipe pipe = 0; int ret = 0; uint32_t tmp; - if (WARN_ON(!intel_display_power_get_if_enabled(dev_priv, - intel_encoder->power_domain))) + wakeref = intel_display_power_get_if_enabled(dev_priv, + intel_encoder->power_domain); + if (WARN_ON(!wakeref)) return -ENXIO; if (WARN_ON(!intel_encoder->get_hw_state(intel_encoder, &pipe))) { @@ -1880,7 +1882,7 @@ int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder, tmp &= ~TRANS_DDI_HDCP_SIGNALLING; I915_WRITE(TRANS_DDI_FUNC_CTL(pipe), tmp); out: - intel_display_power_put(dev_priv, intel_encoder->power_domain); + intel_display_power_put(dev_priv, intel_encoder->power_domain, wakeref); return ret; } @@ -1891,13 +1893,15 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) struct intel_encoder *encoder = intel_connector->encoder; int type = intel_connector->base.connector_type; enum port port = encoder->port; - enum pipe pipe = 0; enum transcoder cpu_transcoder; + intel_wakeref_t wakeref; + enum pipe pipe = 0; uint32_t tmp; bool ret; - if (!intel_display_power_get_if_enabled(dev_priv, - encoder->power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain); + if (!wakeref) return false; if (!encoder->get_hw_state(encoder, &pipe)) { @@ -1939,7 +1943,7 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) } out: - intel_display_power_put(dev_priv, encoder->power_domain); + intel_display_power_put(dev_priv, encoder->power_domain, wakeref); return ret; } @@ -1950,6 +1954,7 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); enum port port = encoder->port; + intel_wakeref_t wakeref; enum pipe p; u32 tmp; u8 mst_pipe_mask; @@ -1957,8 +1962,9 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, *pipe_mask = 0; *is_dp_mst = false; - if (!intel_display_power_get_if_enabled(dev_priv, - encoder->power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain); + if (!wakeref) return; tmp = I915_READ(DDI_BUF_CTL(port)); @@ -2029,7 +2035,7 @@ out: "(PHY_CTL %08x)\n", port_name(port), tmp); } - intel_display_power_put(dev_priv, encoder->power_domain); + intel_display_power_put(dev_priv, encoder->power_domain, wakeref); } bool intel_ddi_get_hw_state(struct intel_encoder *encoder, @@ -3286,7 +3292,8 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder, intel_edp_panel_vdd_on(intel_dp); intel_edp_panel_off(intel_dp); - intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain); + intel_display_power_put_unchecked(dev_priv, + dig_port->ddi_io_power_domain); intel_ddi_clk_disable(encoder); } @@ -3306,7 +3313,8 @@ static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder, intel_disable_ddi_buf(encoder, old_crtc_state); - intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain); + intel_display_power_put_unchecked(dev_priv, + dig_port->ddi_io_power_domain); intel_ddi_clk_disable(encoder); @@ -3626,8 +3634,8 @@ intel_ddi_post_pll_disable(struct intel_encoder *encoder, if (intel_crtc_has_dp_encoder(crtc_state) || intel_port_is_tc(dev_priv, encoder->port)) - intel_display_power_put(dev_priv, - intel_ddi_main_link_aux_domain(dig_port)); + intel_display_power_put_unchecked(dev_priv, + intel_ddi_main_link_aux_domain(dig_port)); } void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b0b8f9ffd873..36c56d1637b8 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1197,17 +1197,19 @@ void assert_pipe(struct drm_i915_private *dev_priv, enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv, pipe); enum intel_display_power_domain power_domain; + intel_wakeref_t wakeref; /* we keep both pipes enabled on 830 */ if (IS_I830(dev_priv)) state = true; power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder); - if (intel_display_power_get_if_enabled(dev_priv, power_domain)) { + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (wakeref) { u32 val = I915_READ(PIPECONF(cpu_transcoder)); cur_state = !!(val & PIPECONF_ENABLE); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); } else { cur_state = false; } @@ -3412,6 +3414,7 @@ static bool i9xx_plane_get_hw_state(struct intel_plane *plane, struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum intel_display_power_domain power_domain; enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; + intel_wakeref_t wakeref; bool ret; u32 val; @@ -3421,7 +3424,8 @@ static bool i9xx_plane_get_hw_state(struct intel_plane *plane, * display power wells. */ power_domain = POWER_DOMAIN_PIPE(plane->pipe); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return false; val = I915_READ(DSPCNTR(i9xx_plane)); @@ -3434,7 +3438,7 @@ static bool i9xx_plane_get_hw_state(struct intel_plane *plane, *pipe = (val & DISPPLANE_SEL_PIPE_MASK) >> DISPPLANE_SEL_PIPE_SHIFT; - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return ret; } @@ -6107,7 +6111,7 @@ static void modeset_put_power_domains(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain; for_each_power_domain(domain, domains) - intel_display_power_put(dev_priv, domain); + intel_display_power_put_unchecked(dev_priv, domain); } static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, @@ -6354,7 +6358,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, domains = intel_crtc->enabled_power_domains; for_each_power_domain(domain, domains) - intel_display_power_put(dev_priv, domain); + intel_display_power_put_unchecked(dev_priv, domain); intel_crtc->enabled_power_domains = 0; dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe); @@ -7966,11 +7970,13 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc, { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum intel_display_power_domain power_domain; + intel_wakeref_t wakeref; uint32_t tmp; bool ret; power_domain = POWER_DOMAIN_PIPE(crtc->pipe); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return false; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; @@ -8071,7 +8077,7 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return ret; } @@ -9038,11 +9044,13 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); enum intel_display_power_domain power_domain; + intel_wakeref_t wakeref; uint32_t tmp; bool ret; power_domain = POWER_DOMAIN_PIPE(crtc->pipe); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return false; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; @@ -9125,7 +9133,7 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return ret; } @@ -9734,7 +9742,7 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, out: for_each_power_domain(power_domain, power_domain_mask) - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put_unchecked(dev_priv, power_domain); return active; } @@ -9984,17 +9992,19 @@ static bool i845_cursor_get_hw_state(struct intel_plane *plane, { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum intel_display_power_domain power_domain; + intel_wakeref_t wakeref; bool ret; power_domain = POWER_DOMAIN_PIPE(PIPE_A); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return false; ret = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE; *pipe = PIPE_A; - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return ret; } @@ -10217,6 +10227,7 @@ static bool i9xx_cursor_get_hw_state(struct intel_plane *plane, { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum intel_display_power_domain power_domain; + intel_wakeref_t wakeref; bool ret; u32 val; @@ -10226,7 +10237,8 @@ static bool i9xx_cursor_get_hw_state(struct intel_plane *plane, * display power wells. */ power_domain = POWER_DOMAIN_PIPE(plane->pipe); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return false; val = I915_READ(CURCNTR(plane->pipe)); @@ -10239,7 +10251,7 @@ static bool i9xx_cursor_get_hw_state(struct intel_plane *plane, *pipe = (val & MCURSOR_PIPE_SELECT_MASK) >> MCURSOR_PIPE_SELECT_SHIFT; - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return ret; } @@ -12950,6 +12962,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) struct drm_crtc *crtc; struct intel_crtc *intel_crtc; u64 put_domains[I915_MAX_PIPES] = {}; + intel_wakeref_t wakeref = 0; int i; intel_atomic_commit_fence_wait(intel_state); @@ -12957,7 +12970,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) drm_atomic_helper_wait_for_dependencies(state); if (intel_state->modeset) - intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET); + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET); for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { old_intel_crtc_state = to_intel_crtc_state(old_crtc_state); @@ -13094,7 +13107,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) * the culprit. */ intel_uncore_arm_unclaimed_mmio_detection(dev_priv); - intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET); + intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET, wakeref); } /* @@ -15496,19 +15509,25 @@ void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv) void i915_redisable_vga(struct drm_i915_private *dev_priv) { - /* This function can be called both from intel_modeset_setup_hw_state or + intel_wakeref_t wakeref; + + /* + * This function can be called both from intel_modeset_setup_hw_state or * at a very early point in our resume sequence, where the power well * structures are not yet restored. Since this function is at a very * paranoid "someone might have enabled VGA while we were not looking" * level, just check if the power well is enabled instead of trying to * follow the "don't touch the power well if we don't need it" policy - * the rest of the driver uses. */ - if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_VGA)) + * the rest of the driver uses. + */ + wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_VGA); + if (!wakeref) return; i915_redisable_vga_power_on(dev_priv); - intel_display_power_put(dev_priv, POWER_DOMAIN_VGA); + intel_display_power_put(dev_priv, POWER_DOMAIN_VGA, wakeref); } /* FIXME read out full plane state for all planes */ @@ -15808,12 +15827,13 @@ intel_modeset_setup_hw_state(struct drm_device *dev, struct drm_modeset_acquire_ctx *ctx) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc; struct intel_crtc_state *crtc_state; struct intel_encoder *encoder; + struct intel_crtc *crtc; + intel_wakeref_t wakeref; int i; - intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); intel_early_display_was(dev_priv); intel_modeset_readout_hw_state(dev); @@ -15883,7 +15903,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev, modeset_put_power_domains(dev_priv, put_domains); } - intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); + intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, wakeref); intel_fbc_init_pipe_state(dev_priv); } diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index d3cd40e656fe..fc85fd77a661 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -621,8 +621,8 @@ static void pps_unlock(struct intel_dp *intel_dp) mutex_unlock(&dev_priv->pps_mutex); - intel_display_power_put(dev_priv, - intel_aux_power_domain(dp_to_dig_port(intel_dp))); + intel_display_power_put_unchecked(dev_priv, + intel_aux_power_domain(dp_to_dig_port(intel_dp))); } static void @@ -2511,8 +2511,8 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) if ((pp & PANEL_POWER_ON) == 0) intel_dp->panel_power_off_time = ktime_get_boottime(); - intel_display_power_put(dev_priv, - intel_aux_power_domain(intel_dig_port)); + intel_display_power_put_unchecked(dev_priv, + intel_aux_power_domain(intel_dig_port)); } static void edp_panel_vdd_work(struct work_struct *__work) @@ -2657,7 +2657,7 @@ static void edp_panel_off(struct intel_dp *intel_dp) intel_dp->panel_power_off_time = ktime_get_boottime(); /* We got a reference when we enabled the VDD. */ - intel_display_power_put(dev_priv, intel_aux_power_domain(dig_port)); + intel_display_power_put_unchecked(dev_priv, intel_aux_power_domain(dig_port)); } void intel_edp_panel_off(struct intel_dp *intel_dp) @@ -2983,16 +2983,18 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + intel_wakeref_t wakeref; bool ret; - if (!intel_display_power_get_if_enabled(dev_priv, - encoder->power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain); + if (!wakeref) return false; ret = intel_dp_port_enabled(dev_priv, intel_dp->output_reg, encoder->port, pipe); - intel_display_power_put(dev_priv, encoder->power_domain); + intel_display_power_put(dev_priv, encoder->power_domain, wakeref); return ret; } @@ -5365,12 +5367,13 @@ intel_dp_detect(struct drm_connector *connector, enum drm_connector_status status; enum intel_display_power_domain aux_domain = intel_aux_power_domain(dig_port); + intel_wakeref_t wakeref; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); - intel_display_power_get(dev_priv, aux_domain); + wakeref = intel_display_power_get(dev_priv, aux_domain); /* Can't disconnect eDP */ if (intel_dp_is_edp(intel_dp)) @@ -5436,7 +5439,7 @@ intel_dp_detect(struct drm_connector *connector, ret = intel_dp_retrain_link(encoder, ctx); if (ret) { - intel_display_power_put(dev_priv, aux_domain); + intel_display_power_put(dev_priv, aux_domain, wakeref); return ret; } } @@ -5460,7 +5463,7 @@ out: if (status != connector_status_connected && !intel_dp->is_mst) intel_dp_unset_edid(intel_dp); - intel_display_power_put(dev_priv, aux_domain); + intel_display_power_put(dev_priv, aux_domain, wakeref); return status; } @@ -5473,6 +5476,7 @@ intel_dp_force(struct drm_connector *connector) struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); enum intel_display_power_domain aux_domain = intel_aux_power_domain(dig_port); + intel_wakeref_t wakeref; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); @@ -5481,11 +5485,11 @@ intel_dp_force(struct drm_connector *connector) if (connector->status != connector_status_connected) return; - intel_display_power_get(dev_priv, aux_domain); + wakeref = intel_display_power_get(dev_priv, aux_domain); intel_dp_set_edid(intel_dp); - intel_display_power_put(dev_priv, aux_domain); + intel_display_power_put(dev_priv, aux_domain, wakeref); } static int intel_dp_get_modes(struct drm_connector *connector) @@ -5931,6 +5935,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) struct intel_dp *intel_dp = &intel_dig_port->dp; struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); enum irqreturn ret = IRQ_NONE; + intel_wakeref_t wakeref; if (long_hpd && intel_dig_port->base.type == INTEL_OUTPUT_EDP) { /* @@ -5953,8 +5958,8 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) return IRQ_NONE; } - intel_display_power_get(dev_priv, - intel_aux_power_domain(intel_dig_port)); + wakeref = intel_display_power_get(dev_priv, + intel_aux_power_domain(intel_dig_port)); if (intel_dp->is_mst) { if (intel_dp_check_mst_status(intel_dp) == -EINVAL) { @@ -5984,7 +5989,8 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) put_power: intel_display_power_put(dev_priv, - intel_aux_power_domain(intel_dig_port)); + intel_aux_power_domain(intel_dig_port), + wakeref); return ret; } diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index d513ca875c67..04870e960537 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -345,9 +345,12 @@ static bool ibx_pch_dpll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_dpll_hw_state *hw_state) { const enum intel_dpll_id id = pll->info->id; + intel_wakeref_t wakeref; uint32_t val; - if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_PLLS); + if (!wakeref) return false; val = I915_READ(PCH_DPLL(id)); @@ -355,7 +358,7 @@ static bool ibx_pch_dpll_get_hw_state(struct drm_i915_private *dev_priv, hw_state->fp0 = I915_READ(PCH_FP0(id)); hw_state->fp1 = I915_READ(PCH_FP1(id)); - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); return val & DPLL_VCO_ENABLE; } @@ -509,15 +512,18 @@ static bool hsw_ddi_wrpll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_dpll_hw_state *hw_state) { const enum intel_dpll_id id = pll->info->id; + intel_wakeref_t wakeref; uint32_t val; - if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_PLLS); + if (!wakeref) return false; val = I915_READ(WRPLL_CTL(id)); hw_state->wrpll = val; - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); return val & WRPLL_PLL_ENABLE; } @@ -526,15 +532,18 @@ static bool hsw_ddi_spll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) { + intel_wakeref_t wakeref; uint32_t val; - if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_PLLS); + if (!wakeref) return false; val = I915_READ(SPLL_CTL); hw_state->spll = val; - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); return val & SPLL_PLL_ENABLE; } @@ -989,9 +998,12 @@ static bool skl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, uint32_t val; const struct skl_dpll_regs *regs = skl_dpll_regs; const enum intel_dpll_id id = pll->info->id; + intel_wakeref_t wakeref; bool ret; - if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_PLLS); + if (!wakeref) return false; ret = false; @@ -1011,7 +1023,7 @@ static bool skl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, ret = true; out: - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); return ret; } @@ -1020,12 +1032,15 @@ static bool skl_ddi_dpll0_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) { - uint32_t val; const struct skl_dpll_regs *regs = skl_dpll_regs; const enum intel_dpll_id id = pll->info->id; + intel_wakeref_t wakeref; + uint32_t val; bool ret; - if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_PLLS); + if (!wakeref) return false; ret = false; @@ -1041,7 +1056,7 @@ static bool skl_ddi_dpll0_get_hw_state(struct drm_i915_private *dev_priv, ret = true; out: - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); return ret; } @@ -1579,14 +1594,17 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_dpll_hw_state *hw_state) { enum port port = (enum port)pll->info->id; /* 1:1 port->PLL mapping */ - uint32_t val; - bool ret; + intel_wakeref_t wakeref; enum dpio_phy phy; enum dpio_channel ch; + uint32_t val; + bool ret; bxt_port_to_phy_channel(dev_priv, port, &phy, &ch); - if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_PLLS); + if (!wakeref) return false; ret = false; @@ -1643,7 +1661,7 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, ret = true; out: - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); return ret; } @@ -2091,10 +2109,13 @@ static bool cnl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_dpll_hw_state *hw_state) { const enum intel_dpll_id id = pll->info->id; + intel_wakeref_t wakeref; uint32_t val; bool ret; - if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_PLLS); + if (!wakeref) return false; ret = false; @@ -2113,7 +2134,7 @@ static bool cnl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, ret = true; out: - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); return ret; } @@ -2950,11 +2971,14 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_dpll_hw_state *hw_state) { const enum intel_dpll_id id = pll->info->id; - uint32_t val; - enum port port; + intel_wakeref_t wakeref; bool ret = false; + enum port port; + uint32_t val; - if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_PLLS); + if (!wakeref) return false; val = I915_READ(icl_pll_id_to_enable_reg(id)); @@ -3007,7 +3031,7 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, ret = true; out: - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); return ret; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 71377ec49a10..5e5ceec7c004 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -2118,12 +2118,21 @@ bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); -void intel_display_power_get(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain); -bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv, +intel_wakeref_t intel_display_power_get(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); +intel_wakeref_t +intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv, + enum intel_display_power_domain domain); +void intel_display_power_put_unchecked(struct drm_i915_private *dev_priv, + enum intel_display_power_domain domain); +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) void intel_display_power_put(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain); + enum intel_display_power_domain domain, + intel_wakeref_t wakeref); +#else +#define intel_display_power_put(i915, domain, wakeref) \ + intel_display_power_put_unchecked(i915, domain) +#endif void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, u8 req_slices); diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h index fc7a09049f81..df3d390e25fe 100644 --- a/drivers/gpu/drm/i915/intel_dsi.h +++ b/drivers/gpu/drm/i915/intel_dsi.h @@ -39,6 +39,7 @@ struct intel_dsi { struct intel_encoder base; struct intel_dsi_host *dsi_hosts[I915_MAX_PORTS]; + intel_wakeref_t io_wakeref[I915_MAX_PORTS]; /* GPIO Desc for CRC based Panel control */ struct gpio_desc *gpio_panel; diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 14a0c28fe7c1..14727ac06f67 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1190,15 +1190,17 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + intel_wakeref_t wakeref; bool ret; - if (!intel_display_power_get_if_enabled(dev_priv, - encoder->power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain); + if (!wakeref) return false; ret = intel_sdvo_port_enabled(dev_priv, intel_hdmi->hdmi_reg, pipe); - intel_display_power_put(dev_priv, encoder->power_domain); + intel_display_power_put(dev_priv, encoder->power_domain, wakeref); return ret; } @@ -1895,11 +1897,12 @@ intel_hdmi_set_edid(struct drm_connector *connector) { struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); + intel_wakeref_t wakeref; struct edid *edid; bool connected = false; struct i2c_adapter *i2c; - intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); i2c = intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus); @@ -1914,7 +1917,7 @@ intel_hdmi_set_edid(struct drm_connector *connector) intel_hdmi_dp_dual_mode_detect(connector, edid != NULL); - intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS); + intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS, wakeref); to_intel_connector(connector)->detect_edid = edid; if (edid && edid->input & DRM_EDID_INPUT_DIGITAL) { @@ -1939,11 +1942,12 @@ intel_hdmi_detect(struct drm_connector *connector, bool force) struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); struct intel_encoder *encoder = &hdmi_to_dig_port(intel_hdmi)->base; + intel_wakeref_t wakeref; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); - intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); if (IS_ICELAKE(dev_priv) && !intel_digital_port_connected(encoder)) @@ -1955,7 +1959,7 @@ intel_hdmi_detect(struct drm_connector *connector, bool force) status = connector_status_connected; out: - intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS); + intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS, wakeref); if (status != connector_status_connected) cec_notifier_phys_addr_invalidate(intel_hdmi->cec_notifier); diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index c6159aff9dc8..4f6dc8c94634 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -697,12 +697,13 @@ out: static int gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) { - struct intel_gmbus *bus = container_of(adapter, struct intel_gmbus, - adapter); + struct intel_gmbus *bus = + container_of(adapter, struct intel_gmbus, adapter); struct drm_i915_private *dev_priv = bus->dev_priv; + intel_wakeref_t wakeref; int ret; - intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); if (bus->force_bit) { ret = i2c_bit_algo.master_xfer(adapter, msgs, num); @@ -714,17 +715,16 @@ gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) bus->force_bit |= GMBUS_FORCE_BIT_RETRY; } - intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS); + intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS, wakeref); return ret; } int intel_gmbus_output_aksv(struct i2c_adapter *adapter) { - struct intel_gmbus *bus = container_of(adapter, struct intel_gmbus, - adapter); + struct intel_gmbus *bus = + container_of(adapter, struct intel_gmbus, adapter); struct drm_i915_private *dev_priv = bus->dev_priv; - int ret; u8 cmd = DRM_HDCP_DDC_AKSV; u8 buf[DRM_HDCP_KSV_LEN] = { 0 }; struct i2c_msg msgs[] = { @@ -741,8 +741,10 @@ int intel_gmbus_output_aksv(struct i2c_adapter *adapter) .buf = buf, } }; + intel_wakeref_t wakeref; + int ret; - intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); mutex_lock(&dev_priv->gmbus_mutex); /* @@ -753,7 +755,7 @@ int intel_gmbus_output_aksv(struct i2c_adapter *adapter) ret = do_gmbus_xfer(adapter, msgs, ARRAY_SIZE(msgs), GMBUS_AKSV_SELECT); mutex_unlock(&dev_priv->gmbus_mutex); - intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS); + intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS, wakeref); return ret; } diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 6adcc8d037bf..b01aacb5d73d 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -94,15 +94,17 @@ static bool intel_lvds_get_hw_state(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); + intel_wakeref_t wakeref; bool ret; - if (!intel_display_power_get_if_enabled(dev_priv, - encoder->power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain); + if (!wakeref) return false; ret = intel_lvds_port_enabled(dev_priv, lvds_encoder->reg, pipe); - intel_display_power_put(dev_priv, encoder->power_domain); + intel_display_power_put(dev_priv, encoder->power_domain, wakeref); return ret; } diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index bdabcfab8090..56d614b02302 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -589,6 +589,7 @@ int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name) struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[crtc->index]; enum intel_display_power_domain power_domain; enum intel_pipe_crc_source source; + intel_wakeref_t wakeref; u32 val = 0; /* shut up gcc */ int ret = 0; @@ -598,7 +599,8 @@ int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name) } power_domain = POWER_DOMAIN_PIPE(crtc->index); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) { + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) { DRM_DEBUG_KMS("Trying to capture CRC while pipe is off\n"); return -EIO; } @@ -624,7 +626,7 @@ int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name) pipe_crc->skipped = 0; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return ret; } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 83b01cde8113..ab7257720c7e 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3989,10 +3989,12 @@ void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc, struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum intel_display_power_domain power_domain; enum pipe pipe = crtc->pipe; + intel_wakeref_t wakeref; enum plane_id plane_id; power_domain = POWER_DOMAIN_PIPE(pipe); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return; for_each_plane_id_on_crtc(crtc, plane_id) @@ -4001,7 +4003,7 @@ void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc, &ddb_y[plane_id], &ddb_uv[plane_id]); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); } void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index c29577d7a35a..6aeceab37000 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -1855,18 +1855,19 @@ __intel_display_power_get_domain(struct drm_i915_private *dev_priv, * Any power domain reference obtained by this function must have a symmetric * call to intel_display_power_put() to release the reference again. */ -void intel_display_power_get(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain) +intel_wakeref_t intel_display_power_get(struct drm_i915_private *dev_priv, + enum intel_display_power_domain domain) { struct i915_power_domains *power_domains = &dev_priv->power_domains; - - intel_runtime_pm_get(dev_priv); + intel_wakeref_t wakeref = intel_runtime_pm_get(dev_priv); mutex_lock(&power_domains->lock); __intel_display_power_get_domain(dev_priv, domain); mutex_unlock(&power_domains->lock); + + return wakeref; } /** @@ -1881,13 +1882,16 @@ void intel_display_power_get(struct drm_i915_private *dev_priv, * Any power domain reference obtained by this function must have a symmetric * call to intel_display_power_put() to release the reference again. */ -bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain) +intel_wakeref_t +intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv, + enum intel_display_power_domain domain) { struct i915_power_domains *power_domains = &dev_priv->power_domains; + intel_wakeref_t wakeref; bool is_enabled; - if (!intel_runtime_pm_get_if_in_use(dev_priv)) + wakeref = intel_runtime_pm_get_if_in_use(dev_priv); + if (!wakeref) return false; mutex_lock(&power_domains->lock); @@ -1901,23 +1905,16 @@ bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv, mutex_unlock(&power_domains->lock); - if (!is_enabled) - intel_runtime_pm_put_unchecked(dev_priv); + if (!is_enabled) { + intel_runtime_pm_put(dev_priv, wakeref); + wakeref = 0; + } - return is_enabled; + return wakeref; } -/** - * intel_display_power_put - release a power domain reference - * @dev_priv: i915 device instance - * @domain: power domain to reference - * - * This function drops the power domain reference obtained by - * intel_display_power_get() and might power down the corresponding hardware - * block right away if this is the last reference. - */ -void intel_display_power_put(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain) +static void __intel_display_power_put(struct drm_i915_private *dev_priv, + enum intel_display_power_domain domain) { struct i915_power_domains *power_domains; struct i915_power_well *power_well; @@ -1935,10 +1932,34 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, intel_power_well_put(dev_priv, power_well); mutex_unlock(&power_domains->lock); +} +/** + * intel_display_power_put - release a power domain reference + * @dev_priv: i915 device instance + * @domain: power domain to reference + * + * This function drops the power domain reference obtained by + * intel_display_power_get() and might power down the corresponding hardware + * block right away if this is the last reference. + */ +void intel_display_power_put_unchecked(struct drm_i915_private *dev_priv, + enum intel_display_power_domain domain) +{ + __intel_display_power_put(dev_priv, domain); intel_runtime_pm_put_unchecked(dev_priv); } +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) +void intel_display_power_put(struct drm_i915_private *dev_priv, + enum intel_display_power_domain domain, + intel_wakeref_t wakeref) +{ + __intel_display_power_put(dev_priv, domain); + intel_runtime_pm_put(dev_priv, wakeref); +} +#endif + #define I830_PIPES_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_PIPE_A) | \ BIT_ULL(POWER_DOMAIN_PIPE_B) | \ @@ -4048,7 +4069,7 @@ void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv) /* Remove the refcount we took to keep power well support disabled. */ if (!i915_modparams.disable_power_well) - intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); + intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT); intel_power_domains_verify_state(dev_priv); } @@ -4067,7 +4088,7 @@ void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv) */ void intel_power_domains_enable(struct drm_i915_private *dev_priv) { - intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); + intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT); intel_power_domains_verify_state(dev_priv); } @@ -4102,7 +4123,7 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv, { struct i915_power_domains *power_domains = &dev_priv->power_domains; - intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); + intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT); /* * In case of suspend-to-idle (aka S0ix) on a DMC platform without DC9 @@ -4123,7 +4144,7 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv, * power wells if power domains must be deinitialized for suspend. */ if (!i915_modparams.disable_power_well) { - intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); + intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT); intel_power_domains_verify_state(dev_priv); } diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 8f3982c03925..87a06fcca284 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -618,17 +618,19 @@ skl_plane_get_hw_state(struct intel_plane *plane, struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum intel_display_power_domain power_domain; enum plane_id plane_id = plane->id; + intel_wakeref_t wakeref; bool ret; power_domain = POWER_DOMAIN_PIPE(plane->pipe); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return false; ret = I915_READ(PLANE_CTL(plane->pipe, plane_id)) & PLANE_CTL_ENABLE; *pipe = plane->pipe; - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return ret; } @@ -882,17 +884,19 @@ vlv_plane_get_hw_state(struct intel_plane *plane, struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum intel_display_power_domain power_domain; enum plane_id plane_id = plane->id; + intel_wakeref_t wakeref; bool ret; power_domain = POWER_DOMAIN_PIPE(plane->pipe); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return false; ret = I915_READ(SPCNTR(plane->pipe, plane_id)) & SP_ENABLE; *pipe = plane->pipe; - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return ret; } @@ -1051,17 +1055,19 @@ ivb_plane_get_hw_state(struct intel_plane *plane, { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum intel_display_power_domain power_domain; + intel_wakeref_t wakeref; bool ret; power_domain = POWER_DOMAIN_PIPE(plane->pipe); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return false; ret = I915_READ(SPRCTL(plane->pipe)) & SPRITE_ENABLE; *pipe = plane->pipe; - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return ret; } @@ -1217,17 +1223,19 @@ g4x_plane_get_hw_state(struct intel_plane *plane, { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum intel_display_power_domain power_domain; + intel_wakeref_t wakeref; bool ret; power_domain = POWER_DOMAIN_PIPE(plane->pipe); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return false; ret = I915_READ(DVSCNTR(plane->pipe)) & DVS_ENABLE; *pipe = plane->pipe; - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return ret; } diff --git a/drivers/gpu/drm/i915/intel_vdsc.c b/drivers/gpu/drm/i915/intel_vdsc.c index 48537827616f..23abf03736e7 100644 --- a/drivers/gpu/drm/i915/intel_vdsc.c +++ b/drivers/gpu/drm/i915/intel_vdsc.c @@ -1082,6 +1082,6 @@ void intel_dsc_disable(const struct intel_crtc_state *old_crtc_state) I915_WRITE(dss_ctl2_reg, dss_ctl2_val); /* Disable Power wells for VDSC/joining */ - intel_display_power_put(dev_priv, - intel_dsc_power_domain(old_crtc_state)); + intel_display_power_put_unchecked(dev_priv, + intel_dsc_power_domain(old_crtc_state)); } diff --git a/drivers/gpu/drm/i915/vlv_dsi.c b/drivers/gpu/drm/i915/vlv_dsi.c index bb1287020f80..d116fead8514 100644 --- a/drivers/gpu/drm/i915/vlv_dsi.c +++ b/drivers/gpu/drm/i915/vlv_dsi.c @@ -959,13 +959,15 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + intel_wakeref_t wakeref; enum port port; bool active = false; DRM_DEBUG_KMS("\n"); - if (!intel_display_power_get_if_enabled(dev_priv, - encoder->power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain); + if (!wakeref) return false; /* @@ -1021,7 +1023,7 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, } out_put_power: - intel_display_power_put(dev_priv, encoder->power_domain); + intel_display_power_put(dev_priv, encoder->power_domain, wakeref); return active; } @@ -1574,6 +1576,7 @@ vlv_dsi_get_hw_panel_orientation(struct intel_connector *connector) enum drm_panel_orientation orientation; struct intel_plane *plane; struct intel_crtc *crtc; + intel_wakeref_t wakeref; enum pipe pipe; u32 val; @@ -1584,7 +1587,8 @@ vlv_dsi_get_hw_panel_orientation(struct intel_connector *connector) plane = to_intel_plane(crtc->base.primary); power_domain = POWER_DOMAIN_PIPE(pipe); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); + if (!wakeref) return DRM_MODE_PANEL_ORIENTATION_UNKNOWN; val = I915_READ(DSPCNTR(plane->i9xx_plane)); @@ -1596,7 +1600,7 @@ vlv_dsi_get_hw_panel_orientation(struct intel_connector *connector) else orientation = DRM_MODE_PANEL_ORIENTATION_NORMAL; - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, power_domain, wakeref); return orientation; } From 25c896bdb8dc8b90e8f4d477185780596fe42bbe Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 14:21:25 +0000 Subject: [PATCH 040/539] drm/i915: Track the wakeref used to initialise display power domains On module load and unload, we grab the POWER_DOMAIN_INIT powerwells and transfer them to the runtime-pm code. We can use our wakeref tracking to verify that the wakeref is indeed passed from init to enable, and disable to fini; and across suspend. Signed-off-by: Chris Wilson Cc: Jani Nikula Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190114142129.24398-17-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 3 + drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/intel_runtime_pm.c | 151 +++++++++++++----------- 3 files changed, 88 insertions(+), 68 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e846608ee6aa..926acd5b6e5d 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2699,6 +2699,9 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused) if (!HAS_RUNTIME_PM(dev_priv)) seq_puts(m, "Runtime power management not supported\n"); + seq_printf(m, "Runtime power status: %s\n", + enableddisabled(!dev_priv->power_domains.wakeref)); + seq_printf(m, "GPU idle: %s (epoch %u)\n", yesno(!dev_priv->gt.awake), dev_priv->gt.epoch); seq_printf(m, "IRQs disabled: %s\n", diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0aedbb88eb5b..97200411dfad 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -822,6 +822,8 @@ struct i915_power_domains { bool display_core_suspended; int power_well_count; + intel_wakeref_t wakeref; + struct mutex lock; int domain_use_count[POWER_DOMAIN_NUM]; struct i915_power_well *power_wells; diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 6aeceab37000..79f00610860b 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -3997,7 +3997,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv); /** * intel_power_domains_init_hw - initialize hardware power domain state - * @dev_priv: i915 device instance + * @i915: i915 device instance * @resume: Called from resume code paths or not * * This function initializes the hardware power domain state and enables all @@ -4011,30 +4011,31 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv); * intel_power_domains_enable()) and must be paired with * intel_power_domains_fini_hw(). */ -void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) +void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume) { - struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_domains *power_domains = &i915->power_domains; power_domains->initializing = true; - if (IS_ICELAKE(dev_priv)) { - icl_display_core_init(dev_priv, resume); - } else if (IS_CANNONLAKE(dev_priv)) { - cnl_display_core_init(dev_priv, resume); - } else if (IS_GEN9_BC(dev_priv)) { - skl_display_core_init(dev_priv, resume); - } else if (IS_GEN9_LP(dev_priv)) { - bxt_display_core_init(dev_priv, resume); - } else if (IS_CHERRYVIEW(dev_priv)) { + if (IS_ICELAKE(i915)) { + icl_display_core_init(i915, resume); + } else if (IS_CANNONLAKE(i915)) { + cnl_display_core_init(i915, resume); + } else if (IS_GEN9_BC(i915)) { + skl_display_core_init(i915, resume); + } else if (IS_GEN9_LP(i915)) { + bxt_display_core_init(i915, resume); + } else if (IS_CHERRYVIEW(i915)) { mutex_lock(&power_domains->lock); - chv_phy_control_init(dev_priv); + chv_phy_control_init(i915); mutex_unlock(&power_domains->lock); - } else if (IS_VALLEYVIEW(dev_priv)) { + } else if (IS_VALLEYVIEW(i915)) { mutex_lock(&power_domains->lock); - vlv_cmnlane_wa(dev_priv); + vlv_cmnlane_wa(i915); mutex_unlock(&power_domains->lock); - } else if (IS_IVYBRIDGE(dev_priv) || INTEL_GEN(dev_priv) >= 7) - intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv)); + } else if (IS_IVYBRIDGE(i915) || INTEL_GEN(i915) >= 7) { + intel_pch_reset_handshake(i915, !HAS_PCH_NOP(i915)); + } /* * Keep all power wells enabled for any dependent HW access during @@ -4042,18 +4043,20 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) * resources powered until display HW readout is complete. We drop * this reference in intel_power_domains_enable(). */ - intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + power_domains->wakeref = + intel_display_power_get(i915, POWER_DOMAIN_INIT); + /* Disable power support if the user asked so. */ if (!i915_modparams.disable_power_well) - intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); - intel_power_domains_sync_hw(dev_priv); + intel_display_power_get(i915, POWER_DOMAIN_INIT); + intel_power_domains_sync_hw(i915); power_domains->initializing = false; } /** * intel_power_domains_fini_hw - deinitialize hw power domain state - * @dev_priv: i915 device instance + * @i915: i915 device instance * * De-initializes the display power domain HW state. It also ensures that the * device stays powered up so that the driver can be reloaded. @@ -4062,21 +4065,24 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) * intel_power_domains_disable()) and must be paired with * intel_power_domains_init_hw(). */ -void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv) +void intel_power_domains_fini_hw(struct drm_i915_private *i915) { - /* Keep the power well enabled, but cancel its rpm wakeref. */ - intel_runtime_pm_put_unchecked(dev_priv); + intel_wakeref_t wakeref __maybe_unused = + fetch_and_zero(&i915->power_domains.wakeref); /* Remove the refcount we took to keep power well support disabled. */ if (!i915_modparams.disable_power_well) - intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT); + intel_display_power_put_unchecked(i915, POWER_DOMAIN_INIT); - intel_power_domains_verify_state(dev_priv); + intel_power_domains_verify_state(i915); + + /* Keep the power well enabled, but cancel its rpm wakeref. */ + intel_runtime_pm_put(i915, wakeref); } /** * intel_power_domains_enable - enable toggling of display power wells - * @dev_priv: i915 device instance + * @i915: i915 device instance * * Enable the ondemand enabling/disabling of the display power wells. Note that * power wells not belonging to POWER_DOMAIN_INIT are allowed to be toggled @@ -4086,30 +4092,36 @@ void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv) * of display HW readout (which will acquire the power references reflecting * the current HW state). */ -void intel_power_domains_enable(struct drm_i915_private *dev_priv) +void intel_power_domains_enable(struct drm_i915_private *i915) { - intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT); + intel_wakeref_t wakeref __maybe_unused = + fetch_and_zero(&i915->power_domains.wakeref); - intel_power_domains_verify_state(dev_priv); + intel_display_power_put(i915, POWER_DOMAIN_INIT, wakeref); + intel_power_domains_verify_state(i915); } /** * intel_power_domains_disable - disable toggling of display power wells - * @dev_priv: i915 device instance + * @i915: i915 device instance * * Disable the ondemand enabling/disabling of the display power wells. See * intel_power_domains_enable() for which power wells this call controls. */ -void intel_power_domains_disable(struct drm_i915_private *dev_priv) +void intel_power_domains_disable(struct drm_i915_private *i915) { - intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + struct i915_power_domains *power_domains = &i915->power_domains; - intel_power_domains_verify_state(dev_priv); + WARN_ON(power_domains->wakeref); + power_domains->wakeref = + intel_display_power_get(i915, POWER_DOMAIN_INIT); + + intel_power_domains_verify_state(i915); } /** * intel_power_domains_suspend - suspend power domain state - * @dev_priv: i915 device instance + * @i915: i915 device instance * @suspend_mode: specifies the target suspend state (idle, mem, hibernation) * * This function prepares the hardware power domain state before entering @@ -4118,12 +4130,14 @@ void intel_power_domains_disable(struct drm_i915_private *dev_priv) * It must be called with power domains already disabled (after a call to * intel_power_domains_disable()) and paired with intel_power_domains_resume(). */ -void intel_power_domains_suspend(struct drm_i915_private *dev_priv, +void intel_power_domains_suspend(struct drm_i915_private *i915, enum i915_drm_suspend_mode suspend_mode) { - struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_domains *power_domains = &i915->power_domains; + intel_wakeref_t wakeref __maybe_unused = + fetch_and_zero(&power_domains->wakeref); - intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT); + intel_display_power_put(i915, POWER_DOMAIN_INIT, wakeref); /* * In case of suspend-to-idle (aka S0ix) on a DMC platform without DC9 @@ -4132,10 +4146,10 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv, * resources as required and also enable deeper system power states * that would be blocked if the firmware was inactive. */ - if (!(dev_priv->csr.allowed_dc_mask & DC_STATE_EN_DC9) && + if (!(i915->csr.allowed_dc_mask & DC_STATE_EN_DC9) && suspend_mode == I915_DRM_SUSPEND_IDLE && - dev_priv->csr.dmc_payload != NULL) { - intel_power_domains_verify_state(dev_priv); + i915->csr.dmc_payload) { + intel_power_domains_verify_state(i915); return; } @@ -4144,25 +4158,25 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv, * power wells if power domains must be deinitialized for suspend. */ if (!i915_modparams.disable_power_well) { - intel_display_power_put_unchecked(dev_priv, POWER_DOMAIN_INIT); - intel_power_domains_verify_state(dev_priv); + intel_display_power_put_unchecked(i915, POWER_DOMAIN_INIT); + intel_power_domains_verify_state(i915); } - if (IS_ICELAKE(dev_priv)) - icl_display_core_uninit(dev_priv); - else if (IS_CANNONLAKE(dev_priv)) - cnl_display_core_uninit(dev_priv); - else if (IS_GEN9_BC(dev_priv)) - skl_display_core_uninit(dev_priv); - else if (IS_GEN9_LP(dev_priv)) - bxt_display_core_uninit(dev_priv); + if (IS_ICELAKE(i915)) + icl_display_core_uninit(i915); + else if (IS_CANNONLAKE(i915)) + cnl_display_core_uninit(i915); + else if (IS_GEN9_BC(i915)) + skl_display_core_uninit(i915); + else if (IS_GEN9_LP(i915)) + bxt_display_core_uninit(i915); power_domains->display_core_suspended = true; } /** * intel_power_domains_resume - resume power domain state - * @dev_priv: i915 device instance + * @i915: i915 device instance * * This function resume the hardware power domain state during system resume. * @@ -4170,28 +4184,30 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv, * intel_power_domains_enable()) and must be paired with * intel_power_domains_suspend(). */ -void intel_power_domains_resume(struct drm_i915_private *dev_priv) +void intel_power_domains_resume(struct drm_i915_private *i915) { - struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_domains *power_domains = &i915->power_domains; if (power_domains->display_core_suspended) { - intel_power_domains_init_hw(dev_priv, true); + intel_power_domains_init_hw(i915, true); power_domains->display_core_suspended = false; } else { - intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + WARN_ON(power_domains->wakeref); + power_domains->wakeref = + intel_display_power_get(i915, POWER_DOMAIN_INIT); } - intel_power_domains_verify_state(dev_priv); + intel_power_domains_verify_state(i915); } #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) -static void intel_power_domains_dump_info(struct drm_i915_private *dev_priv) +static void intel_power_domains_dump_info(struct drm_i915_private *i915) { - struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_domains *power_domains = &i915->power_domains; struct i915_power_well *power_well; - for_each_power_well(dev_priv, power_well) { + for_each_power_well(i915, power_well) { enum intel_display_power_domain domain; DRM_DEBUG_DRIVER("%-25s %d\n", @@ -4206,7 +4222,7 @@ static void intel_power_domains_dump_info(struct drm_i915_private *dev_priv) /** * intel_power_domains_verify_state - verify the HW/SW state for all power wells - * @dev_priv: i915 device instance + * @i915: i915 device instance * * Verify if the reference count of each power well matches its HW enabled * state and the total refcount of the domains it belongs to. This must be @@ -4214,22 +4230,21 @@ static void intel_power_domains_dump_info(struct drm_i915_private *dev_priv) * acquiring reference counts for any power wells in use and disabling the * ones left on by BIOS but not required by any active output. */ -static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv) +static void intel_power_domains_verify_state(struct drm_i915_private *i915) { - struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_domains *power_domains = &i915->power_domains; struct i915_power_well *power_well; bool dump_domain_info; mutex_lock(&power_domains->lock); dump_domain_info = false; - for_each_power_well(dev_priv, power_well) { + for_each_power_well(i915, power_well) { enum intel_display_power_domain domain; int domains_count; bool enabled; - enabled = power_well->desc->ops->is_enabled(dev_priv, - power_well); + enabled = power_well->desc->ops->is_enabled(i915, power_well); if ((power_well->count || power_well->desc->always_on) != enabled) DRM_ERROR("power well %s state mismatch (refcount %d/enabled %d)", @@ -4253,7 +4268,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv) static bool dumped; if (!dumped) { - intel_power_domains_dump_info(dev_priv); + intel_power_domains_dump_info(i915); dumped = true; } } @@ -4263,7 +4278,7 @@ static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv) #else -static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv) +static void intel_power_domains_verify_state(struct drm_i915_private *i915) { } From 69d938200598dda76c70d7b3f39d19e89b15ea71 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 14:21:26 +0000 Subject: [PATCH 041/539] drm/i915/dp: Markup pps lock power well Track where and when we acquire and release the power well for pps access along the dp aux link, with a view to detecting if we leak any wakerefs. Signed-off-by: Chris Wilson Cc: Jani Nikula Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20190114142129.24398-18-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_dp.c | 227 +++++++++++++++++--------------- 1 file changed, 119 insertions(+), 108 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index fc85fd77a661..0a3ac98a779e 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -601,30 +601,39 @@ intel_dp_init_panel_power_sequencer_registers(struct intel_dp *intel_dp, static void intel_dp_pps_init(struct intel_dp *intel_dp); -static void pps_lock(struct intel_dp *intel_dp) +static intel_wakeref_t +pps_lock(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + intel_wakeref_t wakeref; /* * See intel_power_sequencer_reset() why we need * a power domain reference here. */ - intel_display_power_get(dev_priv, - intel_aux_power_domain(dp_to_dig_port(intel_dp))); + wakeref = intel_display_power_get(dev_priv, + intel_aux_power_domain(dp_to_dig_port(intel_dp))); mutex_lock(&dev_priv->pps_mutex); + + return wakeref; } -static void pps_unlock(struct intel_dp *intel_dp) +static intel_wakeref_t +pps_unlock(struct intel_dp *intel_dp, intel_wakeref_t wakeref) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); mutex_unlock(&dev_priv->pps_mutex); - - intel_display_power_put_unchecked(dev_priv, - intel_aux_power_domain(dp_to_dig_port(intel_dp))); + intel_display_power_put(dev_priv, + intel_aux_power_domain(dp_to_dig_port(intel_dp)), + wakeref); + return 0; } +#define with_pps_lock(dp, wf) \ + for ((wf) = pps_lock(dp); (wf); (wf) = pps_unlock((dp), (wf))) + static void vlv_power_sequencer_kick(struct intel_dp *intel_dp) { @@ -973,30 +982,30 @@ static int edp_notify_handler(struct notifier_block *this, unsigned long code, struct intel_dp *intel_dp = container_of(this, typeof(* intel_dp), edp_notifier); struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + intel_wakeref_t wakeref; if (!intel_dp_is_edp(intel_dp) || code != SYS_RESTART) return 0; - pps_lock(intel_dp); + with_pps_lock(intel_dp, wakeref) { + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + enum pipe pipe = vlv_power_sequencer_pipe(intel_dp); + i915_reg_t pp_ctrl_reg, pp_div_reg; + u32 pp_div; - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - enum pipe pipe = vlv_power_sequencer_pipe(intel_dp); - i915_reg_t pp_ctrl_reg, pp_div_reg; - u32 pp_div; + pp_ctrl_reg = PP_CONTROL(pipe); + pp_div_reg = PP_DIVISOR(pipe); + pp_div = I915_READ(pp_div_reg); + pp_div &= PP_REFERENCE_DIVIDER_MASK; - pp_ctrl_reg = PP_CONTROL(pipe); - pp_div_reg = PP_DIVISOR(pipe); - pp_div = I915_READ(pp_div_reg); - pp_div &= PP_REFERENCE_DIVIDER_MASK; - - /* 0x1F write to PP_DIV_REG sets max cycle delay */ - I915_WRITE(pp_div_reg, pp_div | 0x1F); - I915_WRITE(pp_ctrl_reg, PANEL_UNLOCK_REGS | PANEL_POWER_OFF); - msleep(intel_dp->panel_power_cycle_delay); + /* 0x1F write to PP_DIV_REG sets max cycle delay */ + I915_WRITE(pp_div_reg, pp_div | 0x1F); + I915_WRITE(pp_ctrl_reg, + PANEL_UNLOCK_REGS | PANEL_POWER_OFF); + msleep(intel_dp->panel_power_cycle_delay); + } } - pps_unlock(intel_dp); - return 0; } @@ -1184,16 +1193,17 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, to_i915(intel_dig_port->base.base.dev); i915_reg_t ch_ctl, ch_data[5]; uint32_t aux_clock_divider; + intel_wakeref_t wakeref; int i, ret, recv_bytes; - uint32_t status; int try, clock = 0; + uint32_t status; bool vdd; ch_ctl = intel_dp->aux_ch_ctl_reg(intel_dp); for (i = 0; i < ARRAY_SIZE(ch_data); i++) ch_data[i] = intel_dp->aux_ch_data_reg(intel_dp, i); - pps_lock(intel_dp); + wakeref = pps_lock(intel_dp); /* * We will be called with VDD already enabled for dpcd/edid/oui reads. @@ -1337,7 +1347,7 @@ out: if (vdd) edp_panel_vdd_off(intel_dp, false); - pps_unlock(intel_dp); + pps_unlock(intel_dp, wakeref); return ret; } @@ -2464,15 +2474,15 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) */ void intel_edp_panel_vdd_on(struct intel_dp *intel_dp) { + intel_wakeref_t wakeref; bool vdd; if (!intel_dp_is_edp(intel_dp)) return; - pps_lock(intel_dp); - vdd = edp_panel_vdd_on(intel_dp); - pps_unlock(intel_dp); - + vdd = false; + with_pps_lock(intel_dp, wakeref) + vdd = edp_panel_vdd_on(intel_dp); I915_STATE_WARN(!vdd, "eDP port %c VDD already requested on\n", port_name(dp_to_dig_port(intel_dp)->base.port)); } @@ -2517,13 +2527,15 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) static void edp_panel_vdd_work(struct work_struct *__work) { - struct intel_dp *intel_dp = container_of(to_delayed_work(__work), - struct intel_dp, panel_vdd_work); + struct intel_dp *intel_dp = + container_of(to_delayed_work(__work), + struct intel_dp, panel_vdd_work); + intel_wakeref_t wakeref; - pps_lock(intel_dp); - if (!intel_dp->want_panel_vdd) - edp_panel_vdd_off_sync(intel_dp); - pps_unlock(intel_dp); + with_pps_lock(intel_dp, wakeref) { + if (!intel_dp->want_panel_vdd) + edp_panel_vdd_off_sync(intel_dp); + } } static void edp_panel_vdd_schedule_off(struct intel_dp *intel_dp) @@ -2613,12 +2625,13 @@ static void edp_panel_on(struct intel_dp *intel_dp) void intel_edp_panel_on(struct intel_dp *intel_dp) { + intel_wakeref_t wakeref; + if (!intel_dp_is_edp(intel_dp)) return; - pps_lock(intel_dp); - edp_panel_on(intel_dp); - pps_unlock(intel_dp); + with_pps_lock(intel_dp, wakeref) + edp_panel_on(intel_dp); } @@ -2662,20 +2675,20 @@ static void edp_panel_off(struct intel_dp *intel_dp) void intel_edp_panel_off(struct intel_dp *intel_dp) { + intel_wakeref_t wakeref; + if (!intel_dp_is_edp(intel_dp)) return; - pps_lock(intel_dp); - edp_panel_off(intel_dp); - pps_unlock(intel_dp); + with_pps_lock(intel_dp, wakeref) + edp_panel_off(intel_dp); } /* Enable backlight in the panel power control. */ static void _intel_edp_backlight_on(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - u32 pp; - i915_reg_t pp_ctrl_reg; + intel_wakeref_t wakeref; /* * If we enable the backlight right away following a panel power @@ -2685,17 +2698,16 @@ static void _intel_edp_backlight_on(struct intel_dp *intel_dp) */ wait_backlight_on(intel_dp); - pps_lock(intel_dp); + with_pps_lock(intel_dp, wakeref) { + i915_reg_t pp_ctrl_reg = _pp_ctrl_reg(intel_dp); + u32 pp; - pp = ironlake_get_pp_control(intel_dp); - pp |= EDP_BLC_ENABLE; + pp = ironlake_get_pp_control(intel_dp); + pp |= EDP_BLC_ENABLE; - pp_ctrl_reg = _pp_ctrl_reg(intel_dp); - - I915_WRITE(pp_ctrl_reg, pp); - POSTING_READ(pp_ctrl_reg); - - pps_unlock(intel_dp); + I915_WRITE(pp_ctrl_reg, pp); + POSTING_READ(pp_ctrl_reg); + } } /* Enable backlight PWM and backlight PP control. */ @@ -2717,23 +2729,21 @@ void intel_edp_backlight_on(const struct intel_crtc_state *crtc_state, static void _intel_edp_backlight_off(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - u32 pp; - i915_reg_t pp_ctrl_reg; + intel_wakeref_t wakeref; if (!intel_dp_is_edp(intel_dp)) return; - pps_lock(intel_dp); + with_pps_lock(intel_dp, wakeref) { + i915_reg_t pp_ctrl_reg = _pp_ctrl_reg(intel_dp); + u32 pp; - pp = ironlake_get_pp_control(intel_dp); - pp &= ~EDP_BLC_ENABLE; + pp = ironlake_get_pp_control(intel_dp); + pp &= ~EDP_BLC_ENABLE; - pp_ctrl_reg = _pp_ctrl_reg(intel_dp); - - I915_WRITE(pp_ctrl_reg, pp); - POSTING_READ(pp_ctrl_reg); - - pps_unlock(intel_dp); + I915_WRITE(pp_ctrl_reg, pp); + POSTING_READ(pp_ctrl_reg); + } intel_dp->last_backlight_off = jiffies; edp_wait_backlight_off(intel_dp); @@ -2761,12 +2771,12 @@ static void intel_edp_backlight_power(struct intel_connector *connector, bool enable) { struct intel_dp *intel_dp = intel_attached_dp(&connector->base); + intel_wakeref_t wakeref; bool is_enabled; - pps_lock(intel_dp); - is_enabled = ironlake_get_pp_control(intel_dp) & EDP_BLC_ENABLE; - pps_unlock(intel_dp); - + is_enabled = false; + with_pps_lock(intel_dp, wakeref) + is_enabled = ironlake_get_pp_control(intel_dp) & EDP_BLC_ENABLE; if (is_enabled == enable) return; @@ -3276,22 +3286,21 @@ static void intel_enable_dp(struct intel_encoder *encoder, struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); uint32_t dp_reg = I915_READ(intel_dp->output_reg); enum pipe pipe = crtc->pipe; + intel_wakeref_t wakeref; if (WARN_ON(dp_reg & DP_PORT_EN)) return; - pps_lock(intel_dp); + with_pps_lock(intel_dp, wakeref) { + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + vlv_init_panel_power_sequencer(encoder, pipe_config); - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - vlv_init_panel_power_sequencer(encoder, pipe_config); + intel_dp_enable_port(intel_dp, pipe_config); - intel_dp_enable_port(intel_dp, pipe_config); - - edp_panel_vdd_on(intel_dp); - edp_panel_on(intel_dp); - edp_panel_vdd_off(intel_dp, true); - - pps_unlock(intel_dp); + edp_panel_vdd_on(intel_dp); + edp_panel_on(intel_dp); + edp_panel_vdd_off(intel_dp, true); + } if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { unsigned int lane_mask = 0x0; @@ -3989,9 +3998,10 @@ intel_dp_link_down(struct intel_encoder *encoder, intel_dp->DP = DP; if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - pps_lock(intel_dp); - intel_dp->active_pipe = INVALID_PIPE; - pps_unlock(intel_dp); + intel_wakeref_t wakeref; + + with_pps_lock(intel_dp, wakeref) + intel_dp->active_pipe = INVALID_PIPE; } } @@ -5561,14 +5571,15 @@ void intel_dp_encoder_flush_work(struct drm_encoder *encoder) intel_dp_mst_encoder_cleanup(intel_dig_port); if (intel_dp_is_edp(intel_dp)) { + intel_wakeref_t wakeref; + cancel_delayed_work_sync(&intel_dp->panel_vdd_work); /* * vdd might still be enabled do to the delayed vdd off. * Make sure vdd is actually turned off here. */ - pps_lock(intel_dp); - edp_panel_vdd_off_sync(intel_dp); - pps_unlock(intel_dp); + with_pps_lock(intel_dp, wakeref) + edp_panel_vdd_off_sync(intel_dp); if (intel_dp->edp_notifier.notifier_call) { unregister_reboot_notifier(&intel_dp->edp_notifier); @@ -5590,6 +5601,7 @@ static void intel_dp_encoder_destroy(struct drm_encoder *encoder) void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder) { struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base); + intel_wakeref_t wakeref; if (!intel_dp_is_edp(intel_dp)) return; @@ -5599,9 +5611,8 @@ void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder) * Make sure vdd is actually turned off here. */ cancel_delayed_work_sync(&intel_dp->panel_vdd_work); - pps_lock(intel_dp); - edp_panel_vdd_off_sync(intel_dp); - pps_unlock(intel_dp); + with_pps_lock(intel_dp, wakeref) + edp_panel_vdd_off_sync(intel_dp); } static @@ -5882,6 +5893,7 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder) struct drm_i915_private *dev_priv = to_i915(encoder->dev); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_lspcon *lspcon = dp_to_lspcon(intel_dp); + intel_wakeref_t wakeref; if (!HAS_DDI(dev_priv)) intel_dp->DP = I915_READ(intel_dp->output_reg); @@ -5891,18 +5903,19 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder) intel_dp->reset_link_params = true; - pps_lock(intel_dp); + with_pps_lock(intel_dp, wakeref) { + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + intel_dp->active_pipe = vlv_active_pipe(intel_dp); - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - intel_dp->active_pipe = vlv_active_pipe(intel_dp); - - if (intel_dp_is_edp(intel_dp)) { - /* Reinit the power sequencer, in case BIOS did something with it. */ - intel_dp_pps_init(intel_dp); - intel_edp_panel_vdd_sanitize(intel_dp); + if (intel_dp_is_edp(intel_dp)) { + /* + * Reinit the power sequencer, in case BIOS did + * something nasty with it. + */ + intel_dp_pps_init(intel_dp); + intel_edp_panel_vdd_sanitize(intel_dp); + } } - - pps_unlock(intel_dp); } static const struct drm_connector_funcs intel_dp_connector_funcs = { @@ -6698,8 +6711,9 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, struct drm_display_mode *downclock_mode = NULL; bool has_dpcd; struct drm_display_mode *scan; - struct edid *edid; enum pipe pipe = INVALID_PIPE; + intel_wakeref_t wakeref; + struct edid *edid; if (!intel_dp_is_edp(intel_dp)) return true; @@ -6719,13 +6733,11 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, return false; } - pps_lock(intel_dp); - - intel_dp_init_panel_power_timestamps(intel_dp); - intel_dp_pps_init(intel_dp); - intel_edp_panel_vdd_sanitize(intel_dp); - - pps_unlock(intel_dp); + with_pps_lock(intel_dp, wakeref) { + intel_dp_init_panel_power_timestamps(intel_dp); + intel_dp_pps_init(intel_dp); + intel_edp_panel_vdd_sanitize(intel_dp); + } /* Cache DPCD and EDID for edp. */ has_dpcd = intel_edp_init_dpcd(intel_dp); @@ -6810,9 +6822,8 @@ out_vdd_off: * vdd might still be enabled do to the delayed vdd off. * Make sure vdd is actually turned off here. */ - pps_lock(intel_dp); - edp_panel_vdd_off_sync(intel_dp); - pps_unlock(intel_dp); + with_pps_lock(intel_dp, wakeref) + edp_panel_vdd_off_sync(intel_dp); return false; } From 04161d64da09f73d919c6bb935a003c0fd4bc8a8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 14:21:27 +0000 Subject: [PATCH 042/539] drm/i915: Complain if hsw_get_pipe_config acquires the same power well twice As we only release each power well once, we assume that each transcoder maps to a different domain. Complain if this is not so. Signed-off-by: Chris Wilson Cc: Jani Nikula Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190114142129.24398-19-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_display.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 36c56d1637b8..7c974cf064fd 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9569,6 +9569,8 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, power_domain = POWER_DOMAIN_TRANSCODER(pipe_config->cpu_transcoder); if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) return false; + + WARN_ON(*power_domain_mask & BIT_ULL(power_domain)); *power_domain_mask |= BIT_ULL(power_domain); tmp = I915_READ(PIPECONF(pipe_config->cpu_transcoder)); @@ -9596,6 +9598,8 @@ static bool bxt_get_dsi_transcoder_state(struct intel_crtc *crtc, power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder); if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) continue; + + WARN_ON(*power_domain_mask & BIT_ULL(power_domain)); *power_domain_mask |= BIT_ULL(power_domain); /* @@ -9712,7 +9716,9 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, power_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe); if (intel_display_power_get_if_enabled(dev_priv, power_domain)) { + WARN_ON(power_domain_mask & BIT_ULL(power_domain)); power_domain_mask |= BIT_ULL(power_domain); + if (INTEL_GEN(dev_priv) >= 9) skylake_get_pfit_config(crtc, pipe_config); else From 8d761e773e29f4be5c6aae50f57262afb2b83db4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 14:21:28 +0000 Subject: [PATCH 043/539] drm/i915: Combined gt.awake/gt.power wakerefs As the GT_IRQ power domain implies a wakeref, we can use it inplace of our existing redundant rpm grab. v2: Drop papering over forgetting to take the runtime wakeref in selftests Signed-off-by: Chris Wilson Cc: Jani Nikula Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190114142129.24398-20-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem.c | 11 ++++------- drivers/gpu/drm/i915/intel_lrc.c | 2 +- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 97200411dfad..fa99824f63b3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1985,7 +1985,6 @@ struct drm_i915_private { * is a slight delay before we do so. */ intel_wakeref_t awake; - intel_wakeref_t power; /** * The number of times we have woken up. diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index abbca28f4cff..61037e7292ee 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -176,9 +176,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915) if (INTEL_GEN(i915) >= 6) gen6_rps_idle(i915); - intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, i915->gt.power); - - intel_runtime_pm_put(i915, wakeref); + intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); return i915->gt.epoch; } @@ -203,13 +201,11 @@ void i915_gem_unpark(struct drm_i915_private *i915) lockdep_assert_held(&i915->drm.struct_mutex); GEM_BUG_ON(!i915->gt.active_requests); + assert_rpm_wakelock_held(i915); if (i915->gt.awake) return; - i915->gt.awake = intel_runtime_pm_get_noresume(i915); - GEM_BUG_ON(!i915->gt.awake); - /* * It seems that the DMC likes to transition between the DC states a lot * when there are no connected displays (no active power domains) during @@ -221,7 +217,8 @@ void i915_gem_unpark(struct drm_i915_private *i915) * Work around it by grabbing a GT IRQ power domain whilst there is any * GT activity, preventing any DC state transitions. */ - i915->gt.power = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + GEM_BUG_ON(!i915->gt.awake); if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ i915->gt.epoch = 1; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 72ab89151ab9..dcb11c5f8230 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1045,7 +1045,7 @@ static void execlists_submission_tasklet(unsigned long data) GEM_TRACE("%s awake?=%d, active=%x\n", engine->name, - engine->i915->gt.awake, + !!engine->i915->gt.awake, engine->execlists.active); spin_lock_irqsave(&engine->timeline.lock, flags); From 4a8ab5ea0cde753b03bfefe4c98a8c4c61f46550 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 14:21:29 +0000 Subject: [PATCH 044/539] drm/i915: Mark up Ironlake ips with rpm wakerefs Currently Ironlake operates under the assumption that rpm awake (and its error checking is disabled). As such, we have missed a few places where we access registers without taking the rpm wakeref and thus trigger warnings. intel_ips being one culprit. As this involved adding a potentially sleeping rpm_get, we have to rearrange the spinlocks slightly and so switch to acquiring a device-ref under the spinlock rather than hold the spinlock for the whole operation. To be consistent, we make the change in pattern common to the intel_ips interface even though this adds a few more atomic operations than necessary in a few cases. v2: Sagar noted the mb around setting mch_dev were overkill as we only need ordering there, and that i915_emon_status was still using struct_mutex for no reason, but lacked rpm. Signed-off-by: Chris Wilson Cc: Jani Nikula Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20190114142129.24398-21-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 32 ++--- drivers/gpu/drm/i915/i915_drv.c | 3 + drivers/gpu/drm/i915/intel_pm.c | 174 ++++++++++++++-------------- 3 files changed, 103 insertions(+), 106 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 926acd5b6e5d..37c9aff234e0 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1741,32 +1741,24 @@ static int i915_sr_status(struct seq_file *m, void *unused) static int i915_emon_status(struct seq_file *m, void *unused) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - unsigned long temp, chipset, gfx; + struct drm_i915_private *i915 = node_to_i915(m->private); intel_wakeref_t wakeref; - int ret; - if (!IS_GEN(dev_priv, 5)) + if (!IS_GEN(i915, 5)) return -ENODEV; - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; + with_intel_runtime_pm(i915, wakeref) { + unsigned long temp, chipset, gfx; - wakeref = intel_runtime_pm_get(dev_priv); + temp = i915_mch_val(i915); + chipset = i915_chipset_val(i915); + gfx = i915_gfx_val(i915); - temp = i915_mch_val(dev_priv); - chipset = i915_chipset_val(dev_priv); - gfx = i915_gfx_val(dev_priv); - mutex_unlock(&dev->struct_mutex); - - intel_runtime_pm_put(dev_priv, wakeref); - - seq_printf(m, "GMCH temp: %ld\n", temp); - seq_printf(m, "Chipset power: %ld\n", chipset); - seq_printf(m, "GFX power: %ld\n", gfx); - seq_printf(m, "Total power: %ld\n", chipset + gfx); + seq_printf(m, "GMCH temp: %ld\n", temp); + seq_printf(m, "Chipset power: %ld\n", chipset); + seq_printf(m, "GFX power: %ld\n", gfx); + seq_printf(m, "Total power: %ld\n", chipset + gfx); + } return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 5731f992cf44..dafbbfadd1ad 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1780,6 +1780,9 @@ void i915_driver_unload(struct drm_device *dev) i915_driver_unregister(dev_priv); + /* Flush any external code that still may be under the RCU lock */ + synchronize_rcu(); + if (i915_gem_suspend(dev_priv)) DRM_ERROR("failed to idle hardware; continuing to unload!\n"); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ab7257720c7e..7613ae72df3d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6203,10 +6203,6 @@ void intel_init_ipc(struct drm_i915_private *dev_priv) */ DEFINE_SPINLOCK(mchdev_lock); -/* Global for IPS driver to get at the current i915 device. Protected by - * mchdev_lock. */ -static struct drm_i915_private *i915_mch_dev; - bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val) { u16 rgvswctl; @@ -7849,16 +7845,17 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) { - unsigned long val; + intel_wakeref_t wakeref; + unsigned long val = 0; if (!IS_GEN(dev_priv, 5)) return 0; - spin_lock_irq(&mchdev_lock); - - val = __i915_chipset_val(dev_priv); - - spin_unlock_irq(&mchdev_lock); + with_intel_runtime_pm(dev_priv, wakeref) { + spin_lock_irq(&mchdev_lock); + val = __i915_chipset_val(dev_priv); + spin_unlock_irq(&mchdev_lock); + } return val; } @@ -7935,14 +7932,16 @@ static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) void i915_update_gfx_val(struct drm_i915_private *dev_priv) { + intel_wakeref_t wakeref; + if (!IS_GEN(dev_priv, 5)) return; - spin_lock_irq(&mchdev_lock); - - __i915_update_gfx_val(dev_priv); - - spin_unlock_irq(&mchdev_lock); + with_intel_runtime_pm(dev_priv, wakeref) { + spin_lock_irq(&mchdev_lock); + __i915_update_gfx_val(dev_priv); + spin_unlock_irq(&mchdev_lock); + } } static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) @@ -7984,20 +7983,36 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) { - unsigned long val; + intel_wakeref_t wakeref; + unsigned long val = 0; if (!IS_GEN(dev_priv, 5)) return 0; - spin_lock_irq(&mchdev_lock); - - val = __i915_gfx_val(dev_priv); - - spin_unlock_irq(&mchdev_lock); + with_intel_runtime_pm(dev_priv, wakeref) { + spin_lock_irq(&mchdev_lock); + val = __i915_gfx_val(dev_priv); + spin_unlock_irq(&mchdev_lock); + } return val; } +static struct drm_i915_private *i915_mch_dev; + +static struct drm_i915_private *mchdev_get(void) +{ + struct drm_i915_private *i915; + + rcu_read_lock(); + i915 = i915_mch_dev; + if (!kref_get_unless_zero(&i915->drm.ref)) + i915 = NULL; + rcu_read_unlock(); + + return i915; +} + /** * i915_read_mch_val - return value for IPS use * @@ -8006,23 +8021,24 @@ unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) */ unsigned long i915_read_mch_val(void) { - struct drm_i915_private *dev_priv; - unsigned long chipset_val, graphics_val, ret = 0; + struct drm_i915_private *i915; + unsigned long chipset_val = 0; + unsigned long graphics_val = 0; + intel_wakeref_t wakeref; - spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) - goto out_unlock; - dev_priv = i915_mch_dev; + i915 = mchdev_get(); + if (!i915) + return 0; - chipset_val = __i915_chipset_val(dev_priv); - graphics_val = __i915_gfx_val(dev_priv); + with_intel_runtime_pm(i915, wakeref) { + spin_lock_irq(&mchdev_lock); + chipset_val = __i915_chipset_val(i915); + graphics_val = __i915_gfx_val(i915); + spin_unlock_irq(&mchdev_lock); + } - ret = chipset_val + graphics_val; - -out_unlock: - spin_unlock_irq(&mchdev_lock); - - return ret; + drm_dev_put(&i915->drm); + return chipset_val + graphics_val; } EXPORT_SYMBOL_GPL(i915_read_mch_val); @@ -8033,23 +8049,19 @@ EXPORT_SYMBOL_GPL(i915_read_mch_val); */ bool i915_gpu_raise(void) { - struct drm_i915_private *dev_priv; - bool ret = true; + struct drm_i915_private *i915; + + i915 = mchdev_get(); + if (!i915) + return false; spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) { - ret = false; - goto out_unlock; - } - dev_priv = i915_mch_dev; - - if (dev_priv->ips.max_delay > dev_priv->ips.fmax) - dev_priv->ips.max_delay--; - -out_unlock: + if (i915->ips.max_delay > i915->ips.fmax) + i915->ips.max_delay--; spin_unlock_irq(&mchdev_lock); - return ret; + drm_dev_put(&i915->drm); + return true; } EXPORT_SYMBOL_GPL(i915_gpu_raise); @@ -8061,23 +8073,19 @@ EXPORT_SYMBOL_GPL(i915_gpu_raise); */ bool i915_gpu_lower(void) { - struct drm_i915_private *dev_priv; - bool ret = true; + struct drm_i915_private *i915; + + i915 = mchdev_get(); + if (!i915) + return false; spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) { - ret = false; - goto out_unlock; - } - dev_priv = i915_mch_dev; - - if (dev_priv->ips.max_delay < dev_priv->ips.min_delay) - dev_priv->ips.max_delay++; - -out_unlock: + if (i915->ips.max_delay < i915->ips.min_delay) + i915->ips.max_delay++; spin_unlock_irq(&mchdev_lock); - return ret; + drm_dev_put(&i915->drm); + return true; } EXPORT_SYMBOL_GPL(i915_gpu_lower); @@ -8088,13 +8096,16 @@ EXPORT_SYMBOL_GPL(i915_gpu_lower); */ bool i915_gpu_busy(void) { - bool ret = false; + struct drm_i915_private *i915; + bool ret; - spin_lock_irq(&mchdev_lock); - if (i915_mch_dev) - ret = i915_mch_dev->gt.awake; - spin_unlock_irq(&mchdev_lock); + i915 = mchdev_get(); + if (!i915) + return false; + ret = i915->gt.awake; + + drm_dev_put(&i915->drm); return ret; } EXPORT_SYMBOL_GPL(i915_gpu_busy); @@ -8107,24 +8118,19 @@ EXPORT_SYMBOL_GPL(i915_gpu_busy); */ bool i915_gpu_turbo_disable(void) { - struct drm_i915_private *dev_priv; - bool ret = true; + struct drm_i915_private *i915; + bool ret; + + i915 = mchdev_get(); + if (!i915) + return false; spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) { - ret = false; - goto out_unlock; - } - dev_priv = i915_mch_dev; - - dev_priv->ips.max_delay = dev_priv->ips.fstart; - - if (!ironlake_set_drps(dev_priv, dev_priv->ips.fstart)) - ret = false; - -out_unlock: + i915->ips.max_delay = i915->ips.fstart; + ret = ironlake_set_drps(i915, i915->ips.fstart); spin_unlock_irq(&mchdev_lock); + drm_dev_put(&i915->drm); return ret; } EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); @@ -8153,18 +8159,14 @@ void intel_gpu_ips_init(struct drm_i915_private *dev_priv) { /* We only register the i915 ips part with intel-ips once everything is * set up, to avoid intel-ips sneaking in and reading bogus values. */ - spin_lock_irq(&mchdev_lock); - i915_mch_dev = dev_priv; - spin_unlock_irq(&mchdev_lock); + rcu_assign_pointer(i915_mch_dev, dev_priv); ips_ping_for_i915_load(); } void intel_gpu_ips_teardown(void) { - spin_lock_irq(&mchdev_lock); - i915_mch_dev = NULL; - spin_unlock_irq(&mchdev_lock); + rcu_assign_pointer(i915_mch_dev, NULL); } static void intel_init_emon(struct drm_i915_private *dev_priv) From d8af327087f753d7cc26d813e8e5a83461d8e5c6 Mon Sep 17 00:00:00 2001 From: Juha-Pekka Heikkila Date: Thu, 20 Dec 2018 13:26:08 +0200 Subject: [PATCH 045/539] drm/i915: Fix ILK-IVB primary plane enable delays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Primary and sprite plane enable on ILK-IVB may take two frames to complete Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103925 Signed-off-by: Juha-Pekka Heikkila Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/1545305168-6047-1-git-send-email-juhapekka.heikkila@gmail.com --- drivers/gpu/drm/i915/intel_display.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 7c974cf064fd..accb3081dee1 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10851,8 +10851,11 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat * Despite the w/a only being listed for IVB we assume that * the ILK/SNB note has similar ramifications, hence we apply * the w/a on all three platforms. + * + * With experimental results seems this is needed also for primary + * plane, not only sprite plane. */ - if (plane->id == PLANE_SPRITE0 && + if (plane->id != PLANE_CURSOR && (IS_GEN_RANGE(dev_priv, 5, 6) || IS_IVYBRIDGE(dev_priv)) && (turn_on || (!needs_scaling(old_plane_state) && From 094e640dcfab39fd795b6cd90fa3cafae4cf2db7 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Wed, 26 Dec 2018 18:09:33 +0800 Subject: [PATCH 046/539] drm/amdgpu/virtual_dce: No need to pin the fb's bo For virtual display, no need to pin the fb's bo. Signed-off-by: Emily Deng Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 20 ++++++++++++-------- drivers/gpu/drm/amd/amdgpu/dce_virtual.c | 17 +++-------------- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 15ce7e681d67..b083b219b1a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -188,10 +188,12 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, goto cleanup; } - r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev)); - if (unlikely(r != 0)) { - DRM_ERROR("failed to pin new abo buffer before flip\n"); - goto unreserve; + if (!adev->enable_virtual_display) { + r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev)); + if (unlikely(r != 0)) { + DRM_ERROR("failed to pin new abo buffer before flip\n"); + goto unreserve; + } } r = amdgpu_ttm_alloc_gart(&new_abo->tbo); @@ -211,7 +213,8 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, amdgpu_bo_get_tiling_flags(new_abo, &tiling_flags); amdgpu_bo_unreserve(new_abo); - work->base = amdgpu_bo_gpu_offset(new_abo); + if (!adev->enable_virtual_display) + work->base = amdgpu_bo_gpu_offset(new_abo); work->target_vblank = target - (uint32_t)drm_crtc_vblank_count(crtc) + amdgpu_get_vblank_counter_kms(dev, work->crtc_id); @@ -242,9 +245,10 @@ pflip_cleanup: goto cleanup; } unpin: - if (unlikely(amdgpu_bo_unpin(new_abo) != 0)) { - DRM_ERROR("failed to unpin new abo in error path\n"); - } + if (!adev->enable_virtual_display) + if (unlikely(amdgpu_bo_unpin(new_abo) != 0)) + DRM_ERROR("failed to unpin new abo in error path\n"); + unreserve: amdgpu_bo_unreserve(new_abo); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index fdace004544d..e4cc1d48eaab 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -167,19 +167,6 @@ static void dce_virtual_crtc_disable(struct drm_crtc *crtc) struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); - if (crtc->primary->fb) { - int r; - struct amdgpu_bo *abo; - - abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); - r = amdgpu_bo_reserve(abo, true); - if (unlikely(r)) - DRM_ERROR("failed to reserve abo before unpin\n"); - else { - amdgpu_bo_unpin(abo); - amdgpu_bo_unreserve(abo); - } - } amdgpu_crtc->pll_id = ATOM_PPLL_INVALID; amdgpu_crtc->encoder = NULL; @@ -692,7 +679,9 @@ static int dce_virtual_pageflip(struct amdgpu_device *adev, spin_unlock_irqrestore(&adev->ddev->event_lock, flags); drm_crtc_vblank_put(&amdgpu_crtc->base); - schedule_work(&works->unpin_work); + amdgpu_bo_unref(&works->old_abo); + kfree(works->shared); + kfree(works); return 0; } From 91334223b217c43f5366ae585c31f4b6bf4a83de Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Wed, 26 Dec 2018 18:08:54 +0800 Subject: [PATCH 047/539] drm/amdgpu/virtual_dce: No need to pin the cursor bo For virtual display feature, no need to pin cursor bo. Signed-off-by: Emily Deng Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 8a078f4ae73d..98df8e4704eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2798,7 +2798,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) struct drm_framebuffer *fb = crtc->primary->fb; struct amdgpu_bo *robj; - if (amdgpu_crtc->cursor_bo) { + if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) { struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); r = amdgpu_bo_reserve(aobj, true); if (r == 0) { @@ -2906,7 +2906,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - if (amdgpu_crtc->cursor_bo) { + if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) { struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); r = amdgpu_bo_reserve(aobj, true); if (r == 0) { From c27c9778a19e050628689d03604941c7039685a3 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 27 Dec 2018 14:23:30 +0800 Subject: [PATCH 048/539] drm/amd/powerplay: support BOOTUP_DEFAULT power profile mode This can avoid unexpected profile mode change after running compute workload. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- .../gpu/drm/amd/include/kgd_pp_interface.h | 13 ++++++----- drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c | 22 ++++++++++--------- .../gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 8 ++++--- .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 12 +++++----- .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 10 ++++++--- drivers/gpu/drm/amd/powerplay/inc/hwmgr.h | 2 +- 6 files changed, 39 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 1479ea1dc3e7..789c4f288485 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -127,12 +127,13 @@ enum amd_pp_task { }; enum PP_SMC_POWER_PROFILE { - PP_SMC_POWER_PROFILE_FULLSCREEN3D = 0x0, - PP_SMC_POWER_PROFILE_POWERSAVING = 0x1, - PP_SMC_POWER_PROFILE_VIDEO = 0x2, - PP_SMC_POWER_PROFILE_VR = 0x3, - PP_SMC_POWER_PROFILE_COMPUTE = 0x4, - PP_SMC_POWER_PROFILE_CUSTOM = 0x5, + PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT = 0x0, + PP_SMC_POWER_PROFILE_FULLSCREEN3D = 0x1, + PP_SMC_POWER_PROFILE_POWERSAVING = 0x2, + PP_SMC_POWER_PROFILE_VIDEO = 0x3, + PP_SMC_POWER_PROFILE_VR = 0x4, + PP_SMC_POWER_PROFILE_COMPUTE = 0x5, + PP_SMC_POWER_PROFILE_CUSTOM = 0x6, }; enum { diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index 0173d0480024..310b102a9292 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -64,17 +64,19 @@ static int ci_set_asic_special_caps(struct pp_hwmgr *hwmgr); static void hwmgr_init_workload_prority(struct pp_hwmgr *hwmgr) { - hwmgr->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 2; - hwmgr->workload_prority[PP_SMC_POWER_PROFILE_POWERSAVING] = 0; - hwmgr->workload_prority[PP_SMC_POWER_PROFILE_VIDEO] = 1; - hwmgr->workload_prority[PP_SMC_POWER_PROFILE_VR] = 3; - hwmgr->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 4; + hwmgr->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0; + hwmgr->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1; + hwmgr->workload_prority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2; + hwmgr->workload_prority[PP_SMC_POWER_PROFILE_VIDEO] = 3; + hwmgr->workload_prority[PP_SMC_POWER_PROFILE_VR] = 4; + hwmgr->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5; - hwmgr->workload_setting[0] = PP_SMC_POWER_PROFILE_POWERSAVING; - hwmgr->workload_setting[1] = PP_SMC_POWER_PROFILE_VIDEO; - hwmgr->workload_setting[2] = PP_SMC_POWER_PROFILE_FULLSCREEN3D; - hwmgr->workload_setting[3] = PP_SMC_POWER_PROFILE_VR; - hwmgr->workload_setting[4] = PP_SMC_POWER_PROFILE_COMPUTE; + hwmgr->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + hwmgr->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D; + hwmgr->workload_setting[2] = PP_SMC_POWER_PROFILE_POWERSAVING; + hwmgr->workload_setting[3] = PP_SMC_POWER_PROFILE_VIDEO; + hwmgr->workload_setting[4] = PP_SMC_POWER_PROFILE_VR; + hwmgr->workload_setting[5] = PP_SMC_POWER_PROFILE_COMPUTE; } int hwmgr_early_init(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index d91390459326..c8f5c00dd1e7 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -77,8 +77,9 @@ #define PCIE_BUS_CLK 10000 #define TCLK (PCIE_BUS_CLK / 10) -static const struct profile_mode_setting smu7_profiling[6] = - {{1, 0, 100, 30, 1, 0, 100, 10}, +static const struct profile_mode_setting smu7_profiling[7] = + {{0, 0, 0, 0, 0, 0, 0, 0}, + {1, 0, 100, 30, 1, 0, 100, 10}, {1, 10, 0, 30, 0, 0, 0, 0}, {0, 0, 0, 0, 1, 10, 16, 31}, {1, 0, 11, 50, 1, 0, 100, 10}, @@ -4889,7 +4890,8 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) uint32_t i, size = 0; uint32_t len; - static const char *profile_name[6] = {"3D_FULL_SCREEN", + static const char *profile_name[7] = {"BOOTUP_DEFAULT", + "3D_FULL_SCREEN", "POWER_SAVING", "VIDEO", "VR", diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 79c86247d0ac..91e3bbe6d61d 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -804,9 +804,9 @@ static int vega10_hwmgr_backend_init(struct pp_hwmgr *hwmgr) hwmgr->backend = data; - hwmgr->workload_mask = 1 << hwmgr->workload_prority[PP_SMC_POWER_PROFILE_VIDEO]; - hwmgr->power_profile_mode = PP_SMC_POWER_PROFILE_VIDEO; - hwmgr->default_power_profile_mode = PP_SMC_POWER_PROFILE_VIDEO; + hwmgr->workload_mask = 1 << hwmgr->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; + hwmgr->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + hwmgr->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; vega10_set_default_registry_data(hwmgr); data->disable_dpm_mask = 0xff; @@ -4668,13 +4668,15 @@ static int vega10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) { struct vega10_hwmgr *data = hwmgr->backend; uint32_t i, size = 0; - static const uint8_t profile_mode_setting[5][4] = {{70, 60, 1, 3,}, + static const uint8_t profile_mode_setting[6][4] = {{70, 60, 0, 0,}, + {70, 60, 1, 3,}, {90, 60, 0, 0,}, {70, 60, 0, 0,}, {70, 90, 0, 0,}, {30, 60, 0, 6,}, }; - static const char *profile_name[6] = {"3D_FULL_SCREEN", + static const char *profile_name[7] = {"BOOTUP_DEFAULT", + "3D_FULL_SCREEN", "POWER_SAVING", "VIDEO", "VR", diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 26154f9b2178..264ce8fe545f 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -390,9 +390,9 @@ static int vega20_hwmgr_backend_init(struct pp_hwmgr *hwmgr) hwmgr->backend = data; - hwmgr->workload_mask = 1 << hwmgr->workload_prority[PP_SMC_POWER_PROFILE_VIDEO]; - hwmgr->power_profile_mode = PP_SMC_POWER_PROFILE_VIDEO; - hwmgr->default_power_profile_mode = PP_SMC_POWER_PROFILE_VIDEO; + hwmgr->workload_mask = 1 << hwmgr->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; + hwmgr->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + hwmgr->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; vega20_set_default_registry_data(hwmgr); @@ -3261,6 +3261,9 @@ static int conv_power_profile_to_pplib_workload(int power_profile) int pplib_workload = 0; switch (power_profile) { + case PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT: + pplib_workload = WORKLOAD_DEFAULT_BIT; + break; case PP_SMC_POWER_PROFILE_FULLSCREEN3D: pplib_workload = WORKLOAD_PPLIB_FULL_SCREEN_3D_BIT; break; @@ -3290,6 +3293,7 @@ static int vega20_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) uint32_t i, size = 0; uint16_t workload_type = 0; static const char *profile_name[] = { + "BOOTUP_DEFAULT", "3D_FULL_SCREEN", "POWER_SAVING", "VIDEO", diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index 0d298a0409f5..8cb831b6a016 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -705,7 +705,7 @@ enum PP_TABLE_VERSION { /** * The main hardware manager structure. */ -#define Workload_Policy_Max 5 +#define Workload_Policy_Max 6 struct pp_hwmgr { void *adev; From d3c117e564bfbfe3c4568aca136be3880ddf4200 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Sat, 29 Dec 2018 17:46:05 +0800 Subject: [PATCH 049/539] drm/amdgpu/sriov:Correct pfvf exchange logic The pfvf exchange need be in exclusive mode. And add pfvf exchange in gpu reset. Signed-off-by: Emily Deng Reviewed-By: Xiangliang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 98df8e4704eb..7ff3a28fc903 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1701,8 +1701,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) amdgpu_xgmi_add_device(adev); amdgpu_amdkfd_device_init(adev); - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_init_data_exchange(adev); amdgpu_virt_release_full_gpu(adev, true); + } return 0; } @@ -2632,9 +2634,6 @@ fence_driver_init: goto failed; } - if (amdgpu_sriov_vf(adev)) - amdgpu_virt_init_data_exchange(adev); - amdgpu_fbdev_init(adev); r = amdgpu_pm_sysfs_init(adev); @@ -3226,6 +3225,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, r = amdgpu_ib_ring_tests(adev); error: + amdgpu_virt_init_data_exchange(adev); amdgpu_virt_release_full_gpu(adev, true); if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { atomic_inc(&adev->vram_lost_counter); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 8cbb4655896a..b11a1c17a7f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -174,7 +174,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, return r; } /* Retrieve checksum from mailbox2 */ - if (req == IDH_REQ_GPU_INIT_ACCESS) { + if (req == IDH_REQ_GPU_INIT_ACCESS || req == IDH_REQ_GPU_RESET_ACCESS) { adev->virt.fw_reserve.checksum_key = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW2)); From c6064de4b7344e705b1b4ac66fa89ea1da5760d0 Mon Sep 17 00:00:00 2001 From: Tiecheng Zhou Date: Mon, 24 Dec 2018 08:55:45 +0800 Subject: [PATCH 050/539] drm/amdgpu/gfx_v8_0: Reorder the gfx, kiq and kcq rings test sequence The kiq ring and the very first compute ring may fail occasionally if they are tested directly following kiq_kcq_enable. Insert the gfx ring test before kiq ring test to delay the kiq and kcq ring tests will fix the issue. Acked-by: Alex Deucher Signed-off-by: Tiecheng Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 50 +++++++++++++++++++-------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 381f593b0cda..164ffc91b645 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4278,9 +4278,8 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) amdgpu_ring_clear_ring(ring); gfx_v8_0_cp_gfx_start(adev); ring->sched.ready = true; - r = amdgpu_ring_test_helper(ring); - return r; + return 0; } static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) @@ -4369,10 +4368,9 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); } - r = amdgpu_ring_test_helper(kiq_ring); - if (r) - DRM_ERROR("KCQ enable failed\n"); - return r; + amdgpu_ring_commit(kiq_ring); + + return 0; } static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req) @@ -4709,18 +4707,34 @@ static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev) if (r) goto done; - /* Test KCQs - reversing the order of rings seems to fix ring test failure - * after GPU reset - */ - for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) { - ring = &adev->gfx.compute_ring[i]; - r = amdgpu_ring_test_helper(ring); - } - done: return r; } +static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev) +{ + int r, i; + struct amdgpu_ring *ring; + + /* collect all the ring_tests here, gfx, kiq, compute */ + ring = &adev->gfx.gfx_ring[0]; + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + + ring = &adev->gfx.kiq.ring; + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + amdgpu_ring_test_helper(ring); + } + + return 0; +} + static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) { int r; @@ -4739,6 +4753,11 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) r = gfx_v8_0_kcq_resume(adev); if (r) return r; + + r = gfx_v8_0_cp_test_all_rings(adev); + if (r) + return r; + gfx_v8_0_enable_gui_idle_interrupt(adev, true); return 0; @@ -5056,6 +5075,7 @@ static int gfx_v8_0_post_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; u32 grbm_soft_reset = 0; + struct amdgpu_ring *ring; if ((!adev->gfx.grbm_soft_reset) && (!adev->gfx.srbm_soft_reset)) @@ -5086,6 +5106,8 @@ static int gfx_v8_0_post_soft_reset(void *handle) REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) gfx_v8_0_cp_gfx_resume(adev); + gfx_v8_0_cp_test_all_rings(adev); + adev->gfx.rlc.funcs->start(adev); return 0; From c4312c27c826cb2b05dbbda614fa3c735c2c4cca Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Thu, 3 Jan 2019 08:33:15 -0500 Subject: [PATCH 051/539] drm/amdgpu: Cleanup 2 compiler warnings These 2 variables are unused now, so remove their references. Fixes: e4ae0fc drm/amdgpu: implement gfx8 post_soft_reset Fixes: 5e01c09 drm/amdgpu/gfx_v8_0: Reorder the gfx, kiq and kcq rings test sequence Signed-off-by: Kent Russell Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 164ffc91b645..57cb3a51bda7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4233,7 +4233,6 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) u32 tmp; u32 rb_bufsz; u64 rb_addr, rptr_addr, wptr_gpu_addr; - int r; /* Set the write pointer delay */ WREG32(mmCP_RB_WPTR_DELAY, 0); @@ -5075,7 +5074,6 @@ static int gfx_v8_0_post_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; u32 grbm_soft_reset = 0; - struct amdgpu_ring *ring; if ((!adev->gfx.grbm_soft_reset) && (!adev->gfx.srbm_soft_reset)) From a7a0d543ca1aed6f23f1998b1cd2d87da953e444 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Wed, 2 Jan 2019 12:20:12 +0800 Subject: [PATCH 052/539] drm/amdgpu: make gfx9 enter into rlc safe mode when set MGCG MGCG should RLC enter into safe mode first. Signed-off-by: Likun Gao Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 7556716038d3..968b127c6c8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3587,6 +3587,8 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev { uint32_t data, def; + amdgpu_gfx_rlc_enter_safe_mode(adev); + /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { /* 1 - RLC_CGTT_MGCG_OVERRIDE */ @@ -3651,6 +3653,8 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); } } + + amdgpu_gfx_rlc_exit_safe_mode(adev); } static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, From 445938879ef75448e5e292730f75905a4c0b62da Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 7 Jan 2019 18:38:30 +0800 Subject: [PATCH 053/539] drm/amd/powerplay: update OD support flag for SKU with no OD capabilities For those ASICs with no overdrive capabilities, the OD support flag will be reset. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 264ce8fe545f..8f6097c6a02b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -980,6 +980,9 @@ static int vega20_od8_set_feature_capabilities( pp_table->FanZeroRpmEnable) od_settings->overdrive8_capabilities |= OD8_FAN_ZERO_RPM_CONTROL; + if (!od_settings->overdrive8_capabilities) + hwmgr->od_enabled = false; + return 0; } From 45b35ee02162d149e4de3a1d8d4847621fb92c20 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 7 Jan 2019 18:56:14 +0800 Subject: [PATCH 054/539] drm/amd/powerplay: create pp_od_clk_voltage device file under OD support Since pp_od_clk_voltage device file is for OD related sysfs operations. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 1f61ed95727c..6896dec97fc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -2008,6 +2008,7 @@ void amdgpu_pm_print_power_states(struct amdgpu_device *adev) int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) { + struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; int ret; if (adev->pm.sysfs_initialized) @@ -2091,12 +2092,14 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) "pp_power_profile_mode\n"); return ret; } - ret = device_create_file(adev->dev, - &dev_attr_pp_od_clk_voltage); - if (ret) { - DRM_ERROR("failed to create device file " - "pp_od_clk_voltage\n"); - return ret; + if (hwmgr->od_enabled) { + ret = device_create_file(adev->dev, + &dev_attr_pp_od_clk_voltage); + if (ret) { + DRM_ERROR("failed to create device file " + "pp_od_clk_voltage\n"); + return ret; + } } ret = device_create_file(adev->dev, &dev_attr_gpu_busy_percent); @@ -2118,6 +2121,8 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) { + struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; + if (adev->pm.dpm_enabled == 0) return; @@ -2138,8 +2143,9 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_pp_mclk_od); device_remove_file(adev->dev, &dev_attr_pp_power_profile_mode); - device_remove_file(adev->dev, - &dev_attr_pp_od_clk_voltage); + if (hwmgr->od_enabled) + device_remove_file(adev->dev, + &dev_attr_pp_od_clk_voltage); device_remove_file(adev->dev, &dev_attr_gpu_busy_percent); } From c81e42f036232cdb30de0b6294ec82f797958bc9 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 8 Jan 2019 10:33:35 +0800 Subject: [PATCH 055/539] drm/amd/powerplay: avoid possible buffer overflow Make sure the clock level enforced is within the allowed ranges. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Reviewed-by: Likun Gao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 8f6097c6a02b..c2061d351d04 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -2251,6 +2251,13 @@ static int vega20_force_clock_level(struct pp_hwmgr *hwmgr, soft_min_level = mask ? (ffs(mask) - 1) : 0; soft_max_level = mask ? (fls(mask) - 1) : 0; + if (soft_max_level >= data->dpm_table.gfx_table.count) { + pr_err("Clock level specified %d is over max allowed %d\n", + soft_max_level, + data->dpm_table.gfx_table.count - 1); + return -EINVAL; + } + data->dpm_table.gfx_table.dpm_state.soft_min_level = data->dpm_table.gfx_table.dpm_levels[soft_min_level].value; data->dpm_table.gfx_table.dpm_state.soft_max_level = @@ -2271,6 +2278,13 @@ static int vega20_force_clock_level(struct pp_hwmgr *hwmgr, soft_min_level = mask ? (ffs(mask) - 1) : 0; soft_max_level = mask ? (fls(mask) - 1) : 0; + if (soft_max_level >= data->dpm_table.mem_table.count) { + pr_err("Clock level specified %d is over max allowed %d\n", + soft_max_level, + data->dpm_table.mem_table.count - 1); + return -EINVAL; + } + data->dpm_table.mem_table.dpm_state.soft_min_level = data->dpm_table.mem_table.dpm_levels[soft_min_level].value; data->dpm_table.mem_table.dpm_state.soft_max_level = From d1a3e239a6016f2bb42a91696056e223982e8538 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 8 Jan 2019 12:22:24 +0800 Subject: [PATCH 056/539] drm/amd/powerplay: drop the unnecessary uclk hard min setting Since soft min setting is enough. Hard min setting is redundant. Reported-by: Likun Gao Signed-off-by: Evan Quan Acked-by: Alex Deucher Reviewed-by: Likun Gao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index c2061d351d04..82935a3bd950 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -1692,13 +1692,6 @@ static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr, uint32_t feature_ (PPCLK_UCLK << 16) | (min_freq & 0xffff))), "Failed to set soft min memclk !", return ret); - - min_freq = data->dpm_table.mem_table.dpm_state.hard_min_level; - PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter( - hwmgr, PPSMC_MSG_SetHardMinByFreq, - (PPCLK_UCLK << 16) | (min_freq & 0xffff))), - "Failed to set hard min memclk !", - return ret); } if (data->smu_features[GNLD_DPM_UVD].enabled && From 919a94d8101ebc29868940b580fe9e9811b7dc86 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 8 Jan 2019 15:08:44 +0800 Subject: [PATCH 057/539] drm/amdgpu: fix CPDMA hang in PRT mode for VEGA20 Fix CPDMA hang in PRT mode for both VEGA10 and VEGA20 Signed-off-by: Tao Zhou Tested-by: Yukun.Li Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 968b127c6c8f..fbca0494f871 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -113,7 +113,10 @@ static const struct soc15_reg_golden golden_settings_gc_9_0[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) }; static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = @@ -135,10 +138,7 @@ static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) }; static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = From b163714b294a7262bea91919ccbd88cbc45335d5 Mon Sep 17 00:00:00 2001 From: Jim Qu Date: Mon, 17 Dec 2018 17:00:50 +0800 Subject: [PATCH 058/539] drm/amdgpu: set WRITE_BURST_LENGTH to 64B to workaround SDMA1 hang effect asics: VEGA10 and VEGA12 Signed-off-by: Jim Qu Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index fd0bfe140ee0..6811a5d05b27 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -78,7 +78,6 @@ static const struct soc15_reg_golden golden_settings_sdma_4[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000), - SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_IB_CNTL, 0x800f0100, 0x00000100), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), @@ -96,6 +95,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4[] = { static const struct soc15_reg_golden golden_settings_sdma_vg10[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104002), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104002), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002) }; @@ -103,6 +103,7 @@ static const struct soc15_reg_golden golden_settings_sdma_vg10[] = { static const struct soc15_reg_golden golden_settings_sdma_vg12[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104001), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104001), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001) }; From 4ed46c6c4ec7a6a2176289a19485a7b04753dc3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 7 Jan 2019 14:43:55 +0100 Subject: [PATCH 059/539] drm/amdgpu: disable system memory page tables for now MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We hit a problem with IOMMU with that. Disable until we have time to debug further. Signed-off-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e73d152659a2..d2ea5ce2cefb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -847,9 +847,6 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, bp->size = amdgpu_vm_bo_size(adev, level); bp->byte_align = AMDGPU_GPU_PAGE_SIZE; bp->domain = AMDGPU_GEM_DOMAIN_VRAM; - if (bp->size <= PAGE_SIZE && adev->asic_type >= CHIP_VEGA10 && - adev->flags & AMD_IS_APU) - bp->domain |= AMDGPU_GEM_DOMAIN_GTT; bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain); bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | AMDGPU_GEM_CREATE_CPU_GTT_USWC; From 2d3d25b616a09c16c2506f23289532a31638620f Mon Sep 17 00:00:00 2001 From: Amber Lin Date: Thu, 13 Dec 2018 11:57:35 -0500 Subject: [PATCH 060/539] drm/amdgpu: Relocate kgd2kfd function declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since amdkfd is merged into amdgpu module and amdgpu can access amdkfd directly, move declaration of kgd2kfd functions from kfd_priv.h to amdgpu_amdkfd.h Signed-off-by: Amber Lin Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 43 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 20 ++++++++- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 1 + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 + drivers/gpu/drm/amd/amdkfd/kfd_module.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 22 ---------- .../gpu/drm/amd/include/kgd_kfd_interface.h | 3 -- 7 files changed, 66 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 2dfaf158ef07..358f69041164 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -619,4 +619,47 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) { return NULL; } + +struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, + const struct kfd2kgd_calls *f2g) +{ + return NULL; +} + +bool kgd2kfd_device_init(struct kfd_dev *kfd, + const struct kgd2kfd_shared_resources *gpu_resources) +{ + return false; +} + +void kgd2kfd_device_exit(struct kfd_dev *kfd) +{ +} + +void kgd2kfd_exit(void) +{ +} + +void kgd2kfd_suspend(struct kfd_dev *kfd) +{ +} + +int kgd2kfd_resume(struct kfd_dev *kfd) +{ + return 0; +} + +int kgd2kfd_pre_reset(struct kfd_dev *kfd) +{ + return 0; +} + +int kgd2kfd_post_reset(struct kfd_dev *kfd) +{ + return 0; +} + +void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) +{ +} #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 70429f7aa9a8..3214d319071b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -33,7 +33,6 @@ #include "amdgpu_sync.h" #include "amdgpu_vm.h" -extern const struct kgd2kfd_calls *kgd2kfd; extern uint64_t amdgpu_amdkfd_total_mem_size; struct amdgpu_device; @@ -214,4 +213,23 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, void amdgpu_amdkfd_gpuvm_init_mem_limits(void); void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); +/* KGD2KFD callbacks */ +int kgd2kfd_init(unsigned interface_version, + const struct kgd2kfd_calls **g2f); +void kgd2kfd_exit(void); +struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, + const struct kfd2kgd_calls *f2g); +bool kgd2kfd_device_init(struct kfd_dev *kfd, + const struct kgd2kfd_shared_resources *gpu_resources); +void kgd2kfd_device_exit(struct kfd_dev *kfd); +void kgd2kfd_suspend(struct kfd_dev *kfd); +int kgd2kfd_resume(struct kfd_dev *kfd); +int kgd2kfd_pre_reset(struct kfd_dev *kfd); +int kgd2kfd_post_reset(struct kfd_dev *kfd); +void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); +int kgd2kfd_quiesce_mm(struct mm_struct *mm); +int kgd2kfd_resume_mm(struct mm_struct *mm); +int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, + struct dma_fence *fence); + #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 574c1181ae9a..3c7055ecd990 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -31,6 +31,7 @@ static const struct dma_fence_ops amdkfd_fence_ops; static atomic_t fence_seq = ATOMIC_INIT(0); +extern const struct kgd2kfd_calls *kgd2kfd; /* Eviction Fence * Fence helper functions to deal with KFD memory eviction. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index be1ab43473c6..3fc2618ca646 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -44,6 +44,8 @@ */ #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 +extern const struct kgd2kfd_calls *kgd2kfd; + /* Impose limit on how much memory KFD can use */ static struct { uint64_t max_system_mem_limit; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 8018163414ff..030b39d62973 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -23,6 +23,7 @@ #include #include #include "kfd_priv.h" +#include "amdgpu_amdkfd.h" static const struct kgd2kfd_calls kgd2kfd = { .exit = kgd2kfd_exit, @@ -104,7 +105,6 @@ int kgd2kfd_init(unsigned int interface_version, return 0; } -EXPORT_SYMBOL(kgd2kfd_init); void kgd2kfd_exit(void) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 0689d4ccbbc0..12b66330fc6d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -266,14 +266,6 @@ struct kfd_dev { bool pci_atomic_requested; }; -/* KGD2KFD callbacks */ -void kgd2kfd_exit(void); -struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, - struct pci_dev *pdev, const struct kfd2kgd_calls *f2g); -bool kgd2kfd_device_init(struct kfd_dev *kfd, - const struct kgd2kfd_shared_resources *gpu_resources); -void kgd2kfd_device_exit(struct kfd_dev *kfd); - enum kfd_mempool { KFD_MEMPOOL_SYSTEM_CACHEABLE = 1, KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2, @@ -541,11 +533,6 @@ struct qcm_process_device { /* Approx. time before evicting the process again */ #define PROCESS_ACTIVE_TIME_MS 10 -int kgd2kfd_quiesce_mm(struct mm_struct *mm); -int kgd2kfd_resume_mm(struct mm_struct *mm); -int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, - struct dma_fence *fence); - /* 8 byte handle containing GPU ID in the most significant 4 bytes and * idr_handle in the least significant 4 bytes */ @@ -800,20 +787,11 @@ int kfd_numa_node_to_apic_id(int numa_node_id); /* Interrupts */ int kfd_interrupt_init(struct kfd_dev *dev); void kfd_interrupt_exit(struct kfd_dev *dev); -void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry); bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry, uint32_t *patched_ihre, bool *flag); -/* Power Management */ -void kgd2kfd_suspend(struct kfd_dev *kfd); -int kgd2kfd_resume(struct kfd_dev *kfd); - -/* GPU reset */ -int kgd2kfd_pre_reset(struct kfd_dev *kfd); -int kgd2kfd_post_reset(struct kfd_dev *kfd); - /* amdkfd Apertures */ int kfd_init_apertures(struct kfd_process *process); diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 8154d67388cc..3d5c3b00d917 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -379,7 +379,4 @@ struct kgd2kfd_calls { int (*post_reset)(struct kfd_dev *kfd); }; -int kgd2kfd_init(unsigned interface_version, - const struct kgd2kfd_calls **g2f); - #endif /* KGD_KFD_INTERFACE_H_INCLUDED */ From 8e07e2676a42e7d3e5fe8eebac6262ec975664a1 Mon Sep 17 00:00:00 2001 From: Amber Lin Date: Thu, 13 Dec 2018 15:18:01 -0500 Subject: [PATCH 061/539] drm/amdgpu: Simplify kgd2kfd interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After amdkfd is merged into amdgpu module, amdgpu can call amdkfd functions directly. Signed-off-by: Amber Lin Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 26 ++++++++----------- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 3 +-- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 ++--- 3 files changed, 14 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 358f69041164..fc926e2f857a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -61,17 +61,13 @@ int amdgpu_amdkfd_init(void) void amdgpu_amdkfd_fini(void) { - if (kgd2kfd) - kgd2kfd->exit(); + kgd2kfd_exit(); } void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) { const struct kfd2kgd_calls *kfd2kgd; - if (!kgd2kfd) - return; - switch (adev->asic_type) { #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_KAVERI: @@ -98,8 +94,8 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) return; } - adev->kfd.dev = kgd2kfd->probe((struct kgd_dev *)adev, - adev->pdev, kfd2kgd); + adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev, + adev->pdev, kfd2kgd); if (adev->kfd.dev) amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; @@ -182,7 +178,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) &gpu_resources.doorbell_start_offset); if (adev->asic_type < CHIP_VEGA10) { - kgd2kfd->device_init(adev->kfd.dev, &gpu_resources); + kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); return; } @@ -211,14 +207,14 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) gpu_resources.reserved_doorbell_mask = 0x1e0; gpu_resources.reserved_doorbell_val = 0x0e0; - kgd2kfd->device_init(adev->kfd.dev, &gpu_resources); + kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); } } void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) { if (adev->kfd.dev) { - kgd2kfd->device_exit(adev->kfd.dev); + kgd2kfd_device_exit(adev->kfd.dev); adev->kfd.dev = NULL; } } @@ -227,13 +223,13 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry) { if (adev->kfd.dev) - kgd2kfd->interrupt(adev->kfd.dev, ih_ring_entry); + kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry); } void amdgpu_amdkfd_suspend(struct amdgpu_device *adev) { if (adev->kfd.dev) - kgd2kfd->suspend(adev->kfd.dev); + kgd2kfd_suspend(adev->kfd.dev); } int amdgpu_amdkfd_resume(struct amdgpu_device *adev) @@ -241,7 +237,7 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev) int r = 0; if (adev->kfd.dev) - r = kgd2kfd->resume(adev->kfd.dev); + r = kgd2kfd_resume(adev->kfd.dev); return r; } @@ -251,7 +247,7 @@ int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev) int r = 0; if (adev->kfd.dev) - r = kgd2kfd->pre_reset(adev->kfd.dev); + r = kgd2kfd_pre_reset(adev->kfd.dev); return r; } @@ -261,7 +257,7 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) int r = 0; if (adev->kfd.dev) - r = kgd2kfd->post_reset(adev->kfd.dev); + r = kgd2kfd_post_reset(adev->kfd.dev); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 3c7055ecd990..3107b9575929 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -31,7 +31,6 @@ static const struct dma_fence_ops amdkfd_fence_ops; static atomic_t fence_seq = ATOMIC_INIT(0); -extern const struct kgd2kfd_calls *kgd2kfd; /* Eviction Fence * Fence helper functions to deal with KFD memory eviction. @@ -123,7 +122,7 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f) if (dma_fence_is_signaled(f)) return true; - if (!kgd2kfd->schedule_evict_and_restore_process(fence->mm, f)) + if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f)) return true; return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 3fc2618ca646..d7b10d79f1de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -44,8 +44,6 @@ */ #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 -extern const struct kgd2kfd_calls *kgd2kfd; - /* Impose limit on how much memory KFD can use */ static struct { uint64_t max_system_mem_limit; @@ -1792,7 +1790,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, evicted_bos = atomic_inc_return(&process_info->evicted_bos); if (evicted_bos == 1) { /* First eviction, stop the queues */ - r = kgd2kfd->quiesce_mm(mm); + r = kgd2kfd_quiesce_mm(mm); if (r) pr_err("Failed to quiesce KFD\n"); schedule_delayed_work(&process_info->restore_userptr_work, @@ -2084,7 +2082,7 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) evicted_bos) goto unlock_out; evicted_bos = 0; - if (kgd2kfd->resume_mm(mm)) { + if (kgd2kfd_resume_mm(mm)) { pr_err("%s: Failed to resume KFD\n", __func__); /* No recovery from this failure. Probably the CP is * hanging. No point trying again. From 308176d6f625bc782a34ec316beaefcfa7ad75f3 Mon Sep 17 00:00:00 2001 From: Amber Lin Date: Fri, 14 Dec 2018 09:35:17 -0500 Subject: [PATCH 062/539] drm/amdgpu: Remove kgd2kfd function pointers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kgd2kfd function pointers and global kgd2kfd pointer are no longer in use. Signed-off-by: Amber Lin Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 7 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_module.c | 29 +---------- .../gpu/drm/amd/include/kgd_kfd_interface.h | 50 ------------------- 4 files changed, 4 insertions(+), 85 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index fc926e2f857a..612887cd40cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -28,8 +28,6 @@ #include #include -const struct kgd2kfd_calls *kgd2kfd; - static const unsigned int compute_vmid_bitmap = 0xFF00; /* Total memory size in system memory and all GPU VRAM. Used to @@ -47,12 +45,9 @@ int amdgpu_amdkfd_init(void) amdgpu_amdkfd_total_mem_size *= si.mem_unit; #ifdef CONFIG_HSA_AMD - ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd); - if (ret) - kgd2kfd = NULL; + ret = kgd2kfd_init(); amdgpu_amdkfd_gpuvm_init_mem_limits(); #else - kgd2kfd = NULL; ret = -ENOENT; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 3214d319071b..0b31a1859023 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -214,8 +214,7 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void); void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); /* KGD2KFD callbacks */ -int kgd2kfd_init(unsigned interface_version, - const struct kgd2kfd_calls **g2f); +int kgd2kfd_init(void); void kgd2kfd_exit(void); struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, const struct kfd2kgd_calls *f2g); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 030b39d62973..932007eb9168 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -25,22 +25,6 @@ #include "kfd_priv.h" #include "amdgpu_amdkfd.h" -static const struct kgd2kfd_calls kgd2kfd = { - .exit = kgd2kfd_exit, - .probe = kgd2kfd_probe, - .device_init = kgd2kfd_device_init, - .device_exit = kgd2kfd_device_exit, - .interrupt = kgd2kfd_interrupt, - .suspend = kgd2kfd_suspend, - .resume = kgd2kfd_resume, - .quiesce_mm = kgd2kfd_quiesce_mm, - .resume_mm = kgd2kfd_resume_mm, - .schedule_evict_and_restore_process = - kgd2kfd_schedule_evict_and_restore_process, - .pre_reset = kgd2kfd_pre_reset, - .post_reset = kgd2kfd_post_reset, -}; - static int kfd_init(void) { int err; @@ -92,18 +76,9 @@ static void kfd_exit(void) kfd_chardev_exit(); } -int kgd2kfd_init(unsigned int interface_version, - const struct kgd2kfd_calls **g2f) +int kgd2kfd_init() { - int err; - - err = kfd_init(); - if (err) - return err; - - *g2f = &kgd2kfd; - - return 0; + return kfd_init(); } void kgd2kfd_exit(void) diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 3d5c3b00d917..83d960110d23 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -34,7 +34,6 @@ struct pci_dev; -#define KFD_INTERFACE_VERSION 2 #define KGD_MAX_QUEUES 128 struct kfd_dev; @@ -330,53 +329,4 @@ struct kfd2kgd_calls { }; -/** - * struct kgd2kfd_calls - * - * @exit: Notifies amdkfd that kgd module is unloaded - * - * @probe: Notifies amdkfd about a probe done on a device in the kgd driver. - * - * @device_init: Initialize the newly probed device (if it is a device that - * amdkfd supports) - * - * @device_exit: Notifies amdkfd about a removal of a kgd device - * - * @suspend: Notifies amdkfd about a suspend action done to a kgd device - * - * @resume: Notifies amdkfd about a resume action done to a kgd device - * - * @quiesce_mm: Quiesce all user queue access to specified MM address space - * - * @resume_mm: Resume user queue access to specified MM address space - * - * @schedule_evict_and_restore_process: Schedules work queue that will prepare - * for safe eviction of KFD BOs that belong to the specified process. - * - * @pre_reset: Notifies amdkfd that amdgpu about to reset the gpu - * - * @post_reset: Notify amdkfd that amgpu successfully reseted the gpu - * - * This structure contains function callback pointers so the kgd driver - * will notify to the amdkfd about certain status changes. - * - */ -struct kgd2kfd_calls { - void (*exit)(void); - struct kfd_dev* (*probe)(struct kgd_dev *kgd, struct pci_dev *pdev, - const struct kfd2kgd_calls *f2g); - bool (*device_init)(struct kfd_dev *kfd, - const struct kgd2kfd_shared_resources *gpu_resources); - void (*device_exit)(struct kfd_dev *kfd); - void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry); - void (*suspend)(struct kfd_dev *kfd); - int (*resume)(struct kfd_dev *kfd); - int (*quiesce_mm)(struct mm_struct *mm); - int (*resume_mm)(struct mm_struct *mm); - int (*schedule_evict_and_restore_process)(struct mm_struct *mm, - struct dma_fence *fence); - int (*pre_reset)(struct kfd_dev *kfd); - int (*post_reset)(struct kfd_dev *kfd); -}; - #endif /* KGD_KFD_INTERFACE_H_INCLUDED */ From a363553cf0339d26f049d870915177706021132f Mon Sep 17 00:00:00 2001 From: Xiangliang Yu Date: Thu, 13 Dec 2018 15:21:49 +0800 Subject: [PATCH 063/539] drm/amdgpu/psp: Fix to get wrong xgmi session id Driver get session id after loading TA FW and the session id is used by driver instances to communicate with TA. PF and VF have different session id. xGMI session id should get from response buffer, correct it. Signed-off-by: Xiangliang Yu Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 8fab0d637ee5..2f126ea7aea4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -147,6 +147,9 @@ psp_cmd_submit_buf(struct psp_context *psp, return -EINVAL; } + /* get xGMI session id from response buffer */ + cmd->resp.session_id = psp->cmd_buf_mem->resp.session_id; + if (ucode) { ucode->tmr_mc_addr_lo = psp->cmd_buf_mem->resp.fw_addr_lo; ucode->tmr_mc_addr_hi = psp->cmd_buf_mem->resp.fw_addr_hi; From 898e0d9d405158d13cb1afa192c51ce94746d537 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Mon, 17 Dec 2018 09:37:39 -0600 Subject: [PATCH 064/539] drm/amdgpu: Use sdma_engine array Use sdma_engine[8] array instead of sdma_engine0~7 so it is easier to program. Signed-off-by: Oak Zeng Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 9 +-------- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 3 +-- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 8 ++------ drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 16 ++++++++-------- drivers/gpu/drm/amd/amdgpu/vi.c | 4 ++-- 7 files changed, 20 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 612887cd40cd..e957e42c539a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -188,13 +188,13 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) * can use each doorbell assignment twice. */ gpu_resources.sdma_doorbell[0][i] = - adev->doorbell_index.sdma_engine0 + (i >> 1); + adev->doorbell_index.sdma_engine[0] + (i >> 1); gpu_resources.sdma_doorbell[0][i+1] = - adev->doorbell_index.sdma_engine0 + 0x200 + (i >> 1); + adev->doorbell_index.sdma_engine[0] + 0x200 + (i >> 1); gpu_resources.sdma_doorbell[1][i] = - adev->doorbell_index.sdma_engine1 + (i >> 1); + adev->doorbell_index.sdma_engine[1] + (i >> 1); gpu_resources.sdma_doorbell[1][i+1] = - adev->doorbell_index.sdma_engine1 + 0x200 + (i >> 1); + adev->doorbell_index.sdma_engine[1] + 0x200 + (i >> 1); } /* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for * SDMA, IH and VCN. So don't use them for the CP. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index be620b29f4aa..35a0c05f454c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -51,14 +51,7 @@ struct amdgpu_doorbell_index { uint32_t userqueue_start; uint32_t userqueue_end; uint32_t gfx_ring0; - uint32_t sdma_engine0; - uint32_t sdma_engine1; - uint32_t sdma_engine2; - uint32_t sdma_engine3; - uint32_t sdma_engine4; - uint32_t sdma_engine5; - uint32_t sdma_engine6; - uint32_t sdma_engine7; + uint32_t sdma_engine[8]; uint32_t ih; union { struct { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 1bccc5fe2d9d..06c5a277aa76 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1145,8 +1145,7 @@ static int sdma_v3_0_sw_init(void *handle) ring->ring_obj = NULL; if (!amdgpu_sriov_vf(adev)) { ring->use_doorbell = true; - ring->doorbell_index = (i == 0) ? - adev->doorbell_index.sdma_engine0 : adev->doorbell_index.sdma_engine1; + ring->doorbell_index = adev->doorbell_index.sdma_engine[i]; } else { ring->use_pollmem = true; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 6811a5d05b27..59638b800212 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1522,9 +1522,7 @@ static int sdma_v4_0_sw_init(void *handle) ring->use_doorbell?"true":"false"); /* doorbell size is 2 dwords, get DWORD offset */ - ring->doorbell_index = (i == 0) ? - (adev->doorbell_index.sdma_engine0 << 1) - : (adev->doorbell_index.sdma_engine1 << 1); + ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, @@ -1543,9 +1541,7 @@ static int sdma_v4_0_sw_init(void *handle) /* paging queue use same doorbell index/routing as gfx queue * with 0x400 (4096 dwords) offset on second doorbell page */ - ring->doorbell_index = (i == 0) ? - (adev->doorbell_index.sdma_engine0 << 1) - : (adev->doorbell_index.sdma_engine1 << 1); + ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; ring->doorbell_index += 0x400; sprintf(ring->name, "page%d", i); diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c index 422674bb3cdf..b75d17ba59e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c @@ -70,8 +70,8 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.userqueue_start = AMDGPU_DOORBELL64_USERQUEUE_START; adev->doorbell_index.userqueue_end = AMDGPU_DOORBELL64_USERQUEUE_END; adev->doorbell_index.gfx_ring0 = AMDGPU_DOORBELL64_GFX_RING0; - adev->doorbell_index.sdma_engine0 = AMDGPU_DOORBELL64_sDMA_ENGINE0; - adev->doorbell_index.sdma_engine1 = AMDGPU_DOORBELL64_sDMA_ENGINE1; + adev->doorbell_index.sdma_engine[0] = AMDGPU_DOORBELL64_sDMA_ENGINE0; + adev->doorbell_index.sdma_engine[1] = AMDGPU_DOORBELL64_sDMA_ENGINE1; adev->doorbell_index.ih = AMDGPU_DOORBELL64_IH; adev->doorbell_index.uvd_vce.uvd_ring0_1 = AMDGPU_DOORBELL64_UVD_RING0_1; adev->doorbell_index.uvd_vce.uvd_ring2_3 = AMDGPU_DOORBELL64_UVD_RING2_3; diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index edce413fda9a..63c542cfabfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -68,14 +68,14 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.userqueue_start = AMDGPU_VEGA20_DOORBELL_USERQUEUE_START; adev->doorbell_index.userqueue_end = AMDGPU_VEGA20_DOORBELL_USERQUEUE_END; adev->doorbell_index.gfx_ring0 = AMDGPU_VEGA20_DOORBELL_GFX_RING0; - adev->doorbell_index.sdma_engine0 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0; - adev->doorbell_index.sdma_engine1 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE1; - adev->doorbell_index.sdma_engine2 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE2; - adev->doorbell_index.sdma_engine3 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE3; - adev->doorbell_index.sdma_engine4 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE4; - adev->doorbell_index.sdma_engine5 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE5; - adev->doorbell_index.sdma_engine6 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE6; - adev->doorbell_index.sdma_engine7 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE7; + adev->doorbell_index.sdma_engine[0] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0; + adev->doorbell_index.sdma_engine[1] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE1; + adev->doorbell_index.sdma_engine[2] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE2; + adev->doorbell_index.sdma_engine[3] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE3; + adev->doorbell_index.sdma_engine[4] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE4; + adev->doorbell_index.sdma_engine[5] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE5; + adev->doorbell_index.sdma_engine[6] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE6; + adev->doorbell_index.sdma_engine[7] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE7; adev->doorbell_index.ih = AMDGPU_VEGA20_DOORBELL_IH; adev->doorbell_index.uvd_vce.uvd_ring0_1 = AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1; adev->doorbell_index.uvd_vce.uvd_ring2_3 = AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 77e367459101..03e7be595a0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1726,8 +1726,8 @@ void legacy_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.mec_ring6 = AMDGPU_DOORBELL_MEC_RING6; adev->doorbell_index.mec_ring7 = AMDGPU_DOORBELL_MEC_RING7; adev->doorbell_index.gfx_ring0 = AMDGPU_DOORBELL_GFX_RING0; - adev->doorbell_index.sdma_engine0 = AMDGPU_DOORBELL_sDMA_ENGINE0; - adev->doorbell_index.sdma_engine1 = AMDGPU_DOORBELL_sDMA_ENGINE1; + adev->doorbell_index.sdma_engine[0] = AMDGPU_DOORBELL_sDMA_ENGINE0; + adev->doorbell_index.sdma_engine[1] = AMDGPU_DOORBELL_sDMA_ENGINE1; adev->doorbell_index.ih = AMDGPU_DOORBELL_IH; adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_MAX_ASSIGNMENT; } From 76223c5496a7cf10aef1971e4e6aff77abadd15a Mon Sep 17 00:00:00 2001 From: Xiangliang Yu Date: Thu, 13 Dec 2018 15:34:12 +0800 Subject: [PATCH 065/539] drm/amdgpu/psp: Fix can't detect psp INVOKE command failed There isn't ucode when executing INVOKE command, so current code can't check the failure of INVOKE command. Remove the ucode check. Signed-off-by: Xiangliang Yu Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 2f126ea7aea4..7f5ce3788732 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -140,10 +140,13 @@ psp_cmd_submit_buf(struct psp_context *psp, while (*((unsigned int *)psp->fence_buf) != index) msleep(1); - /* the status field must be 0 after FW is loaded */ - if (ucode && psp->cmd_buf_mem->resp.status) { - DRM_ERROR("failed loading with status (%d) and ucode id (%d)\n", - psp->cmd_buf_mem->resp.status, ucode->ucode_id); + /* the status field must be 0 after psp command completion */ + if (psp->cmd_buf_mem->resp.status) { + if (ucode) + DRM_ERROR("failed to load ucode id (%d) ", + ucode->ucode_id); + DRM_ERROR("psp command failed and response status is (%d)\n", + psp->cmd_buf_mem->resp.status); return -EINVAL; } From 14d20ec7f31ef96a2e7dcf7880b13dde1d473b56 Mon Sep 17 00:00:00 2001 From: wentalou Date: Tue, 18 Dec 2018 15:42:08 +0800 Subject: [PATCH 066/539] drm/amdgpu: psp_ring_destroy cause psp->km_ring.ring_mem NULL psp_ring_destroy inside psp_load_fw cause psp->km_ring.ring_mem NULL. Call Trace occurred when psp_cmd_submit. should be psp_ring_stop instead. Reviewed-by: Xiangliang Yu Signed-off-by: Wentao Lou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 7f5ce3788732..8189a90637f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -547,7 +547,7 @@ static int psp_load_fw(struct amdgpu_device *adev) struct psp_context *psp = &adev->psp; if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) { - psp_ring_destroy(psp, PSP_RING_TYPE__KM); + psp_ring_stop(psp, PSP_RING_TYPE__KM); /* should not destroy ring, only stop */ goto skip_memalloc; } From fda2019cf20c8f236e51b7b9301ba030d3b7771a Mon Sep 17 00:00:00 2001 From: Steven Chiu Date: Fri, 16 Nov 2018 14:06:55 -0500 Subject: [PATCH 067/539] drm/amd/display: 3.2.09 Signed-off-by: Steven Chiu Reviewed-by: Shahin Khayyer Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 4b5bbb13ce7f..9a1e8f6eb244 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.08" +#define DC_VER "3.2.09" #define MAX_SURFACES 3 #define MAX_STREAMS 6 From f9ed8fdcd21eb872ac86ac21e8fc7b762cd34751 Mon Sep 17 00:00:00 2001 From: Jun Lei Date: Tue, 13 Nov 2018 10:47:47 -0500 Subject: [PATCH 068/539] drm/amd/display: Add a PP_SMU_VER_MAX enum Signed-off-by: Jun Lei Reviewed-by: Nevenko Stupar Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dm_pp_smu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h index 0029a39efb1c..14bed5b1fa97 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h +++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h @@ -38,7 +38,8 @@ enum pp_smu_ver { * of interface sharing between families of ASIcs. */ PP_SMU_UNSUPPORTED, - PP_SMU_VER_RV + PP_SMU_VER_RV, + PP_SMU_VER_MAX }; struct pp_smu { From 60a804c848fb80ec2bafb2b6b0ab7ec0ba4bc631 Mon Sep 17 00:00:00 2001 From: Eric Bernstein Date: Wed, 7 Nov 2018 16:26:50 -0500 Subject: [PATCH 069/539] drm/amd/display: Expose hubp1_vready_workaround function Expose this function for future use. Signed-off-by: Eric Bernstein Reviewed-by: Dmytro Laktyushkin Reviewed-by: Tony Cheng Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index 345af015d061..e24ea6146a29 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -115,7 +115,7 @@ static void hubp1_set_hubp_blank_en(struct hubp *hubp, bool blank) REG_UPDATE(DCHUBP_CNTL, HUBP_BLANK_EN, blank_en); } -static void hubp1_vready_workaround(struct hubp *hubp, +void hubp1_vready_workaround(struct hubp *hubp, struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest) { uint32_t value = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h index 62d4232e7796..e82ca15c842f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h @@ -745,5 +745,7 @@ void hubp1_clear_underflow(struct hubp *hubp); enum cursor_pitch hubp1_get_cursor_pitch(unsigned int pitch); +void hubp1_vready_workaround(struct hubp *hubp, + struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest); #endif From da1043cf22d3b9e652992e9d9a9372b90658ceb2 Mon Sep 17 00:00:00 2001 From: Eric Bernstein Date: Fri, 9 Nov 2018 16:51:55 -0500 Subject: [PATCH 070/539] drm/amd/display: Fix runtime errors for diagnostic tests 1) Rename and make public definition of input CSC matrix struct. 2) Make wm_read_state() function an interface of hubbub, and check if watermark registers exist before read/write to them. 3) Check if OTG_INTERLACE_CONTROL register exists before updating 4) Add dummy functions for set_input/output_transfer function to avoid errors due to differences in CM registers. 5) Added missing register field definition in header file Signed-off-by: Eric Bernstein Reviewed-by: Dmytro Laktyushkin Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn10/dcn10_dpp_cm.c | 12 +-- .../drm/amd/display/dc/dcn10/dcn10_hubbub.c | 73 +++++++++++-------- .../drm/amd/display/dc/dcn10/dcn10_hubbub.h | 12 --- .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 2 +- .../dc/dcn10/dcn10_hw_sequencer_debug.c | 2 +- .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 21 +++--- .../gpu/drm/amd/display/dc/inc/hw/dchubbub.h | 15 ++++ drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h | 5 ++ 8 files changed, 78 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c index 116977eb24e2..41f0f4c912e7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c @@ -51,10 +51,6 @@ #define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0])) -struct dcn10_input_csc_matrix { - enum dc_color_space color_space; - uint16_t regval[12]; -}; enum dcn10_coef_filter_type_sel { SCL_COEF_LUMA_VERT_FILTER = 0, @@ -99,7 +95,7 @@ enum gamut_remap_select { GAMUT_REMAP_COMB_COEFF }; -static const struct dcn10_input_csc_matrix dcn10_input_csc_matrix[] = { +static const struct dpp_input_csc_matrix dpp_input_csc_matrix[] = { {COLOR_SPACE_SRGB, {0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} }, {COLOR_SPACE_SRGB_LIMITED, @@ -454,7 +450,7 @@ void dpp1_program_input_csc( { struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); int i; - int arr_size = sizeof(dcn10_input_csc_matrix)/sizeof(struct dcn10_input_csc_matrix); + int arr_size = sizeof(dpp_input_csc_matrix)/sizeof(struct dpp_input_csc_matrix); const uint16_t *regval = NULL; uint32_t cur_select = 0; enum dcn10_input_csc_select select; @@ -467,8 +463,8 @@ void dpp1_program_input_csc( if (tbl_entry == NULL) { for (i = 0; i < arr_size; i++) - if (dcn10_input_csc_matrix[i].color_space == color_space) { - regval = dcn10_input_csc_matrix[i].regval; + if (dpp_input_csc_matrix[i].color_space == color_space) { + regval = dpp_input_csc_matrix[i].regval; break; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c index c7d1e678ebf5..eb31a5ed6dff 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c @@ -275,14 +275,16 @@ void hubbub1_program_watermarks( watermarks->a.urgent_ns, prog_wm_value); } - if (safe_to_lower || watermarks->a.pte_meta_urgent_ns > hubbub->watermarks.a.pte_meta_urgent_ns) { - hubbub->watermarks.a.pte_meta_urgent_ns = watermarks->a.pte_meta_urgent_ns; - prog_wm_value = convert_and_clamp(watermarks->a.pte_meta_urgent_ns, - refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_A calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->a.pte_meta_urgent_ns, prog_wm_value); + if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A)) { + if (safe_to_lower || watermarks->a.pte_meta_urgent_ns > hubbub->watermarks.a.pte_meta_urgent_ns) { + hubbub->watermarks.a.pte_meta_urgent_ns = watermarks->a.pte_meta_urgent_ns; + prog_wm_value = convert_and_clamp(watermarks->a.pte_meta_urgent_ns, + refclk_mhz, 0x1fffff); + REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_A calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->a.pte_meta_urgent_ns, prog_wm_value); + } } if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A)) { @@ -338,14 +340,16 @@ void hubbub1_program_watermarks( watermarks->b.urgent_ns, prog_wm_value); } - if (safe_to_lower || watermarks->b.pte_meta_urgent_ns > hubbub->watermarks.b.pte_meta_urgent_ns) { - hubbub->watermarks.b.pte_meta_urgent_ns = watermarks->b.pte_meta_urgent_ns; - prog_wm_value = convert_and_clamp(watermarks->b.pte_meta_urgent_ns, - refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_B calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->b.pte_meta_urgent_ns, prog_wm_value); + if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B)) { + if (safe_to_lower || watermarks->b.pte_meta_urgent_ns > hubbub->watermarks.b.pte_meta_urgent_ns) { + hubbub->watermarks.b.pte_meta_urgent_ns = watermarks->b.pte_meta_urgent_ns; + prog_wm_value = convert_and_clamp(watermarks->b.pte_meta_urgent_ns, + refclk_mhz, 0x1fffff); + REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_B calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->b.pte_meta_urgent_ns, prog_wm_value); + } } if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B)) { @@ -401,14 +405,16 @@ void hubbub1_program_watermarks( watermarks->c.urgent_ns, prog_wm_value); } - if (safe_to_lower || watermarks->c.pte_meta_urgent_ns > hubbub->watermarks.c.pte_meta_urgent_ns) { - hubbub->watermarks.c.pte_meta_urgent_ns = watermarks->c.pte_meta_urgent_ns; - prog_wm_value = convert_and_clamp(watermarks->c.pte_meta_urgent_ns, - refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_C calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->c.pte_meta_urgent_ns, prog_wm_value); + if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C)) { + if (safe_to_lower || watermarks->c.pte_meta_urgent_ns > hubbub->watermarks.c.pte_meta_urgent_ns) { + hubbub->watermarks.c.pte_meta_urgent_ns = watermarks->c.pte_meta_urgent_ns; + prog_wm_value = convert_and_clamp(watermarks->c.pte_meta_urgent_ns, + refclk_mhz, 0x1fffff); + REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_C calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->c.pte_meta_urgent_ns, prog_wm_value); + } } if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C)) { @@ -464,14 +470,16 @@ void hubbub1_program_watermarks( watermarks->d.urgent_ns, prog_wm_value); } - if (safe_to_lower || watermarks->d.pte_meta_urgent_ns > hubbub->watermarks.d.pte_meta_urgent_ns) { - hubbub->watermarks.d.pte_meta_urgent_ns = watermarks->d.pte_meta_urgent_ns; - prog_wm_value = convert_and_clamp(watermarks->d.pte_meta_urgent_ns, - refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_D calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->d.pte_meta_urgent_ns, prog_wm_value); + if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D)) { + if (safe_to_lower || watermarks->d.pte_meta_urgent_ns > hubbub->watermarks.d.pte_meta_urgent_ns) { + hubbub->watermarks.d.pte_meta_urgent_ns = watermarks->d.pte_meta_urgent_ns; + prog_wm_value = convert_and_clamp(watermarks->d.pte_meta_urgent_ns, + refclk_mhz, 0x1fffff); + REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_D calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->d.pte_meta_urgent_ns, prog_wm_value); + } } if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D)) { @@ -837,6 +845,7 @@ static const struct hubbub_funcs hubbub1_funcs = { .dcc_support_swizzle = hubbub1_dcc_support_swizzle, .dcc_support_pixel_format = hubbub1_dcc_support_pixel_format, .get_dcc_compression_cap = hubbub1_get_dcc_compression_cap, + .wm_read_state = hubbub1_wm_read_state, }; void hubbub1_construct(struct hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h index d0f03d152913..aca67633ee58 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h @@ -165,18 +165,6 @@ struct dcn_hubbub_mask { struct dc; -struct dcn_hubbub_wm_set { - uint32_t wm_set; - uint32_t data_urgent; - uint32_t pte_meta_urgent; - uint32_t sr_enter; - uint32_t sr_exit; - uint32_t dram_clk_chanage; -}; - -struct dcn_hubbub_wm { - struct dcn_hubbub_wm_set sets[4]; -}; struct hubbub { const struct hubbub_funcs *funcs; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 91e015e14355..f001eddd4180 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -95,7 +95,7 @@ void dcn10_log_hubbub_state(struct dc *dc, struct dc_log_buffer_ctx *log_ctx) struct dcn_hubbub_wm wm = {0}; int i; - hubbub1_wm_read_state(dc->res_pool->hubbub, &wm); + dc->res_pool->hubbub->funcs->wm_read_state(dc->res_pool->hubbub, &wm); DTN_INFO("HUBBUB WM: data_urgent pte_meta_urgent" " sr_enter sr_exit dram_clk_change\n"); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c index cd469014baa3..82175820b0cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c @@ -81,7 +81,7 @@ static unsigned int dcn10_get_hubbub_state(struct dc *dc, char *pBuf, unsigned i const uint32_t ref_clk_mhz = dc_ctx->dc->res_pool->ref_clock_inKhz / 1000; static const unsigned int frac = 1000; - hubbub1_wm_read_state(dc->res_pool->hubbub, &wm); + dc->res_pool->hubbub->funcs->wm_read_state(dc->res_pool->hubbub, &wm); chars_printed = snprintf_count(pBuf, remaining_buffer, "wm_set_index,data_urgent,pte_meta_urgent,sr_enter,sr_exit,dram_clk_chanage\n"); remaining_buffer -= chars_printed; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 7c138615f17d..cdb3f096eb25 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -299,16 +299,17 @@ void optc1_program_timing( } /* Interlace */ - if (patched_crtc_timing.flags.INTERLACE == 1) { - REG_UPDATE(OTG_INTERLACE_CONTROL, - OTG_INTERLACE_ENABLE, 1); - v_init = v_init / 2; - if ((optc->dlg_otg_param.vstartup_start/2)*2 > asic_blank_end) - v_fp2 = v_fp2 / 2; - } else - REG_UPDATE(OTG_INTERLACE_CONTROL, - OTG_INTERLACE_ENABLE, 0); - + if (REG(OTG_INTERLACE_CONTROL)) { + if (patched_crtc_timing.flags.INTERLACE == 1) { + REG_UPDATE(OTG_INTERLACE_CONTROL, + OTG_INTERLACE_ENABLE, 1); + v_init = v_init / 2; + if ((optc->dlg_otg_param.vstartup_start/2)*2 > asic_blank_end) + v_fp2 = v_fp2 / 2; + } else + REG_UPDATE(OTG_INTERLACE_CONTROL, + OTG_INTERLACE_ENABLE, 0); + } /* VTG enable set to 0 first VInit */ REG_UPDATE(CONTROL, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index 02f757dd70d4..dc0bb5bd8cac 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -39,6 +39,18 @@ enum segment_order { segment_order__non_contiguous, }; +struct dcn_hubbub_wm_set { + uint32_t wm_set; + uint32_t data_urgent; + uint32_t pte_meta_urgent; + uint32_t sr_enter; + uint32_t sr_exit; + uint32_t dram_clk_chanage; +}; + +struct dcn_hubbub_wm { + struct dcn_hubbub_wm_set sets[4]; +}; struct hubbub_funcs { void (*update_dchub)( @@ -58,6 +70,9 @@ struct hubbub_funcs { bool (*dcc_support_pixel_format)( enum surface_pixel_format format, unsigned int *bytes_per_element); + + void (*wm_read_state)(struct hubbub *hubbub, + struct dcn_hubbub_wm *wm); }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h index e894e649ce5a..fb7967b39edb 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h @@ -39,6 +39,11 @@ struct dpp { }; +struct dpp_input_csc_matrix { + enum dc_color_space color_space; + uint16_t regval[12]; +}; + struct dpp_grph_csc_adjustment { struct fixed31_32 temperature_matrix[CSC_TEMPERATURE_MATRIX_SIZE]; enum graphics_gamut_adjust_type gamut_adjust_type; From 1c164f70825bd570c1f18c507849ba89deef3783 Mon Sep 17 00:00:00 2001 From: Fatemeh Darbehani Date: Mon, 19 Nov 2018 13:23:21 -0500 Subject: [PATCH 071/539] drm/amd/display: Add pixel clock values to dtn logs [Why] To make sure future changes in DAL for SMU msgs will not change the current behaviour and to make sure clock registeres are programmed correctly based on SMU msgs that DAL sends. Signed-off-by: Fatemeh Darbehani Reviewed-by: Dmytro Laktyushkin Acked-by: Leo Li Acked-by: Yongqiang Sun Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c index 82175820b0cf..ae4fd5ec9f74 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c @@ -419,20 +419,22 @@ static unsigned int dcn10_get_otg_states(struct dc *dc, char *pBuf, unsigned int unsigned int remaining_buffer = bufSize; chars_printed = snprintf_count(pBuf, remaining_buffer, "instance,v_bs,v_be,v_ss,v_se,vpol,vmax,vmin,vmax_sel,vmin_sel," - "h_bs,h_be,h_ss,h_se,hpol,htot,vtot,underflow\n"); + "h_bs,h_be,h_ss,h_se,hpol,htot,vtot,underflow,pixelclk[khz]\n"); remaining_buffer -= chars_printed; pBuf += chars_printed; for (i = 0; i < pool->timing_generator_count; i++) { struct timing_generator *tg = pool->timing_generators[i]; struct dcn_otg_state s = {0}; + int pix_clk = 0; optc1_read_otg_state(DCN10TG_FROM_TG(tg), &s); + pix_clk = dc->current_state->res_ctx.pipe_ctx[i].stream_res.pix_clk_params.requested_pix_clk; //only print if OTG master is enabled if (s.otg_enabled & 1) { chars_printed = snprintf_count(pBuf, remaining_buffer, "%x,%d,%d,%d,%d,%d,%d,%d,%d,%d," - "%d,%d,%d,%d,%d,%d,%d,%d" + "%d,%d,%d,%d,%d,%d,%d,%d,%d" "\n", tg->inst, s.v_blank_start, @@ -451,7 +453,8 @@ static unsigned int dcn10_get_otg_states(struct dc *dc, char *pBuf, unsigned int s.h_sync_a_pol, s.h_total, s.v_total, - s.underflow_occurred_status); + s.underflow_occurred_status, + pix_clk); remaining_buffer -= chars_printed; pBuf += chars_printed; From 630cb40ffd4a308e98c334cc9f01640a0ccb0aaa Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Mon, 19 Nov 2018 17:02:39 -0500 Subject: [PATCH 072/539] drm/amd/display: Remove some extra braces Remove braces around single-line conditionals Signed-off-by: Dmytro Laktyushkin Reviewed-by: Eric Bernstein Acked-by: Leo Li Acked-by: Tony Cheng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index f001eddd4180..edb382e1eaeb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2252,13 +2252,11 @@ static void program_all_pipe_in_tree( } - if (pipe_ctx->plane_state != NULL) { + if (pipe_ctx->plane_state != NULL) dcn10_program_pipe(dc, pipe_ctx, context); - } - if (pipe_ctx->bottom_pipe != NULL && pipe_ctx->bottom_pipe != pipe_ctx) { + if (pipe_ctx->bottom_pipe != NULL && pipe_ctx->bottom_pipe != pipe_ctx) program_all_pipe_in_tree(dc, pipe_ctx->bottom_pipe, context); - } } struct pipe_ctx *find_top_pipe_for_stream( From d5cf79eeda52045bc685939b86975944312f688f Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Tue, 13 Nov 2018 18:21:53 -0500 Subject: [PATCH 073/539] drm/amd/display: implement dc_init_callbacks to assign callback pointers after dc_create [why] Some components depend on dc to constuct but need to assign callback functions to dc. [how] Instead of assigning dc callback functions in dc_create, decouple the callback init to a standlone function after dc_create. This is currently a no-op. Signed-off-by: Wenjing Liu Reviewed-by: Jun Lei Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 5 +++++ drivers/gpu/drm/amd/display/dc/dc.h | 7 ++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 5fd52094d459..e12612be118d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -840,6 +840,11 @@ alloc_fail: return NULL; } +void dc_init_callbacks(struct dc *dc, + const struct dc_callback_init *init_params) +{ +} + void dc_destroy(struct dc **dc) { destruct(*dc); diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 9a1e8f6eb244..580f752920d1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -339,8 +339,13 @@ struct dc_init_data { uint32_t log_mask; }; -struct dc *dc_create(const struct dc_init_data *init_params); +struct dc_callback_init { + uint8_t reserved; +}; +struct dc *dc_create(const struct dc_init_data *init_params); +void dc_init_callbacks(struct dc *dc, + const struct dc_callback_init *init_params); void dc_destroy(struct dc **dc); /******************************************************************************* From ebd084cd6509313307bc3b8fd9695f65aee8ff96 Mon Sep 17 00:00:00 2001 From: Lewis Huang Date: Fri, 16 Nov 2018 19:12:46 +0800 Subject: [PATCH 074/539] drm/amd/display: add plane size change check condition [Why] Driver didn't check plane size and surface size is mismatch. It will cause pitch data incorrect. [How] Add condition to check is plane change and update surface Signed-off-by: Lewis Huang Reviewed-by: Tony Cheng Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 6 ++++++ drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 3 ++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index e12612be118d..ce34e3e0c31f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1220,6 +1220,12 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa */ update_flags->bits.bpp_change = 1; + if (u->plane_info->plane_size.grph.surface_pitch != u->surface->plane_size.grph.surface_pitch + || u->plane_info->plane_size.video.luma_pitch != u->surface->plane_size.video.luma_pitch + || u->plane_info->plane_size.video.chroma_pitch != u->surface->plane_size.video.chroma_pitch) + update_flags->bits.plane_size_change = 1; + + if (memcmp(&u->plane_info->tiling_info, &u->surface->tiling_info, sizeof(union dc_tiling_info)) != 0) { update_flags->bits.swizzle_change = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 580f752920d1..b4ad48b0bff3 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -445,6 +445,7 @@ union surface_update_flags { uint32_t coeff_reduction_change:1; uint32_t output_tf_change:1; uint32_t pixel_format_change:1; + uint32_t plane_size_change:1; /* Full updates */ uint32_t new_plane:1; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index edb382e1eaeb..03835dcd28a2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2125,7 +2125,8 @@ void update_dchubp_dpp( plane_state->update_flags.bits.swizzle_change || plane_state->update_flags.bits.dcc_change || plane_state->update_flags.bits.bpp_change || - plane_state->update_flags.bits.scaling_change) { + plane_state->update_flags.bits.scaling_change || + plane_state->update_flags.bits.plane_size_change) { hubp->funcs->hubp_program_surface_config( hubp, plane_state->format, From ef32bc1c751694743703ff150b270bbcf8ad4de2 Mon Sep 17 00:00:00 2001 From: Krunoslav Kovac Date: Tue, 13 Nov 2018 15:32:24 -0500 Subject: [PATCH 075/539] drm/amd/display: Fix issue with VLine interrupt not firing [Why] We are not correctly handling the wrap around case. VLine interrupt is relative to position of VUpdate interrupt. Both VUpdate interrupt and VLine interrupt could possibly be in front porch or back porch. [How] Fix wraparound case by checking for line number that is greater than the VTOTAL of the OTG timing. In this case, the interrupt should occur on the next frame. Also fix some variable naming and remove some dead code. Signed-off-by: Krunoslav Kovac Reviewed-by: Anthony Koo Acked-by: Leo Li Acked-by: Tony Cheng Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 25 +++++++------------ 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index cdb3f096eb25..57d00d660462 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -102,14 +102,6 @@ static uint32_t get_start_vline(struct timing_generator *optc, const struct dc_c patched_crtc_timing = *dc_crtc_timing; optc1_apply_front_porch_workaround(optc, &patched_crtc_timing); - vesa_sync_start = patched_crtc_timing.h_addressable + - patched_crtc_timing.h_border_right + - patched_crtc_timing.h_front_porch; - - asic_blank_end = patched_crtc_timing.h_total - - vesa_sync_start - - patched_crtc_timing.h_border_left; - vesa_sync_start = patched_crtc_timing.v_addressable + patched_crtc_timing.v_border_bottom + patched_crtc_timing.v_front_porch; @@ -119,10 +111,8 @@ static uint32_t get_start_vline(struct timing_generator *optc, const struct dc_c patched_crtc_timing.v_border_top); vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1; - if (vertical_line_start < 0) { - ASSERT(0); + if (vertical_line_start < 0) vertical_line_start = 0; - } return vertical_line_start; } @@ -143,7 +133,7 @@ void optc1_program_vline_interrupt( uint32_t vsync_line = get_start_vline(optc, dc_crtc_timing); uint32_t start_line = 0; - uint32_t endLine = 0; + uint32_t end_line = 0; if (req_delta_lines != 0) req_delta_lines--; @@ -153,14 +143,17 @@ void optc1_program_vline_interrupt( else start_line = vsync_line - req_delta_lines; - endLine = start_line + 2; + end_line = start_line + 2; - if (endLine >= dc_crtc_timing->v_total) - endLine = 2; + if (start_line >= dc_crtc_timing->v_total) + start_line = start_line % dc_crtc_timing->v_total; + + if (end_line >= dc_crtc_timing->v_total) + end_line = end_line % dc_crtc_timing->v_total; REG_SET_2(OTG_VERTICAL_INTERRUPT0_POSITION, 0, OTG_VERTICAL_INTERRUPT0_LINE_START, start_line, - OTG_VERTICAL_INTERRUPT0_LINE_END, endLine); + OTG_VERTICAL_INTERRUPT0_LINE_END, end_line); } /** From 987741af33e5b8ebc0e20f40e5366abebab395c2 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Wed, 21 Nov 2018 02:37:07 -0500 Subject: [PATCH 076/539] drm/amd/display: update bw formula to v252 [why] There was a recent fix in the BW spreadsheet to allow timing with very large vblank. Need to be ported into driver. Signed-off-by: Eric Yang Reviewed-by: Tony Cheng Acked-by: Dmytro Laktyushkin Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/calcs/dcn_calc_auto.c | 43 +++++++++++++++---- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c index d0fc54f8fb1c..7d102ac0d61b 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c @@ -797,9 +797,40 @@ void mode_support_and_system_configuration(struct dcn_bw_internal_vars *v) else { v->maximum_vstartup = v->v_sync_plus_back_porch[k] - 1.0; } - v->line_times_for_prefetch[k] = v->maximum_vstartup - v->urgent_latency / (v->htotal[k] / v->pixel_clock[k]) - (v->time_calc + v->time_setup) / (v->htotal[k] / v->pixel_clock[k]) - (v->dst_y_after_scaler + v->dst_x_after_scaler / v->htotal[k]); - v->line_times_for_prefetch[k] =dcn_bw_floor2(4.0 * (v->line_times_for_prefetch[k] + 0.125), 1.0) / 4; - v->prefetch_bw[k] = (v->meta_pte_bytes_per_frame[k] + 2.0 * v->meta_row_bytes[k] + 2.0 * v->dpte_bytes_per_row[k] + v->prefetch_lines_y[k] * v->swath_width_yper_state[i][j][k] *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) + v->prefetch_lines_c[k] * v->swath_width_yper_state[i][j][k] / 2.0 *dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0)) / (v->line_times_for_prefetch[k] * v->htotal[k] / v->pixel_clock[k]); + + do { + v->line_times_for_prefetch[k] = v->maximum_vstartup - v->urgent_latency / (v->htotal[k] / v->pixel_clock[k]) - (v->time_calc + v->time_setup) / (v->htotal[k] / v->pixel_clock[k]) - (v->dst_y_after_scaler + v->dst_x_after_scaler / v->htotal[k]); + v->line_times_for_prefetch[k] =dcn_bw_floor2(4.0 * (v->line_times_for_prefetch[k] + 0.125), 1.0) / 4; + v->prefetch_bw[k] = (v->meta_pte_bytes_per_frame[k] + 2.0 * v->meta_row_bytes[k] + 2.0 * v->dpte_bytes_per_row[k] + v->prefetch_lines_y[k] * v->swath_width_yper_state[i][j][k] *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) + v->prefetch_lines_c[k] * v->swath_width_yper_state[i][j][k] / 2.0 *dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0)) / (v->line_times_for_prefetch[k] * v->htotal[k] / v->pixel_clock[k]); + + if (v->pte_enable == dcn_bw_yes && v->dcc_enable[k] == dcn_bw_yes) { + v->time_for_meta_pte_without_immediate_flip = dcn_bw_max3( + v->meta_pte_bytes_frame[k] / v->prefetch_bandwidth[k], + v->extra_latency, + v->htotal[k] / v->pixel_clock[k] / 4.0); + } else { + v->time_for_meta_pte_without_immediate_flip = v->htotal[k] / v->pixel_clock[k] / 4.0; + } + + if (v->pte_enable == dcn_bw_yes || v->dcc_enable[k] == dcn_bw_yes) { + v->time_for_meta_and_dpte_row_without_immediate_flip = dcn_bw_max3(( + v->meta_row_bytes[k] + v->dpte_bytes_per_row[k]) / v->prefetch_bandwidth[k], + v->htotal[k] / v->pixel_clock[k] - v->time_for_meta_pte_without_immediate_flip, + v->extra_latency); + } else { + v->time_for_meta_and_dpte_row_without_immediate_flip = dcn_bw_max2( + v->htotal[k] / v->pixel_clock[k] - v->time_for_meta_pte_without_immediate_flip, + v->extra_latency - v->time_for_meta_pte_with_immediate_flip); + } + + v->lines_for_meta_pte_without_immediate_flip[k] =dcn_bw_floor2(4.0 * (v->time_for_meta_pte_without_immediate_flip / (v->htotal[k] / v->pixel_clock[k]) + 0.125), 1.0) / 4; + v->lines_for_meta_and_dpte_row_without_immediate_flip[k] =dcn_bw_floor2(4.0 * (v->time_for_meta_and_dpte_row_without_immediate_flip / (v->htotal[k] / v->pixel_clock[k]) + 0.125), 1.0) / 4; + v->maximum_vstartup = v->maximum_vstartup - 1; + + if (v->lines_for_meta_pte_without_immediate_flip[k] < 8.0 && v->lines_for_meta_and_dpte_row_without_immediate_flip[k] < 16.0) + break; + + } while(1); } v->bw_available_for_immediate_flip = v->return_bw_per_state[i]; for (k = 0; k <= v->number_of_active_planes - 1; k++) { @@ -814,24 +845,18 @@ void mode_support_and_system_configuration(struct dcn_bw_internal_vars *v) for (k = 0; k <= v->number_of_active_planes - 1; k++) { if (v->pte_enable == dcn_bw_yes && v->dcc_enable[k] == dcn_bw_yes) { v->time_for_meta_pte_with_immediate_flip =dcn_bw_max5(v->meta_pte_bytes_per_frame[k] / v->prefetch_bw[k], v->meta_pte_bytes_per_frame[k] * v->total_immediate_flip_bytes[k] / (v->bw_available_for_immediate_flip * (v->meta_pte_bytes_per_frame[k] + v->meta_row_bytes[k] + v->dpte_bytes_per_row[k])), v->extra_latency, v->urgent_latency, v->htotal[k] / v->pixel_clock[k] / 4.0); - v->time_for_meta_pte_without_immediate_flip =dcn_bw_max3(v->meta_pte_bytes_per_frame[k] / v->prefetch_bw[k], v->extra_latency, v->htotal[k] / v->pixel_clock[k] / 4.0); } else { v->time_for_meta_pte_with_immediate_flip = v->htotal[k] / v->pixel_clock[k] / 4.0; - v->time_for_meta_pte_without_immediate_flip = v->htotal[k] / v->pixel_clock[k] / 4.0; } if (v->pte_enable == dcn_bw_yes || v->dcc_enable[k] == dcn_bw_yes) { v->time_for_meta_and_dpte_row_with_immediate_flip =dcn_bw_max5((v->meta_row_bytes[k] + v->dpte_bytes_per_row[k]) / v->prefetch_bw[k], (v->meta_row_bytes[k] + v->dpte_bytes_per_row[k]) * v->total_immediate_flip_bytes[k] / (v->bw_available_for_immediate_flip * (v->meta_pte_bytes_per_frame[k] + v->meta_row_bytes[k] + v->dpte_bytes_per_row[k])), v->htotal[k] / v->pixel_clock[k] - v->time_for_meta_pte_with_immediate_flip, v->extra_latency, 2.0 * v->urgent_latency); - v->time_for_meta_and_dpte_row_without_immediate_flip =dcn_bw_max3((v->meta_row_bytes[k] + v->dpte_bytes_per_row[k]) / v->prefetch_bw[k], v->htotal[k] / v->pixel_clock[k] - v->time_for_meta_pte_without_immediate_flip, v->extra_latency); } else { v->time_for_meta_and_dpte_row_with_immediate_flip =dcn_bw_max2(v->htotal[k] / v->pixel_clock[k] - v->time_for_meta_pte_with_immediate_flip, v->extra_latency - v->time_for_meta_pte_with_immediate_flip); - v->time_for_meta_and_dpte_row_without_immediate_flip =dcn_bw_max2(v->htotal[k] / v->pixel_clock[k] - v->time_for_meta_pte_without_immediate_flip, v->extra_latency - v->time_for_meta_pte_without_immediate_flip); } v->lines_for_meta_pte_with_immediate_flip[k] =dcn_bw_floor2(4.0 * (v->time_for_meta_pte_with_immediate_flip / (v->htotal[k] / v->pixel_clock[k]) + 0.125), 1.0) / 4; - v->lines_for_meta_pte_without_immediate_flip[k] =dcn_bw_floor2(4.0 * (v->time_for_meta_pte_without_immediate_flip / (v->htotal[k] / v->pixel_clock[k]) + 0.125), 1.0) / 4; v->lines_for_meta_and_dpte_row_with_immediate_flip[k] =dcn_bw_floor2(4.0 * (v->time_for_meta_and_dpte_row_with_immediate_flip / (v->htotal[k] / v->pixel_clock[k]) + 0.125), 1.0) / 4; - v->lines_for_meta_and_dpte_row_without_immediate_flip[k] =dcn_bw_floor2(4.0 * (v->time_for_meta_and_dpte_row_without_immediate_flip / (v->htotal[k] / v->pixel_clock[k]) + 0.125), 1.0) / 4; v->line_times_to_request_prefetch_pixel_data_with_immediate_flip = v->line_times_for_prefetch[k] - v->lines_for_meta_pte_with_immediate_flip[k] - v->lines_for_meta_and_dpte_row_with_immediate_flip[k]; v->line_times_to_request_prefetch_pixel_data_without_immediate_flip = v->line_times_for_prefetch[k] - v->lines_for_meta_pte_without_immediate_flip[k] - v->lines_for_meta_and_dpte_row_without_immediate_flip[k]; if (v->line_times_to_request_prefetch_pixel_data_with_immediate_flip > 0.0) { From 51ba137ef226255bb13578da13e8a9030c5ad019 Mon Sep 17 00:00:00 2001 From: Hugo Hu Date: Wed, 21 Nov 2018 14:34:10 +0800 Subject: [PATCH 077/539] drm/amd/display: Add debug option to force fclk request [Why] So that we can adjust fclk for debugging purposes. [How] Add option to force adjust fclk request to pplib. Signed-off-by: Hugo Hu Reviewed-by: Tony Cheng Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index b4ad48b0bff3..da08c7122497 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -255,6 +255,7 @@ struct dc_debug_options { bool scl_reset_length10; bool hdmi20_disable; bool skip_detection_link_training; + unsigned int force_fclk_khz; }; struct dc_debug_data { diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c index 54abedbf1b43..c39db5bfa7cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c @@ -218,6 +218,7 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr, bool safe_to_lower) { struct dc *dc = clk_mgr->ctx->dc; + struct dc_debug_options *debug = &dc->debug; struct dc_clocks *new_clocks = &context->bw.dcn.clk; struct pp_smu_display_requirement_rv *smu_req_cur = &dc->res_pool->pp_smu_req; @@ -261,6 +262,9 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr, } // F Clock + if (debug->force_fclk_khz != 0) + new_clocks->fclk_khz = debug->force_fclk_khz; + if (should_set_clock(safe_to_lower, new_clocks->fclk_khz, clk_mgr->clks.fclk_khz)) { clk_mgr->clks.fclk_khz = new_clocks->fclk_khz; smu_req.hard_min_fclk_mhz = new_clocks->fclk_khz / 1000; From ceb3dbb4690db8377ad127a5666cd4775d9f70f4 Mon Sep 17 00:00:00 2001 From: Jun Lei Date: Fri, 9 Nov 2018 09:21:21 -0500 Subject: [PATCH 078/539] drm/amd/display: remove sink reference in dc_stream_state [why] dc_stream_state containing a pointer to sink is poor design. Sink describes the display, and the specifications or capabilities it has. That information is irrelevant for dc_stream_state, which describes hardware state, and is generally used for hardware programming. It could further be argued that dc_sink itself is just a convenience dc provides, and DC should be perfectly capable of programming hardware without any dc_sinks (for example, emulated sinks). [how] Phase 1: Deprecate use of dc_sink pointer in dc_stream. Most references are trivial to remove, but some call sites are risky (such as is_timing_changed) with no obvious logical replacement. These will be removed in follow up change. Add dc_link pointer to dc_stream. This is the typical reason DC really needed sink pointer, and most call sites are replaced with this. DMs also need minor updates, as all 3 DMs leverage stream->sink for some functionality. this is replaced instead by a pointer to private data inside dc_stream_state, which is used by DMs as a quality of life improvment for some key functionality. it allows DMs to set pointers have to their own objects which associate OS objects to dc_stream_states (such as DisplayTarget and amdgpu_dm_connector). Without the private pointer, DMs would be forced to perform a lookup for callbacks. Signed-off-by: Jun Lei Reviewed-by: Aric Cyr Acked-by: David Francis Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 ++-- .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 6 +-- .../gpu/drm/amd/display/dc/calcs/dcn_calcs.c | 2 +- drivers/gpu/drm/amd/display/dc/core/dc.c | 11 ++-- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 54 +++++++++---------- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 6 +-- .../drm/amd/display/dc/core/dc_link_hwss.c | 9 ++-- .../gpu/drm/amd/display/dc/core/dc_resource.c | 14 ++--- .../gpu/drm/amd/display/dc/core/dc_stream.c | 30 +++++------ drivers/gpu/drm/amd/display/dc/dc_link.h | 1 + drivers/gpu/drm/amd/display/dc/dc_stream.h | 13 ++++- .../gpu/drm/amd/display/dc/dce/dce_clk_mgr.c | 10 ++-- .../drm/amd/display/dc/dce/dce_link_encoder.c | 2 +- .../display/dc/dce110/dce110_hw_sequencer.c | 26 ++++----- .../amd/display/dc/dce110/dce110_resource.c | 2 +- .../amd/display/dc/dce112/dce112_resource.c | 2 +- .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 2 +- .../amd/display/dc/dcn10/dcn10_link_encoder.c | 2 +- .../drm/amd/display/dc/dcn10/dcn10_resource.c | 2 +- 19 files changed, 103 insertions(+), 101 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8a626d16e8e3..31382907b0f3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2666,10 +2666,10 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream, timing_out->v_border_bottom = 0; /* TODO: un-hardcode */ if (drm_mode_is_420_only(info, mode_in) - && stream->sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A) + && stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420; else if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCRCB444) - && stream->sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A) + && stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR444; else timing_out->pixel_encoding = PIXEL_ENCODING_RGB; @@ -2711,7 +2711,7 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream, stream->out_transfer_func->type = TF_TYPE_PREDEFINED; stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB; - if (stream->sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A) + if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) adjust_colour_depth_from_display_info(timing_out, info); } @@ -2905,6 +2905,8 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, goto finish; } + stream->dm_stream_context = aconnector; + list_for_each_entry(preferred_mode, &aconnector->base.modes, head) { /* Search for preferred mode */ if (preferred_mode->type & DRM_MODE_TYPE_PREFERRED) { @@ -2956,7 +2958,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, drm_connector, sink); - update_stream_signal(stream); + update_stream_signal(stream, sink); if (dm_state && dm_state->freesync_capable) stream->ignore_msa_timing_param = true; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 39997d977efb..b0bc8314c6d5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -192,7 +192,7 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( int bpp = 0; int pbn = 0; - aconnector = stream->sink->priv; + aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; if (!aconnector || !aconnector->mst_port) return false; @@ -284,7 +284,7 @@ bool dm_helpers_dp_mst_poll_for_allocation_change_trigger( struct drm_dp_mst_topology_mgr *mst_mgr; int ret; - aconnector = stream->sink->priv; + aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; if (!aconnector || !aconnector->mst_port) return false; @@ -312,7 +312,7 @@ bool dm_helpers_dp_mst_send_payload_allocation( struct drm_dp_mst_port *mst_port; int ret; - aconnector = stream->sink->priv; + aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; if (!aconnector || !aconnector->mst_port) return false; diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 43e4a2be0fa6..cd88ceca5c41 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -961,7 +961,7 @@ bool dcn_validate_bandwidth( v->dcc_rate[input_idx] = 1; /*TODO: Worst case? does this change?*/ v->output_format[input_idx] = pipe->stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420 ? dcn_bw_420 : dcn_bw_444; - v->output[input_idx] = pipe->stream->sink->sink_signal == + v->output[input_idx] = pipe->stream->signal == SIGNAL_TYPE_HDMI_TYPE_A ? dcn_bw_hdmi : dcn_bw_dp; v->output_deep_color[input_idx] = dcn_bw_encoder_8bpc; if (v->output[input_idx] == dcn_bw_hdmi) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index ce34e3e0c31f..a3cd93825dcf 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -384,7 +384,7 @@ void dc_stream_set_dither_option(struct dc_stream_state *stream, enum dc_dither_option option) { struct bit_depth_reduction_params params; - struct dc_link *link = stream->sink->link; + struct dc_link *link = stream->link; struct pipe_ctx *pipes = NULL; int i; @@ -526,9 +526,8 @@ void dc_link_set_preferred_link_settings(struct dc *dc, for (i = 0; i < MAX_PIPES; i++) { pipe = &dc->current_state->res_ctx.pipe_ctx[i]; - if (pipe->stream && pipe->stream->sink - && pipe->stream->sink->link) { - if (pipe->stream->sink->link == link) + if (pipe->stream && pipe->stream->link) { + if (pipe->stream->link == link) break; } } @@ -1045,7 +1044,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c /* Program all planes within new context*/ for (i = 0; i < context->stream_count; i++) { - const struct dc_sink *sink = context->streams[i]->sink; + const struct dc_link *link = context->streams[i]->link; if (!context->streams[i]->mode_changed) continue; @@ -1070,7 +1069,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c } } - CONN_MSG_MODE(sink->link, "{%dx%d, %dx%d@%dKhz}", + CONN_MSG_MODE(link, "{%dx%d, %dx%d@%dKhz}", context->streams[i]->timing.h_addressable, context->streams[i]->timing.v_addressable, context->streams[i]->timing.h_total, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 52deacf39841..ba8e8e1fb110 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -789,7 +789,7 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) return false; } - sink->dongle_max_pix_clk = sink_caps.max_hdmi_pixel_clock; + sink->link->dongle_max_pix_clk = sink_caps.max_hdmi_pixel_clock; sink->converter_disable_audio = converter_disable_audio; link->local_sink = sink; @@ -1372,7 +1372,7 @@ static void dpcd_configure_panel_mode( static void enable_stream_features(struct pipe_ctx *pipe_ctx) { struct dc_stream_state *stream = pipe_ctx->stream; - struct dc_link *link = stream->sink->link; + struct dc_link *link = stream->link; union down_spread_ctrl old_downspread; union down_spread_ctrl new_downspread; @@ -1397,7 +1397,7 @@ static enum dc_status enable_link_dp( struct dc_stream_state *stream = pipe_ctx->stream; enum dc_status status; bool skip_video_pattern; - struct dc_link *link = stream->sink->link; + struct dc_link *link = stream->link; struct dc_link_settings link_settings = {0}; enum dp_panel_mode panel_mode; @@ -1414,8 +1414,8 @@ static enum dc_status enable_link_dp( pipe_ctx->clock_source->id, &link_settings); - if (stream->sink->edid_caps.panel_patch.dppowerup_delay > 0) { - int delay_dp_power_up_in_ms = stream->sink->edid_caps.panel_patch.dppowerup_delay; + if (stream->sink_patches.dppowerup_delay > 0) { + int delay_dp_power_up_in_ms = stream->sink_patches.dppowerup_delay; msleep(delay_dp_power_up_in_ms); } @@ -1448,7 +1448,7 @@ static enum dc_status enable_link_edp( { enum dc_status status; struct dc_stream_state *stream = pipe_ctx->stream; - struct dc_link *link = stream->sink->link; + struct dc_link *link = stream->link; /*in case it is not on*/ link->dc->hwss.edp_power_control(link, true); link->dc->hwss.edp_wait_for_hpd_ready(link, true); @@ -1463,7 +1463,7 @@ static enum dc_status enable_link_dp_mst( struct dc_state *state, struct pipe_ctx *pipe_ctx) { - struct dc_link *link = pipe_ctx->stream->sink->link; + struct dc_link *link = pipe_ctx->stream->link; /* sink signal type after MST branch is MST. Multiple MST sinks * share one link. Link DP PHY is enable or training only once. @@ -1597,7 +1597,7 @@ static bool i2c_write(struct pipe_ctx *pipe_ctx, cmd.payloads = &payload; if (dm_helpers_submit_i2c(pipe_ctx->stream->ctx, - pipe_ctx->stream->sink->link, &cmd)) + pipe_ctx->stream->link, &cmd)) return true; return false; @@ -1651,7 +1651,7 @@ static void write_i2c_retimer_setting( else { i2c_success = dal_ddc_service_query_ddc_data( - pipe_ctx->stream->sink->link->ddc, + pipe_ctx->stream->link->ddc, slave_address, &offset, 1, &value, 1); if (!i2c_success) /* Write failure */ @@ -1704,7 +1704,7 @@ static void write_i2c_retimer_setting( else { i2c_success = dal_ddc_service_query_ddc_data( - pipe_ctx->stream->sink->link->ddc, + pipe_ctx->stream->link->ddc, slave_address, &offset, 1, &value, 1); if (!i2c_success) /* Write failure */ @@ -1929,7 +1929,7 @@ static void write_i2c_redriver_setting( static void enable_link_hdmi(struct pipe_ctx *pipe_ctx) { struct dc_stream_state *stream = pipe_ctx->stream; - struct dc_link *link = stream->sink->link; + struct dc_link *link = stream->link; enum dc_color_depth display_color_depth; enum engine_id eng_id; struct ext_hdmi_settings settings = {0}; @@ -1943,7 +1943,7 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx) is_over_340mhz = true; if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) { - unsigned short masked_chip_caps = pipe_ctx->stream->sink->link->chip_caps & + unsigned short masked_chip_caps = pipe_ctx->stream->link->chip_caps & EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK; if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) { /* DP159, Retimer settings */ @@ -1964,11 +1964,11 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx) if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) dal_ddc_service_write_scdc_data( - stream->sink->link->ddc, + stream->link->ddc, stream->phy_pix_clk, stream->timing.flags.LTE_340MCSC_SCRAMBLE); - memset(&stream->sink->link->cur_link_settings, 0, + memset(&stream->link->cur_link_settings, 0, sizeof(struct dc_link_settings)); display_color_depth = stream->timing.display_color_depth; @@ -1989,12 +1989,12 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx) static void enable_link_lvds(struct pipe_ctx *pipe_ctx) { struct dc_stream_state *stream = pipe_ctx->stream; - struct dc_link *link = stream->sink->link; + struct dc_link *link = stream->link; if (stream->phy_pix_clk == 0) stream->phy_pix_clk = stream->timing.pix_clk_khz; - memset(&stream->sink->link->cur_link_settings, 0, + memset(&stream->link->cur_link_settings, 0, sizeof(struct dc_link_settings)); link->link_enc->funcs->enable_lvds_output( @@ -2145,7 +2145,7 @@ enum dc_status dc_link_validate_mode_timing( struct dc_link *link, const struct dc_crtc_timing *timing) { - uint32_t max_pix_clk = stream->sink->dongle_max_pix_clk; + uint32_t max_pix_clk = stream->link->dongle_max_pix_clk; struct dpcd_caps *dpcd_caps = &link->dpcd_caps; /* A hack to avoid failing any modes for EDID override feature on @@ -2219,7 +2219,7 @@ bool dc_link_set_backlight_level(const struct dc_link *link, for (i = 0; i < MAX_PIPES; i++) { if (core_dc->current_state->res_ctx.pipe_ctx[i].stream) { if (core_dc->current_state->res_ctx. - pipe_ctx[i].stream->sink->link + pipe_ctx[i].stream->link == link) /* DMCU -1 for all controller id values, * therefore +1 here @@ -2279,7 +2279,7 @@ void core_link_resume(struct dc_link *link) static struct fixed31_32 get_pbn_per_slot(struct dc_stream_state *stream) { struct dc_link_settings *link_settings = - &stream->sink->link->cur_link_settings; + &stream->link->cur_link_settings; uint32_t link_rate_in_mbps = link_settings->link_rate * LINK_RATE_REF_FREQ_IN_MHZ; struct fixed31_32 mbps = dc_fixpt_from_int( @@ -2386,7 +2386,7 @@ static void update_mst_stream_alloc_table( static enum dc_status allocate_mst_payload(struct pipe_ctx *pipe_ctx) { struct dc_stream_state *stream = pipe_ctx->stream; - struct dc_link *link = stream->sink->link; + struct dc_link *link = stream->link; struct link_encoder *link_encoder = link->link_enc; struct stream_encoder *stream_encoder = pipe_ctx->stream_res.stream_enc; struct dp_mst_stream_allocation_table proposed_table = {0}; @@ -2466,7 +2466,7 @@ static enum dc_status allocate_mst_payload(struct pipe_ctx *pipe_ctx) static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) { struct dc_stream_state *stream = pipe_ctx->stream; - struct dc_link *link = stream->sink->link; + struct dc_link *link = stream->link; struct link_encoder *link_encoder = link->link_enc; struct stream_encoder *stream_encoder = pipe_ctx->stream_res.stream_enc; struct dp_mst_stream_allocation_table proposed_table = {0}; @@ -2551,8 +2551,8 @@ void core_link_enable_stream( DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger); if (pipe_ctx->stream->signal != SIGNAL_TYPE_VIRTUAL) { - stream->sink->link->link_enc->funcs->setup( - stream->sink->link->link_enc, + stream->link->link_enc->funcs->setup( + stream->link->link_enc, pipe_ctx->stream->signal); pipe_ctx->stream_res.stream_enc->funcs->setup_stereo_sync( pipe_ctx->stream_res.stream_enc, @@ -2604,7 +2604,7 @@ void core_link_enable_stream( if (status != DC_OK) { DC_LOG_WARNING("enabling link %u failed: %d\n", - pipe_ctx->stream->sink->link->link_index, + pipe_ctx->stream->link->link_index, status); /* Abort stream enable *unless* the failure was due to @@ -2633,12 +2633,12 @@ void core_link_enable_stream( allocate_mst_payload(pipe_ctx); core_dc->hwss.unblank_stream(pipe_ctx, - &pipe_ctx->stream->sink->link->cur_link_settings); + &pipe_ctx->stream->link->cur_link_settings); if (dc_is_dp_signal(pipe_ctx->stream->signal)) enable_stream_features(pipe_ctx); - dc_link_set_backlight_level(pipe_ctx->stream->sink->link, + dc_link_set_backlight_level(pipe_ctx->stream->link, pipe_ctx->stream->bl_pwm_level, 0, pipe_ctx->stream); @@ -2657,7 +2657,7 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx, int option) core_dc->hwss.disable_stream(pipe_ctx, option); - disable_link(pipe_ctx->stream->sink->link, pipe_ctx->stream->signal); + disable_link(pipe_ctx->stream->link, pipe_ctx->stream->signal); } void core_link_set_avmute(struct pipe_ctx *pipe_ctx, bool enable) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 0caacb60b02f..39562c93808d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1634,7 +1634,7 @@ void decide_link_settings(struct dc_stream_state *stream, req_bw = bandwidth_in_kbps_from_timing(&stream->timing); - link = stream->sink->link; + link = stream->link; /* if preferred is specified through AMDDP, use it, if it's enough * to drive the mode @@ -1656,7 +1656,7 @@ void decide_link_settings(struct dc_stream_state *stream, } /* EDP use the link cap setting */ - if (stream->sink->sink_signal == SIGNAL_TYPE_EDP) { + if (link->connector_signal == SIGNAL_TYPE_EDP) { *link_setting = link->verified_link_cap; return; } @@ -2621,7 +2621,7 @@ bool dc_link_dp_set_test_pattern( memset(&training_pattern, 0, sizeof(training_pattern)); for (i = 0; i < MAX_PIPES; i++) { - if (pipes[i].stream->sink->link == link) { + if (pipes[i].stream->link == link) { pipe_ctx = &pipes[i]; break; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c index 0065ec7d5330..a179c09580c3 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c @@ -70,8 +70,7 @@ void dp_enable_link_phy( */ for (i = 0; i < MAX_PIPES; i++) { if (pipes[i].stream != NULL && - pipes[i].stream->sink != NULL && - pipes[i].stream->sink->link == link) { + pipes[i].stream->link == link) { if (pipes[i].clock_source != NULL && pipes[i].clock_source->id != CLOCK_SOURCE_ID_DP_DTO) { pipes[i].clock_source = dp_cs; @@ -279,10 +278,8 @@ void dp_retrain_link_dp_test(struct dc_link *link, for (i = 0; i < MAX_PIPES; i++) { if (pipes[i].stream != NULL && !pipes[i].top_pipe && - pipes[i].stream->sink != NULL && - pipes[i].stream->sink->link != NULL && - pipes[i].stream_res.stream_enc != NULL && - pipes[i].stream->sink->link == link) { + pipes[i].stream->link != NULL && + pipes[i].stream_res.stream_enc != NULL) { udelay(100); pipes[i].stream_res.stream_enc->funcs->dp_blank( diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 76137df74a53..188cf4fa5cf0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1559,7 +1559,7 @@ static struct stream_encoder *find_first_free_match_stream_enc_for_link( { int i; int j = -1; - struct dc_link *link = stream->sink->link; + struct dc_link *link = stream->link; for (i = 0; i < pool->stream_enc_count; i++) { if (!res_ctx->is_stream_enc_acquired[i] && @@ -1748,7 +1748,7 @@ static struct dc_stream_state *find_pll_sharable_stream( if (resource_are_streams_timing_synchronizable( stream_needs_pll, stream_has_pll) && !dc_is_dp_signal(stream_has_pll->signal) - && stream_has_pll->sink->link->connector_signal + && stream_has_pll->link->connector_signal != SIGNAL_TYPE_VIRTUAL) return stream_has_pll; @@ -1850,7 +1850,7 @@ enum dc_status resource_map_pool_resources( true); /* TODO: Add check if ASIC support and EDID audio */ - if (!stream->sink->converter_disable_audio && + if (!stream->converter_disable_audio && dc_is_audio_capable_signal(pipe_ctx->stream->signal) && stream->audio_info.mode_count) { pipe_ctx->stream_res.audio = find_first_free_audio( @@ -2112,7 +2112,7 @@ static void set_avi_info_frame( itc = true; itc_value = 1; - support = stream->sink->edid_caps.content_support; + support = stream->content_support; if (itc) { if (!support.bits.valid_content_type) { @@ -2151,8 +2151,8 @@ static void set_avi_info_frame( /* TODO : We should handle YCC quantization */ /* but we do not have matrix calculation */ - if (stream->sink->edid_caps.qs_bit == 1 && - stream->sink->edid_caps.qy_bit == 1) { + if (stream->qs_bit == 1 && + stream->qy_bit == 1) { if (color_space == COLOR_SPACE_SRGB || color_space == COLOR_SPACE_2020_RGB_FULLRANGE) { hdmi_info.bits.Q0_Q1 = RGB_QUANTIZATION_FULL_RANGE; @@ -2596,7 +2596,7 @@ void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream, enum dc_status dc_validate_stream(struct dc *dc, struct dc_stream_state *stream) { struct dc *core_dc = dc; - struct dc_link *link = stream->sink->link; + struct dc_link *link = stream->link; struct timing_generator *tg = core_dc->res_pool->timing_generators[0]; enum dc_status res = DC_OK; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 66e5c4623a49..8f9dc6f54bf1 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -35,20 +35,17 @@ /******************************************************************************* * Private functions ******************************************************************************/ -void update_stream_signal(struct dc_stream_state *stream) +void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink) { - - struct dc_sink *dc_sink = stream->sink; - - if (dc_sink->sink_signal == SIGNAL_TYPE_NONE) - stream->signal = stream->sink->link->connector_signal; + if (sink->sink_signal == SIGNAL_TYPE_NONE) + stream->signal = stream->link->connector_signal; else - stream->signal = dc_sink->sink_signal; + stream->signal = sink->sink_signal; if (dc_is_dvi_signal(stream->signal)) { if (stream->ctx->dc->caps.dual_link_dvi && stream->timing.pix_clk_khz > TMDS_MAX_PIXEL_CLOCK && - stream->sink->sink_signal != SIGNAL_TYPE_DVI_SINGLE_LINK) + sink->sink_signal != SIGNAL_TYPE_DVI_SINGLE_LINK) stream->signal = SIGNAL_TYPE_DVI_DUAL_LINK; else stream->signal = SIGNAL_TYPE_DVI_SINGLE_LINK; @@ -61,10 +58,15 @@ static void construct(struct dc_stream_state *stream, uint32_t i = 0; stream->sink = dc_sink_data; - stream->ctx = stream->sink->ctx; - dc_sink_retain(dc_sink_data); + stream->ctx = dc_sink_data->ctx; + stream->link = dc_sink_data->link; + stream->sink_patches = dc_sink_data->edid_caps.panel_patch; + stream->converter_disable_audio = dc_sink_data->converter_disable_audio; + stream->qs_bit = dc_sink_data->edid_caps.qs_bit; + stream->qy_bit = dc_sink_data->edid_caps.qy_bit; + /* Copy audio modes */ /* TODO - Remove this translation */ for (i = 0; i < (dc_sink_data->edid_caps.audio_mode_count); i++) @@ -100,7 +102,7 @@ static void construct(struct dc_stream_state *stream, /* EDID CAP translation for HDMI 2.0 */ stream->timing.flags.LTE_340MCSC_SCRAMBLE = dc_sink_data->edid_caps.lte_340mcsc_scramble; - update_stream_signal(stream); + update_stream_signal(stream, dc_sink_data); stream->out_transfer_func = dc_create_transfer_func(); stream->out_transfer_func->type = TF_TYPE_BYPASS; @@ -339,11 +341,7 @@ void dc_stream_log(const struct dc *dc, const struct dc_stream_state *stream) stream->timing.v_total, stream->timing.pixel_encoding, stream->timing.display_color_depth); - DC_LOG_DC( - "\tsink name: %s, serial: %d\n", - stream->sink->edid_caps.display_name, - stream->sink->edid_caps.serial_number); DC_LOG_DC( "\tlink: %d\n", - stream->sink->link->link_index); + stream->link->link_index); } diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 29f19d57ff7a..8a0e57620345 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -110,6 +110,7 @@ struct dc_link { union ddi_channel_mapping ddi_channel_mapping; struct connector_device_tag_info device_tag; struct dpcd_caps dpcd_caps; + uint32_t dongle_max_pix_clk; unsigned short chip_caps; unsigned int dpcd_sink_count; enum edp_revision edp_revision; diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index be34d638e15d..39b3016f7d2e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -51,7 +51,13 @@ struct freesync_context { }; struct dc_stream_state { + // sink is deprecated, new code should not reference + // this pointer struct dc_sink *sink; + + struct dc_link *link; + struct dc_panel_patch sink_patches; + union display_content_support content_support; struct dc_crtc_timing timing; struct dc_crtc_timing_adjust adjust; struct dc_info_packet vrr_infopacket; @@ -80,6 +86,9 @@ struct dc_stream_state { enum view_3d_format view_format; bool ignore_msa_timing_param; + bool converter_disable_audio; + uint8_t qs_bit; + uint8_t qy_bit; unsigned long long periodic_fn_vsync_delta; @@ -105,6 +114,8 @@ struct dc_stream_state { bool dpms_off; bool apply_edp_fast_boot_optimization; + void *dm_stream_context; + struct dc_cursor_attributes cursor_attributes; struct dc_cursor_position cursor_position; uint32_t sdr_white_level; // for boosting (SDR) cursor in HDR mode @@ -256,7 +267,7 @@ enum surface_update_type dc_check_update_surfaces_for_stream( */ struct dc_stream_state *dc_create_stream_for_sink(struct dc_sink *dc_sink); -void update_stream_signal(struct dc_stream_state *stream); +void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink); void dc_stream_retain(struct dc_stream_state *dc_stream); void dc_stream_release(struct dc_stream_state *dc_stream); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c index afd287f08bc9..5c1a7c013f63 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c @@ -483,15 +483,15 @@ void dce110_fill_display_configs( cfg->src_height = stream->src.height; cfg->src_width = stream->src.width; cfg->ddi_channel_mapping = - stream->sink->link->ddi_channel_mapping.raw; + stream->link->ddi_channel_mapping.raw; cfg->transmitter = - stream->sink->link->link_enc->transmitter; + stream->link->link_enc->transmitter; cfg->link_settings.lane_count = - stream->sink->link->cur_link_settings.lane_count; + stream->link->cur_link_settings.lane_count; cfg->link_settings.link_rate = - stream->sink->link->cur_link_settings.link_rate; + stream->link->cur_link_settings.link_rate; cfg->link_settings.link_spread = - stream->sink->link->cur_link_settings.link_spread; + stream->link->cur_link_settings.link_spread; cfg->sym_clock = stream->phy_pix_clk; /* Round v_refresh*/ cfg->v_refresh = stream->timing.pix_clk_khz * 1000; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c index 3e18ea84b1f9..1d794c65cc8b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c @@ -788,7 +788,7 @@ bool dce110_link_encoder_validate_output_with_stream( case SIGNAL_TYPE_DVI_DUAL_LINK: is_valid = dce110_link_encoder_validate_dvi_output( enc110, - stream->sink->link->connector_signal, + stream->link->connector_signal, stream->signal, &stream->timing); break; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 4bf24758217f..e947cc5381ec 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -617,12 +617,11 @@ dce110_set_output_transfer_func(struct pipe_ctx *pipe_ctx, static enum dc_status bios_parser_crtc_source_select( struct pipe_ctx *pipe_ctx) { - struct dc_bios *dcb; + struct dc_bios *dcb = pipe_ctx->stream->ctx->dc_bios; /* call VBIOS table to set CRTC source for the HW * encoder block * note: video bios clears all FMT setting here. */ struct bp_crtc_source_select crtc_source_select = {0}; - const struct dc_sink *sink = pipe_ctx->stream->sink; crtc_source_select.engine_id = pipe_ctx->stream_res.stream_enc->id; crtc_source_select.controller_id = pipe_ctx->stream_res.tg->inst + 1; @@ -652,8 +651,6 @@ static enum dc_status bios_parser_crtc_source_select( break; } - dcb = sink->ctx->dc_bios; - if (BP_RESULT_OK != dcb->funcs->crtc_source_select( dcb, &crtc_source_select)) { @@ -692,10 +689,10 @@ void dce110_update_info_frame(struct pipe_ctx *pipe_ctx) void dce110_enable_stream(struct pipe_ctx *pipe_ctx) { enum dc_lane_count lane_count = - pipe_ctx->stream->sink->link->cur_link_settings.lane_count; + pipe_ctx->stream->link->cur_link_settings.lane_count; struct dc_crtc_timing *timing = &pipe_ctx->stream->timing; - struct dc_link *link = pipe_ctx->stream->sink->link; + struct dc_link *link = pipe_ctx->stream->link; uint32_t active_total_with_borders; @@ -1048,7 +1045,7 @@ void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx, int option) void dce110_disable_stream(struct pipe_ctx *pipe_ctx, int option) { struct dc_stream_state *stream = pipe_ctx->stream; - struct dc_link *link = stream->sink->link; + struct dc_link *link = stream->link; struct dc *dc = pipe_ctx->stream->ctx->dc; if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) @@ -1073,7 +1070,7 @@ void dce110_unblank_stream(struct pipe_ctx *pipe_ctx, { struct encoder_unblank_param params = { { 0 } }; struct dc_stream_state *stream = pipe_ctx->stream; - struct dc_link *link = stream->sink->link; + struct dc_link *link = stream->link; /* only 3 items below are used by unblank */ params.pixel_clk_khz = @@ -1090,7 +1087,7 @@ void dce110_unblank_stream(struct pipe_ctx *pipe_ctx, void dce110_blank_stream(struct pipe_ctx *pipe_ctx) { struct dc_stream_state *stream = pipe_ctx->stream; - struct dc_link *link = stream->sink->link; + struct dc_link *link = stream->link; if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) { link->dc->hwss.edp_backlight_control(link, false); @@ -1408,7 +1405,7 @@ static enum dc_status apply_single_controller_ctx_to_hw( pipe_ctx->plane_res.scl_data.lb_params.alpha_en = pipe_ctx->bottom_pipe != 0; - pipe_ctx->stream->sink->link->psr_enabled = false; + pipe_ctx->stream->link->psr_enabled = false; return DC_OK; } @@ -1813,18 +1810,15 @@ static bool should_enable_fbc(struct dc *dc, if (i == dc->res_pool->pipe_count) return false; - if (!pipe_ctx->stream->sink) - return false; - - if (!pipe_ctx->stream->sink->link) + if (!pipe_ctx->stream->link) return false; /* Only supports eDP */ - if (pipe_ctx->stream->sink->link->connector_signal != SIGNAL_TYPE_EDP) + if (pipe_ctx->stream->link->connector_signal != SIGNAL_TYPE_EDP) return false; /* PSR should not be enabled */ - if (pipe_ctx->stream->sink->link->psr_enabled) + if (pipe_ctx->stream->link->psr_enabled) return false; /* Nothing to compress */ diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index e33d11785b1f..3c989b8e1ae3 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -780,7 +780,7 @@ static void get_pixel_clock_parameters( * in pll_adjust_pix_clk */ pixel_clk_params->requested_pix_clk = stream->timing.pix_clk_khz; - pixel_clk_params->encoder_object_id = stream->sink->link->link_enc->id; + pixel_clk_params->encoder_object_id = stream->link->link_enc->id; pixel_clk_params->signal_type = pipe_ctx->stream->signal; pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1; /* TODO: un-hardcode*/ diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index 969d4e72dc94..c2a713399726 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -763,7 +763,7 @@ static struct clock_source *find_matching_pll( const struct resource_pool *pool, const struct dc_stream_state *const stream) { - switch (stream->sink->link->link_enc->transmitter) { + switch (stream->link->link_enc->transmitter) { case TRANSMITTER_UNIPHY_A: return pool->clock_sources[DCE112_CLK_SRC_PLL0]; case TRANSMITTER_UNIPHY_B: diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 03835dcd28a2..613293d79bd7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2524,7 +2524,7 @@ static void dcn10_config_stereo_parameters( timing_3d_format == TIMING_3D_FORMAT_DP_HDMI_INBAND_FA || timing_3d_format == TIMING_3D_FORMAT_SIDEBAND_FA) { enum display_dongle_type dongle = \ - stream->sink->link->ddc->dongle_type; + stream->link->ddc->dongle_type; if (dongle == DISPLAY_DONGLE_DP_VGA_CONVERTER || dongle == DISPLAY_DONGLE_DP_DVI_CONVERTER || dongle == DISPLAY_DONGLE_DP_HDMI_CONVERTER) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c index 477ab9222216..aefe8974469d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c @@ -738,7 +738,7 @@ bool dcn10_link_encoder_validate_output_with_stream( case SIGNAL_TYPE_DVI_DUAL_LINK: is_valid = dcn10_link_encoder_validate_dvi_output( enc10, - stream->sink->link->connector_signal, + stream->link->connector_signal, stream->signal, &stream->timing); break; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 5d4772dec0ba..6ab1ca221579 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -975,7 +975,7 @@ static void get_pixel_clock_parameters( { const struct dc_stream_state *stream = pipe_ctx->stream; pixel_clk_params->requested_pix_clk = stream->timing.pix_clk_khz; - pixel_clk_params->encoder_object_id = stream->sink->link->link_enc->id; + pixel_clk_params->encoder_object_id = stream->link->link_enc->id; pixel_clk_params->signal_type = pipe_ctx->stream->signal; pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1; /* TODO: un-hardcode*/ From 923a3743abbece8426a545e8cacab5202f4b32c4 Mon Sep 17 00:00:00 2001 From: Imad Syed Date: Wed, 21 Nov 2018 12:54:31 -0800 Subject: [PATCH 079/539] drm/amd/display: Remove unused headers They're not being used, so remove them. Signed-off-by: Imad Syed Reviewed-by: Tony Cheng Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index ba8e8e1fb110..5e5c162ca304 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -43,10 +43,6 @@ #include "dpcd_defs.h" #include "dmcu.h" -#include "dce/dce_11_0_d.h" -#include "dce/dce_11_0_enum.h" -#include "dce/dce_11_0_sh_mask.h" - #define DC_LOGGER_INIT(logger) From 923fe4951282cbdfce05186c10380bbc45b5e03b Mon Sep 17 00:00:00 2001 From: David Francis Date: Tue, 20 Nov 2018 09:42:58 -0500 Subject: [PATCH 080/539] drm/amd/display: Detach backlight from stream [Why] Backlight is conceptually a property of links, not streams. All backlight programming is done on links, but there is a stream property bl_pwm_level that is used to restore backlight on dpms on and s3 resume. This is unnecessary, as backlight is already restored by hardware with no driver intervention. [How] Remove bl_pwm_level, and the stream argument to set_backlight Signed-off-by: David Francis Reviewed-by: Harry Wentland Acked-by: Anthony Koo Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 12 +----------- drivers/gpu/drm/amd/display/dc/dc_link.h | 3 +-- drivers/gpu/drm/amd/display/dc/dc_stream.h | 1 - 4 files changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 31382907b0f3..562d9d64ec43 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1760,7 +1760,7 @@ static int amdgpu_dm_backlight_update_status(struct backlight_device *bd) + caps.min_input_signal * 0x101; if (dc_link_set_backlight_level(dm->backlight_link, - brightness, 0, 0)) + brightness, 0)) return 0; else return 1; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 5e5c162ca304..ffd1c7d7fe44 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2186,8 +2186,7 @@ int dc_link_get_backlight_level(const struct dc_link *link) bool dc_link_set_backlight_level(const struct dc_link *link, uint32_t backlight_pwm_u16_16, - uint32_t frame_ramp, - const struct dc_stream_state *stream) + uint32_t frame_ramp) { struct dc *core_dc = link->ctx->dc; struct abm *abm = core_dc->res_pool->abm; @@ -2202,10 +2201,6 @@ bool dc_link_set_backlight_level(const struct dc_link *link, (abm->funcs->set_backlight_level_pwm == NULL)) return false; - if (stream) - ((struct dc_stream_state *)stream)->bl_pwm_level = - backlight_pwm_u16_16; - use_smooth_brightness = dmcu->funcs->is_dmcu_initialized(dmcu); DC_LOG_BACKLIGHT("New Backlight level: %d (0x%X)\n", @@ -2633,11 +2628,6 @@ void core_link_enable_stream( if (dc_is_dp_signal(pipe_ctx->stream->signal)) enable_stream_features(pipe_ctx); - - dc_link_set_backlight_level(pipe_ctx->stream->link, - pipe_ctx->stream->bl_pwm_level, - 0, - pipe_ctx->stream); } } diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 8a0e57620345..f249ff9be2a7 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -147,8 +147,7 @@ static inline struct dc_link *dc_get_link_at_index(struct dc *dc, uint32_t link_ */ bool dc_link_set_backlight_level(const struct dc_link *dc_link, uint32_t backlight_pwm_u16_16, - uint32_t frame_ramp, - const struct dc_stream_state *stream); + uint32_t frame_ramp); int dc_link_get_backlight_level(const struct dc_link *dc_link); diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 39b3016f7d2e..8a778dd5f69e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -100,7 +100,6 @@ struct dc_stream_state { /* DMCU info */ unsigned int abm_level; - unsigned int bl_pwm_level; /* from core_stream struct */ struct dc_context *ctx; From fb57452fb4023a8efeb25efd468f53b292ba414e Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Thu, 8 Nov 2018 14:38:44 -0500 Subject: [PATCH 081/539] drm/amd/display: update DCN dml calcs DV have made updates to DCN dml which we need to pull in Signed-off-by: Dmytro Laktyushkin Reviewed-by: Eric Bernstein Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/calcs/dcn_calcs.c | 7 -- .../drm/amd/display/dc/dcn10/dcn10_resource.c | 2 +- .../amd/display/dc/dml/display_mode_structs.h | 107 +----------------- .../display/dc/dml/dml1_display_rq_dlg_calc.c | 4 +- .../gpu/drm/amd/display/dc/inc/hw/mem_input.h | 4 +- 5 files changed, 11 insertions(+), 113 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index cd88ceca5c41..446ee482fd24 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -290,41 +290,34 @@ static void pipe_ctx_to_e2e_pipe_params ( switch (pipe->plane_state->tiling_info.gfx9.swizzle) { /* for 4/8/16 high tiles */ case DC_SW_LINEAR: - input->src.is_display_sw = 1; input->src.macro_tile_size = dm_4k_tile; break; case DC_SW_4KB_S: case DC_SW_4KB_S_X: - input->src.is_display_sw = 0; input->src.macro_tile_size = dm_4k_tile; break; case DC_SW_64KB_S: case DC_SW_64KB_S_X: case DC_SW_64KB_S_T: - input->src.is_display_sw = 0; input->src.macro_tile_size = dm_64k_tile; break; case DC_SW_VAR_S: case DC_SW_VAR_S_X: - input->src.is_display_sw = 0; input->src.macro_tile_size = dm_256k_tile; break; /* For 64bpp 2 high tiles */ case DC_SW_4KB_D: case DC_SW_4KB_D_X: - input->src.is_display_sw = 1; input->src.macro_tile_size = dm_4k_tile; break; case DC_SW_64KB_D: case DC_SW_64KB_D_X: case DC_SW_64KB_D_T: - input->src.is_display_sw = 1; input->src.macro_tile_size = dm_64k_tile; break; case DC_SW_VAR_D: case DC_SW_VAR_D_X: - input->src.is_display_sw = 1; input->src.macro_tile_size = dm_256k_tile; break; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 6ab1ca221579..83447cde6a75 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -70,7 +70,7 @@ const struct _vcs_dpi_ip_params_st dcn1_0_ip = { .rob_buffer_size_kbytes = 64, .det_buffer_size_kbytes = 164, - .dpte_buffer_size_in_pte_reqs = 42, + .dpte_buffer_size_in_pte_reqs_luma = 42, .dpp_output_buffer_pixels = 2560, .opp_output_buffer_lines = 1, .pixel_chunk_size_kbytes = 8, diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 5dd04520ceca..391183e3428f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -30,22 +30,15 @@ typedef struct _vcs_dpi_soc_bounding_box_st soc_bounding_box_st; typedef struct _vcs_dpi_ip_params_st ip_params_st; typedef struct _vcs_dpi_display_pipe_source_params_st display_pipe_source_params_st; typedef struct _vcs_dpi_display_output_params_st display_output_params_st; -typedef struct _vcs_dpi_display_bandwidth_st display_bandwidth_st; typedef struct _vcs_dpi_scaler_ratio_depth_st scaler_ratio_depth_st; typedef struct _vcs_dpi_scaler_taps_st scaler_taps_st; typedef struct _vcs_dpi_display_pipe_dest_params_st display_pipe_dest_params_st; typedef struct _vcs_dpi_display_pipe_params_st display_pipe_params_st; typedef struct _vcs_dpi_display_clocks_and_cfg_st display_clocks_and_cfg_st; typedef struct _vcs_dpi_display_e2e_pipe_params_st display_e2e_pipe_params_st; -typedef struct _vcs_dpi_dchub_buffer_sizing_st dchub_buffer_sizing_st; -typedef struct _vcs_dpi_watermarks_perf_st watermarks_perf_st; -typedef struct _vcs_dpi_cstate_pstate_watermarks_st cstate_pstate_watermarks_st; -typedef struct _vcs_dpi_wm_calc_pipe_params_st wm_calc_pipe_params_st; -typedef struct _vcs_dpi_vratio_pre_st vratio_pre_st; typedef struct _vcs_dpi_display_data_rq_misc_params_st display_data_rq_misc_params_st; typedef struct _vcs_dpi_display_data_rq_sizing_params_st display_data_rq_sizing_params_st; typedef struct _vcs_dpi_display_data_rq_dlg_params_st display_data_rq_dlg_params_st; -typedef struct _vcs_dpi_display_cur_rq_dlg_params_st display_cur_rq_dlg_params_st; typedef struct _vcs_dpi_display_rq_dlg_params_st display_rq_dlg_params_st; typedef struct _vcs_dpi_display_rq_sizing_params_st display_rq_sizing_params_st; typedef struct _vcs_dpi_display_rq_misc_params_st display_rq_misc_params_st; @@ -55,8 +48,6 @@ typedef struct _vcs_dpi_display_ttu_regs_st display_ttu_regs_st; typedef struct _vcs_dpi_display_data_rq_regs_st display_data_rq_regs_st; typedef struct _vcs_dpi_display_rq_regs_st display_rq_regs_st; typedef struct _vcs_dpi_display_dlg_sys_params_st display_dlg_sys_params_st; -typedef struct _vcs_dpi_display_dlg_prefetch_param_st display_dlg_prefetch_param_st; -typedef struct _vcs_dpi_display_pipe_clock_st display_pipe_clock_st; typedef struct _vcs_dpi_display_arb_params_st display_arb_params_st; struct _vcs_dpi_voltage_scaling_st { @@ -111,8 +102,6 @@ struct _vcs_dpi_soc_bounding_box_st { double xfc_bus_transport_time_us; double xfc_xbuf_latency_tolerance_us; int use_urgent_burst_bw; - double max_hscl_ratio; - double max_vscl_ratio; unsigned int num_states; struct _vcs_dpi_voltage_scaling_st clock_limits[8]; }; @@ -129,7 +118,8 @@ struct _vcs_dpi_ip_params_st { unsigned int odm_capable; unsigned int rob_buffer_size_kbytes; unsigned int det_buffer_size_kbytes; - unsigned int dpte_buffer_size_in_pte_reqs; + unsigned int dpte_buffer_size_in_pte_reqs_luma; + unsigned int dpte_buffer_size_in_pte_reqs_chroma; unsigned int pde_proc_buffer_size_64k_reqs; unsigned int dpp_output_buffer_pixels; unsigned int opp_output_buffer_lines; @@ -192,7 +182,6 @@ struct _vcs_dpi_display_xfc_params_st { struct _vcs_dpi_display_pipe_source_params_st { int source_format; unsigned char dcc; - unsigned int dcc_override; unsigned int dcc_rate; unsigned char dcc_use_global; unsigned char vm; @@ -205,7 +194,6 @@ struct _vcs_dpi_display_pipe_source_params_st { int source_scan; int sw_mode; int macro_tile_size; - unsigned char is_display_sw; unsigned int viewport_width; unsigned int viewport_height; unsigned int viewport_y_y; @@ -252,16 +240,10 @@ struct _vcs_dpi_display_output_params_st { int output_bpc; int output_type; int output_format; - int output_standard; int dsc_slices; struct writeback_st wb; }; -struct _vcs_dpi_display_bandwidth_st { - double total_bw_consumed_gbps; - double guaranteed_urgent_return_bw_gbps; -}; - struct _vcs_dpi_scaler_ratio_depth_st { double hscl_ratio; double vscl_ratio; @@ -300,11 +282,9 @@ struct _vcs_dpi_display_pipe_dest_params_st { unsigned int vupdate_width; unsigned int vready_offset; unsigned char interlaced; - unsigned char underscan; double pixel_rate_mhz; unsigned char synchronized_vblank_all_planes; unsigned char otg_inst; - unsigned char odm_split_cnt; unsigned char odm_combine; unsigned char use_maximum_vstartup; }; @@ -331,65 +311,6 @@ struct _vcs_dpi_display_e2e_pipe_params_st { display_clocks_and_cfg_st clks_cfg; }; -struct _vcs_dpi_dchub_buffer_sizing_st { - unsigned int swath_width_y; - unsigned int swath_height_y; - unsigned int swath_height_c; - unsigned int detail_buffer_size_y; -}; - -struct _vcs_dpi_watermarks_perf_st { - double stutter_eff_in_active_region_percent; - double urgent_latency_supported_us; - double non_urgent_latency_supported_us; - double dram_clock_change_margin_us; - double dram_access_eff_percent; -}; - -struct _vcs_dpi_cstate_pstate_watermarks_st { - double cstate_exit_us; - double cstate_enter_plus_exit_us; - double pstate_change_us; -}; - -struct _vcs_dpi_wm_calc_pipe_params_st { - unsigned int num_dpp; - int voltage; - int output_type; - double dcfclk_mhz; - double socclk_mhz; - double dppclk_mhz; - double pixclk_mhz; - unsigned char interlace_en; - unsigned char pte_enable; - unsigned char dcc_enable; - double dcc_rate; - double bytes_per_pixel_c; - double bytes_per_pixel_y; - unsigned int swath_width_y; - unsigned int swath_height_y; - unsigned int swath_height_c; - unsigned int det_buffer_size_y; - double h_ratio; - double v_ratio; - unsigned int h_taps; - unsigned int h_total; - unsigned int v_total; - unsigned int v_active; - unsigned int e2e_index; - double display_pipe_line_delivery_time; - double read_bw; - unsigned int lines_in_det_y; - unsigned int lines_in_det_y_rounded_down_to_swath; - double full_det_buffering_time; - double dcfclk_deepsleep_mhz_per_plane; -}; - -struct _vcs_dpi_vratio_pre_st { - double vratio_pre_l; - double vratio_pre_c; -}; - struct _vcs_dpi_display_data_rq_misc_params_st { unsigned int full_swath_bytes; unsigned int stored_swath_bytes; @@ -423,16 +344,9 @@ struct _vcs_dpi_display_data_rq_dlg_params_st { unsigned int meta_bytes_per_row_ub; }; -struct _vcs_dpi_display_cur_rq_dlg_params_st { - unsigned char enable; - unsigned int swath_height; - unsigned int req_per_line; -}; - struct _vcs_dpi_display_rq_dlg_params_st { display_data_rq_dlg_params_st rq_l; display_data_rq_dlg_params_st rq_c; - display_cur_rq_dlg_params_st rq_cur0; }; struct _vcs_dpi_display_rq_sizing_params_st { @@ -498,6 +412,10 @@ struct _vcs_dpi_display_dlg_regs_st { unsigned int xfc_reg_remote_surface_flip_latency; unsigned int xfc_reg_prefetch_margin; unsigned int dst_y_delta_drq_limit; + unsigned int refcyc_per_vm_group_vblank; + unsigned int refcyc_per_vm_group_flip; + unsigned int refcyc_per_vm_req_vblank; + unsigned int refcyc_per_vm_req_flip; }; struct _vcs_dpi_display_ttu_regs_st { @@ -556,19 +474,6 @@ struct _vcs_dpi_display_dlg_sys_params_st { unsigned int total_flip_bytes; }; -struct _vcs_dpi_display_dlg_prefetch_param_st { - double prefetch_bw; - unsigned int flip_bytes; -}; - -struct _vcs_dpi_display_pipe_clock_st { - double dcfclk_mhz; - double dispclk_mhz; - double socclk_mhz; - double dscclk_mhz[6]; - double dppclk_mhz[6]; -}; - struct _vcs_dpi_display_arb_params_st { int max_req_outstanding; int min_req_outstanding; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c index c2037daa8e66..a8b233ee5f97 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c @@ -459,7 +459,7 @@ static void dml1_rq_dlg_get_row_heights( /* dpte */ /* ------ */ log2_vmpg_bytes = dml_log2(mode_lib->soc.vmm_page_size_bytes); - dpte_buf_in_pte_reqs = mode_lib->ip.dpte_buffer_size_in_pte_reqs; + dpte_buf_in_pte_reqs = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; log2_vmpg_height = 0; log2_vmpg_width = 0; @@ -776,7 +776,7 @@ static void get_surf_rq_param( /* dpte */ /* ------ */ log2_vmpg_bytes = dml_log2(mode_lib->soc.vmm_page_size_bytes); - dpte_buf_in_pte_reqs = mode_lib->ip.dpte_buffer_size_in_pte_reqs; + dpte_buf_in_pte_reqs = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; log2_vmpg_height = 0; log2_vmpg_width = 0; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h index 06df02ddff6a..da89c2edb07c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h @@ -31,7 +31,7 @@ #include "dml/display_mode_structs.h" struct dchub_init_data; -struct cstate_pstate_watermarks_st1 { +struct cstate_pstate_watermarks_st { uint32_t cstate_exit_ns; uint32_t cstate_enter_plus_exit_ns; uint32_t pstate_change_ns; @@ -40,7 +40,7 @@ struct cstate_pstate_watermarks_st1 { struct dcn_watermarks { uint32_t pte_meta_urgent_ns; uint32_t urgent_ns; - struct cstate_pstate_watermarks_st1 cstate_pstate; + struct cstate_pstate_watermarks_st cstate_pstate; }; struct dcn_watermark_set { From 04a789bef315a3ed64f296fab21be3933405ea43 Mon Sep 17 00:00:00 2001 From: Jun Lei Date: Fri, 23 Nov 2018 15:21:02 -0500 Subject: [PATCH 082/539] drm/amd/display: add stream ID and otg instance in dc_stream_state [why] stream ID allows DMs to avoid memory address comparisons to compare stream equality. otg_instance allows DC to more rigorously define when otg_instance can change. specifically, it is now defined to be only mutable when dc_stream_state changes. This is better than a "get status" function which prevents efficient caching of otherwise very stable information. [how] stream ID follows similar pattern to sink ID, which is already implemented otg_instance is an output which occurs on all dc_stream modification functions Signed-off-by: Jun Lei Reviewed-by: Tony Cheng Acked-by: Aric Cyr Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 5 +++++ drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 3 +++ drivers/gpu/drm/amd/display/dc/dc_stream.h | 12 ++++++++++++ drivers/gpu/drm/amd/display/dc/dc_types.h | 1 + 4 files changed, 21 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index a3cd93825dcf..7b9174b1432f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -669,6 +669,7 @@ static bool construct(struct dc *dc, dc_ctx->dc = dc; dc_ctx->asic_id = init_params->asic_id; dc_ctx->dc_sink_id_count = 0; + dc_ctx->dc_stream_id_count = 0; dc->ctx = dc_ctx; dc->current_state = dc_create_state(); @@ -1045,6 +1046,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c /* Program all planes within new context*/ for (i = 0; i < context->stream_count; i++) { const struct dc_link *link = context->streams[i]->link; + struct dc_stream_status *status; if (!context->streams[i]->mode_changed) continue; @@ -1069,6 +1071,9 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c } } + status = dc_stream_get_status(context->streams[i]); + context->streams[i]->out.otg_offset = status->primary_otg_inst; + CONN_MSG_MODE(link, "{%dx%d, %dx%d@%dKhz}", context->streams[i]->timing.h_addressable, context->streams[i]->timing.v_addressable, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 8f9dc6f54bf1..336586ba4639 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -107,6 +107,9 @@ static void construct(struct dc_stream_state *stream, stream->out_transfer_func = dc_create_transfer_func(); stream->out_transfer_func->type = TF_TYPE_BYPASS; stream->out_transfer_func->ctx = stream->ctx; + + stream->stream_id = stream->ctx->dc_stream_id_count; + stream->ctx->dc_stream_id_count++; } static void destruct(struct dc_stream_state *stream) diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 8a778dd5f69e..0dfad73d1743 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -127,6 +127,18 @@ struct dc_stream_state { /* Computed state bits */ bool mode_changed : 1; + /* Output from DC when stream state is committed or altered + * DC may only access these values during: + * dc_commit_state, dc_commit_state_no_check, dc_commit_streams + * values may not change outside of those calls + */ + struct { + // For interrupt management, some hardware instance + // offsets need to be exposed to DM + uint8_t otg_offset; + } out; + + uint32_t stream_id; }; struct dc_stream_update { diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 0b20ae23f169..07cd165c389c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -99,6 +99,7 @@ struct dc_context { struct gpio_service *gpio_service; struct i2caux *i2caux; uint32_t dc_sink_id_count; + uint32_t dc_stream_id_count; uint64_t fbc_gpu_addr; }; From f284975ef2bcec6eb5ab51f61ea67f460ca49a03 Mon Sep 17 00:00:00 2001 From: David Francis Date: Tue, 13 Nov 2018 10:37:16 -0500 Subject: [PATCH 083/539] drm/amd/display: Add backlight pwm debugfs [Why] ABM enablement testing can be automated if a way of reading target and current hardware backlight is available [How] Expand debugfs interface with two new entries. Hook directly into dc interface. Units are as a fraction of 0x1000 = 100% Use the built-in amdgpu function for creating read-only debugfs files Signed-off-by: David Francis Reviewed-by: Harry Wentland Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 49 ++++++++++++++++++- drivers/gpu/drm/amd/display/dc/core/dc.c | 20 ++++++++ drivers/gpu/drm/amd/display/dc/dc.h | 3 ++ 3 files changed, 70 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 9a7ac58eb18e..cca3e16cda4f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -783,6 +783,45 @@ static ssize_t dtn_log_write( return size; } +/* + * Backlight at this moment. Read only. + * As written to display, taking ABM and backlight lut into account. + * Ranges from 0x0 to 0x10000 (= 100% PWM) + */ +static int current_backlight_read(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct amdgpu_device *adev = dev->dev_private; + struct dc *dc = adev->dm.dc; + unsigned int backlight = dc_get_current_backlight_pwm(dc); + + seq_printf(m, "0x%x\n", backlight); + return 0; +} + +/* + * Backlight value that is being approached. Read only. + * As written to display, taking ABM and backlight lut into account. + * Ranges from 0x0 to 0x10000 (= 100% PWM) + */ +static int target_backlight_read(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct amdgpu_device *adev = dev->dev_private; + struct dc *dc = adev->dm.dc; + unsigned int backlight = dc_get_target_backlight_pwm(dc); + + seq_printf(m, "0x%x\n", backlight); + return 0; +} + +static const struct drm_info_list amdgpu_dm_debugfs_list[] = { + {"amdgpu_current_backlight_pwm", ¤t_backlight_read}, + {"amdgpu_target_backlight_pwm", &target_backlight_read}, +}; + int dtn_debugfs_init(struct amdgpu_device *adev) { static const struct file_operations dtn_log_fops = { @@ -793,9 +832,15 @@ int dtn_debugfs_init(struct amdgpu_device *adev) }; struct drm_minor *minor = adev->ddev->primary; - struct dentry *root = minor->debugfs_root; + struct dentry *ent, *root = minor->debugfs_root; + int ret; - struct dentry *ent = debugfs_create_file( + ret = amdgpu_debugfs_add_files(adev, amdgpu_dm_debugfs_list, + ARRAY_SIZE(amdgpu_dm_debugfs_list)); + if (ret) + return ret; + + ent = debugfs_create_file( "amdgpu_dm_dtn_log", 0644, root, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 7b9174b1432f..66067a46eb6f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1779,6 +1779,26 @@ void dc_resume(struct dc *dc) core_link_resume(dc->links[i]); } +unsigned int dc_get_current_backlight_pwm(struct dc *dc) +{ + struct abm *abm = dc->res_pool->abm; + + if (abm) + return abm->funcs->get_current_backlight(abm); + + return 0; +} + +unsigned int dc_get_target_backlight_pwm(struct dc *dc) +{ + struct abm *abm = dc->res_pool->abm; + + if (abm) + return abm->funcs->get_target_backlight(abm); + + return 0; +} + bool dc_is_dmcu_initialized(struct dc *dc) { struct dmcu *dmcu = dc->res_pool->dmcu; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index da08c7122497..c150597962f6 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -749,6 +749,9 @@ void dc_set_power_state( struct dc *dc, enum dc_acpi_cm_power_state power_state); void dc_resume(struct dc *dc); +unsigned int dc_get_current_backlight_pwm(struct dc *dc); +unsigned int dc_get_target_backlight_pwm(struct dc *dc); + bool dc_is_dmcu_initialized(struct dc *dc); #endif /* DC_INTERFACE_H_ */ From 0ec0b5f26ca53922182a0ff88e982eb9e856a18f Mon Sep 17 00:00:00 2001 From: Steven Chiu Date: Mon, 26 Nov 2018 09:59:37 -0500 Subject: [PATCH 084/539] drm/amd/display: 3.2.10 Signed-off-by: Steven Chiu Reviewed-by: Yongqiang Sun Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index c150597962f6..ef59f5cce0eb 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.09" +#define DC_VER "3.2.10" #define MAX_SURFACES 3 #define MAX_STREAMS 6 From c3f574ba7bdb96f6052ec15d69e97c7a2bea68a4 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Tue, 20 Nov 2018 10:07:07 -0500 Subject: [PATCH 085/539] drm/amd/display: Remove stream_status->link [Why] It's not being used anywhere. [How] Remove it. Signed-off-by: Leo Li Reviewed-by: David Francis Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_stream.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 0dfad73d1743..1e1e89e7c2c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -38,11 +38,6 @@ struct dc_stream_status { int stream_enc_inst; int plane_count; struct dc_plane_state *plane_states[MAX_SURFACE_NUM]; - - /* - * link this stream passes through - */ - struct dc_link *link; }; // TODO: References to this needs to be removed.. From 43995f8fb64c54b740262f72ab51d41268c0f7ff Mon Sep 17 00:00:00 2001 From: SivapiriyanKumarasamy Date: Fri, 23 Nov 2018 20:33:35 -0500 Subject: [PATCH 086/539] drm/amd/display: Minor code cleanup [WHY] There is an extra null check for fs_params in the build_freesync_hdr function detected by Smatch. [HOW] 1) Remove the extra null check since it is checked in the caller. Signed-off-by: SivapiriyanKumarasamy Reviewed-by: Aric Cyr Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/color/color_gamma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index 479b77c2e89e..beb7bda1478a 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -823,7 +823,7 @@ static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma, bool is_clipped = false; struct fixed31_32 sdr_white_level; - if (fs_params == NULL || fs_params->max_content == 0 || + if (fs_params->max_content == 0 || fs_params->max_display == 0) return false; From 380604e27bc9c26ce64a83044aa1ea76ffd28caf Mon Sep 17 00:00:00 2001 From: Ken Chalmers Date: Tue, 6 Nov 2018 14:24:12 -0500 Subject: [PATCH 087/539] drm/amd/display: Use 100 Hz precision for pipe pixel clocks [Why] Users would like more accurate pixel clocks, especially for fractional "TV" frame rates like 59.94 Hz. [How] Store and communicate pixel clocks with 100 Hz accuracy from dc_crtc_timing through to BIOS command table setpixelclock call. Signed-off-by: Ken Chalmers Reviewed-by: Charlene Liu Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +- .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 2 +- .../drm/amd/display/dc/bios/command_table.c | 19 +- .../drm/amd/display/dc/bios/command_table2.c | 6 +- .../gpu/drm/amd/display/dc/calcs/dce_calcs.c | 10 +- .../gpu/drm/amd/display/dc/calcs/dcn_calcs.c | 10 +- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 22 +- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 4 +- .../drm/amd/display/dc/core/dc_link_hwss.c | 4 +- .../gpu/drm/amd/display/dc/core/dc_resource.c | 10 +- .../gpu/drm/amd/display/dc/core/dc_stream.c | 4 +- drivers/gpu/drm/amd/display/dc/dc_hw_types.h | 2 +- .../gpu/drm/amd/display/dc/dce/dce_clk_mgr.c | 12 +- .../drm/amd/display/dc/dce/dce_clock_source.c | 196 +++++++++--------- .../drm/amd/display/dc/dce/dce_link_encoder.c | 6 +- .../amd/display/dc/dce/dce_stream_encoder.c | 4 +- .../display/dc/dce110/dce110_hw_sequencer.c | 20 +- .../amd/display/dc/dce110/dce110_resource.c | 10 +- .../display/dc/dce80/dce80_timing_generator.c | 8 +- .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 2 +- .../dc/dcn10/dcn10_hw_sequencer_debug.c | 2 +- .../amd/display/dc/dcn10/dcn10_link_encoder.c | 18 +- .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 2 +- .../drm/amd/display/dc/dcn10/dcn10_resource.c | 6 +- .../display/dc/dcn10/dcn10_stream_encoder.c | 2 +- .../gpu/drm/amd/display/dc/inc/clock_source.h | 8 +- .../amd/display/include/bios_parser_types.h | 4 +- .../amd/display/modules/freesync/freesync.c | 12 +- 29 files changed, 206 insertions(+), 209 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 562d9d64ec43..3902ce599287 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2580,7 +2580,7 @@ get_output_color_space(const struct dc_crtc_timing *dc_crtc_timing) * according to HDMI spec, we use YCbCr709 and YCbCr601 * respectively */ - if (dc_crtc_timing->pix_clk_khz > 27030) { + if (dc_crtc_timing->pix_clk_100hz > 270300) { if (dc_crtc_timing->flags.Y_ONLY) color_space = COLOR_SPACE_YCBCR709_LIMITED; @@ -2623,7 +2623,7 @@ static void adjust_colour_depth_from_display_info(struct dc_crtc_timing *timing_ if (timing_out->display_color_depth <= COLOR_DEPTH_888) return; do { - normalized_clk = timing_out->pix_clk_khz; + normalized_clk = timing_out->pix_clk_100hz / 10; /* YCbCr 4:2:0 requires additional adjustment of 1/2 */ if (timing_out->pixel_encoding == PIXEL_ENCODING_YCBCR420) normalized_clk /= 2; @@ -2704,7 +2704,7 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream, mode_in->crtc_vsync_start - mode_in->crtc_vdisplay; timing_out->v_sync_width = mode_in->crtc_vsync_end - mode_in->crtc_vsync_start; - timing_out->pix_clk_khz = mode_in->crtc_clock; + timing_out->pix_clk_100hz = mode_in->crtc_clock * 10; timing_out->aspect_ratio = get_aspect_ratio(mode_in); stream->output_color_space = get_output_color_space(timing_out); @@ -2832,7 +2832,7 @@ static void set_master_stream(struct dc_stream_state *stream_set[], if (stream_set[j] && stream_set[j]->triggered_crtc_reset.enabled) { int refresh_rate = 0; - refresh_rate = (stream_set[j]->timing.pix_clk_khz*1000)/ + refresh_rate = (stream_set[j]->timing.pix_clk_100hz*100)/ (stream_set[j]->timing.h_total*stream_set[j]->timing.v_total); if (refresh_rate > highest_rfr) { highest_rfr = refresh_rate; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index b0bc8314c6d5..e6ab0186955c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -205,7 +205,7 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( mst_port = aconnector->port; if (enable) { - clock = stream->timing.pix_clk_khz; + clock = stream->timing.pix_clk_100hz / 10; switch (stream->timing.display_color_depth) { diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c b/drivers/gpu/drm/amd/display/dc/bios/command_table.c index 2bd7cd97e00d..67c119bf6bf7 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c @@ -964,9 +964,9 @@ static enum bp_result set_pixel_clock_v3( allocation.sPCLKInput.ucPostDiv = (uint8_t)bp_params->pixel_clock_post_divider; - /* We need to convert from KHz units into 10KHz units */ + /* We need to convert from 100Hz units into 10KHz units */ allocation.sPCLKInput.usPixelClock = - cpu_to_le16((uint16_t)(bp_params->target_pixel_clock / 10)); + cpu_to_le16((uint16_t)(bp_params->target_pixel_clock_100hz / 100)); params = (PIXEL_CLOCK_PARAMETERS_V3 *)&allocation.sPCLKInput; params->ucTransmitterId = @@ -1042,9 +1042,9 @@ static enum bp_result set_pixel_clock_v5( (uint8_t)bp->cmd_helper->encoder_mode_bp_to_atom( bp_params->signal_type, false); - /* We need to convert from KHz units into 10KHz units */ + /* We need to convert from 100Hz units into 10KHz units */ clk.sPCLKInput.usPixelClock = - cpu_to_le16((uint16_t)(bp_params->target_pixel_clock / 10)); + cpu_to_le16((uint16_t)(bp_params->target_pixel_clock_100hz / 100)); if (bp_params->flags.FORCE_PROGRAMMING_OF_PLL) clk.sPCLKInput.ucMiscInfo |= @@ -1118,9 +1118,9 @@ static enum bp_result set_pixel_clock_v6( (uint8_t) bp->cmd_helper->encoder_mode_bp_to_atom( bp_params->signal_type, false); - /* We need to convert from KHz units into 10KHz units */ + /* We need to convert from 100 Hz units into 10KHz units */ clk.sPCLKInput.ulCrtcPclkFreq.ulPixelClock = - cpu_to_le32(bp_params->target_pixel_clock / 10); + cpu_to_le32(bp_params->target_pixel_clock_100hz / 100); if (bp_params->flags.FORCE_PROGRAMMING_OF_PLL) { clk.sPCLKInput.ucMiscInfo |= @@ -1182,8 +1182,7 @@ static enum bp_result set_pixel_clock_v7( clk.ucTransmitterID = bp->cmd_helper->encoder_id_to_atom(dal_graphics_object_id_get_encoder_id(bp_params->encoder_object_id)); clk.ucEncoderMode = (uint8_t) bp->cmd_helper->encoder_mode_bp_to_atom(bp_params->signal_type, false); - /* We need to convert from KHz units into 10KHz units */ - clk.ulPixelClock = cpu_to_le32(bp_params->target_pixel_clock * 10); + clk.ulPixelClock = cpu_to_le32(bp_params->target_pixel_clock_100hz); clk.ucDeepColorRatio = (uint8_t) bp->cmd_helper->transmitter_color_depth_to_atom(bp_params->color_depth); @@ -2164,7 +2163,7 @@ static enum bp_result program_clock_v5( /* We need to convert from KHz units into 10KHz units */ params.sPCLKInput.ucPpll = (uint8_t) atom_pll_id; params.sPCLKInput.usPixelClock = - cpu_to_le16((uint16_t) (bp_params->target_pixel_clock / 10)); + cpu_to_le16((uint16_t) (bp_params->target_pixel_clock_100hz / 100)); params.sPCLKInput.ucCRTC = (uint8_t) ATOM_CRTC_INVALID; if (bp_params->flags.SET_EXTERNAL_REF_DIV_SRC) @@ -2196,7 +2195,7 @@ static enum bp_result program_clock_v6( /* We need to convert from KHz units into 10KHz units */ params.sPCLKInput.ucPpll = (uint8_t)atom_pll_id; params.sPCLKInput.ulDispEngClkFreq = - cpu_to_le32(bp_params->target_pixel_clock / 10); + cpu_to_le32(bp_params->target_pixel_clock_100hz / 100); if (bp_params->flags.SET_EXTERNAL_REF_DIV_SRC) params.sPCLKInput.ucMiscInfo |= PIXEL_CLOCK_MISC_REF_DIV_SRC; diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c index 2b5dc499a35e..7d3e57765eb2 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c @@ -301,9 +301,7 @@ static enum bp_result set_pixel_clock_v7( cmd_helper->encoder_mode_bp_to_atom( bp_params->signal_type, false); - /* We need to convert from KHz units into 10KHz units */ - clk.pixclk_100hz = cpu_to_le32(bp_params->target_pixel_clock * - 10); + clk.pixclk_100hz = cpu_to_le32(bp_params->target_pixel_clock_100hz); clk.deep_color_ratio = (uint8_t) bp->cmd_helper-> @@ -311,7 +309,7 @@ static enum bp_result set_pixel_clock_v7( bp_params->color_depth); DC_LOG_BIOS("%s:program display clock = %d"\ "colorDepth = %d\n", __func__,\ - bp_params->target_pixel_clock, bp_params->color_depth); + bp_params->target_pixel_clock_100hz, bp_params->color_depth); if (bp_params->flags.FORCE_PROGRAMMING_OF_PLL) clk.miscinfo |= PIXEL_CLOCK_V7_MISC_FORCE_PROG_PPLL; diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c index 9ebe30ba4dab..f3aa7b53d2aa 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c @@ -2792,7 +2792,7 @@ static void populate_initial_data( data->lpt_en[num_displays + 4] = false; data->h_total[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->timing.h_total); data->v_total[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->timing.v_total); - data->pixel_rate[num_displays + 4] = bw_frc_to_fixed(pipe[i].stream->timing.pix_clk_khz, 1000); + data->pixel_rate[num_displays + 4] = bw_frc_to_fixed(pipe[i].stream->timing.pix_clk_100hz, 10000); data->src_width[num_displays + 4] = bw_int_to_fixed(pipe[i].plane_res.scl_data.viewport.width); data->pitch_in_pixels[num_displays + 4] = data->src_width[num_displays + 4]; data->src_height[num_displays + 4] = bw_int_to_fixed(pipe[i].plane_res.scl_data.viewport.height); @@ -2881,7 +2881,7 @@ static void populate_initial_data( /* Pipes without underlay after */ for (i = 0; i < pipe_count; i++) { - unsigned int pixel_clock_khz; + unsigned int pixel_clock_100hz; if (!pipe[i].stream || pipe[i].bottom_pipe) continue; @@ -2890,10 +2890,10 @@ static void populate_initial_data( data->lpt_en[num_displays + 4] = false; data->h_total[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->timing.h_total); data->v_total[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->timing.v_total); - pixel_clock_khz = pipe[i].stream->timing.pix_clk_khz; + pixel_clock_100hz = pipe[i].stream->timing.pix_clk_100hz; if (pipe[i].stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) - pixel_clock_khz *= 2; - data->pixel_rate[num_displays + 4] = bw_frc_to_fixed(pixel_clock_khz, 1000); + pixel_clock_100hz *= 2; + data->pixel_rate[num_displays + 4] = bw_frc_to_fixed(pixel_clock_100hz, 10000); if (pipe[i].plane_state) { data->src_width[num_displays + 4] = bw_int_to_fixed(pipe[i].plane_res.scl_data.viewport.width); data->pitch_in_pixels[num_displays + 4] = data->src_width[num_displays + 4]; diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 446ee482fd24..12d1842079ae 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -416,7 +416,7 @@ static void pipe_ctx_to_e2e_pipe_params ( - pipe->stream->timing.v_addressable - pipe->stream->timing.v_border_bottom - pipe->stream->timing.v_border_top; - input->dest.pixel_rate_mhz = pipe->stream->timing.pix_clk_khz/1000.0; + input->dest.pixel_rate_mhz = pipe->stream->timing.pix_clk_100hz/10000.0; input->dest.vstartup_start = pipe->pipe_dlg_param.vstartup_start; input->dest.vupdate_offset = pipe->pipe_dlg_param.vupdate_offset; input->dest.vupdate_offset = pipe->pipe_dlg_param.vupdate_offset; @@ -663,9 +663,9 @@ static void hack_disable_optional_pipe_split(struct dcn_bw_internal_vars *v) } static void hack_force_pipe_split(struct dcn_bw_internal_vars *v, - unsigned int pixel_rate_khz) + unsigned int pixel_rate_100hz) { - float pixel_rate_mhz = pixel_rate_khz / 1000; + float pixel_rate_mhz = pixel_rate_100hz / 10000; /* * force enabling pipe split by lower dpp clock for DPM0 to just @@ -688,7 +688,7 @@ static void hack_bounding_box(struct dcn_bw_internal_vars *v, if (context->stream_count == 1 && dbg->force_single_disp_pipe_split) - hack_force_pipe_split(v, context->streams[0]->timing.pix_clk_khz); + hack_force_pipe_split(v, context->streams[0]->timing.pix_clk_100hz); } bool dcn_validate_bandwidth( @@ -845,7 +845,7 @@ bool dcn_validate_bandwidth( v->v_sync_plus_back_porch[input_idx] = pipe->stream->timing.v_total - v->vactive[input_idx] - pipe->stream->timing.v_front_porch; - v->pixel_clock[input_idx] = pipe->stream->timing.pix_clk_khz/1000.0; + v->pixel_clock[input_idx] = pipe->stream->timing.pix_clk_100hz/10000.0; if (pipe->stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) v->pixel_clock[input_idx] *= 2; if (!pipe->plane_state) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 66067a46eb6f..3872c82843a8 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1079,7 +1079,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c context->streams[i]->timing.v_addressable, context->streams[i]->timing.h_total, context->streams[i]->timing.v_total, - context->streams[i]->timing.pix_clk_khz); + context->streams[i]->timing.pix_clk_100hz / 10); } dc_enable_stereo(dc, context, dc_streams, context->stream_count); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index ffd1c7d7fe44..3dd5f2717b53 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1934,7 +1934,7 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx) && (stream->timing.v_addressable == 480); if (stream->phy_pix_clk == 0) - stream->phy_pix_clk = stream->timing.pix_clk_khz; + stream->phy_pix_clk = stream->timing.pix_clk_100hz / 10; if (stream->phy_pix_clk > 340000) is_over_340mhz = true; @@ -1988,7 +1988,7 @@ static void enable_link_lvds(struct pipe_ctx *pipe_ctx) struct dc_link *link = stream->link; if (stream->phy_pix_clk == 0) - stream->phy_pix_clk = stream->timing.pix_clk_khz; + stream->phy_pix_clk = stream->timing.pix_clk_100hz / 10; memset(&stream->link->cur_link_settings, 0, sizeof(struct dc_link_settings)); @@ -2063,7 +2063,7 @@ static bool dp_active_dongle_validate_timing( const struct dc_crtc_timing *timing, const struct dpcd_caps *dpcd_caps) { - unsigned int required_pix_clk = timing->pix_clk_khz; + unsigned int required_pix_clk_100hz = timing->pix_clk_100hz; const struct dc_dongle_caps *dongle_caps = &dpcd_caps->dongle_caps; switch (dpcd_caps->dongle_type) { @@ -2103,9 +2103,9 @@ static bool dp_active_dongle_validate_timing( /* Check Color Depth and Pixel Clock */ if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) - required_pix_clk /= 2; + required_pix_clk_100hz /= 2; else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) - required_pix_clk = required_pix_clk * 2 / 3; + required_pix_clk_100hz = required_pix_clk_100hz * 2 / 3; switch (timing->display_color_depth) { case COLOR_DEPTH_666: @@ -2115,12 +2115,12 @@ static bool dp_active_dongle_validate_timing( case COLOR_DEPTH_101010: if (dongle_caps->dp_hdmi_max_bpc < 10) return false; - required_pix_clk = required_pix_clk * 10 / 8; + required_pix_clk_100hz = required_pix_clk_100hz * 10 / 8; break; case COLOR_DEPTH_121212: if (dongle_caps->dp_hdmi_max_bpc < 12) return false; - required_pix_clk = required_pix_clk * 12 / 8; + required_pix_clk_100hz = required_pix_clk_100hz * 12 / 8; break; case COLOR_DEPTH_141414: @@ -2130,7 +2130,7 @@ static bool dp_active_dongle_validate_timing( return false; } - if (required_pix_clk > dongle_caps->dp_hdmi_max_pixel_clk) + if (required_pix_clk_100hz > (dongle_caps->dp_hdmi_max_pixel_clk * 10)) return false; return true; @@ -2141,7 +2141,7 @@ enum dc_status dc_link_validate_mode_timing( struct dc_link *link, const struct dc_crtc_timing *timing) { - uint32_t max_pix_clk = stream->link->dongle_max_pix_clk; + uint32_t max_pix_clk = stream->link->dongle_max_pix_clk * 10; struct dpcd_caps *dpcd_caps = &link->dpcd_caps; /* A hack to avoid failing any modes for EDID override feature on @@ -2151,7 +2151,7 @@ enum dc_status dc_link_validate_mode_timing( return DC_OK; /* Passive Dongle */ - if (0 != max_pix_clk && timing->pix_clk_khz > max_pix_clk) + if (0 != max_pix_clk && timing->pix_clk_100hz > max_pix_clk) return DC_EXCEED_DONGLE_CAP; /* Active Dongle*/ @@ -2301,7 +2301,7 @@ static struct fixed31_32 get_pbn_from_timing(struct pipe_ctx *pipe_ctx) uint32_t denominator; bpc = get_color_depth(pipe_ctx->stream_res.pix_clk_params.color_depth); - kbps = pipe_ctx->stream_res.pix_clk_params.requested_pix_clk * bpc * 3; + kbps = pipe_ctx->stream_res.pix_clk_params.requested_pix_clk_100hz / 10 * bpc * 3; /* * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006 diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 39562c93808d..568fdc9423e6 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1542,7 +1542,7 @@ static uint32_t bandwidth_in_kbps_from_timing( ASSERT(bits_per_channel != 0); - kbps = timing->pix_clk_khz; + kbps = timing->pix_clk_100hz / 10; kbps *= bits_per_channel; if (timing->flags.Y_ONLY != 1) { @@ -1584,7 +1584,7 @@ bool dp_validate_mode_timing( const struct dc_link_settings *link_setting; /*always DP fail safe mode*/ - if (timing->pix_clk_khz == (uint32_t) 25175 && + if ((timing->pix_clk_100hz / 10) == (uint32_t) 25175 && timing->h_addressable == (uint32_t) 640 && timing->v_addressable == (uint32_t) 480) return true; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c index a179c09580c3..16d441d3af8a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c @@ -74,8 +74,8 @@ void dp_enable_link_phy( if (pipes[i].clock_source != NULL && pipes[i].clock_source->id != CLOCK_SOURCE_ID_DP_DTO) { pipes[i].clock_source = dp_cs; - pipes[i].stream_res.pix_clk_params.requested_pix_clk = - pipes[i].stream->timing.pix_clk_khz; + pipes[i].stream_res.pix_clk_params.requested_pix_clk_100hz = + pipes[i].stream->timing.pix_clk_100hz; pipes[i].clock_source->funcs->program_pix_clk( pipes[i].clock_source, &pipes[i].stream_res.pix_clk_params, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 188cf4fa5cf0..98f0fca0b08d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -355,8 +355,8 @@ bool resource_are_streams_timing_synchronizable( != stream2->timing.v_addressable) return false; - if (stream1->timing.pix_clk_khz - != stream2->timing.pix_clk_khz) + if (stream1->timing.pix_clk_100hz + != stream2->timing.pix_clk_100hz) return false; if (stream1->clamping.c_depth != stream2->clamping.c_depth) @@ -1759,7 +1759,7 @@ static struct dc_stream_state *find_pll_sharable_stream( static int get_norm_pix_clk(const struct dc_crtc_timing *timing) { - uint32_t pix_clk = timing->pix_clk_khz; + uint32_t pix_clk = timing->pix_clk_100hz; uint32_t normalized_pix_clk = pix_clk; if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) @@ -1791,10 +1791,10 @@ static void calculate_phy_pix_clks(struct dc_stream_state *stream) /* update actual pixel clock on all streams */ if (dc_is_hdmi_signal(stream->signal)) stream->phy_pix_clk = get_norm_pix_clk( - &stream->timing); + &stream->timing) / 10; else stream->phy_pix_clk = - stream->timing.pix_clk_khz; + stream->timing.pix_clk_100hz / 10; if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) stream->phy_pix_clk *= 2; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 336586ba4639..e498a9aa8035 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -44,7 +44,7 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink) if (dc_is_dvi_signal(stream->signal)) { if (stream->ctx->dc->caps.dual_link_dvi && - stream->timing.pix_clk_khz > TMDS_MAX_PIXEL_CLOCK && + (stream->timing.pix_clk_100hz / 10) > TMDS_MAX_PIXEL_CLOCK && sink->sink_signal != SIGNAL_TYPE_DVI_SINGLE_LINK) stream->signal = SIGNAL_TYPE_DVI_DUAL_LINK; else @@ -339,7 +339,7 @@ void dc_stream_log(const struct dc *dc, const struct dc_stream_state *stream) stream->output_color_space); DC_LOG_DC( "\tpix_clk_khz: %d, h_total: %d, v_total: %d, pixelencoder:%d, displaycolorDepth:%d\n", - stream->timing.pix_clk_khz, + stream->timing.pix_clk_100hz / 10, stream->timing.h_total, stream->timing.v_total, stream->timing.pixel_encoding, diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index e72fce4eca65..04f279a76d70 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -730,7 +730,7 @@ struct dc_crtc_timing { uint32_t v_front_porch; uint32_t v_sync_width; - uint32_t pix_clk_khz; + uint32_t pix_clk_100hz; uint32_t vic; uint32_t hdmi_vic; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c index 5c1a7c013f63..894043009abd 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c @@ -194,8 +194,8 @@ static uint32_t get_max_pixel_clock_for_all_paths(struct dc_state *context) if (pipe_ctx->top_pipe) continue; - if (pipe_ctx->stream_res.pix_clk_params.requested_pix_clk > max_pix_clk) - max_pix_clk = pipe_ctx->stream_res.pix_clk_params.requested_pix_clk; + if (pipe_ctx->stream_res.pix_clk_params.requested_pix_clk_100hz / 10 > max_pix_clk) + max_pix_clk = pipe_ctx->stream_res.pix_clk_params.requested_pix_clk_100hz / 10; /* raise clock state for HBR3/2 if required. Confirmed with HW DCE/DPCS * logic for HBR3 still needs Nominal (0.8V) on VDDC rail @@ -257,7 +257,7 @@ static int dce_set_clock( clk_mgr_dce->dentist_vco_freq_khz / 64); /* Prepare to program display clock*/ - pxl_clk_params.target_pixel_clock = requested_clk_khz; + pxl_clk_params.target_pixel_clock_100hz = requested_clk_khz * 10; pxl_clk_params.pll_id = CLOCK_SOURCE_ID_DFS; if (clk_mgr_dce->dfs_bypass_active) @@ -494,7 +494,7 @@ void dce110_fill_display_configs( stream->link->cur_link_settings.link_spread; cfg->sym_clock = stream->phy_pix_clk; /* Round v_refresh*/ - cfg->v_refresh = stream->timing.pix_clk_khz * 1000; + cfg->v_refresh = stream->timing.pix_clk_100hz * 100; cfg->v_refresh /= stream->timing.h_total; cfg->v_refresh = (cfg->v_refresh + stream->timing.v_total / 2) / stream->timing.v_total; @@ -518,7 +518,7 @@ static uint32_t dce110_get_min_vblank_time_us(const struct dc_state *context) - stream->timing.v_addressable); vertical_blank_time = vertical_blank_in_pixels - * 1000 / stream->timing.pix_clk_khz; + * 10000 / stream->timing.pix_clk_100hz; if (min_vertical_blank_time > vertical_blank_time) min_vertical_blank_time = vertical_blank_time; @@ -612,7 +612,7 @@ static void dce11_pplib_apply_display_requirements( pp_display_cfg->crtc_index = pp_display_cfg->disp_configs[0].pipe_idx; - pp_display_cfg->line_time_in_us = timing->h_total * 1000 / timing->pix_clk_khz; + pp_display_cfg->line_time_in_us = timing->h_total * 10000 / timing->pix_clk_100hz; } if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index 723ce80ed89c..c67e90e5c339 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -108,28 +108,28 @@ static const struct spread_spectrum_data *get_ss_data_entry( } /** -* Function: calculate_fb_and_fractional_fb_divider -* -* * DESCRIPTION: Calculates feedback and fractional feedback dividers values -* -*PARAMETERS: -* targetPixelClock Desired frequency in 10 KHz -* ref_divider Reference divider (already known) -* postDivider Post Divider (already known) -* feedback_divider_param Pointer where to store -* calculated feedback divider value -* fract_feedback_divider_param Pointer where to store -* calculated fract feedback divider value -* -*RETURNS: -* It fills the locations pointed by feedback_divider_param -* and fract_feedback_divider_param -* It returns - true if feedback divider not 0 -* - false should never happen) -*/ + * Function: calculate_fb_and_fractional_fb_divider + * + * * DESCRIPTION: Calculates feedback and fractional feedback dividers values + * + *PARAMETERS: + * targetPixelClock Desired frequency in 100 Hz + * ref_divider Reference divider (already known) + * postDivider Post Divider (already known) + * feedback_divider_param Pointer where to store + * calculated feedback divider value + * fract_feedback_divider_param Pointer where to store + * calculated fract feedback divider value + * + *RETURNS: + * It fills the locations pointed by feedback_divider_param + * and fract_feedback_divider_param + * It returns - true if feedback divider not 0 + * - false should never happen) + */ static bool calculate_fb_and_fractional_fb_divider( struct calc_pll_clock_source *calc_pll_cs, - uint32_t target_pix_clk_khz, + uint32_t target_pix_clk_100hz, uint32_t ref_divider, uint32_t post_divider, uint32_t *feedback_divider_param, @@ -138,11 +138,11 @@ static bool calculate_fb_and_fractional_fb_divider( uint64_t feedback_divider; feedback_divider = - (uint64_t)target_pix_clk_khz * ref_divider * post_divider; + (uint64_t)target_pix_clk_100hz * ref_divider * post_divider; feedback_divider *= 10; /* additional factor, since we divide by 10 afterwards */ feedback_divider *= (uint64_t)(calc_pll_cs->fract_fb_divider_factor); - feedback_divider = div_u64(feedback_divider, calc_pll_cs->ref_freq_khz); + feedback_divider = div_u64(feedback_divider, calc_pll_cs->ref_freq_khz * 10ull); /*Round to the number of precision * The following code replace the old code (ullfeedbackDivider + 5)/10 @@ -195,36 +195,36 @@ static bool calc_fb_divider_checking_tolerance( { uint32_t feedback_divider; uint32_t fract_feedback_divider; - uint32_t actual_calculated_clock_khz; + uint32_t actual_calculated_clock_100hz; uint32_t abs_err; - uint64_t actual_calc_clk_khz; + uint64_t actual_calc_clk_100hz; calculate_fb_and_fractional_fb_divider( calc_pll_cs, - pll_settings->adjusted_pix_clk, + pll_settings->adjusted_pix_clk_100hz, ref_divider, post_divider, &feedback_divider, &fract_feedback_divider); /*Actual calculated value*/ - actual_calc_clk_khz = (uint64_t)feedback_divider * + actual_calc_clk_100hz = (uint64_t)feedback_divider * calc_pll_cs->fract_fb_divider_factor + fract_feedback_divider; - actual_calc_clk_khz *= calc_pll_cs->ref_freq_khz; - actual_calc_clk_khz = - div_u64(actual_calc_clk_khz, + actual_calc_clk_100hz *= calc_pll_cs->ref_freq_khz * 10; + actual_calc_clk_100hz = + div_u64(actual_calc_clk_100hz, ref_divider * post_divider * calc_pll_cs->fract_fb_divider_factor); - actual_calculated_clock_khz = (uint32_t)(actual_calc_clk_khz); + actual_calculated_clock_100hz = (uint32_t)(actual_calc_clk_100hz); - abs_err = (actual_calculated_clock_khz > - pll_settings->adjusted_pix_clk) - ? actual_calculated_clock_khz - - pll_settings->adjusted_pix_clk - : pll_settings->adjusted_pix_clk - - actual_calculated_clock_khz; + abs_err = (actual_calculated_clock_100hz > + pll_settings->adjusted_pix_clk_100hz) + ? actual_calculated_clock_100hz - + pll_settings->adjusted_pix_clk_100hz + : pll_settings->adjusted_pix_clk_100hz - + actual_calculated_clock_100hz; if (abs_err <= tolerance) { /*found good values*/ @@ -233,10 +233,10 @@ static bool calc_fb_divider_checking_tolerance( pll_settings->feedback_divider = feedback_divider; pll_settings->fract_feedback_divider = fract_feedback_divider; pll_settings->pix_clk_post_divider = post_divider; - pll_settings->calculated_pix_clk = - actual_calculated_clock_khz; + pll_settings->calculated_pix_clk_100hz = + actual_calculated_clock_100hz; pll_settings->vco_freq = - actual_calculated_clock_khz * post_divider; + actual_calculated_clock_100hz * post_divider / 10; return true; } return false; @@ -257,8 +257,8 @@ static bool calc_pll_dividers_in_range( /* This is err_tolerance / 10000 = 0.0025 - acceptable error of 0.25% * This is errorTolerance / 10000 = 0.0001 - acceptable error of 0.01%*/ - tolerance = (pll_settings->adjusted_pix_clk * err_tolerance) / - 10000; + tolerance = (pll_settings->adjusted_pix_clk_100hz * err_tolerance) / + 100000; if (tolerance < CALC_PLL_CLK_SRC_ERR_TOLERANCE) tolerance = CALC_PLL_CLK_SRC_ERR_TOLERANCE; @@ -294,7 +294,7 @@ static uint32_t calculate_pixel_clock_pll_dividers( uint32_t min_ref_divider; uint32_t max_ref_divider; - if (pll_settings->adjusted_pix_clk == 0) { + if (pll_settings->adjusted_pix_clk_100hz == 0) { DC_LOG_ERROR( "%s Bad requested pixel clock", __func__); return MAX_PLL_CALC_ERROR; @@ -306,21 +306,21 @@ static uint32_t calculate_pixel_clock_pll_dividers( max_post_divider = pll_settings->pix_clk_post_divider; } else { min_post_divider = calc_pll_cs->min_pix_clock_pll_post_divider; - if (min_post_divider * pll_settings->adjusted_pix_clk < - calc_pll_cs->min_vco_khz) { - min_post_divider = calc_pll_cs->min_vco_khz / - pll_settings->adjusted_pix_clk; + if (min_post_divider * pll_settings->adjusted_pix_clk_100hz < + calc_pll_cs->min_vco_khz * 10) { + min_post_divider = calc_pll_cs->min_vco_khz * 10 / + pll_settings->adjusted_pix_clk_100hz; if ((min_post_divider * - pll_settings->adjusted_pix_clk) < - calc_pll_cs->min_vco_khz) + pll_settings->adjusted_pix_clk_100hz) < + calc_pll_cs->min_vco_khz * 10) min_post_divider++; } max_post_divider = calc_pll_cs->max_pix_clock_pll_post_divider; - if (max_post_divider * pll_settings->adjusted_pix_clk - > calc_pll_cs->max_vco_khz) - max_post_divider = calc_pll_cs->max_vco_khz / - pll_settings->adjusted_pix_clk; + if (max_post_divider * pll_settings->adjusted_pix_clk_100hz + > calc_pll_cs->max_vco_khz * 10) + max_post_divider = calc_pll_cs->max_vco_khz * 10 / + pll_settings->adjusted_pix_clk_100hz; } /* 2) Find Reference divider ranges @@ -392,47 +392,47 @@ static bool pll_adjust_pix_clk( struct pixel_clk_params *pix_clk_params, struct pll_settings *pll_settings) { - uint32_t actual_pix_clk_khz = 0; - uint32_t requested_clk_khz = 0; + uint32_t actual_pix_clk_100hz = 0; + uint32_t requested_clk_100hz = 0; struct bp_adjust_pixel_clock_parameters bp_adjust_pixel_clock_params = { 0 }; enum bp_result bp_result; switch (pix_clk_params->signal_type) { case SIGNAL_TYPE_HDMI_TYPE_A: { - requested_clk_khz = pix_clk_params->requested_pix_clk; + requested_clk_100hz = pix_clk_params->requested_pix_clk_100hz; if (pix_clk_params->pixel_encoding != PIXEL_ENCODING_YCBCR422) { switch (pix_clk_params->color_depth) { case COLOR_DEPTH_101010: - requested_clk_khz = (requested_clk_khz * 5) >> 2; + requested_clk_100hz = (requested_clk_100hz * 5) >> 2; break; /* x1.25*/ case COLOR_DEPTH_121212: - requested_clk_khz = (requested_clk_khz * 6) >> 2; + requested_clk_100hz = (requested_clk_100hz * 6) >> 2; break; /* x1.5*/ case COLOR_DEPTH_161616: - requested_clk_khz = requested_clk_khz * 2; + requested_clk_100hz = requested_clk_100hz * 2; break; /* x2.0*/ default: break; } } - actual_pix_clk_khz = requested_clk_khz; + actual_pix_clk_100hz = requested_clk_100hz; } break; case SIGNAL_TYPE_DISPLAY_PORT: case SIGNAL_TYPE_DISPLAY_PORT_MST: case SIGNAL_TYPE_EDP: - requested_clk_khz = pix_clk_params->requested_sym_clk; - actual_pix_clk_khz = pix_clk_params->requested_pix_clk; + requested_clk_100hz = pix_clk_params->requested_sym_clk * 10; + actual_pix_clk_100hz = pix_clk_params->requested_pix_clk_100hz; break; default: - requested_clk_khz = pix_clk_params->requested_pix_clk; - actual_pix_clk_khz = pix_clk_params->requested_pix_clk; + requested_clk_100hz = pix_clk_params->requested_pix_clk_100hz; + actual_pix_clk_100hz = pix_clk_params->requested_pix_clk_100hz; break; } - bp_adjust_pixel_clock_params.pixel_clock = requested_clk_khz; + bp_adjust_pixel_clock_params.pixel_clock = requested_clk_100hz / 10; bp_adjust_pixel_clock_params. encoder_object_id = pix_clk_params->encoder_object_id; bp_adjust_pixel_clock_params.signal_type = pix_clk_params->signal_type; @@ -441,9 +441,9 @@ static bool pll_adjust_pix_clk( bp_result = clk_src->bios->funcs->adjust_pixel_clock( clk_src->bios, &bp_adjust_pixel_clock_params); if (bp_result == BP_RESULT_OK) { - pll_settings->actual_pix_clk = actual_pix_clk_khz; - pll_settings->adjusted_pix_clk = - bp_adjust_pixel_clock_params.adjusted_pixel_clock; + pll_settings->actual_pix_clk_100hz = actual_pix_clk_100hz; + pll_settings->adjusted_pix_clk_100hz = + bp_adjust_pixel_clock_params.adjusted_pixel_clock * 10; pll_settings->reference_divider = bp_adjust_pixel_clock_params.reference_divider; pll_settings->pix_clk_post_divider = @@ -490,7 +490,7 @@ static uint32_t dce110_get_pix_clk_dividers_helper ( const struct spread_spectrum_data *ss_data = get_ss_data_entry( clk_src, pix_clk_params->signal_type, - pll_settings->adjusted_pix_clk); + pll_settings->adjusted_pix_clk_100hz / 10); if (NULL != ss_data) pll_settings->ss_percentage = ss_data->percentage; @@ -502,13 +502,13 @@ static uint32_t dce110_get_pix_clk_dividers_helper ( * to continue. */ DC_LOG_ERROR( "%s: Failed to adjust pixel clock!!", __func__); - pll_settings->actual_pix_clk = - pix_clk_params->requested_pix_clk; - pll_settings->adjusted_pix_clk = - pix_clk_params->requested_pix_clk; + pll_settings->actual_pix_clk_100hz = + pix_clk_params->requested_pix_clk_100hz; + pll_settings->adjusted_pix_clk_100hz = + pix_clk_params->requested_pix_clk_100hz; if (dc_is_dp_signal(pix_clk_params->signal_type)) - pll_settings->adjusted_pix_clk = 100000; + pll_settings->adjusted_pix_clk_100hz = 1000000; } /* Calculate Dividers */ @@ -533,28 +533,28 @@ static void dce112_get_pix_clk_dividers_helper ( struct pll_settings *pll_settings, struct pixel_clk_params *pix_clk_params) { - uint32_t actualPixelClockInKHz; + uint32_t actual_pixel_clock_100hz; - actualPixelClockInKHz = pix_clk_params->requested_pix_clk; + actual_pixel_clock_100hz = pix_clk_params->requested_pix_clk_100hz; /* Calculate Dividers */ if (pix_clk_params->signal_type == SIGNAL_TYPE_HDMI_TYPE_A) { switch (pix_clk_params->color_depth) { case COLOR_DEPTH_101010: - actualPixelClockInKHz = (actualPixelClockInKHz * 5) >> 2; + actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 5) >> 2; break; case COLOR_DEPTH_121212: - actualPixelClockInKHz = (actualPixelClockInKHz * 6) >> 2; + actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 6) >> 2; break; case COLOR_DEPTH_161616: - actualPixelClockInKHz = actualPixelClockInKHz * 2; + actual_pixel_clock_100hz = actual_pixel_clock_100hz * 2; break; default: break; } } - pll_settings->actual_pix_clk = actualPixelClockInKHz; - pll_settings->adjusted_pix_clk = actualPixelClockInKHz; - pll_settings->calculated_pix_clk = pix_clk_params->requested_pix_clk; + pll_settings->actual_pix_clk_100hz = actual_pixel_clock_100hz; + pll_settings->adjusted_pix_clk_100hz = actual_pixel_clock_100hz; + pll_settings->calculated_pix_clk_100hz = pix_clk_params->requested_pix_clk_100hz; } static uint32_t dce110_get_pix_clk_dividers( @@ -567,7 +567,7 @@ static uint32_t dce110_get_pix_clk_dividers( DC_LOGGER_INIT(); if (pix_clk_params == NULL || pll_settings == NULL - || pix_clk_params->requested_pix_clk == 0) { + || pix_clk_params->requested_pix_clk_100hz == 0) { DC_LOG_ERROR( "%s: Invalid parameters!!\n", __func__); return pll_calc_error; @@ -577,10 +577,10 @@ static uint32_t dce110_get_pix_clk_dividers( if (cs->id == CLOCK_SOURCE_ID_DP_DTO || cs->id == CLOCK_SOURCE_ID_EXTERNAL) { - pll_settings->adjusted_pix_clk = clk_src->ext_clk_khz; - pll_settings->calculated_pix_clk = clk_src->ext_clk_khz; - pll_settings->actual_pix_clk = - pix_clk_params->requested_pix_clk; + pll_settings->adjusted_pix_clk_100hz = clk_src->ext_clk_khz * 10; + pll_settings->calculated_pix_clk_100hz = clk_src->ext_clk_khz * 10; + pll_settings->actual_pix_clk_100hz = + pix_clk_params->requested_pix_clk_100hz; return 0; } @@ -599,7 +599,7 @@ static uint32_t dce112_get_pix_clk_dividers( DC_LOGGER_INIT(); if (pix_clk_params == NULL || pll_settings == NULL - || pix_clk_params->requested_pix_clk == 0) { + || pix_clk_params->requested_pix_clk_100hz == 0) { DC_LOG_ERROR( "%s: Invalid parameters!!\n", __func__); return -1; @@ -609,10 +609,10 @@ static uint32_t dce112_get_pix_clk_dividers( if (cs->id == CLOCK_SOURCE_ID_DP_DTO || cs->id == CLOCK_SOURCE_ID_EXTERNAL) { - pll_settings->adjusted_pix_clk = clk_src->ext_clk_khz; - pll_settings->calculated_pix_clk = clk_src->ext_clk_khz; - pll_settings->actual_pix_clk = - pix_clk_params->requested_pix_clk; + pll_settings->adjusted_pix_clk_100hz = clk_src->ext_clk_khz * 10; + pll_settings->calculated_pix_clk_100hz = clk_src->ext_clk_khz * 10; + pll_settings->actual_pix_clk_100hz = + pix_clk_params->requested_pix_clk_100hz; return -1; } @@ -714,7 +714,7 @@ static bool enable_spread_spectrum( ss_data = get_ss_data_entry( clk_src, signal, - pll_settings->calculated_pix_clk); + pll_settings->calculated_pix_clk_100hz / 10); /* Pixel clock PLL has been programmed to generate desired pixel clock, * now enable SS on pixel clock */ @@ -853,7 +853,7 @@ static bool dce110_program_pix_clk( /*ATOMBIOS expects pixel rate adjusted by deep color ratio)*/ bp_pc_params.controller_id = pix_clk_params->controller_id; bp_pc_params.pll_id = clock_source->id; - bp_pc_params.target_pixel_clock = pll_settings->actual_pix_clk; + bp_pc_params.target_pixel_clock_100hz = pll_settings->actual_pix_clk_100hz; bp_pc_params.encoder_object_id = pix_clk_params->encoder_object_id; bp_pc_params.signal_type = pix_clk_params->signal_type; @@ -903,12 +903,12 @@ static bool dce112_program_pix_clk( #if defined(CONFIG_DRM_AMD_DC_DCN1_0) if (IS_FPGA_MAXIMUS_DC(clock_source->ctx->dce_environment)) { unsigned int inst = pix_clk_params->controller_id - CONTROLLER_ID_D0; - unsigned dp_dto_ref_kHz = 700000; - unsigned clock_kHz = pll_settings->actual_pix_clk; + unsigned dp_dto_ref_100hz = 7000000; + unsigned clock_100hz = pll_settings->actual_pix_clk_100hz; /* Set DTO values: phase = target clock, modulo = reference clock */ - REG_WRITE(PHASE[inst], clock_kHz); - REG_WRITE(MODULO[inst], dp_dto_ref_kHz); + REG_WRITE(PHASE[inst], clock_100hz); + REG_WRITE(MODULO[inst], dp_dto_ref_100hz); /* Enable DTO */ REG_UPDATE(PIXEL_RATE_CNTL[inst], DP_DTO0_ENABLE, 1); @@ -927,7 +927,7 @@ static bool dce112_program_pix_clk( /*ATOMBIOS expects pixel rate adjusted by deep color ratio)*/ bp_pc_params.controller_id = pix_clk_params->controller_id; bp_pc_params.pll_id = clock_source->id; - bp_pc_params.target_pixel_clock = pll_settings->actual_pix_clk; + bp_pc_params.target_pixel_clock_100hz = pll_settings->actual_pix_clk_100hz; bp_pc_params.encoder_object_id = pix_clk_params->encoder_object_id; bp_pc_params.signal_type = pix_clk_params->signal_type; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c index 1d794c65cc8b..314c04a915d2 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c @@ -599,12 +599,12 @@ bool dce110_link_encoder_validate_dvi_output( if ((connector_signal == SIGNAL_TYPE_DVI_SINGLE_LINK || connector_signal == SIGNAL_TYPE_HDMI_TYPE_A) && signal != SIGNAL_TYPE_HDMI_TYPE_A && - crtc_timing->pix_clk_khz > TMDS_MAX_PIXEL_CLOCK) + crtc_timing->pix_clk_100hz > (TMDS_MAX_PIXEL_CLOCK * 10)) return false; - if (crtc_timing->pix_clk_khz < TMDS_MIN_PIXEL_CLOCK) + if (crtc_timing->pix_clk_100hz < (TMDS_MIN_PIXEL_CLOCK * 10)) return false; - if (crtc_timing->pix_clk_khz > max_pixel_clock) + if (crtc_timing->pix_clk_100hz > (max_pixel_clock * 10)) return false; /* DVI supports 6/8bpp single-link and 10/16bpp dual-link */ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index cce0d18f91da..4bd7c5daabc0 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -662,7 +662,7 @@ static void dce110_stream_encoder_dvi_set_stream_attribute( cntl.signal = is_dual_link ? SIGNAL_TYPE_DVI_DUAL_LINK : SIGNAL_TYPE_DVI_SINGLE_LINK; cntl.enable_dp_audio = false; - cntl.pixel_clock = crtc_timing->pix_clk_khz; + cntl.pixel_clock = crtc_timing->pix_clk_100hz / 10; cntl.lanes_number = (is_dual_link) ? LANE_COUNT_EIGHT : LANE_COUNT_FOUR; if (enc110->base.bp->funcs->encoder_control( @@ -686,7 +686,7 @@ static void dce110_stream_encoder_lvds_set_stream_attribute( cntl.engine_id = enc110->base.id; cntl.signal = SIGNAL_TYPE_LVDS; cntl.enable_dp_audio = false; - cntl.pixel_clock = crtc_timing->pix_clk_khz; + cntl.pixel_clock = crtc_timing->pix_clk_100hz / 10; cntl.lanes_number = LANE_COUNT_FOUR; if (enc110->base.bp->funcs->encoder_control( diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index e947cc5381ec..8e961e699db5 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1074,7 +1074,7 @@ void dce110_unblank_stream(struct pipe_ctx *pipe_ctx, /* only 3 items below are used by unblank */ params.pixel_clk_khz = - pipe_ctx->stream->timing.pix_clk_khz; + pipe_ctx->stream->timing.pix_clk_100hz / 10; params.link_settings.link_rate = link_settings->link_rate; if (dc_is_dp_signal(pipe_ctx->stream->signal)) @@ -1160,27 +1160,27 @@ static void build_audio_output( stream->timing.flags.INTERLACE; audio_output->crtc_info.refresh_rate = - (stream->timing.pix_clk_khz*1000)/ + (stream->timing.pix_clk_100hz*10000)/ (stream->timing.h_total*stream->timing.v_total); audio_output->crtc_info.color_depth = stream->timing.display_color_depth; audio_output->crtc_info.requested_pixel_clock = - pipe_ctx->stream_res.pix_clk_params.requested_pix_clk; + pipe_ctx->stream_res.pix_clk_params.requested_pix_clk_100hz / 10; audio_output->crtc_info.calculated_pixel_clock = - pipe_ctx->stream_res.pix_clk_params.requested_pix_clk; + pipe_ctx->stream_res.pix_clk_params.requested_pix_clk_100hz / 10; /*for HDMI, audio ACR is with deep color ratio factor*/ if (dc_is_hdmi_signal(pipe_ctx->stream->signal) && audio_output->crtc_info.requested_pixel_clock == - stream->timing.pix_clk_khz) { + (stream->timing.pix_clk_100hz / 10)) { if (pipe_ctx->stream_res.pix_clk_params.pixel_encoding == PIXEL_ENCODING_YCBCR420) { audio_output->crtc_info.requested_pixel_clock = audio_output->crtc_info.requested_pixel_clock/2; audio_output->crtc_info.calculated_pixel_clock = - pipe_ctx->stream_res.pix_clk_params.requested_pix_clk/2; + pipe_ctx->stream_res.pix_clk_params.requested_pix_clk_100hz/20; } } @@ -1621,8 +1621,8 @@ static uint32_t compute_pstate_blackout_duration( pstate_blackout_duration_ns = 1000 * blackout_duration.value >> 24; total_dest_line_time_ns = 1000000UL * - stream->timing.h_total / - stream->timing.pix_clk_khz + + (stream->timing.h_total * 10) / + stream->timing.pix_clk_100hz + pstate_blackout_duration_ns; return total_dest_line_time_ns; @@ -2567,7 +2567,7 @@ static void dce110_apply_ctx_for_surface( pipe_ctx->plane_res.mi, pipe_ctx->stream->timing.h_total, pipe_ctx->stream->timing.v_total, - pipe_ctx->stream->timing.pix_clk_khz, + pipe_ctx->stream->timing.pix_clk_100hz / 10, context->stream_count); dce110_program_front_end_for_pipe(dc, pipe_ctx); @@ -2622,7 +2622,7 @@ void dce110_set_cursor_position(struct pipe_ctx *pipe_ctx) struct input_pixel_processor *ipp = pipe_ctx->plane_res.ipp; struct mem_input *mi = pipe_ctx->plane_res.mi; struct dc_cursor_mi_param param = { - .pixel_clk_khz = pipe_ctx->stream->timing.pix_clk_khz, + .pixel_clk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10, .ref_clk_khz = pipe_ctx->stream->ctx->dc->res_pool->ref_clock_inKhz, .viewport = pipe_ctx->plane_res.scl_data.viewport, .h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz, diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index 3c989b8e1ae3..3c27c31eaf7c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -779,7 +779,7 @@ static void get_pixel_clock_parameters( * the pixel clock normalization for hdmi up to here instead of doing it * in pll_adjust_pix_clk */ - pixel_clk_params->requested_pix_clk = stream->timing.pix_clk_khz; + pixel_clk_params->requested_pix_clk_100hz = stream->timing.pix_clk_100hz; pixel_clk_params->encoder_object_id = stream->link->link_enc->id; pixel_clk_params->signal_type = pipe_ctx->stream->signal; pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1; @@ -797,10 +797,10 @@ static void get_pixel_clock_parameters( pixel_clk_params->color_depth = COLOR_DEPTH_888; } if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) { - pixel_clk_params->requested_pix_clk = pixel_clk_params->requested_pix_clk / 2; + pixel_clk_params->requested_pix_clk_100hz = pixel_clk_params->requested_pix_clk_100hz / 2; } if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) - pixel_clk_params->requested_pix_clk *= 2; + pixel_clk_params->requested_pix_clk_100hz *= 2; } @@ -874,7 +874,7 @@ static bool dce110_validate_bandwidth( __func__, context->streams[0]->timing.h_addressable, context->streams[0]->timing.v_addressable, - context->streams[0]->timing.pix_clk_khz); + context->streams[0]->timing.pix_clk_100hz / 10); if (memcmp(&dc->current_state->bw.dce, &context->bw.dce, sizeof(context->bw.dce))) { @@ -1055,7 +1055,7 @@ static struct pipe_ctx *dce110_acquire_underlay( pipe_ctx->plane_res.mi->funcs->allocate_mem_input(pipe_ctx->plane_res.mi, stream->timing.h_total, stream->timing.v_total, - stream->timing.pix_clk_khz, + stream->timing.pix_clk_100hz / 10, context->stream_count); color_space_to_black_color(dc, diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c index 3ba4712a35ab..5c629ae487ec 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c @@ -84,17 +84,17 @@ static const struct dce110_timing_generator_offsets reg_offsets[] = { #define DCP_REG(reg) (reg + tg110->offsets.dcp) #define DMIF_REG(reg) (reg + tg110->offsets.dmif) -static void program_pix_dur(struct timing_generator *tg, uint32_t pix_clk_khz) +static void program_pix_dur(struct timing_generator *tg, uint32_t pix_clk_100hz) { uint64_t pix_dur; uint32_t addr = mmDMIF_PG0_DPG_PIPE_ARBITRATION_CONTROL1 + DCE110TG_FROM_TG(tg)->offsets.dmif; uint32_t value = dm_read_reg(tg->ctx, addr); - if (pix_clk_khz == 0) + if (pix_clk_100hz == 0) return; - pix_dur = 1000000000 / pix_clk_khz; + pix_dur = 10000000000ull / pix_clk_100hz; set_reg_field_value( value, @@ -110,7 +110,7 @@ static void program_timing(struct timing_generator *tg, bool use_vbios) { if (!use_vbios) - program_pix_dur(tg, timing->pix_clk_khz); + program_pix_dur(tg, timing->pix_clk_100hz); dce110_tg_program_timing(tg, timing, use_vbios); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 613293d79bd7..05c615228361 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2655,7 +2655,7 @@ static void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) struct hubp *hubp = pipe_ctx->plane_res.hubp; struct dpp *dpp = pipe_ctx->plane_res.dpp; struct dc_cursor_mi_param param = { - .pixel_clk_khz = pipe_ctx->stream->timing.pix_clk_khz, + .pixel_clk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10, .ref_clk_khz = pipe_ctx->stream->ctx->dc->res_pool->ref_clock_inKhz, .viewport = pipe_ctx->plane_res.scl_data.viewport, .h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c index ae4fd5ec9f74..19d15ec46642 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c @@ -429,7 +429,7 @@ static unsigned int dcn10_get_otg_states(struct dc *dc, char *pBuf, unsigned int int pix_clk = 0; optc1_read_otg_state(DCN10TG_FROM_TG(tg), &s); - pix_clk = dc->current_state->res_ctx.pipe_ctx[i].stream_res.pix_clk_params.requested_pix_clk; + pix_clk = dc->current_state->res_ctx.pipe_ctx[i].stream_res.pix_clk_params.requested_pix_clk_100hz / 10; //only print if OTG master is enabled if (s.otg_enabled & 1) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c index aefe8974469d..af0bcff0b01a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c @@ -543,12 +543,12 @@ bool dcn10_link_encoder_validate_dvi_output( if ((connector_signal == SIGNAL_TYPE_DVI_SINGLE_LINK || connector_signal == SIGNAL_TYPE_HDMI_TYPE_A) && signal != SIGNAL_TYPE_HDMI_TYPE_A && - crtc_timing->pix_clk_khz > TMDS_MAX_PIXEL_CLOCK) + crtc_timing->pix_clk_100hz > (TMDS_MAX_PIXEL_CLOCK * 10)) return false; - if (crtc_timing->pix_clk_khz < TMDS_MIN_PIXEL_CLOCK) + if (crtc_timing->pix_clk_100hz < (TMDS_MIN_PIXEL_CLOCK * 10)) return false; - if (crtc_timing->pix_clk_khz > max_pixel_clock) + if (crtc_timing->pix_clk_100hz > (max_pixel_clock * 10)) return false; /* DVI supports 6/8bpp single-link and 10/16bpp dual-link */ @@ -571,7 +571,7 @@ bool dcn10_link_encoder_validate_dvi_output( static bool dcn10_link_encoder_validate_hdmi_output( const struct dcn10_link_encoder *enc10, const struct dc_crtc_timing *crtc_timing, - int adjusted_pix_clk_khz) + int adjusted_pix_clk_100hz) { enum dc_color_depth max_deep_color = enc10->base.features.max_hdmi_deep_color; @@ -581,11 +581,11 @@ static bool dcn10_link_encoder_validate_hdmi_output( if (crtc_timing->display_color_depth < COLOR_DEPTH_888) return false; - if (adjusted_pix_clk_khz < TMDS_MIN_PIXEL_CLOCK) + if (adjusted_pix_clk_100hz < (TMDS_MIN_PIXEL_CLOCK * 10)) return false; - if ((adjusted_pix_clk_khz == 0) || - (adjusted_pix_clk_khz > enc10->base.features.max_hdmi_pixel_clock)) + if ((adjusted_pix_clk_100hz == 0) || + (adjusted_pix_clk_100hz > (enc10->base.features.max_hdmi_pixel_clock * 10))) return false; /* DCE11 HW does not support 420 */ @@ -594,7 +594,7 @@ static bool dcn10_link_encoder_validate_hdmi_output( return false; if (!enc10->base.features.flags.bits.HDMI_6GB_EN && - adjusted_pix_clk_khz >= 300000) + adjusted_pix_clk_100hz >= 3000000) return false; if (enc10->base.ctx->dc->debug.hdmi20_disable && crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) @@ -746,7 +746,7 @@ bool dcn10_link_encoder_validate_output_with_stream( is_valid = dcn10_link_encoder_validate_hdmi_output( enc10, &stream->timing, - stream->phy_pix_clk); + stream->phy_pix_clk * 10); break; case SIGNAL_TYPE_DISPLAY_PORT: case SIGNAL_TYPE_DISPLAY_PORT_MST: diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 57d00d660462..eb019d404928 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -126,7 +126,7 @@ void optc1_program_vline_interrupt( struct optc *optc1 = DCN10TG_FROM_TG(optc); unsigned long long req_delta_tens_of_usec = div64_u64((vsync_delta + 9999), 10000); - unsigned long long pix_clk_hundreds_khz = div64_u64((dc_crtc_timing->pix_clk_khz + 99), 100); + unsigned long long pix_clk_hundreds_khz = div64_u64((dc_crtc_timing->pix_clk_100hz + 999), 1000); uint32_t req_delta_lines = (uint32_t) div64_u64( (req_delta_tens_of_usec * pix_clk_hundreds_khz + dc_crtc_timing->h_total - 1), dc_crtc_timing->h_total); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 83447cde6a75..add84f9df90c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -974,7 +974,7 @@ static void get_pixel_clock_parameters( struct pixel_clk_params *pixel_clk_params) { const struct dc_stream_state *stream = pipe_ctx->stream; - pixel_clk_params->requested_pix_clk = stream->timing.pix_clk_khz; + pixel_clk_params->requested_pix_clk_100hz = stream->timing.pix_clk_100hz; pixel_clk_params->encoder_object_id = stream->link->link_enc->id; pixel_clk_params->signal_type = pipe_ctx->stream->signal; pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1; @@ -991,9 +991,9 @@ static void get_pixel_clock_parameters( pixel_clk_params->color_depth = COLOR_DEPTH_888; if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) - pixel_clk_params->requested_pix_clk /= 2; + pixel_clk_params->requested_pix_clk_100hz /= 2; if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) - pixel_clk_params->requested_pix_clk *= 2; + pixel_clk_params->requested_pix_clk_100hz *= 2; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index b8b5525a389a..0b0e06fe5c53 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -594,7 +594,7 @@ void enc1_stream_encoder_dvi_set_stream_attribute( cntl.signal = is_dual_link ? SIGNAL_TYPE_DVI_DUAL_LINK : SIGNAL_TYPE_DVI_SINGLE_LINK; cntl.enable_dp_audio = false; - cntl.pixel_clock = crtc_timing->pix_clk_khz; + cntl.pixel_clock = crtc_timing->pix_clk_100hz / 10; cntl.lanes_number = (is_dual_link) ? LANE_COUNT_EIGHT : LANE_COUNT_FOUR; if (enc1->base.bp->funcs->encoder_control( diff --git a/drivers/gpu/drm/amd/display/dc/inc/clock_source.h b/drivers/gpu/drm/amd/display/dc/inc/clock_source.h index 47ef90495376..43d1fbd8ace5 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/clock_source.h +++ b/drivers/gpu/drm/amd/display/dc/inc/clock_source.h @@ -78,7 +78,7 @@ struct csdp_ref_clk_ds_params { }; struct pixel_clk_params { - uint32_t requested_pix_clk; /* in KHz */ + uint32_t requested_pix_clk_100hz; /*> Requested Pixel Clock * (based on Video Timing standard used for requested mode)*/ uint32_t requested_sym_clk; /* in KHz */ @@ -104,9 +104,9 @@ struct pixel_clk_params { * with actually calculated Clock and reference Crystal frequency */ struct pll_settings { - uint32_t actual_pix_clk; - uint32_t adjusted_pix_clk; - uint32_t calculated_pix_clk; + uint32_t actual_pix_clk_100hz; + uint32_t adjusted_pix_clk_100hz; + uint32_t calculated_pix_clk_100hz; uint32_t vco_freq; uint32_t reference_freq; uint32_t reference_divider; diff --git a/drivers/gpu/drm/amd/display/include/bios_parser_types.h b/drivers/gpu/drm/amd/display/include/bios_parser_types.h index 7fd78a696800..01bf01a34a08 100644 --- a/drivers/gpu/drm/amd/display/include/bios_parser_types.h +++ b/drivers/gpu/drm/amd/display/include/bios_parser_types.h @@ -211,8 +211,8 @@ struct bp_pixel_clock_parameters { /* signal_type -> Encoder Mode - needed by VBIOS Exec table */ enum signal_type signal_type; /* Adjusted Pixel Clock (after VBIOS exec table) - * that becomes Target Pixel Clock (KHz) */ - uint32_t target_pixel_clock; + * that becomes Target Pixel Clock (100 Hz units) */ + uint32_t target_pixel_clock_100hz; /* Calculated Reference divider of Display PLL */ uint32_t reference_divider; /* Calculated Feedback divider of Display PLL */ diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index 1544ed3f1747..d967ac001f59 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -108,8 +108,8 @@ static unsigned int calc_duration_in_us_from_v_total( { unsigned int duration_in_us = (unsigned int)(div64_u64(((unsigned long long)(v_total) - * 1000) * stream->timing.h_total, - stream->timing.pix_clk_khz)); + * 10000) * stream->timing.h_total, + stream->timing.pix_clk_100hz)); return duration_in_us; } @@ -126,7 +126,7 @@ static unsigned int calc_v_total_from_refresh( refresh_in_uhz))); v_total = div64_u64(div64_u64(((unsigned long long)( - frame_duration_in_ns) * stream->timing.pix_clk_khz), + frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), stream->timing.h_total), 1000000); /* v_total cannot be less than nominal */ @@ -152,7 +152,7 @@ static unsigned int calc_v_total_from_duration( duration_in_us = vrr->max_duration_in_us; v_total = div64_u64(div64_u64(((unsigned long long)( - duration_in_us) * stream->timing.pix_clk_khz), + duration_in_us) * (stream->timing.pix_clk_100hz / 10)), stream->timing.h_total), 1000); /* v_total cannot be less than nominal */ @@ -227,7 +227,7 @@ static void update_v_total_for_static_ramp( } v_total = div64_u64(div64_u64(((unsigned long long)( - current_duration_in_us) * stream->timing.pix_clk_khz), + current_duration_in_us) * (stream->timing.pix_clk_100hz / 10)), stream->timing.h_total), 1000); in_out_vrr->adjust.v_total_min = v_total; @@ -972,7 +972,7 @@ unsigned long long mod_freesync_calc_nominal_field_rate( unsigned long long nominal_field_rate_in_uhz = 0; /* Calculate nominal field rate for stream */ - nominal_field_rate_in_uhz = stream->timing.pix_clk_khz; + nominal_field_rate_in_uhz = stream->timing.pix_clk_100hz / 10; nominal_field_rate_in_uhz *= 1000ULL * 1000ULL * 1000ULL; nominal_field_rate_in_uhz = div_u64(nominal_field_rate_in_uhz, stream->timing.h_total); From 38684e46ef6622f25f141727fd9a9733b43eaf55 Mon Sep 17 00:00:00 2001 From: Eric Bernstein Date: Thu, 22 Nov 2018 17:04:14 -0500 Subject: [PATCH 088/539] drm/amd/display: Improve logging of validation failures during atomic_check [Why] There are different reasons for Validation failure error during atomic_check [How] Add better logging of the reason for validation failure Signed-off-by: Eric Bernstein Reviewed-by: Dmytro Laktyushkin Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_resource.c | 2 +- .../amd/display/dc/dml/display_mode_enums.h | 26 +++++++++++++++++ .../drm/amd/display/dc/dml/display_mode_lib.c | 28 +++++++++++++++++++ .../drm/amd/display/dc/dml/display_mode_lib.h | 2 ++ .../gpu/drm/amd/display/dc/inc/core_status.h | 2 +- 5 files changed, 58 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 98f0fca0b08d..9888bc7659f3 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1842,7 +1842,7 @@ enum dc_status resource_map_pool_resources( &context->res_ctx, pool, stream); if (!pipe_ctx->stream_res.stream_enc) - return DC_NO_STREAM_ENG_RESOURCE; + return DC_NO_STREAM_ENC_RESOURCE; update_stream_engine_usage( &context->res_ctx, pool, diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h index bea4e61b94c7..c59e582c1f40 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h @@ -121,4 +121,30 @@ enum self_refresh_affinity { dm_neither_self_refresh_nor_mclk_switch }; +enum dm_validation_status { + DML_VALIDATION_OK, + DML_FAIL_SCALE_RATIO_TAP, + DML_FAIL_SOURCE_PIXEL_FORMAT, + DML_FAIL_VIEWPORT_SIZE, + DML_FAIL_TOTAL_V_ACTIVE_BW, + DML_FAIL_DIO_SUPPORT, + DML_FAIL_NOT_ENOUGH_DSC, + DML_FAIL_DSC_CLK_REQUIRED, + DML_FAIL_URGENT_LATENCY, + DML_FAIL_REORDERING_BUFFER, + DML_FAIL_DISPCLK_DPPCLK, + DML_FAIL_TOTAL_AVAILABLE_PIPES, + DML_FAIL_NUM_OTG, + DML_FAIL_WRITEBACK_MODE, + DML_FAIL_WRITEBACK_LATENCY, + DML_FAIL_WRITEBACK_SCALE_RATIO_TAP, + DML_FAIL_CURSOR_SUPPORT, + DML_FAIL_PITCH_SUPPORT, + DML_FAIL_PTE_BUFFER_SIZE, + DML_FAIL_HOST_VM_IMMEDIATE_FLIP, + DML_FAIL_DSC_INPUT_BPC, + DML_FAIL_PREFETCH_SUPPORT, + DML_FAIL_V_RATIO_PREFETCH, +}; + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c index dddeb0d4db8f..d303b789adfe 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c @@ -62,3 +62,31 @@ void dml_init_instance(struct display_mode_lib *lib, enum dml_project project) } } +const char *dml_get_status_message(enum dm_validation_status status) +{ + switch (status) { + case DML_VALIDATION_OK: return "Validation OK"; + case DML_FAIL_SCALE_RATIO_TAP: return "Scale ratio/tap"; + case DML_FAIL_SOURCE_PIXEL_FORMAT: return "Source pixel format"; + case DML_FAIL_VIEWPORT_SIZE: return "Viewport size"; + case DML_FAIL_TOTAL_V_ACTIVE_BW: return "Total vertical active bandwidth"; + case DML_FAIL_DIO_SUPPORT: return "DIO support"; + case DML_FAIL_NOT_ENOUGH_DSC: return "Not enough DSC Units"; + case DML_FAIL_DSC_CLK_REQUIRED: return "DSC clock required"; + case DML_FAIL_URGENT_LATENCY: return "Urgent latency"; + case DML_FAIL_REORDERING_BUFFER: return "Re-ordering buffer"; + case DML_FAIL_DISPCLK_DPPCLK: return "Dispclk and Dppclk"; + case DML_FAIL_TOTAL_AVAILABLE_PIPES: return "Total available pipes"; + case DML_FAIL_NUM_OTG: return "Number of OTG"; + case DML_FAIL_WRITEBACK_MODE: return "Writeback mode"; + case DML_FAIL_WRITEBACK_LATENCY: return "Writeback latency"; + case DML_FAIL_WRITEBACK_SCALE_RATIO_TAP: return "Writeback scale ratio/tap"; + case DML_FAIL_CURSOR_SUPPORT: return "Cursor support"; + case DML_FAIL_PITCH_SUPPORT: return "Pitch support"; + case DML_FAIL_PTE_BUFFER_SIZE: return "PTE buffer size"; + case DML_FAIL_DSC_INPUT_BPC: return "DSC input bpc"; + case DML_FAIL_PREFETCH_SUPPORT: return "Prefetch support"; + case DML_FAIL_V_RATIO_PREFETCH: return "Vertical ratio prefetch"; + default: return "Unknown Status"; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h index 635206248889..a730e0209c05 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h @@ -43,4 +43,6 @@ struct display_mode_lib { void dml_init_instance(struct display_mode_lib *lib, enum dml_project project); +const char *dml_get_status_message(enum dm_validation_status status); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_status.h b/drivers/gpu/drm/amd/display/dc/inc/core_status.h index 94fc31080fda..2e61a22ef4b2 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_status.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_status.h @@ -30,7 +30,7 @@ enum dc_status { DC_OK = 1, DC_NO_CONTROLLER_RESOURCE = 2, - DC_NO_STREAM_ENG_RESOURCE = 3, + DC_NO_STREAM_ENC_RESOURCE = 3, DC_NO_CLOCK_SOURCE_RESOURCE = 4, DC_FAIL_CONTROLLER_VALIDATE = 5, DC_FAIL_ENC_VALIDATE = 6, From 8d25a560b877563d7a8c93a07140767d8959a7cd Mon Sep 17 00:00:00 2001 From: Leo Li Date: Tue, 13 Nov 2018 15:24:20 -0500 Subject: [PATCH 089/539] drm/amd/display: Use local variable instead of caching global [Why] context->bw.dce.dispclk_khz is being cached into unpatched_clock, then restored at end of function call. This is needlessly complex [How] Instead, use a local patched_clock variable. Leave context->bw.dce.dispclk_khz alone. No functional change is intended. Signed-off-by: David Francis Signed-off-by: Leo Li Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dce/dce_clk_mgr.c | 37 ++++++++----------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c index 894043009abd..d05b1750edee 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c @@ -625,11 +625,11 @@ static void dce_update_clocks(struct clk_mgr *clk_mgr, { struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr); struct dm_pp_power_level_change_request level_change_req; - int unpatched_disp_clk = context->bw.dce.dispclk_khz; + int patched_disp_clk = context->bw.dce.dispclk_khz; /*TODO: W/A for dal3 linux, investigate why this works */ if (!clk_mgr_dce->dfs_bypass_active) - context->bw.dce.dispclk_khz = context->bw.dce.dispclk_khz * 115 / 100; + patched_disp_clk = patched_disp_clk * 115 / 100; level_change_req.power_level = dce_get_required_clocks_state(clk_mgr, context); /* get max clock state from PPLIB */ @@ -639,13 +639,11 @@ static void dce_update_clocks(struct clk_mgr *clk_mgr, clk_mgr_dce->cur_min_clks_state = level_change_req.power_level; } - if (should_set_clock(safe_to_lower, context->bw.dce.dispclk_khz, clk_mgr->clks.dispclk_khz)) { - context->bw.dce.dispclk_khz = dce_set_clock(clk_mgr, context->bw.dce.dispclk_khz); - clk_mgr->clks.dispclk_khz = context->bw.dce.dispclk_khz; + if (should_set_clock(safe_to_lower, patched_disp_clk, clk_mgr->clks.dispclk_khz)) { + patched_disp_clk = dce_set_clock(clk_mgr, patched_disp_clk); + clk_mgr->clks.dispclk_khz = patched_disp_clk; } dce_pplib_apply_display_requirements(clk_mgr->ctx->dc, context); - - context->bw.dce.dispclk_khz = unpatched_disp_clk; } static void dce11_update_clocks(struct clk_mgr *clk_mgr, @@ -676,11 +674,11 @@ static void dce112_update_clocks(struct clk_mgr *clk_mgr, { struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr); struct dm_pp_power_level_change_request level_change_req; - int unpatched_disp_clk = context->bw.dce.dispclk_khz; + int patched_disp_clk = context->bw.dce.dispclk_khz; /*TODO: W/A for dal3 linux, investigate why this works */ if (!clk_mgr_dce->dfs_bypass_active) - context->bw.dce.dispclk_khz = context->bw.dce.dispclk_khz * 115 / 100; + patched_disp_clk = patched_disp_clk * 115 / 100; level_change_req.power_level = dce_get_required_clocks_state(clk_mgr, context); /* get max clock state from PPLIB */ @@ -690,13 +688,11 @@ static void dce112_update_clocks(struct clk_mgr *clk_mgr, clk_mgr_dce->cur_min_clks_state = level_change_req.power_level; } - if (should_set_clock(safe_to_lower, context->bw.dce.dispclk_khz, clk_mgr->clks.dispclk_khz)) { - context->bw.dce.dispclk_khz = dce112_set_clock(clk_mgr, context->bw.dce.dispclk_khz); - clk_mgr->clks.dispclk_khz = context->bw.dce.dispclk_khz; + if (should_set_clock(safe_to_lower, patched_disp_clk, clk_mgr->clks.dispclk_khz)) { + patched_disp_clk = dce112_set_clock(clk_mgr, patched_disp_clk); + clk_mgr->clks.dispclk_khz = patched_disp_clk; } dce11_pplib_apply_display_requirements(clk_mgr->ctx->dc, context); - - context->bw.dce.dispclk_khz = unpatched_disp_clk; } static void dce12_update_clocks(struct clk_mgr *clk_mgr, @@ -706,17 +702,16 @@ static void dce12_update_clocks(struct clk_mgr *clk_mgr, struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr); struct dm_pp_clock_for_voltage_req clock_voltage_req = {0}; int max_pix_clk = get_max_pixel_clock_for_all_paths(context); - int unpatched_disp_clk = context->bw.dce.dispclk_khz; + int patched_disp_clk = context->bw.dce.dispclk_khz; /*TODO: W/A for dal3 linux, investigate why this works */ if (!clk_mgr_dce->dfs_bypass_active) - context->bw.dce.dispclk_khz = context->bw.dce.dispclk_khz * 115 / 100; + patched_disp_clk = patched_disp_clk * 115 / 100; - if (should_set_clock(safe_to_lower, context->bw.dce.dispclk_khz, clk_mgr->clks.dispclk_khz)) { + if (should_set_clock(safe_to_lower, patched_disp_clk, clk_mgr->clks.dispclk_khz)) { clock_voltage_req.clk_type = DM_PP_CLOCK_TYPE_DISPLAY_CLK; - clock_voltage_req.clocks_in_khz = context->bw.dce.dispclk_khz; - context->bw.dce.dispclk_khz = dce112_set_clock(clk_mgr, context->bw.dce.dispclk_khz); - clk_mgr->clks.dispclk_khz = context->bw.dce.dispclk_khz; + clock_voltage_req.clocks_in_khz = patched_disp_clk; + clk_mgr->clks.dispclk_khz = dce112_set_clock(clk_mgr, patched_disp_clk); dm_pp_apply_clock_for_voltage_request(clk_mgr->ctx, &clock_voltage_req); } @@ -729,8 +724,6 @@ static void dce12_update_clocks(struct clk_mgr *clk_mgr, dm_pp_apply_clock_for_voltage_request(clk_mgr->ctx, &clock_voltage_req); } dce11_pplib_apply_display_requirements(clk_mgr->ctx->dc, context); - - context->bw.dce.dispclk_khz = unpatched_disp_clk; } static const struct clk_mgr_funcs dce120_funcs = { From 09f609c34fc8b9cb560947ab11609259f5d42889 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Tue, 27 Nov 2018 15:05:12 -0500 Subject: [PATCH 090/539] drm/amd/display: Fix driver load crash in amdgpu_dm [Why] This fixes an regression introduced by: drm/amd/display: add stream ID and otg instance in dc_stream_state During driver initialization, a null pointer deref is raised. This is caused by searching for a stream status in the dc->current_state before the dc_state swap happens at the end of dc_commit_state_no_check(). Since the swap has not happened, the dc_state to be swapped in should be searched, and not dc->current_state. [How] Add a function that searches for the stream status within the given dc_state, instead of dc->current_state. Use that before the state swap happens in dc_commit_state_no_check(). Also remove duplicate occurrences of this function in amdgpu_dm.c. Signed-off-by: Leo Li Reviewed-by: Harry Wentland Acked-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 22 +++---------- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +- .../gpu/drm/amd/display/dc/core/dc_stream.c | 32 ++++++++++++++++--- drivers/gpu/drm/amd/display/dc/dc_stream.h | 3 ++ 4 files changed, 35 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3902ce599287..b7938e575040 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4458,20 +4458,6 @@ static void prepare_flip_isr(struct amdgpu_crtc *acrtc) acrtc->crtc_id); } -struct dc_stream_status *dc_state_get_stream_status( - struct dc_state *state, - struct dc_stream_state *stream) -{ - uint8_t i; - - for (i = 0; i < state->stream_count; i++) { - if (stream == state->streams[i]) - return &state->stream_status[i]; - } - - return NULL; -} - static void update_freesync_state_on_stream( struct amdgpu_display_manager *dm, struct dm_crtc_state *new_crtc_state, @@ -5079,8 +5065,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) dc_stream_get_status(dm_new_crtc_state->stream); if (!status) - status = dc_state_get_stream_status(dc_state, - dm_new_crtc_state->stream); + status = dc_stream_get_status_from_state(dc_state, + dm_new_crtc_state->stream); if (!status) DC_ERR("got no status for stream %p on acrtc%p\n", dm_new_crtc_state->stream, acrtc); @@ -5844,8 +5830,8 @@ dm_determine_update_type_for_commit(struct dc *dc, goto cleanup; } - status = dc_state_get_stream_status(old_dm_state->context, - new_dm_crtc_state->stream); + status = dc_stream_get_status_from_state(old_dm_state->context, + new_dm_crtc_state->stream); update_type = dc_check_update_surfaces_for_stream(dc, updates, num_plane, &stream_update, status); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 3872c82843a8..024f7ea6ba92 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1071,7 +1071,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c } } - status = dc_stream_get_status(context->streams[i]); + status = dc_stream_get_status_from_state(context, context->streams[i]); context->streams[i]->out.otg_offset = status->primary_otg_inst; CONN_MSG_MODE(link, "{%dx%d, %dx%d@%dKhz}", diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index e498a9aa8035..996298c35f42 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -160,20 +160,42 @@ struct dc_stream_state *dc_create_stream_for_sink( return stream; } -struct dc_stream_status *dc_stream_get_status( +/** + * dc_stream_get_status_from_state - Get stream status from given dc state + * @state: DC state to find the stream status in + * @stream: The stream to get the stream status for + * + * The given stream is expected to exist in the given dc state. Otherwise, NULL + * will be returned. + */ +struct dc_stream_status *dc_stream_get_status_from_state( + struct dc_state *state, struct dc_stream_state *stream) { uint8_t i; - struct dc *dc = stream->ctx->dc; - for (i = 0; i < dc->current_state->stream_count; i++) { - if (stream == dc->current_state->streams[i]) - return &dc->current_state->stream_status[i]; + for (i = 0; i < state->stream_count; i++) { + if (stream == state->streams[i]) + return &state->stream_status[i]; } return NULL; } +/** + * dc_stream_get_status() - Get current stream status of the given stream state + * @stream: The stream to get the stream status for. + * + * The given stream is expected to exist in dc->current_state. Otherwise, NULL + * will be returned. + */ +struct dc_stream_status *dc_stream_get_status( + struct dc_stream_state *stream) +{ + struct dc *dc = stream->ctx->dc; + return dc_stream_get_status_from_state(dc->current_state, stream); +} + /** * dc_stream_set_cursor_attributes() - Update cursor attributes and set cursor surface address */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 1e1e89e7c2c5..bfb741b980fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -278,6 +278,9 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink); void dc_stream_retain(struct dc_stream_state *dc_stream); void dc_stream_release(struct dc_stream_state *dc_stream); +struct dc_stream_status *dc_stream_get_status_from_state( + struct dc_state *state, + struct dc_stream_state *stream); struct dc_stream_status *dc_stream_get_status( struct dc_stream_state *dc_stream); From 508f5fcb54f0ad3b333a835f45e109feb9edf761 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Thu, 1 Nov 2018 11:10:18 -0400 Subject: [PATCH 091/539] drm/amd/display: Compensate for XGMI SS downspread on dprefclk [Why] When XGMI is enabled, we need to adjust the dprefclk according to the WAFL link's spread spectrum info. This is for VG20 (DCE121) only. [How] dce_clk_mgr already stores SS info, currently being used by audio clock. Therefore, patch the clk_mgr's SS info with the xGMI SS info, if xGMI is enabled. For display clock, adjust it during dce12_update_clocks() before calling set_clock(). Since we rely on a mmhub register to reliably determine if xGMI is enabled, the patching step needs to happen after resource_construct() has initialized the hardware sequencer. Signed-off-by: Leo Li Reviewed-by: Hersen Wu Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dce/dce_clk_mgr.c | 64 +++++++++++++++ .../gpu/drm/amd/display/dc/dce/dce_clk_mgr.h | 35 +++++++- .../gpu/drm/amd/display/dc/dce/dce_hwseq.h | 12 +++ .../display/dc/dce120/dce120_hw_sequencer.c | 15 ++++ .../display/dc/dce120/dce120_hw_sequencer.h | 1 + .../amd/display/dc/dce120/dce120_resource.c | 81 +++++++++++++++++-- 6 files changed, 198 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c index d05b1750edee..3c52a4fc921d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c @@ -450,6 +450,42 @@ void dce_clock_read_ss_info(struct dce_clk_mgr *clk_mgr_dce) } } +/** + * dce121_clock_patch_xgmi_ss_info() - Save XGMI spread spectrum info + * @clk_mgr: clock manager base structure + * + * Reads from VBIOS the XGMI spread spectrum info and saves it within + * the dce clock manager. This operation will overwrite the existing dprefclk + * SS values if the vBIOS query succeeds. Otherwise, it does nothing. It also + * sets the ->xgmi_enabled flag. + */ +void dce121_clock_patch_xgmi_ss_info(struct clk_mgr *clk_mgr) +{ + struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr); + enum bp_result result; + struct spread_spectrum_info info = { { 0 } }; + struct dc_bios *bp = clk_mgr_dce->base.ctx->dc_bios; + + clk_mgr_dce->xgmi_enabled = false; + + result = bp->funcs->get_spread_spectrum_info(bp, AS_SIGNAL_TYPE_XGMI, + 0, &info); + if (result == BP_RESULT_OK && info.spread_spectrum_percentage != 0) { + clk_mgr_dce->xgmi_enabled = true; + clk_mgr_dce->ss_on_dprefclk = true; + clk_mgr_dce->dprefclk_ss_divider = + info.spread_percentage_divider; + + if (info.type.CENTER_MODE == 0) { + /* Currently for DP Reference clock we + * need only SS percentage for + * downspread */ + clk_mgr_dce->dprefclk_ss_percentage = + info.spread_spectrum_percentage; + } + } +} + void dce110_fill_display_configs( const struct dc_state *context, struct dm_pp_display_configuration *pp_display_cfg) @@ -710,6 +746,13 @@ static void dce12_update_clocks(struct clk_mgr *clk_mgr, if (should_set_clock(safe_to_lower, patched_disp_clk, clk_mgr->clks.dispclk_khz)) { clock_voltage_req.clk_type = DM_PP_CLOCK_TYPE_DISPLAY_CLK; + /* + * When xGMI is enabled, the display clk needs to be adjusted + * with the WAFL link's SS percentage. + */ + if (clk_mgr_dce->xgmi_enabled) + patched_disp_clk = clk_mgr_adjust_dp_ref_freq_for_ss( + clk_mgr_dce, patched_disp_clk); clock_voltage_req.clocks_in_khz = patched_disp_clk; clk_mgr->clks.dispclk_khz = dce112_set_clock(clk_mgr, patched_disp_clk); @@ -875,6 +918,27 @@ struct clk_mgr *dce120_clk_mgr_create(struct dc_context *ctx) return &clk_mgr_dce->base; } +struct clk_mgr *dce121_clk_mgr_create(struct dc_context *ctx) +{ + struct dce_clk_mgr *clk_mgr_dce = kzalloc(sizeof(*clk_mgr_dce), + GFP_KERNEL); + + if (clk_mgr_dce == NULL) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + memcpy(clk_mgr_dce->max_clks_by_state, dce120_max_clks_by_state, + sizeof(dce120_max_clks_by_state)); + + dce_clk_mgr_construct(clk_mgr_dce, ctx, NULL, NULL, NULL); + + clk_mgr_dce->dprefclk_khz = 625000; + clk_mgr_dce->base.funcs = &dce120_funcs; + + return &clk_mgr_dce->base; +} + void dce_clk_mgr_destroy(struct clk_mgr **clk_mgr) { struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(*clk_mgr); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h index 3bceb31d910d..c8f8c442142a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h @@ -94,11 +94,37 @@ struct dce_clk_mgr { * This is basically "Crystal Frequency In KHz" (XTALIN) frequency */ int dfs_bypass_disp_clk; - /* Flag for Enabled SS on DPREFCLK */ + /** + * @ss_on_dprefclk: + * + * True if spread spectrum is enabled on the DP ref clock. + */ bool ss_on_dprefclk; - /* DPREFCLK SS percentage (if down-spread enabled) */ + + /** + * @xgmi_enabled: + * + * True if xGMI is enabled. On VG20, both audio and display clocks need + * to be adjusted with the WAFL link's SS info if xGMI is enabled. + */ + bool xgmi_enabled; + + /** + * @dprefclk_ss_percentage: + * + * DPREFCLK SS percentage (if down-spread enabled). + * + * Note that if XGMI is enabled, the SS info (percentage and divider) + * from the WAFL link is used instead. This is decided during + * dce_clk_mgr initialization. + */ int dprefclk_ss_percentage; - /* DPREFCLK SS percentage Divider (100 or 1000) */ + + /** + * @dprefclk_ss_divider: + * + * DPREFCLK SS percentage Divider (100 or 1000). + */ int dprefclk_ss_divider; int dprefclk_khz; @@ -163,6 +189,9 @@ struct clk_mgr *dce112_clk_mgr_create( struct clk_mgr *dce120_clk_mgr_create(struct dc_context *ctx); +struct clk_mgr *dce121_clk_mgr_create(struct dc_context *ctx); +void dce121_clock_patch_xgmi_ss_info(struct clk_mgr *clk_mgr); + void dce_clk_mgr_destroy(struct clk_mgr **clk_mgr); int dentist_get_divider_from_did(int did); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h index c83a7f05f14c..956bdf14503f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h @@ -133,6 +133,10 @@ SR(DCHUB_AGP_TOP), \ BL_REG_LIST() +#define HWSEQ_VG20_REG_LIST() \ + HWSEQ_DCE120_REG_LIST(),\ + MMHUB_SR(MC_VM_XGMI_LFB_CNTL) + #define HWSEQ_DCE112_REG_LIST() \ HWSEQ_DCE10_REG_LIST(), \ HWSEQ_PIXEL_RATE_REG_LIST(CRTC), \ @@ -298,6 +302,7 @@ struct dce_hwseq_registers { uint32_t MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB; uint32_t MC_VM_SYSTEM_APERTURE_LOW_ADDR; uint32_t MC_VM_SYSTEM_APERTURE_HIGH_ADDR; + uint32_t MC_VM_XGMI_LFB_CNTL; uint32_t AZALIA_AUDIO_DTO; uint32_t AZALIA_CONTROLLER_CLOCK_GATING; }; @@ -382,6 +387,11 @@ struct dce_hwseq_registers { HWS_SF(, LVTMA_PWRSEQ_CNTL, LVTMA_BLON, mask_sh), \ HWS_SF(, LVTMA_PWRSEQ_STATE, LVTMA_PWRSEQ_TARGET_STATE_R, mask_sh) +#define HWSEQ_VG20_MASK_SH_LIST(mask_sh)\ + HWSEQ_DCE12_MASK_SH_LIST(mask_sh),\ + HWS_SF(, MC_VM_XGMI_LFB_CNTL, PF_LFB_REGION, mask_sh),\ + HWS_SF(, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION, mask_sh) + #define HWSEQ_DCN_MASK_SH_LIST(mask_sh)\ HWSEQ_PIXEL_RATE_MASK_SH_LIST(mask_sh, OTG0_),\ HWS_SF1(OTG0_, PHYPLL_PIXEL_RATE_CNTL, PHYPLL_PIXEL_RATE_SOURCE, mask_sh), \ @@ -470,6 +480,8 @@ struct dce_hwseq_registers { type PHYSICAL_PAGE_NUMBER_MSB;\ type PHYSICAL_PAGE_NUMBER_LSB;\ type LOGICAL_ADDR; \ + type PF_LFB_REGION;\ + type PF_MAX_REGION;\ type ENABLE_L1_TLB;\ type SYSTEM_ACCESS_MODE;\ type LVTMA_BLON;\ diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c index eb0f5f9a973b..1ca30928025e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c @@ -244,6 +244,21 @@ static void dce120_update_dchub( dh_data->dchub_info_valid = false; } +/** + * dce121_xgmi_enabled() - Check if xGMI is enabled + * @hws: DCE hardware sequencer object + * + * Return true if xGMI is enabled. False otherwise. + */ +bool dce121_xgmi_enabled(struct dce_hwseq *hws) +{ + uint32_t pf_max_region; + + REG_GET(MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION, &pf_max_region); + /* PF_MAX_REGION == 0 means xgmi is disabled */ + return !!pf_max_region; +} + void dce120_hw_sequencer_construct(struct dc *dc) { /* All registers used by dce11.2 match those in dce11 in offset and diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.h index 77a6b86d7606..c51afbd0b012 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.h @@ -30,6 +30,7 @@ struct dc; +bool dce121_xgmi_enabled(struct dce_hwseq *hws); void dce120_hw_sequencer_construct(struct dc *dc); #endif /* __DC_HWSS_DCE112_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index f12696674eb0..48a210ec975b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -62,6 +62,8 @@ #include "soc15_hw_ip.h" #include "vega10_ip_offset.h" #include "nbio/nbio_6_1_offset.h" +#include "mmhub/mmhub_9_4_0_offset.h" +#include "mmhub/mmhub_9_4_0_sh_mask.h" #include "reg_helper.h" #include "dce100/dce100_resource.h" @@ -139,6 +141,17 @@ static const struct dce110_timing_generator_offsets dce120_tg_offsets[] = { .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ mm ## block ## id ## _ ## reg_name +/* MMHUB */ +#define MMHUB_BASE_INNER(seg) \ + MMHUB_BASE__INST0_SEG ## seg + +#define MMHUB_BASE(seg) \ + MMHUB_BASE_INNER(seg) + +#define MMHUB_SR(reg_name)\ + .reg_name = MMHUB_BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name + /* macros to expend register list macro defined in HW object header file * end *********************/ @@ -681,6 +694,19 @@ static const struct dce_hwseq_mask hwseq_mask = { HWSEQ_DCE12_MASK_SH_LIST(_MASK) }; +/* HWSEQ regs for VG20 */ +static const struct dce_hwseq_registers dce121_hwseq_reg = { + HWSEQ_VG20_REG_LIST() +}; + +static const struct dce_hwseq_shift dce121_hwseq_shift = { + HWSEQ_VG20_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask dce121_hwseq_mask = { + HWSEQ_VG20_MASK_SH_LIST(_MASK) +}; + static struct dce_hwseq *dce120_hwseq_create( struct dc_context *ctx) { @@ -695,6 +721,20 @@ static struct dce_hwseq *dce120_hwseq_create( return hws; } +static struct dce_hwseq *dce121_hwseq_create( + struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + + if (hws) { + hws->ctx = ctx; + hws->regs = &dce121_hwseq_reg; + hws->shifts = &dce121_hwseq_shift; + hws->masks = &dce121_hwseq_mask; + } + return hws; +} + static const struct resource_create_funcs res_create_funcs = { .read_dce_straps = read_dce_straps, .create_audio = create_audio, @@ -702,6 +742,14 @@ static const struct resource_create_funcs res_create_funcs = { .create_hwseq = dce120_hwseq_create, }; +static const struct resource_create_funcs dce121_res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = create_audio, + .create_stream_encoder = dce120_stream_encoder_create, + .create_hwseq = dce121_hwseq_create, +}; + + #define mi_inst_regs(id) { MI_DCE12_REG_LIST(id) } static const struct dce_mem_input_registers mi_regs[] = { mi_inst_regs(0), @@ -911,7 +959,8 @@ static bool construct( int j; struct dc_context *ctx = dc->ctx; struct irq_service_init_data irq_init_data; - bool harvest_enabled = ASICREV_IS_VEGA20_P(ctx->asic_id.hw_internal_rev); + static const struct resource_create_funcs *res_funcs; + bool is_vg20 = ASICREV_IS_VEGA20_P(ctx->asic_id.hw_internal_rev); uint32_t pipe_fuses; ctx->dc_bios->regs = &bios_regs; @@ -975,7 +1024,11 @@ static bool construct( } } - pool->base.clk_mgr = dce120_clk_mgr_create(ctx); + if (is_vg20) + pool->base.clk_mgr = dce121_clk_mgr_create(ctx); + else + pool->base.clk_mgr = dce120_clk_mgr_create(ctx); + if (pool->base.clk_mgr == NULL) { dm_error("DC: failed to create display clock!\n"); BREAK_TO_DEBUGGER(); @@ -1008,14 +1061,14 @@ static bool construct( if (!pool->base.irqs) goto irqs_create_fail; - /* retrieve valid pipe fuses */ - if (harvest_enabled) + /* VG20: Pipe harvesting enabled, retrieve valid pipe fuses */ + if (is_vg20) pipe_fuses = read_pipe_fuses(ctx); /* index to valid pipe resource */ j = 0; for (i = 0; i < pool->base.pipe_count; i++) { - if (harvest_enabled) { + if (is_vg20) { if ((pipe_fuses & (1 << i)) != 0) { dm_error("DC: skip invalid pipe %d!\n", i); continue; @@ -1093,10 +1146,24 @@ static bool construct( pool->base.pipe_count = j; pool->base.timing_generator_count = j; - if (!resource_construct(num_virtual_links, dc, &pool->base, - &res_create_funcs)) + if (is_vg20) + res_funcs = &dce121_res_create_funcs; + else + res_funcs = &res_create_funcs; + + if (!resource_construct(num_virtual_links, dc, &pool->base, res_funcs)) goto res_create_fail; + /* + * This is a bit of a hack. The xGMI enabled info is used to determine + * if audio and display clocks need to be adjusted with the WAFL link's + * SS info. This is a responsiblity of the clk_mgr. But since MMHUB is + * under hwseq, and the relevant register is in MMHUB, we have to do it + * here. + */ + if (is_vg20 && dce121_xgmi_enabled(dc->hwseq)) + dce121_clock_patch_xgmi_ss_info(pool->base.clk_mgr); + /* Create hardware sequencer */ if (!dce120_hw_sequencer_create(dc)) goto controller_create_fail; From d83ba8f38154208137c761185b9ed11ec7d58e97 Mon Sep 17 00:00:00 2001 From: Steven Chiu Date: Fri, 30 Nov 2018 10:58:32 -0500 Subject: [PATCH 092/539] drm/amd/display: 3.2.11 Signed-off-by: Steven Chiu Reviewed-by: Yongqiang Sun Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index ef59f5cce0eb..ee7054042901 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.10" +#define DC_VER "3.2.11" #define MAX_SURFACES 3 #define MAX_STREAMS 6 From 4fd994c448a3bcfea003203c7df39598741191b7 Mon Sep 17 00:00:00 2001 From: Fatemeh Darbehani Date: Fri, 30 Nov 2018 15:55:01 -0500 Subject: [PATCH 093/539] drm/amd/display: Start using the new pp_smu interface [Why] PPLib has impelemented the new pp_smu interface [How] Use the new functions if available instead of the old interface 'set_display_requirement' and 'dcn1_pplib_apply_display_requirements'. Signed-off-by: Fatemeh Darbehani Reviewed-by: Fatemeh Darbehani Acked-by: Leo Li Acked-by: Su Chung Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn10/dcn10_clk_mgr.c | 99 +++++-------------- 1 file changed, 24 insertions(+), 75 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c index c39db5bfa7cf..afe8c42211cd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c @@ -161,58 +161,6 @@ static int get_active_display_cnt( return display_count; } -static void notify_deep_sleep_dcfclk_to_smu( - struct pp_smu_funcs_rv *pp_smu, int min_dcef_deep_sleep_clk_khz) -{ - int min_dcef_deep_sleep_clk_mhz; //minimum required DCEF Deep Sleep clock in mhz - /* - * if function pointer not set up, this message is - * sent as part of pplib_apply_display_requirements. - * So just return. - */ - if (!pp_smu || !pp_smu->set_min_deep_sleep_dcfclk) - return; - - min_dcef_deep_sleep_clk_mhz = (min_dcef_deep_sleep_clk_khz + 999) / 1000; //Round up - pp_smu->set_min_deep_sleep_dcfclk(&pp_smu->pp_smu, min_dcef_deep_sleep_clk_mhz); -} - -static void notify_hard_min_dcfclk_to_smu( - struct pp_smu_funcs_rv *pp_smu, int min_dcf_clk_khz) -{ - int min_dcf_clk_mhz; //minimum required DCF clock in mhz - - /* - * if function pointer not set up, this message is - * sent as part of pplib_apply_display_requirements. - * So just return. - */ - if (!pp_smu || !pp_smu->set_hard_min_dcfclk_by_freq) - return; - - min_dcf_clk_mhz = min_dcf_clk_khz / 1000; - - pp_smu->set_hard_min_dcfclk_by_freq(&pp_smu->pp_smu, min_dcf_clk_mhz); -} - -static void notify_hard_min_fclk_to_smu( - struct pp_smu_funcs_rv *pp_smu, int min_f_clk_khz) -{ - int min_f_clk_mhz; //minimum required F clock in mhz - - /* - * if function pointer not set up, this message is - * sent as part of pplib_apply_display_requirements. - * So just return. - */ - if (!pp_smu || !pp_smu->set_hard_min_fclk_by_freq) - return; - - min_f_clk_mhz = min_f_clk_khz / 1000; - - pp_smu->set_hard_min_fclk_by_freq(&pp_smu->pp_smu, min_f_clk_mhz); -} - static void dcn1_update_clocks(struct clk_mgr *clk_mgr, struct dc_state *context, bool safe_to_lower) @@ -224,7 +172,6 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr, &dc->res_pool->pp_smu_req; struct pp_smu_display_requirement_rv smu_req = *smu_req_cur; struct pp_smu_funcs_rv *pp_smu = dc->res_pool->pp_smu; - uint32_t requested_dcf_clock_in_khz = 0; bool send_request_to_increase = false; bool send_request_to_lower = false; int display_count; @@ -244,9 +191,8 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr, */ if (pp_smu->set_display_count) pp_smu->set_display_count(&pp_smu->pp_smu, display_count); - else - smu_req.display_count = display_count; + smu_req.display_count = display_count; } if (new_clocks->dispclk_khz > clk_mgr->clks.dispclk_khz @@ -269,8 +215,6 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr, clk_mgr->clks.fclk_khz = new_clocks->fclk_khz; smu_req.hard_min_fclk_mhz = new_clocks->fclk_khz / 1000; - notify_hard_min_fclk_to_smu(pp_smu, new_clocks->fclk_khz); - send_request_to_lower = true; } @@ -285,7 +229,7 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr, if (should_set_clock(safe_to_lower, new_clocks->dcfclk_deep_sleep_khz, clk_mgr->clks.dcfclk_deep_sleep_khz)) { clk_mgr->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz; - smu_req.min_deep_sleep_dcefclk_mhz = new_clocks->dcfclk_deep_sleep_khz / 1000; + smu_req.min_deep_sleep_dcefclk_mhz = (new_clocks->dcfclk_deep_sleep_khz + 999) / 1000; send_request_to_lower = true; } @@ -295,15 +239,18 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr, */ if (send_request_to_increase) { /*use dcfclk to request voltage*/ - requested_dcf_clock_in_khz = dcn_find_dcfclk_suits_all(dc, new_clocks); + if (pp_smu->set_hard_min_fclk_by_freq && + pp_smu->set_hard_min_dcfclk_by_freq && + pp_smu->set_min_deep_sleep_dcfclk) { - notify_hard_min_dcfclk_to_smu(pp_smu, requested_dcf_clock_in_khz); - - if (pp_smu->set_display_requirement) - pp_smu->set_display_requirement(&pp_smu->pp_smu, &smu_req); - - notify_deep_sleep_dcfclk_to_smu(pp_smu, clk_mgr->clks.dcfclk_deep_sleep_khz); - dcn1_pplib_apply_display_requirements(dc, context); + pp_smu->set_hard_min_fclk_by_freq(&pp_smu->pp_smu, smu_req.hard_min_fclk_mhz); + pp_smu->set_hard_min_dcfclk_by_freq(&pp_smu->pp_smu, smu_req.hard_min_dcefclk_mhz); + pp_smu->set_min_deep_sleep_dcfclk(&pp_smu->pp_smu, smu_req.min_deep_sleep_dcefclk_mhz); + } else { + if (pp_smu->set_display_requirement) + pp_smu->set_display_requirement(&pp_smu->pp_smu, &smu_req); + dcn1_pplib_apply_display_requirements(dc, context); + } } /* dcn1 dppclk is tied to dispclk */ @@ -318,18 +265,20 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr, if (!send_request_to_increase && send_request_to_lower) { /*use dcfclk to request voltage*/ - requested_dcf_clock_in_khz = dcn_find_dcfclk_suits_all(dc, new_clocks); + if (pp_smu->set_hard_min_fclk_by_freq && + pp_smu->set_hard_min_dcfclk_by_freq && + pp_smu->set_min_deep_sleep_dcfclk) { - notify_hard_min_dcfclk_to_smu(pp_smu, requested_dcf_clock_in_khz); - - if (pp_smu->set_display_requirement) - pp_smu->set_display_requirement(&pp_smu->pp_smu, &smu_req); - - notify_deep_sleep_dcfclk_to_smu(pp_smu, clk_mgr->clks.dcfclk_deep_sleep_khz); - dcn1_pplib_apply_display_requirements(dc, context); + pp_smu->set_hard_min_fclk_by_freq(&pp_smu->pp_smu, smu_req.hard_min_fclk_mhz); + pp_smu->set_hard_min_dcfclk_by_freq(&pp_smu->pp_smu, smu_req.hard_min_dcefclk_mhz); + pp_smu->set_min_deep_sleep_dcfclk(&pp_smu->pp_smu, smu_req.min_deep_sleep_dcefclk_mhz); + } else { + if (pp_smu->set_display_requirement) + pp_smu->set_display_requirement(&pp_smu->pp_smu, &smu_req); + dcn1_pplib_apply_display_requirements(dc, context); + } } - *smu_req_cur = smu_req; } static const struct clk_mgr_funcs dcn1_funcs = { From 69338c1f4e97eac659db202c363aa2bfa4784347 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Tue, 18 Sep 2018 14:24:05 -0400 Subject: [PATCH 094/539] drm/amd/display: add dsclk to pipe bw struct This will allow us to program dscclk to required value Signed-off-by: Dmytro Laktyushkin Reviewed-by: Eric Bernstein Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 2 +- drivers/gpu/drm/amd/display/dc/inc/core_types.h | 7 +------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 05c615228361..afdfe3b0a1c3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2048,7 +2048,7 @@ void update_dchubp_dpp( dc->res_pool->dccg->funcs->update_dpp_dto( dc->res_pool->dccg, dpp->inst, - pipe_ctx->plane_res.bw.calc.dppclk_khz); + pipe_ctx->plane_res.bw.dppclk_khz); else dc->res_pool->clk_mgr->clks.dppclk_khz = should_divided_by_2 ? dc->res_pool->clk_mgr->clks.dispclk_khz / 2 : diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index b168a5e9dd9d..b019a5e0e018 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -180,13 +180,8 @@ struct resource_pool { const struct resource_caps *res_cap; }; -struct dcn_fe_clocks { - int dppclk_khz; -}; - struct dcn_fe_bandwidth { - struct dcn_fe_clocks calc; - struct dcn_fe_clocks cur; + int dppclk_khz; }; struct stream_resource { From cfdb60f7673907c718a2c434f7ee6ecfe14d85b7 Mon Sep 17 00:00:00 2001 From: David Francis Date: Mon, 10 Dec 2018 10:18:21 -0500 Subject: [PATCH 095/539] drm/amd/display: Remove unused parameter plane_states [Why] The function dc_commit_updates_for_stream had a parameter called plane_states. It was never used. It was getting in the way of some cleanup work [How] Remove it Signed-off-by: David Francis Reviewed-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +-- drivers/gpu/drm/amd/display/dc/core/dc.c | 1 - drivers/gpu/drm/amd/display/dc/dc_stream.h | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b7938e575040..85e4afc6790c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4673,7 +4673,6 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc, 1, acrtc_state->stream, &stream_update, - &surface_updates->surface, state); mutex_unlock(&adev->dm.dc_lock); @@ -4772,7 +4771,7 @@ static bool commit_planes_to_stream( dc, updates, new_plane_count, - dc_stream, stream_update, plane_states, state); + dc_stream, stream_update, state); mutex_unlock(&dm->dc_lock); kfree(flip_addr); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 024f7ea6ba92..af907d541318 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1620,7 +1620,6 @@ void dc_commit_updates_for_stream(struct dc *dc, int surface_count, struct dc_stream_state *stream, struct dc_stream_update *stream_update, - struct dc_plane_state **plane_states, struct dc_state *state) { const struct dc_stream_status *stream_status; diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index bfb741b980fb..7bb1da18c1ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -180,7 +180,6 @@ void dc_commit_updates_for_stream(struct dc *dc, int surface_count, struct dc_stream_state *stream, struct dc_stream_update *stream_update, - struct dc_plane_state **plane_states, struct dc_state *state); /* * Log the current stream state. From eb87161a9cb02a058e7535a729af4ed098da89f0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 18 Dec 2018 13:05:54 +0000 Subject: [PATCH 096/539] amdgpu/dc: remove various variables that are defined but never used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are several variables that are defined and never used and hence can be removed. Remove them. Cleans up clang -Wunused-const-variable warnings: warning: ‘dvi_hdmi_dongle_signature_str’ defined but not used warning: ‘dce11_one_lpt_channel_max_resolution’ defined but not used warning: ‘ddc_hw_status_addr’ defined but not used Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c | 1 - .../gpu/drm/amd/display/dc/dce110/dce110_compressor.c | 2 -- .../amd/display/dc/i2caux/dce80/i2c_sw_engine_dce80.c | 10 ---------- 3 files changed, 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c index 506a97e16956..99a314b79850 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c @@ -42,7 +42,6 @@ #define CV_SMART_DONGLE_ADDRESS 0x20 /* DVI-HDMI dongle slave address for retrieving dongle signature*/ #define DVI_HDMI_DONGLE_ADDRESS 0x68 -static const int8_t dvi_hdmi_dongle_signature_str[] = "6140063500G"; struct dvi_hdmi_dongle_signature_data { int8_t vendor[3];/* "AMD" */ uint8_t version[2]; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c index 52d50e24a995..7b23239d33fe 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c @@ -62,8 +62,6 @@ static const struct dce110_compressor_reg_offsets reg_offsets[] = { } }; -static const uint32_t dce11_one_lpt_channel_max_resolution = 2560 * 1600; - static uint32_t align_to_chunks_number_per_line(uint32_t pixels) { return 256 * ((pixels + 255) / 256); diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2c_sw_engine_dce80.c b/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2c_sw_engine_dce80.c index 4853ee26096a..c9d6cf08e8ab 100644 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2c_sw_engine_dce80.c +++ b/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2c_sw_engine_dce80.c @@ -50,16 +50,6 @@ * This unit */ -static const uint32_t ddc_hw_status_addr[] = { - mmDC_I2C_DDC1_HW_STATUS, - mmDC_I2C_DDC2_HW_STATUS, - mmDC_I2C_DDC3_HW_STATUS, - mmDC_I2C_DDC4_HW_STATUS, - mmDC_I2C_DDC5_HW_STATUS, - mmDC_I2C_DDC6_HW_STATUS, - mmDC_I2C_DDCVGA_HW_STATUS -}; - /* * @brief * Cast 'struct i2c_sw_engine *' From 84d9d39aa166a8ea2c7687da48cdacdc0685644d Mon Sep 17 00:00:00 2001 From: Brajeswar Ghosh Date: Wed, 19 Dec 2018 13:27:13 +0530 Subject: [PATCH 097/539] drm/amd/display: Remove duplicate header Remove custom_float.h which is included more than once Acked-by: Souptick Joarder Signed-off-by: Brajeswar Ghosh Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index afdfe3b0a1c3..a5c4f95a677d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -40,7 +40,6 @@ #include "ipp.h" #include "mpc.h" #include "reg_helper.h" -#include "custom_float.h" #include "dcn10_hubp.h" #include "dcn10_hubbub.h" #include "dcn10_cm_common.h" From 97ccdb71211cf8378ae012a638670fe1b17a89c0 Mon Sep 17 00:00:00 2001 From: Brajeswar Ghosh Date: Thu, 20 Dec 2018 17:58:41 +0530 Subject: [PATCH 098/539] drm/amd/display/dcn10_hw_sequencer_debug: Remove duplicate header Remove custom_float.h which is included more than once Signed-off-by: Brajeswar Ghosh Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c index 19d15ec46642..5ead46269acf 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c @@ -40,7 +40,6 @@ #include "ipp.h" #include "mpc.h" #include "reg_helper.h" -#include "custom_float.h" #include "dcn10_hubp.h" #include "dcn10_hubbub.h" #include "dcn10_cm_common.h" From 8a13779ec1297ca74765a88ba0cc4159be03c0ca Mon Sep 17 00:00:00 2001 From: Brajeswar Ghosh Date: Thu, 20 Dec 2018 18:04:55 +0530 Subject: [PATCH 099/539] drm/amd/display/i2c_hw_engine_dce110: Remove duplicate header Remove reg_helper.h which is included more than once Signed-off-by: Brajeswar Ghosh Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c index 9cbe1a7a6bcb..137350142fb2 100644 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c +++ b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c @@ -97,7 +97,6 @@ enum dc_i2c_arbitration { #define FN(reg_name, field_name) \ hw_engine->i2c_shift->field_name, hw_engine->i2c_mask->field_name -#include "reg_helper.h" static void disable_i2c_hw_engine( struct i2c_hw_engine_dce110 *hw_engine) From b187af3709131f0530995b1abb9da4c18026417b Mon Sep 17 00:00:00 2001 From: Ken Chalmers Date: Mon, 5 Nov 2018 11:34:57 -0500 Subject: [PATCH 100/539] drm/amd/display: Expand set_pixel_clock debug log message [Why] For more informative debugging. [How] Add timing generator and PLL ids to output, to make it clear which pixel clock is being set. Signed-off-by: Ken Chalmers Reviewed-by: Tony Cheng Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/command_table2.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c index 7d3e57765eb2..63206a4b6c0b 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c @@ -307,9 +307,11 @@ static enum bp_result set_pixel_clock_v7( (uint8_t) bp->cmd_helper-> transmitter_color_depth_to_atom( bp_params->color_depth); - DC_LOG_BIOS("%s:program display clock = %d"\ - "colorDepth = %d\n", __func__,\ - bp_params->target_pixel_clock_100hz, bp_params->color_depth); + + DC_LOG_BIOS("%s:program display clock = %d, tg = %d, pll = %d, "\ + "colorDepth = %d\n", __func__, + bp_params->target_pixel_clock_100hz, (int)controller_id, + pll_id, bp_params->color_depth); if (bp_params->flags.FORCE_PROGRAMMING_OF_PLL) clk.miscinfo |= PIXEL_CLOCK_V7_MISC_FORCE_PROG_PPLL; From 785fd44c251c08c1490e20daec604f0f924dd7fc Mon Sep 17 00:00:00 2001 From: Shirish S Date: Thu, 20 Dec 2018 16:08:02 +0530 Subject: [PATCH 101/539] drm/amd/display: fix compliler errors [-Werror,-Wmissing-braces] Initializing structures with { } is known to be problematic since it doesn't necessararily initialize all bytes, in case of padding, causing random failures when structures are memcmp(). This patch fixes the structure initialisation related compiler error by memset. Signed-off-by: Shirish S Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 3 ++- .../gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index a5c4f95a677d..93226fc37f27 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -91,9 +91,10 @@ static void log_mpc_crc(struct dc *dc, void dcn10_log_hubbub_state(struct dc *dc, struct dc_log_buffer_ctx *log_ctx) { struct dc_context *dc_ctx = dc->ctx; - struct dcn_hubbub_wm wm = {0}; + struct dcn_hubbub_wm wm; int i; + memset(&wm, 0, sizeof(struct dcn_hubbub_wm)); dc->res_pool->hubbub->funcs->wm_read_state(dc->res_pool->hubbub, &wm); DTN_INFO("HUBBUB WM: data_urgent pte_meta_urgent" diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c index 5ead46269acf..98f41d250978 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c @@ -71,7 +71,7 @@ static unsigned int snprintf_count(char *pBuf, unsigned int bufSize, char *fmt, static unsigned int dcn10_get_hubbub_state(struct dc *dc, char *pBuf, unsigned int bufSize) { struct dc_context *dc_ctx = dc->ctx; - struct dcn_hubbub_wm wm = {0}; + struct dcn_hubbub_wm wm; int i; unsigned int chars_printed = 0; @@ -80,6 +80,7 @@ static unsigned int dcn10_get_hubbub_state(struct dc *dc, char *pBuf, unsigned i const uint32_t ref_clk_mhz = dc_ctx->dc->res_pool->ref_clock_inKhz / 1000; static const unsigned int frac = 1000; + memset(&wm, 0, sizeof(struct dcn_hubbub_wm)); dc->res_pool->hubbub->funcs->wm_read_state(dc->res_pool->hubbub, &wm); chars_printed = snprintf_count(pBuf, remaining_buffer, "wm_set_index,data_urgent,pte_meta_urgent,sr_enter,sr_exit,dram_clk_chanage\n"); From 32e61361b82e7ad016a83345f66ccc68865f63f4 Mon Sep 17 00:00:00 2001 From: Ken Chalmers Date: Wed, 19 Dec 2018 14:50:20 -0500 Subject: [PATCH 102/539] drm/amd/display: Fix 64-bit division for 32-bit builds [Why] 32-bit builds break when doing 64-bit division directly. [How] Use the div_u64() function instead to perform the division. Fixes: https://lists.freedesktop.org/archives/dri-devel/2018-December/201008.html Signed-off-by: Ken Chalmers Reviewed-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c index 5c629ae487ec..8b5ce557ee71 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c @@ -94,7 +94,7 @@ static void program_pix_dur(struct timing_generator *tg, uint32_t pix_clk_100hz) if (pix_clk_100hz == 0) return; - pix_dur = 10000000000ull / pix_clk_100hz; + pix_dur = div_u64(10000000000ull, pix_clk_100hz); set_reg_field_value( value, From 05794eff1aa6060248bfca34ee936c613f94a942 Mon Sep 17 00:00:00 2001 From: Shirish S Date: Thu, 20 Dec 2018 16:04:35 +0530 Subject: [PATCH 103/539] drm/amdgpu/gmc: fix compiler errors [-Werror,-Wmissing-braces] (V2) Initializing structures with { } is known to be problematic since it doesn't necessararily initialize all bytes, in case of padding, causing random failures when structures are memcmp(). This patch fixes the structure initialisation related compiler error by memset(). V2: rectified missing piece in coding Signed-off-by: Shirish S Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 1ad7e6b8ed1d..34440672f938 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1471,8 +1471,9 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, gmc_v8_0_set_fault_enable_default(adev, false); if (printk_ratelimit()) { - struct amdgpu_task_info task_info = { 0 }; + struct amdgpu_task_info task_info; + memset(&task_info, 0, sizeof(struct amdgpu_task_info)); amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); dev_err(adev->dev, "GPU fault detected: %d 0x%08x for process %s pid %d thread %s pid %d\n", diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index bacdaef77b6c..521ac8954652 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -320,8 +320,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, } if (printk_ratelimit()) { - struct amdgpu_task_info task_info = { 0 }; + struct amdgpu_task_info task_info; + memset(&task_info, 0, sizeof(struct amdgpu_task_info)); amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); dev_err(adev->dev, From 73c97fa4421fa0465a0b25a0ccf62af32e4bd01e Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Tue, 25 Dec 2018 10:20:56 +0800 Subject: [PATCH 104/539] drm/amd/powerplay:clean up phm_enable_clock_power_gatings As the PG was setted by each IP block durinng IP early init thus remove the unused phm_enable_clock_power_gatings related funcs. Signed-off-by: Prike Liang Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c | 9 --------- drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h | 1 - drivers/gpu/drm/amd/powerplay/inc/hwmgr.h | 1 - 3 files changed, 11 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c index 1f92a9f4c9e3..c1c51c115e57 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c @@ -154,15 +154,6 @@ int phm_powerdown_uvd(struct pp_hwmgr *hwmgr) return 0; } -int phm_enable_clock_power_gatings(struct pp_hwmgr *hwmgr) -{ - PHM_FUNC_CHECK(hwmgr); - - if (NULL != hwmgr->hwmgr_func->enable_clock_power_gating) - return hwmgr->hwmgr_func->enable_clock_power_gating(hwmgr); - - return 0; -} int phm_disable_clock_power_gatings(struct pp_hwmgr *hwmgr) { diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h index f4dab979a3a1..6e0be6027705 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h @@ -397,7 +397,6 @@ struct phm_odn_clock_levels { }; extern int phm_disable_clock_power_gatings(struct pp_hwmgr *hwmgr); -extern int phm_enable_clock_power_gatings(struct pp_hwmgr *hwmgr); extern int phm_powerdown_uvd(struct pp_hwmgr *hwmgr); extern int phm_setup_asic(struct pp_hwmgr *hwmgr); extern int phm_enable_dynamic_state_management(struct pp_hwmgr *hwmgr); diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index 8cb831b6a016..be4f87aed99c 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -251,7 +251,6 @@ struct pp_hwmgr_func { uint32_t (*get_sclk)(struct pp_hwmgr *hwmgr, bool low); int (*power_state_set)(struct pp_hwmgr *hwmgr, const void *state); - int (*enable_clock_power_gating)(struct pp_hwmgr *hwmgr); int (*notify_smc_display_config_after_ps_adjustment)(struct pp_hwmgr *hwmgr); int (*pre_display_config_changed)(struct pp_hwmgr *hwmgr); int (*display_config_changed)(struct pp_hwmgr *hwmgr); From 8bb9eb480d032418bd08d0a6a39e4eaa1dec2fb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 17 Sep 2018 16:13:49 +0200 Subject: [PATCH 105/539] drm/amdgpu: add IH ring to ih_get_wptr/ih_set_rptr v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's start to support multiple rings. v2: decode IV is needed as well Signed-off-by: Christian König Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 6 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h | 13 +++--- drivers/gpu/drm/amd/amdgpu/cik_ih.c | 29 +++++++------ drivers/gpu/drm/amd/amdgpu/cz_ih.c | 31 +++++++------- drivers/gpu/drm/amd/amdgpu/iceland_ih.c | 29 +++++++------ drivers/gpu/drm/amd/amdgpu/si_ih.c | 31 +++++++------- drivers/gpu/drm/amd/amdgpu/tonga_ih.c | 43 ++++++++++--------- drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 56 +++++++++++++------------ 8 files changed, 128 insertions(+), 110 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 8af67f649660..fb8dd6179926 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -137,7 +137,7 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, if (!ih->enabled || adev->shutdown) return IRQ_NONE; - wptr = amdgpu_ih_get_wptr(adev); + wptr = amdgpu_ih_get_wptr(adev, ih); restart_ih: /* is somebody else already processing irqs? */ @@ -154,11 +154,11 @@ restart_ih: ih->rptr &= ih->ptr_mask; } - amdgpu_ih_set_rptr(adev); + amdgpu_ih_set_rptr(adev, ih); atomic_set(&ih->lock, 0); /* make sure wptr hasn't changed while processing */ - wptr = amdgpu_ih_get_wptr(adev); + wptr = amdgpu_ih_get_wptr(adev, ih); if (wptr != ih->rptr) goto restart_ih; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index f877bb78d10a..d810fd73d574 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -50,15 +50,16 @@ struct amdgpu_ih_ring { /* provided by the ih block */ struct amdgpu_ih_funcs { /* ring read/write ptr handling, called from interrupt context */ - u32 (*get_wptr)(struct amdgpu_device *adev); - void (*decode_iv)(struct amdgpu_device *adev, + u32 (*get_wptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); + void (*decode_iv)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, struct amdgpu_iv_entry *entry); - void (*set_rptr)(struct amdgpu_device *adev); + void (*set_rptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); }; -#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev)) -#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) -#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev)) +#define amdgpu_ih_get_wptr(adev, ih) (adev)->irq.ih_funcs->get_wptr((adev), (ih)) +#define amdgpu_ih_decode_iv(adev, iv) \ + (adev)->irq.ih_funcs->decode_iv((adev), (ih), (iv)) +#define amdgpu_ih_set_rptr(adev, ih) (adev)->irq.ih_funcs->set_rptr((adev), (ih)) int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, unsigned ring_size, bool use_bus_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index 8a8b4967a101..884aa9b81e86 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -183,11 +183,12 @@ static void cik_ih_irq_disable(struct amdgpu_device *adev) * Used by cik_irq_process(). * Returns the value of the wptr. */ -static u32 cik_ih_get_wptr(struct amdgpu_device *adev) +static u32 cik_ih_get_wptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) { u32 wptr, tmp; - wptr = le32_to_cpu(adev->wb.wb[adev->irq.ih.wptr_offs]); + wptr = le32_to_cpu(adev->wb.wb[ih->wptr_offs]); if (wptr & IH_RB_WPTR__RB_OVERFLOW_MASK) { wptr &= ~IH_RB_WPTR__RB_OVERFLOW_MASK; @@ -196,13 +197,13 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev) * this should allow us to catchup. */ dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", - wptr, adev->irq.ih.rptr, (wptr + 16) & adev->irq.ih.ptr_mask); - adev->irq.ih.rptr = (wptr + 16) & adev->irq.ih.ptr_mask; + wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); + ih->rptr = (wptr + 16) & ih->ptr_mask; tmp = RREG32(mmIH_RB_CNTL); tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; WREG32(mmIH_RB_CNTL, tmp); } - return (wptr & adev->irq.ih.ptr_mask); + return (wptr & ih->ptr_mask); } /* CIK IV Ring @@ -237,16 +238,17 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev) * position and also advance the position. */ static void cik_ih_decode_iv(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih, struct amdgpu_iv_entry *entry) { /* wptr/rptr are in bytes! */ - u32 ring_index = adev->irq.ih.rptr >> 2; + u32 ring_index = ih->rptr >> 2; uint32_t dw[4]; - dw[0] = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]); - dw[1] = le32_to_cpu(adev->irq.ih.ring[ring_index + 1]); - dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); - dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); + dw[0] = le32_to_cpu(ih->ring[ring_index + 0]); + dw[1] = le32_to_cpu(ih->ring[ring_index + 1]); + dw[2] = le32_to_cpu(ih->ring[ring_index + 2]); + dw[3] = le32_to_cpu(ih->ring[ring_index + 3]); entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY; entry->src_id = dw[0] & 0xff; @@ -256,7 +258,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev, entry->pasid = (dw[2] >> 16) & 0xffff; /* wptr/rptr are in bytes! */ - adev->irq.ih.rptr += 16; + ih->rptr += 16; } /** @@ -266,9 +268,10 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev, * * Set the IH ring buffer rptr. */ -static void cik_ih_set_rptr(struct amdgpu_device *adev) +static void cik_ih_set_rptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) { - WREG32(mmIH_RB_RPTR, adev->irq.ih.rptr); + WREG32(mmIH_RB_RPTR, ih->rptr); } static int cik_ih_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index 9d3ea298e116..c59eed041fb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -185,11 +185,12 @@ static void cz_ih_irq_disable(struct amdgpu_device *adev) * Used by cz_irq_process(VI). * Returns the value of the wptr. */ -static u32 cz_ih_get_wptr(struct amdgpu_device *adev) +static u32 cz_ih_get_wptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) { u32 wptr, tmp; - wptr = le32_to_cpu(adev->wb.wb[adev->irq.ih.wptr_offs]); + wptr = le32_to_cpu(adev->wb.wb[ih->wptr_offs]); if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); @@ -198,13 +199,13 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev) * this should allow us to catchup. */ dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", - wptr, adev->irq.ih.rptr, (wptr + 16) & adev->irq.ih.ptr_mask); - adev->irq.ih.rptr = (wptr + 16) & adev->irq.ih.ptr_mask; + wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); + ih->rptr = (wptr + 16) & ih->ptr_mask; tmp = RREG32(mmIH_RB_CNTL); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); } - return (wptr & adev->irq.ih.ptr_mask); + return (wptr & ih->ptr_mask); } /** @@ -216,16 +217,17 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev) * position and also advance the position. */ static void cz_ih_decode_iv(struct amdgpu_device *adev, - struct amdgpu_iv_entry *entry) + struct amdgpu_ih_ring *ih, + struct amdgpu_iv_entry *entry) { /* wptr/rptr are in bytes! */ - u32 ring_index = adev->irq.ih.rptr >> 2; + u32 ring_index = ih->rptr >> 2; uint32_t dw[4]; - dw[0] = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]); - dw[1] = le32_to_cpu(adev->irq.ih.ring[ring_index + 1]); - dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); - dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); + dw[0] = le32_to_cpu(ih->ring[ring_index + 0]); + dw[1] = le32_to_cpu(ih->ring[ring_index + 1]); + dw[2] = le32_to_cpu(ih->ring[ring_index + 2]); + dw[3] = le32_to_cpu(ih->ring[ring_index + 3]); entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY; entry->src_id = dw[0] & 0xff; @@ -235,7 +237,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev, entry->pasid = (dw[2] >> 16) & 0xffff; /* wptr/rptr are in bytes! */ - adev->irq.ih.rptr += 16; + ih->rptr += 16; } /** @@ -245,9 +247,10 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev, * * Set the IH ring buffer rptr. */ -static void cz_ih_set_rptr(struct amdgpu_device *adev) +static void cz_ih_set_rptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) { - WREG32(mmIH_RB_RPTR, adev->irq.ih.rptr); + WREG32(mmIH_RB_RPTR, ih->rptr); } static int cz_ih_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index a3984d10b604..f006ed509db3 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -185,11 +185,12 @@ static void iceland_ih_irq_disable(struct amdgpu_device *adev) * Used by cz_irq_process(VI). * Returns the value of the wptr. */ -static u32 iceland_ih_get_wptr(struct amdgpu_device *adev) +static u32 iceland_ih_get_wptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) { u32 wptr, tmp; - wptr = le32_to_cpu(adev->wb.wb[adev->irq.ih.wptr_offs]); + wptr = le32_to_cpu(adev->wb.wb[ih->wptr_offs]); if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); @@ -198,13 +199,13 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev) * this should allow us to catchup. */ dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", - wptr, adev->irq.ih.rptr, (wptr + 16) & adev->irq.ih.ptr_mask); - adev->irq.ih.rptr = (wptr + 16) & adev->irq.ih.ptr_mask; + wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); + ih->rptr = (wptr + 16) & ih->ptr_mask; tmp = RREG32(mmIH_RB_CNTL); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); } - return (wptr & adev->irq.ih.ptr_mask); + return (wptr & ih->ptr_mask); } /** @@ -216,16 +217,17 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev) * position and also advance the position. */ static void iceland_ih_decode_iv(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih, struct amdgpu_iv_entry *entry) { /* wptr/rptr are in bytes! */ - u32 ring_index = adev->irq.ih.rptr >> 2; + u32 ring_index = ih->rptr >> 2; uint32_t dw[4]; - dw[0] = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]); - dw[1] = le32_to_cpu(adev->irq.ih.ring[ring_index + 1]); - dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); - dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); + dw[0] = le32_to_cpu(ih->ring[ring_index + 0]); + dw[1] = le32_to_cpu(ih->ring[ring_index + 1]); + dw[2] = le32_to_cpu(ih->ring[ring_index + 2]); + dw[3] = le32_to_cpu(ih->ring[ring_index + 3]); entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY; entry->src_id = dw[0] & 0xff; @@ -235,7 +237,7 @@ static void iceland_ih_decode_iv(struct amdgpu_device *adev, entry->pasid = (dw[2] >> 16) & 0xffff; /* wptr/rptr are in bytes! */ - adev->irq.ih.rptr += 16; + ih->rptr += 16; } /** @@ -245,9 +247,10 @@ static void iceland_ih_decode_iv(struct amdgpu_device *adev, * * Set the IH ring buffer rptr. */ -static void iceland_ih_set_rptr(struct amdgpu_device *adev) +static void iceland_ih_set_rptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) { - WREG32(mmIH_RB_RPTR, adev->irq.ih.rptr); + WREG32(mmIH_RB_RPTR, ih->rptr); } static int iceland_ih_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 2938fb9f17cc..5cabc9687f76 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -100,34 +100,36 @@ static void si_ih_irq_disable(struct amdgpu_device *adev) mdelay(1); } -static u32 si_ih_get_wptr(struct amdgpu_device *adev) +static u32 si_ih_get_wptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) { u32 wptr, tmp; - wptr = le32_to_cpu(adev->wb.wb[adev->irq.ih.wptr_offs]); + wptr = le32_to_cpu(adev->wb.wb[ih->wptr_offs]); if (wptr & IH_RB_WPTR__RB_OVERFLOW_MASK) { wptr &= ~IH_RB_WPTR__RB_OVERFLOW_MASK; dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", - wptr, adev->irq.ih.rptr, (wptr + 16) & adev->irq.ih.ptr_mask); - adev->irq.ih.rptr = (wptr + 16) & adev->irq.ih.ptr_mask; + wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); + ih->rptr = (wptr + 16) & ih->ptr_mask; tmp = RREG32(IH_RB_CNTL); tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; WREG32(IH_RB_CNTL, tmp); } - return (wptr & adev->irq.ih.ptr_mask); + return (wptr & ih->ptr_mask); } static void si_ih_decode_iv(struct amdgpu_device *adev, - struct amdgpu_iv_entry *entry) + struct amdgpu_ih_ring *ih, + struct amdgpu_iv_entry *entry) { - u32 ring_index = adev->irq.ih.rptr >> 2; + u32 ring_index = ih->rptr >> 2; uint32_t dw[4]; - dw[0] = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]); - dw[1] = le32_to_cpu(adev->irq.ih.ring[ring_index + 1]); - dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); - dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); + dw[0] = le32_to_cpu(ih->ring[ring_index + 0]); + dw[1] = le32_to_cpu(ih->ring[ring_index + 1]); + dw[2] = le32_to_cpu(ih->ring[ring_index + 2]); + dw[3] = le32_to_cpu(ih->ring[ring_index + 3]); entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY; entry->src_id = dw[0] & 0xff; @@ -135,12 +137,13 @@ static void si_ih_decode_iv(struct amdgpu_device *adev, entry->ring_id = dw[2] & 0xff; entry->vmid = (dw[2] >> 8) & 0xff; - adev->irq.ih.rptr += 16; + ih->rptr += 16; } -static void si_ih_set_rptr(struct amdgpu_device *adev) +static void si_ih_set_rptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) { - WREG32(IH_RB_RPTR, adev->irq.ih.rptr); + WREG32(IH_RB_RPTR, ih->rptr); } static int si_ih_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 15da06ddeb75..30e3911dedb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -193,14 +193,15 @@ static void tonga_ih_irq_disable(struct amdgpu_device *adev) * Used by cz_irq_process(VI). * Returns the value of the wptr. */ -static u32 tonga_ih_get_wptr(struct amdgpu_device *adev) +static u32 tonga_ih_get_wptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) { u32 wptr, tmp; if (adev->irq.ih.use_bus_addr) - wptr = le32_to_cpu(adev->irq.ih.ring[adev->irq.ih.wptr_offs]); + wptr = le32_to_cpu(ih->ring[ih->wptr_offs]); else - wptr = le32_to_cpu(adev->wb.wb[adev->irq.ih.wptr_offs]); + wptr = le32_to_cpu(adev->wb.wb[ih->wptr_offs]); if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); @@ -209,13 +210,13 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev) * this should allow us to catchup. */ dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", - wptr, adev->irq.ih.rptr, (wptr + 16) & adev->irq.ih.ptr_mask); - adev->irq.ih.rptr = (wptr + 16) & adev->irq.ih.ptr_mask; + wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); + ih->rptr = (wptr + 16) & ih->ptr_mask; tmp = RREG32(mmIH_RB_CNTL); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); } - return (wptr & adev->irq.ih.ptr_mask); + return (wptr & ih->ptr_mask); } /** @@ -227,16 +228,17 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev) * position and also advance the position. */ static void tonga_ih_decode_iv(struct amdgpu_device *adev, - struct amdgpu_iv_entry *entry) + struct amdgpu_ih_ring *ih, + struct amdgpu_iv_entry *entry) { /* wptr/rptr are in bytes! */ - u32 ring_index = adev->irq.ih.rptr >> 2; + u32 ring_index = ih->rptr >> 2; uint32_t dw[4]; - dw[0] = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]); - dw[1] = le32_to_cpu(adev->irq.ih.ring[ring_index + 1]); - dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); - dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); + dw[0] = le32_to_cpu(ih->ring[ring_index + 0]); + dw[1] = le32_to_cpu(ih->ring[ring_index + 1]); + dw[2] = le32_to_cpu(ih->ring[ring_index + 2]); + dw[3] = le32_to_cpu(ih->ring[ring_index + 3]); entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY; entry->src_id = dw[0] & 0xff; @@ -246,7 +248,7 @@ static void tonga_ih_decode_iv(struct amdgpu_device *adev, entry->pasid = (dw[2] >> 16) & 0xffff; /* wptr/rptr are in bytes! */ - adev->irq.ih.rptr += 16; + ih->rptr += 16; } /** @@ -256,17 +258,18 @@ static void tonga_ih_decode_iv(struct amdgpu_device *adev, * * Set the IH ring buffer rptr. */ -static void tonga_ih_set_rptr(struct amdgpu_device *adev) +static void tonga_ih_set_rptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) { - if (adev->irq.ih.use_doorbell) { + if (ih->use_doorbell) { /* XXX check if swapping is necessary on BE */ - if (adev->irq.ih.use_bus_addr) - adev->irq.ih.ring[adev->irq.ih.rptr_offs] = adev->irq.ih.rptr; + if (ih->use_bus_addr) + ih->ring[ih->rptr_offs] = ih->rptr; else - adev->wb.wb[adev->irq.ih.rptr_offs] = adev->irq.ih.rptr; - WDOORBELL32(adev->irq.ih.doorbell_index, adev->irq.ih.rptr); + adev->wb.wb[ih->rptr_offs] = ih->rptr; + WDOORBELL32(ih->doorbell_index, ih->rptr); } else { - WREG32(mmIH_RB_RPTR, adev->irq.ih.rptr); + WREG32(mmIH_RB_RPTR, ih->rptr); } } diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 2c250b01a903..28b0e9a6cc42 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -191,14 +191,15 @@ static void vega10_ih_irq_disable(struct amdgpu_device *adev) * ring buffer overflow and deal with it. * Returns the value of the wptr. */ -static u32 vega10_ih_get_wptr(struct amdgpu_device *adev) +static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) { u32 wptr, tmp; - if (adev->irq.ih.use_bus_addr) - wptr = le32_to_cpu(adev->irq.ih.ring[adev->irq.ih.wptr_offs]); + if (ih->use_bus_addr) + wptr = le32_to_cpu(ih->ring[ih->wptr_offs]); else - wptr = le32_to_cpu(adev->wb.wb[adev->irq.ih.wptr_offs]); + wptr = le32_to_cpu(adev->wb.wb[ih->wptr_offs]); if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); @@ -207,16 +208,16 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev) * from the last not overwritten vector (wptr + 32). Hopefully * this should allow us to catchup. */ - tmp = (wptr + 32) & adev->irq.ih.ptr_mask; + tmp = (wptr + 32) & ih->ptr_mask; dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", - wptr, adev->irq.ih.rptr, tmp); - adev->irq.ih.rptr = tmp; + wptr, ih->rptr, tmp); + ih->rptr = tmp; tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL)); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL), tmp); } - return (wptr & adev->irq.ih.ptr_mask); + return (wptr & ih->ptr_mask); } /** @@ -228,20 +229,21 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev) * position and also advance the position. */ static void vega10_ih_decode_iv(struct amdgpu_device *adev, - struct amdgpu_iv_entry *entry) + struct amdgpu_ih_ring *ih, + struct amdgpu_iv_entry *entry) { /* wptr/rptr are in bytes! */ - u32 ring_index = adev->irq.ih.rptr >> 2; + u32 ring_index = ih->rptr >> 2; uint32_t dw[8]; - dw[0] = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]); - dw[1] = le32_to_cpu(adev->irq.ih.ring[ring_index + 1]); - dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); - dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); - dw[4] = le32_to_cpu(adev->irq.ih.ring[ring_index + 4]); - dw[5] = le32_to_cpu(adev->irq.ih.ring[ring_index + 5]); - dw[6] = le32_to_cpu(adev->irq.ih.ring[ring_index + 6]); - dw[7] = le32_to_cpu(adev->irq.ih.ring[ring_index + 7]); + dw[0] = le32_to_cpu(ih->ring[ring_index + 0]); + dw[1] = le32_to_cpu(ih->ring[ring_index + 1]); + dw[2] = le32_to_cpu(ih->ring[ring_index + 2]); + dw[3] = le32_to_cpu(ih->ring[ring_index + 3]); + dw[4] = le32_to_cpu(ih->ring[ring_index + 4]); + dw[5] = le32_to_cpu(ih->ring[ring_index + 5]); + dw[6] = le32_to_cpu(ih->ring[ring_index + 6]); + dw[7] = le32_to_cpu(ih->ring[ring_index + 7]); entry->client_id = dw[0] & 0xff; entry->src_id = (dw[0] >> 8) & 0xff; @@ -257,9 +259,8 @@ static void vega10_ih_decode_iv(struct amdgpu_device *adev, entry->src_data[2] = dw[6]; entry->src_data[3] = dw[7]; - /* wptr/rptr are in bytes! */ - adev->irq.ih.rptr += 32; + ih->rptr += 32; } /** @@ -269,17 +270,18 @@ static void vega10_ih_decode_iv(struct amdgpu_device *adev, * * Set the IH ring buffer rptr. */ -static void vega10_ih_set_rptr(struct amdgpu_device *adev) +static void vega10_ih_set_rptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) { - if (adev->irq.ih.use_doorbell) { + if (ih->use_doorbell) { /* XXX check if swapping is necessary on BE */ - if (adev->irq.ih.use_bus_addr) - adev->irq.ih.ring[adev->irq.ih.rptr_offs] = adev->irq.ih.rptr; + if (ih->use_bus_addr) + ih->ring[ih->rptr_offs] = ih->rptr; else - adev->wb.wb[adev->irq.ih.rptr_offs] = adev->irq.ih.rptr; - WDOORBELL32(adev->irq.ih.doorbell_index, adev->irq.ih.rptr); + adev->wb.wb[ih->rptr_offs] = ih->rptr; + WDOORBELL32(ih->doorbell_index, ih->rptr); } else { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, adev->irq.ih.rptr); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, ih->rptr); } } From d81f78b440f314e2a551d938e4c509fca16a8fe7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 18 Sep 2018 14:24:49 +0200 Subject: [PATCH 106/539] drm/amdgpu: simplify IH programming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calculate all the addresses and pointers in amdgpu_ih.c Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 34 +++++++++++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h | 23 +++++++++------- drivers/gpu/drm/amd/amdgpu/cik_ih.c | 9 +++---- drivers/gpu/drm/amd/amdgpu/cz_ih.c | 11 ++++---- drivers/gpu/drm/amd/amdgpu/iceland_ih.c | 9 +++---- drivers/gpu/drm/amd/amdgpu/si_ih.c | 9 +++---- drivers/gpu/drm/amd/amdgpu/tonga_ih.c | 27 +++++-------------- drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 36 +++++++++---------------- 8 files changed, 73 insertions(+), 85 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index fb8dd6179926..d0a5db777b6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -52,6 +52,8 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, ih->use_bus_addr = use_bus_addr; if (use_bus_addr) { + dma_addr_t dma_addr; + if (ih->ring) return 0; @@ -59,21 +61,26 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, * add them to the end of the ring allocation. */ ih->ring = dma_alloc_coherent(adev->dev, ih->ring_size + 8, - &ih->rb_dma_addr, GFP_KERNEL); + &dma_addr, GFP_KERNEL); if (ih->ring == NULL) return -ENOMEM; memset((void *)ih->ring, 0, ih->ring_size + 8); - ih->wptr_offs = (ih->ring_size / 4) + 0; - ih->rptr_offs = (ih->ring_size / 4) + 1; + ih->gpu_addr = dma_addr; + ih->wptr_addr = dma_addr + ih->ring_size; + ih->wptr_cpu = &ih->ring[ih->ring_size / 4]; + ih->rptr_addr = dma_addr + ih->ring_size + 4; + ih->rptr_cpu = &ih->ring[(ih->ring_size / 4) + 1]; } else { - r = amdgpu_device_wb_get(adev, &ih->wptr_offs); + unsigned wptr_offs, rptr_offs; + + r = amdgpu_device_wb_get(adev, &wptr_offs); if (r) return r; - r = amdgpu_device_wb_get(adev, &ih->rptr_offs); + r = amdgpu_device_wb_get(adev, &rptr_offs); if (r) { - amdgpu_device_wb_free(adev, ih->wptr_offs); + amdgpu_device_wb_free(adev, wptr_offs); return r; } @@ -82,10 +89,15 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, &ih->ring_obj, &ih->gpu_addr, (void **)&ih->ring); if (r) { - amdgpu_device_wb_free(adev, ih->rptr_offs); - amdgpu_device_wb_free(adev, ih->wptr_offs); + amdgpu_device_wb_free(adev, rptr_offs); + amdgpu_device_wb_free(adev, wptr_offs); return r; } + + ih->wptr_addr = adev->wb.gpu_addr + wptr_offs * 4; + ih->wptr_cpu = &adev->wb.wb[wptr_offs]; + ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4; + ih->rptr_cpu = &adev->wb.wb[rptr_offs]; } return 0; } @@ -109,13 +121,13 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) * add them to the end of the ring allocation. */ dma_free_coherent(adev->dev, ih->ring_size + 8, - (void *)ih->ring, ih->rb_dma_addr); + (void *)ih->ring, ih->gpu_addr); ih->ring = NULL; } else { amdgpu_bo_free_kernel(&ih->ring_obj, &ih->gpu_addr, (void **)&ih->ring); - amdgpu_device_wb_free(adev, ih->wptr_offs); - amdgpu_device_wb_free(adev, ih->rptr_offs); + amdgpu_device_wb_free(adev, (ih->wptr_addr - ih->gpu_addr) / 4); + amdgpu_device_wb_free(adev, (ih->rptr_addr - ih->gpu_addr) / 4); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index d810fd73d574..1ccb1831382a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -31,20 +31,25 @@ struct amdgpu_iv_entry; * R6xx+ IH ring */ struct amdgpu_ih_ring { - struct amdgpu_bo *ring_obj; - volatile uint32_t *ring; - unsigned rptr; unsigned ring_size; - uint64_t gpu_addr; uint32_t ptr_mask; - atomic_t lock; - bool enabled; - unsigned wptr_offs; - unsigned rptr_offs; u32 doorbell_index; bool use_doorbell; bool use_bus_addr; - dma_addr_t rb_dma_addr; /* only used when use_bus_addr = true */ + + struct amdgpu_bo *ring_obj; + volatile uint32_t *ring; + uint64_t gpu_addr; + + uint64_t wptr_addr; + volatile uint32_t *wptr_cpu; + + uint64_t rptr_addr; + volatile uint32_t *rptr_cpu; + + bool enabled; + unsigned rptr; + atomic_t lock; }; /* provided by the ih block */ diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index 884aa9b81e86..721c757156e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -103,9 +103,9 @@ static void cik_ih_disable_interrupts(struct amdgpu_device *adev) */ static int cik_ih_irq_init(struct amdgpu_device *adev) { + struct amdgpu_ih_ring *ih = &adev->irq.ih; int rb_bufsz; u32 interrupt_cntl, ih_cntl, ih_rb_cntl; - u64 wptr_off; /* disable irqs */ cik_ih_disable_interrupts(adev); @@ -131,9 +131,8 @@ static int cik_ih_irq_init(struct amdgpu_device *adev) ih_rb_cntl |= IH_RB_CNTL__WPTR_WRITEBACK_ENABLE_MASK; /* set the writeback address whether it's enabled or not */ - wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4); - WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off)); - WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFF); + WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr)); + WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(ih->wptr_addr) & 0xFF); WREG32(mmIH_RB_CNTL, ih_rb_cntl); @@ -188,7 +187,7 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev, { u32 wptr, tmp; - wptr = le32_to_cpu(adev->wb.wb[ih->wptr_offs]); + wptr = le32_to_cpu(*ih->wptr_cpu); if (wptr & IH_RB_WPTR__RB_OVERFLOW_MASK) { wptr &= ~IH_RB_WPTR__RB_OVERFLOW_MASK; diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index c59eed041fb5..61024b9c7a4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -103,9 +103,9 @@ static void cz_ih_disable_interrupts(struct amdgpu_device *adev) */ static int cz_ih_irq_init(struct amdgpu_device *adev) { - int rb_bufsz; + struct amdgpu_ih_ring *ih = &adev->irq.ih; u32 interrupt_cntl, ih_cntl, ih_rb_cntl; - u64 wptr_off; + int rb_bufsz; /* disable irqs */ cz_ih_disable_interrupts(adev); @@ -133,9 +133,8 @@ static int cz_ih_irq_init(struct amdgpu_device *adev) ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_WRITEBACK_ENABLE, 1); /* set the writeback address whether it's enabled or not */ - wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4); - WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off)); - WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFF); + WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr)); + WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(ih->wptr_addr) & 0xFF); WREG32(mmIH_RB_CNTL, ih_rb_cntl); @@ -190,7 +189,7 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev, { u32 wptr, tmp; - wptr = le32_to_cpu(adev->wb.wb[ih->wptr_offs]); + wptr = le32_to_cpu(*ih->wptr_cpu); if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index f006ed509db3..b1626e1d2f5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -103,9 +103,9 @@ static void iceland_ih_disable_interrupts(struct amdgpu_device *adev) */ static int iceland_ih_irq_init(struct amdgpu_device *adev) { + struct amdgpu_ih_ring *ih = &adev->irq.ih; int rb_bufsz; u32 interrupt_cntl, ih_cntl, ih_rb_cntl; - u64 wptr_off; /* disable irqs */ iceland_ih_disable_interrupts(adev); @@ -133,9 +133,8 @@ static int iceland_ih_irq_init(struct amdgpu_device *adev) ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_WRITEBACK_ENABLE, 1); /* set the writeback address whether it's enabled or not */ - wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4); - WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off)); - WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFF); + WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr)); + WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(ih->wptr_addr) & 0xFF); WREG32(mmIH_RB_CNTL, ih_rb_cntl); @@ -190,7 +189,7 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev, { u32 wptr, tmp; - wptr = le32_to_cpu(adev->wb.wb[ih->wptr_offs]); + wptr = le32_to_cpu(*ih->wptr_cpu); if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 5cabc9687f76..8c50c9cab455 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -57,9 +57,9 @@ static void si_ih_disable_interrupts(struct amdgpu_device *adev) static int si_ih_irq_init(struct amdgpu_device *adev) { + struct amdgpu_ih_ring *ih = &adev->irq.ih; int rb_bufsz; u32 interrupt_cntl, ih_cntl, ih_rb_cntl; - u64 wptr_off; si_ih_disable_interrupts(adev); WREG32(INTERRUPT_CNTL2, adev->irq.ih.gpu_addr >> 8); @@ -76,9 +76,8 @@ static int si_ih_irq_init(struct amdgpu_device *adev) (rb_bufsz << 1) | IH_WPTR_WRITEBACK_ENABLE; - wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4); - WREG32(IH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off)); - WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFF); + WREG32(IH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr)); + WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(ih->wptr_addr) & 0xFF); WREG32(IH_RB_CNTL, ih_rb_cntl); WREG32(IH_RB_RPTR, 0); WREG32(IH_RB_WPTR, 0); @@ -105,7 +104,7 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev, { u32 wptr, tmp; - wptr = le32_to_cpu(adev->wb.wb[ih->wptr_offs]); + wptr = le32_to_cpu(*ih->wptr_cpu); if (wptr & IH_RB_WPTR__RB_OVERFLOW_MASK) { wptr &= ~IH_RB_WPTR__RB_OVERFLOW_MASK; diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 30e3911dedb5..a20b711a6756 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -99,9 +99,9 @@ static void tonga_ih_disable_interrupts(struct amdgpu_device *adev) */ static int tonga_ih_irq_init(struct amdgpu_device *adev) { - int rb_bufsz; u32 interrupt_cntl, ih_rb_cntl, ih_doorbell_rtpr; - u64 wptr_off; + struct amdgpu_ih_ring *ih = &adev->irq.ih; + int rb_bufsz; /* disable irqs */ tonga_ih_disable_interrupts(adev); @@ -118,10 +118,7 @@ static int tonga_ih_irq_init(struct amdgpu_device *adev) WREG32(mmINTERRUPT_CNTL, interrupt_cntl); /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ - if (adev->irq.ih.use_bus_addr) - WREG32(mmIH_RB_BASE, adev->irq.ih.rb_dma_addr >> 8); - else - WREG32(mmIH_RB_BASE, adev->irq.ih.gpu_addr >> 8); + WREG32(mmIH_RB_BASE, ih->gpu_addr >> 8); rb_bufsz = order_base_2(adev->irq.ih.ring_size / 4); ih_rb_cntl = REG_SET_FIELD(0, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); @@ -136,12 +133,8 @@ static int tonga_ih_irq_init(struct amdgpu_device *adev) WREG32(mmIH_RB_CNTL, ih_rb_cntl); /* set the writeback address whether it's enabled or not */ - if (adev->irq.ih.use_bus_addr) - wptr_off = adev->irq.ih.rb_dma_addr + (adev->irq.ih.wptr_offs * 4); - else - wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4); - WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off)); - WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFF); + WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr)); + WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(ih->wptr_addr) & 0xFF); /* set rptr, wptr to 0 */ WREG32(mmIH_RB_RPTR, 0); @@ -198,10 +191,7 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev, { u32 wptr, tmp; - if (adev->irq.ih.use_bus_addr) - wptr = le32_to_cpu(ih->ring[ih->wptr_offs]); - else - wptr = le32_to_cpu(adev->wb.wb[ih->wptr_offs]); + wptr = le32_to_cpu(*ih->wptr_cpu); if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); @@ -263,10 +253,7 @@ static void tonga_ih_set_rptr(struct amdgpu_device *adev, { if (ih->use_doorbell) { /* XXX check if swapping is necessary on BE */ - if (ih->use_bus_addr) - ih->ring[ih->rptr_offs] = ih->rptr; - else - adev->wb.wb[ih->rptr_offs] = ih->rptr; + *ih->rptr_cpu = ih->rptr; WDOORBELL32(ih->doorbell_index, ih->rptr); } else { WREG32(mmIH_RB_RPTR, ih->rptr); diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 28b0e9a6cc42..3e9ebb0de94d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -86,11 +86,11 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) */ static int vega10_ih_irq_init(struct amdgpu_device *adev) { + struct amdgpu_ih_ring *ih = &adev->irq.ih; int ret = 0; int rb_bufsz; u32 ih_rb_cntl, ih_doorbell_rtpr; u32 tmp; - u64 wptr_off; /* disable irqs */ vega10_ih_disable_interrupts(adev); @@ -99,15 +99,11 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ - if (adev->irq.ih.use_bus_addr) { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE, adev->irq.ih.rb_dma_addr >> 8); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, ((u64)adev->irq.ih.rb_dma_addr >> 40) & 0xff); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SPACE, 1); - } else { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE, adev->irq.ih.gpu_addr >> 8); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, (adev->irq.ih.gpu_addr >> 40) & 0xff); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SPACE, 4); - } + WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE, adev->irq.ih.gpu_addr >> 8); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, + (adev->irq.ih.gpu_addr >> 40) & 0xff); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SPACE, + ih->use_bus_addr ? 1 : 4); rb_bufsz = order_base_2(adev->irq.ih.ring_size / 4); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 1); @@ -124,12 +120,10 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); /* set the writeback address whether it's enabled or not */ - if (adev->irq.ih.use_bus_addr) - wptr_off = adev->irq.ih.rb_dma_addr + (adev->irq.ih.wptr_offs * 4); - else - wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off)); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFFFF); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, + lower_32_bits(ih->wptr_addr)); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI, + upper_32_bits(ih->wptr_addr) & 0xFFFF); /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); @@ -196,10 +190,7 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, { u32 wptr, tmp; - if (ih->use_bus_addr) - wptr = le32_to_cpu(ih->ring[ih->wptr_offs]); - else - wptr = le32_to_cpu(adev->wb.wb[ih->wptr_offs]); + wptr = le32_to_cpu(*ih->wptr_cpu); if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); @@ -275,10 +266,7 @@ static void vega10_ih_set_rptr(struct amdgpu_device *adev, { if (ih->use_doorbell) { /* XXX check if swapping is necessary on BE */ - if (ih->use_bus_addr) - ih->ring[ih->rptr_offs] = ih->rptr; - else - adev->wb.wb[ih->rptr_offs] = ih->rptr; + *ih->rptr_cpu = ih->rptr; WDOORBELL32(ih->doorbell_index, ih->rptr); } else { WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, ih->rptr); From b82175750131c9ac228cf08116a208c856d850c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 14 Dec 2018 15:31:24 +0100 Subject: [PATCH 107/539] drm/amdgpu: fix IH overflow on Vega10 v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an ring buffer overflow happens the appropriate bit is set in the WPTR register which is also written back to memory. But clearing the bit in the WPTR doesn't trigger another memory writeback. So what can happen is that we end up processing the buffer overflow over and over again because the bit is never cleared. Resulting in a random system lockup because of an infinite loop in an interrupt handler. This is 100% reproducible on Vega10, but it's most likely an issue we have in the driver over all generations all the way back to radeon. v2: rebase Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 37 ++++++++++++++++---------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 3e9ebb0de94d..562701939d3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -192,22 +192,31 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, wptr = le32_to_cpu(*ih->wptr_cpu); - if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; - /* When a ring buffer overflow happen start parsing interrupt - * from the last not overwritten vector (wptr + 32). Hopefully - * this should allow us to catchup. - */ - tmp = (wptr + 32) & ih->ptr_mask; - dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", - wptr, ih->rptr, tmp); - ih->rptr = tmp; + /* Double check that the overflow wasn't already cleared. */ + wptr = RREG32_NO_KIQ(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR)); + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; - tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL)); - tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); - WREG32_NO_KIQ(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL), tmp); - } + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + + /* When a ring buffer overflow happen start parsing interrupt + * from the last not overwritten vector (wptr + 32). Hopefully + * this should allow us to catchup. + */ + tmp = (wptr + 32) & ih->ptr_mask; + dev_warn(adev->dev, "IH ring buffer overflow " + "(0x%08X, 0x%08X, 0x%08X)\n", + wptr, ih->rptr, tmp); + ih->rptr = tmp; + + tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL)); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32_NO_KIQ(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL), tmp); + +out: return (wptr & ih->ptr_mask); } From b56e3270e0f310d66567368ee134660147305724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 26 Sep 2018 13:37:24 +0200 Subject: [PATCH 108/539] drm/amdgpu: add the IH to the IV trace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To distinct on which IH ring an IV was found. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 11 +++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index b7968f426862..b8e543e23166 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -148,6 +148,8 @@ static void amdgpu_irq_callback(struct amdgpu_device *adev, entry.iv_entry = (const uint32_t *)&ih->ring[ring_index]; amdgpu_ih_decode_iv(adev, &entry); + trace_amdgpu_iv(ih - &adev->irq.ih, &entry); + amdgpu_irq_dispatch(adev, &entry); } @@ -367,8 +369,6 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, bool handled = false; int r; - trace_amdgpu_iv(entry); - if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) { DRM_DEBUG("Invalid client_id in IV: %d\n", client_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 626abca770a0..d3ca2424b5fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -76,9 +76,10 @@ TRACE_EVENT(amdgpu_mm_wreg, ); TRACE_EVENT(amdgpu_iv, - TP_PROTO(struct amdgpu_iv_entry *iv), - TP_ARGS(iv), + TP_PROTO(unsigned ih, struct amdgpu_iv_entry *iv), + TP_ARGS(ih, iv), TP_STRUCT__entry( + __field(unsigned, ih) __field(unsigned, client_id) __field(unsigned, src_id) __field(unsigned, ring_id) @@ -90,6 +91,7 @@ TRACE_EVENT(amdgpu_iv, __array(unsigned, src_data, 4) ), TP_fast_assign( + __entry->ih = ih; __entry->client_id = iv->client_id; __entry->src_id = iv->src_id; __entry->ring_id = iv->ring_id; @@ -103,8 +105,9 @@ TRACE_EVENT(amdgpu_iv, __entry->src_data[2] = iv->src_data[2]; __entry->src_data[3] = iv->src_data[3]; ), - TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x", - __entry->client_id, __entry->src_id, + TP_printk("ih:%u client_id:%u src_id:%u ring:%u vmid:%u " + "timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x", + __entry->ih, __entry->client_id, __entry->src_id, __entry->ring_id, __entry->vmid, __entry->timestamp, __entry->pasid, __entry->src_data[0], __entry->src_data[1], From 7df7e505e82a2b39e80159234f8556e1e46f9341 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 2 Jan 2019 15:15:41 -0500 Subject: [PATCH 109/539] drm/amd/display: Set requested plane state DCC params for GFX9 [Why] Hardware support for Delta Color Compression (DCC) decompression is available in DC for GFX9 but there's no way for userspace to enable the feature. Enabling the feature can provide improved GFX performance and power savings in many situations. [How] The GFX9 DCC parameters are passed to amdgpu_dm from AMDGPU via the amdgpu_bo tiling_flags. The plane capability is queried and the parameters are set accordingly. The DCC address is given via a 256 byte aligned offset on the framebuffer address. The DCC address is updated whenever the buffer address changes. Signed-off-by: Nicholas Kazlauskas Acked-by: Alex Deucher Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 90 ++++++++++++++++++- 1 file changed, 88 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 85e4afc6790c..afb2e7994a85 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2284,6 +2284,68 @@ static int get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb, return r; } +static inline uint64_t get_dcc_address(uint64_t address, uint64_t tiling_flags) +{ + uint32_t offset = AMDGPU_TILING_GET(tiling_flags, DCC_OFFSET_256B); + + return offset ? (address + offset * 256) : 0; +} + +static bool fill_plane_dcc_attributes(struct amdgpu_device *adev, + const struct amdgpu_framebuffer *afb, + struct dc_plane_state *plane_state, + uint64_t info) +{ + struct dc *dc = adev->dm.dc; + struct dc_dcc_surface_param input = {0}; + struct dc_surface_dcc_cap output = {0}; + uint32_t offset = AMDGPU_TILING_GET(info, DCC_OFFSET_256B); + uint32_t i64b = AMDGPU_TILING_GET(info, DCC_INDEPENDENT_64B) != 0; + uint64_t dcc_address; + + if (!offset) + return false; + + if (!dc->cap_funcs.get_dcc_compression_cap) + return false; + + input.format = plane_state->format; + input.surface_size.width = + plane_state->plane_size.grph.surface_size.width; + input.surface_size.height = + plane_state->plane_size.grph.surface_size.height; + input.swizzle_mode = plane_state->tiling_info.gfx9.swizzle; + + if (plane_state->rotation == ROTATION_ANGLE_0 || + plane_state->rotation == ROTATION_ANGLE_180) + input.scan = SCAN_DIRECTION_HORIZONTAL; + else if (plane_state->rotation == ROTATION_ANGLE_90 || + plane_state->rotation == ROTATION_ANGLE_270) + input.scan = SCAN_DIRECTION_VERTICAL; + + if (!dc->cap_funcs.get_dcc_compression_cap(dc, &input, &output)) + return false; + + if (!output.capable) + return false; + + if (i64b == 0 && output.grph.rgb.independent_64b_blks != 0) + return false; + + plane_state->dcc.enable = 1; + plane_state->dcc.grph.meta_pitch = + AMDGPU_TILING_GET(info, DCC_PITCH_MAX) + 1; + plane_state->dcc.grph.independent_64b_blks = i64b; + + dcc_address = get_dcc_address(afb->address, info); + plane_state->address.grph.meta_addr.low_part = + lower_32_bits(dcc_address); + plane_state->address.grph.meta_addr.high_part = + upper_32_bits(dcc_address); + + return true; +} + static int fill_plane_attributes_from_fb(struct amdgpu_device *adev, struct dc_plane_state *plane_state, const struct amdgpu_framebuffer *amdgpu_fb) @@ -2336,6 +2398,10 @@ static int fill_plane_attributes_from_fb(struct amdgpu_device *adev, return -EINVAL; } + memset(&plane_state->address, 0, sizeof(plane_state->address)); + memset(&plane_state->tiling_info, 0, sizeof(plane_state->tiling_info)); + memset(&plane_state->dcc, 0, sizeof(plane_state->dcc)); + if (plane_state->format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { plane_state->address.type = PLN_ADDR_TYPE_GRAPHICS; plane_state->plane_size.grph.surface_size.x = 0; @@ -2367,8 +2433,6 @@ static int fill_plane_attributes_from_fb(struct amdgpu_device *adev, plane_state->color_space = COLOR_SPACE_YCBCR709; } - memset(&plane_state->tiling_info, 0, sizeof(plane_state->tiling_info)); - /* Fill GFX8 params */ if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == DC_ARRAY_2D_TILED_THIN1) { unsigned int bankw, bankh, mtaspect, tile_split, num_banks; @@ -2417,6 +2481,9 @@ static int fill_plane_attributes_from_fb(struct amdgpu_device *adev, plane_state->tiling_info.gfx9.swizzle = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE); plane_state->tiling_info.gfx9.shaderEnable = 1; + + fill_plane_dcc_attributes(adev, amdgpu_fb, plane_state, + tiling_flags); } plane_state->visible = true; @@ -3534,6 +3601,7 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, struct amdgpu_bo *rbo; uint64_t chroma_addr = 0; struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old; + uint64_t tiling_flags, dcc_address; unsigned int awidth; uint32_t domain; int r; @@ -3574,6 +3642,9 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, DRM_ERROR("%p bind failed\n", rbo); return r; } + + amdgpu_bo_get_tiling_flags(rbo, &tiling_flags); + amdgpu_bo_unreserve(rbo); afb->address = amdgpu_bo_gpu_offset(rbo); @@ -3587,6 +3658,13 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, if (plane_state->format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { plane_state->address.grph.addr.low_part = lower_32_bits(afb->address); plane_state->address.grph.addr.high_part = upper_32_bits(afb->address); + + dcc_address = + get_dcc_address(afb->address, tiling_flags); + plane_state->address.grph.meta_addr.low_part = + lower_32_bits(dcc_address); + plane_state->address.grph.meta_addr.high_part = + upper_32_bits(dcc_address); } else { awidth = ALIGN(new_state->fb->width, 64); plane_state->address.type = PLN_ADDR_TYPE_VIDEO_PROGRESSIVE; @@ -4566,6 +4644,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc, struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state); struct dc_stream_status *stream_status; struct dc_plane_state *surface; + uint64_t tiling_flags, dcc_address; /* Prepare wait for target vblank early - before the fence-waits */ @@ -4588,6 +4667,8 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc, WARN_ON(reservation_object_wait_timeout_rcu(abo->tbo.resv, true, false, MAX_SCHEDULE_TIMEOUT) < 0); + amdgpu_bo_get_tiling_flags(abo, &tiling_flags); + amdgpu_bo_unreserve(abo); /* @@ -4613,6 +4694,11 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc, addr.address.grph.addr.low_part = lower_32_bits(afb->address); addr.address.grph.addr.high_part = upper_32_bits(afb->address); + + dcc_address = get_dcc_address(afb->address, tiling_flags); + addr.address.grph.meta_addr.low_part = lower_32_bits(dcc_address); + addr.address.grph.meta_addr.high_part = upper_32_bits(dcc_address); + addr.flip_immediate = async_flip; timestamp_ns = ktime_get_ns(); From 0aaeefccb4d717b001d64a75ac1141914e928869 Mon Sep 17 00:00:00 2001 From: wentalou Date: Wed, 2 Jan 2019 15:56:27 +0800 Subject: [PATCH 110/539] drm/amdgpu: distinguish early and late re-init log in sriov distinguish ip_reinit_early_sriov and ip_reinit_late_sriov by different log RE-INIT-early and RE-INIT-late Signed-off-by: Wentao Lou Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7ff3a28fc903..03b73c562bc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2133,7 +2133,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) continue; r = block->version->funcs->hw_init(adev); - DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); + DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); if (r) return r; } @@ -2167,7 +2167,7 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) continue; r = block->version->funcs->hw_init(adev); - DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); + DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); if (r) return r; } From c468f9e2e56f67410b1659d2eacf147a4558f137 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 11 Dec 2018 11:06:59 +0100 Subject: [PATCH 111/539] drm/amdgpu: improve GMC v9 page fault message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Note if this is a retry fault or not and cleanup the message a bit. Signed-off-by: Christian König Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 521ac8954652..9c082f9aea1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -305,6 +305,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; + bool retry_fault = !!(entry->src_data[1] & 0x80); uint32_t status = 0; u64 addr; @@ -326,8 +327,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); dev_err(adev->dev, - "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n", + "[%s] %s page fault (src_id:%u ring:%u vmid:%u " + "pasid:%u, for process %s pid %d thread %s pid %d)\n", entry->vmid_src ? "mmhub" : "gfxhub", + retry_fault ? "retry" : "no-retry", entry->src_id, entry->ring_id, entry->vmid, entry->pasid, task_info.process_name, task_info.tgid, task_info.task_name, task_info.pid); From 66f92bcff0f90405150f685e4ef78fd548275f10 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 3 Jan 2019 13:48:45 -0600 Subject: [PATCH 112/539] drm/amd/display: Fix boolean expression in get_surf_rq_param Fix boolean expression by using logical AND operator '&&' instead of bitwise operator '&'. This issue was detected with the help of Coccinelle. Fixes: 6d04ee9dc101 ("drm/amd/display: Restructuring and cleaning up DML") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c index a8b233ee5f97..ad8571f5a142 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c @@ -881,7 +881,7 @@ static void get_surf_rq_param( /* the dpte_group_bytes is reduced for the specific case of vertical * access of a tile surface that has dpte request of 8x1 ptes. */ - if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) /*reduced, in this case, will have page fault within a group */ + if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) /*reduced, in this case, will have page fault within a group */ rq_sizing_param->dpte_group_bytes = 512; else /*full size */ From 72d3f59205ffae6827b4af525a7963f181322e35 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Fri, 4 Jan 2019 10:24:02 +0800 Subject: [PATCH 113/539] drm/amdgpu/sriov: For finishing routine send rel event after init failed When init fail, send rel init, req_fini and rel_fini to host for the finishing routine. Signed-off-by: Emily Deng Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 +++++++++++++--------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 03b73c562bc8..39d5d058b2c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1645,7 +1645,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) { DRM_ERROR("sw_init of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); - return r; + goto init_failed; } adev->ip_blocks[i].status.sw = true; @@ -1654,17 +1654,17 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) r = amdgpu_device_vram_scratch_init(adev); if (r) { DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r); - return r; + goto init_failed; } r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); if (r) { DRM_ERROR("hw_init %d failed %d\n", i, r); - return r; + goto init_failed; } r = amdgpu_device_wb_init(adev); if (r) { DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); - return r; + goto init_failed; } adev->ip_blocks[i].status.hw = true; @@ -1675,7 +1675,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) AMDGPU_CSA_SIZE); if (r) { DRM_ERROR("allocate CSA failed %d\n", r); - return r; + goto init_failed; } } } @@ -1683,30 +1683,32 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/ if (r) - return r; + goto init_failed; r = amdgpu_device_ip_hw_init_phase1(adev); if (r) - return r; + goto init_failed; r = amdgpu_device_fw_loading(adev); if (r) - return r; + goto init_failed; r = amdgpu_device_ip_hw_init_phase2(adev); if (r) - return r; + goto init_failed; if (adev->gmc.xgmi.num_physical_nodes > 1) amdgpu_xgmi_add_device(adev); amdgpu_amdkfd_device_init(adev); +init_failed: if (amdgpu_sriov_vf(adev)) { - amdgpu_virt_init_data_exchange(adev); + if (!r) + amdgpu_virt_init_data_exchange(adev); amdgpu_virt_release_full_gpu(adev, true); } - return 0; + return r; } /** @@ -2612,6 +2614,8 @@ fence_driver_init: } dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); + if (amdgpu_virt_request_full_gpu(adev, false)) + amdgpu_virt_release_full_gpu(adev, false); goto failed; } From 0855c9c9ec28451858aeb1e512d2d25fd8397828 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 3 Jan 2019 14:01:46 -0600 Subject: [PATCH 114/539] drm/amdgpu_vm: fix boolean expressions Fix boolean expressions by using logical AND operator '&&' instead of bitwise operator '&'. This issue was detected with the help of Coccinelle. Fixes: 9a4b7d4c769e ("drm/amdgpu: Add vm context module param") Reviewed-by: Felix Kuehling Signed-off-by: Gustavo A. R. Silva Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d2ea5ce2cefb..ccffcadfd73b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -3003,7 +3003,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, } DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); - WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)), + WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); vm->last_update = NULL; @@ -3133,7 +3133,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns vm->pte_support_ats = pte_support_ats; DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); - WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)), + WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); if (vm->pasid) { From 0db2a8cd6259deffc46208278958d4c2afec608d Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Wed, 2 Jan 2019 19:16:06 +0800 Subject: [PATCH 115/539] drm/amdgpu/psp: update the naming of GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL was renamed to GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_CNTL in latest psp_gfx_if drop Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h | 2 +- drivers/gpu/drm/amd/amdgpu/psp_v10_0.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 0de00fbe9233..f3a7d207af07 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -191,7 +191,7 @@ enum psp_gfx_fw_type GFX_FW_TYPE_MMSCH = 19, GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM = 20, GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM = 21, - GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL = 22, + GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_CNTL = 22, GFX_FW_TYPE_UVD1 = 23, GFX_FW_TYPE_MAX = 24 }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index d78b4306a36f..f2e937a9bf81 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -73,7 +73,7 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type * *type = GFX_FW_TYPE_RLC_G; break; case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL: - *type = GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL; + *type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_CNTL; break; case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM: *type = GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM; From be4630d96258781d72201af525a34c2509a6f3a0 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 3 Jan 2019 21:38:41 +0800 Subject: [PATCH 116/539] drm/amdgpu/psp: make get_fw_type and prep_cmd_buf to be common interfaces get_fw_type and prep_cmd_buf should be common interface instead of IP specific ones Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 94 ++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 3 - drivers/gpu/drm/amd/amdgpu/psp_v10_0.c | 90 ----------------------- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 75 -------------------- drivers/gpu/drm/amd/amdgpu/psp_v3_1.c | 72 ------------------- 5 files changed, 93 insertions(+), 241 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 8189a90637f7..53c2d6069fa0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -501,6 +501,98 @@ static int psp_hw_start(struct psp_context *psp) return 0; } +static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, + enum psp_gfx_fw_type *type) +{ + switch (ucode->ucode_id) { + case AMDGPU_UCODE_ID_SDMA0: + *type = GFX_FW_TYPE_SDMA0; + break; + case AMDGPU_UCODE_ID_SDMA1: + *type = GFX_FW_TYPE_SDMA1; + break; + case AMDGPU_UCODE_ID_CP_CE: + *type = GFX_FW_TYPE_CP_CE; + break; + case AMDGPU_UCODE_ID_CP_PFP: + *type = GFX_FW_TYPE_CP_PFP; + break; + case AMDGPU_UCODE_ID_CP_ME: + *type = GFX_FW_TYPE_CP_ME; + break; + case AMDGPU_UCODE_ID_CP_MEC1: + *type = GFX_FW_TYPE_CP_MEC; + break; + case AMDGPU_UCODE_ID_CP_MEC1_JT: + *type = GFX_FW_TYPE_CP_MEC_ME1; + break; + case AMDGPU_UCODE_ID_CP_MEC2: + *type = GFX_FW_TYPE_CP_MEC; + break; + case AMDGPU_UCODE_ID_CP_MEC2_JT: + *type = GFX_FW_TYPE_CP_MEC_ME2; + break; + case AMDGPU_UCODE_ID_RLC_G: + *type = GFX_FW_TYPE_RLC_G; + break; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL: + *type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_CNTL; + break; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM: + *type = GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM; + break; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM: + *type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM; + break; + case AMDGPU_UCODE_ID_SMC: + *type = GFX_FW_TYPE_SMU; + break; + case AMDGPU_UCODE_ID_UVD: + *type = GFX_FW_TYPE_UVD; + break; + case AMDGPU_UCODE_ID_UVD1: + *type = GFX_FW_TYPE_UVD1; + break; + case AMDGPU_UCODE_ID_VCE: + *type = GFX_FW_TYPE_VCE; + break; + case AMDGPU_UCODE_ID_VCN: + *type = GFX_FW_TYPE_VCN; + break; + case AMDGPU_UCODE_ID_DMCU_ERAM: + *type = GFX_FW_TYPE_DMCU_ERAM; + break; + case AMDGPU_UCODE_ID_DMCU_INTV: + *type = GFX_FW_TYPE_DMCU_ISR; + break; + case AMDGPU_UCODE_ID_MAXIMUM: + default: + return -EINVAL; + } + + return 0; +} + +static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, + struct psp_gfx_cmd_resp *cmd) +{ + int ret; + uint64_t fw_mem_mc_addr = ucode->mc_addr; + + memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); + + cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; + cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr); + cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr); + cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size; + + ret = psp_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); + if (ret) + DRM_ERROR("Unknown firmware type\n"); + + return ret; +} + static int psp_np_fw_load(struct psp_context *psp) { int i, ret; @@ -522,7 +614,7 @@ static int psp_np_fw_load(struct psp_context *psp) /*skip ucode loading in SRIOV VF */ continue; - ret = psp_prep_cmd_buf(ucode, psp->cmd); + ret = psp_prep_load_ip_fw_cmd_buf(ucode, psp->cmd); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 3ee573b4016e..2ef98cc755d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -65,8 +65,6 @@ struct psp_funcs int (*init_microcode)(struct psp_context *psp); int (*bootloader_load_sysdrv)(struct psp_context *psp); int (*bootloader_load_sos)(struct psp_context *psp); - int (*prep_cmd_buf)(struct amdgpu_firmware_info *ucode, - struct psp_gfx_cmd_resp *cmd); int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type); int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type); @@ -176,7 +174,6 @@ struct psp_xgmi_topology_info { struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES]; }; -#define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type)) #define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type)) #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type)) #define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type)) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index f2e937a9bf81..77c2bc344dfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -38,75 +38,6 @@ MODULE_FIRMWARE("amdgpu/raven_asd.bin"); MODULE_FIRMWARE("amdgpu/picasso_asd.bin"); MODULE_FIRMWARE("amdgpu/raven2_asd.bin"); -static int -psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type) -{ - switch(ucode->ucode_id) { - case AMDGPU_UCODE_ID_SDMA0: - *type = GFX_FW_TYPE_SDMA0; - break; - case AMDGPU_UCODE_ID_SDMA1: - *type = GFX_FW_TYPE_SDMA1; - break; - case AMDGPU_UCODE_ID_CP_CE: - *type = GFX_FW_TYPE_CP_CE; - break; - case AMDGPU_UCODE_ID_CP_PFP: - *type = GFX_FW_TYPE_CP_PFP; - break; - case AMDGPU_UCODE_ID_CP_ME: - *type = GFX_FW_TYPE_CP_ME; - break; - case AMDGPU_UCODE_ID_CP_MEC1: - *type = GFX_FW_TYPE_CP_MEC; - break; - case AMDGPU_UCODE_ID_CP_MEC1_JT: - *type = GFX_FW_TYPE_CP_MEC_ME1; - break; - case AMDGPU_UCODE_ID_CP_MEC2: - *type = GFX_FW_TYPE_CP_MEC; - break; - case AMDGPU_UCODE_ID_CP_MEC2_JT: - *type = GFX_FW_TYPE_CP_MEC_ME2; - break; - case AMDGPU_UCODE_ID_RLC_G: - *type = GFX_FW_TYPE_RLC_G; - break; - case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL: - *type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_CNTL; - break; - case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM: - *type = GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM; - break; - case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM: - *type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM; - break; - case AMDGPU_UCODE_ID_SMC: - *type = GFX_FW_TYPE_SMU; - break; - case AMDGPU_UCODE_ID_UVD: - *type = GFX_FW_TYPE_UVD; - break; - case AMDGPU_UCODE_ID_VCE: - *type = GFX_FW_TYPE_VCE; - break; - case AMDGPU_UCODE_ID_VCN: - *type = GFX_FW_TYPE_VCN; - break; - case AMDGPU_UCODE_ID_DMCU_ERAM: - *type = GFX_FW_TYPE_DMCU_ERAM; - break; - case AMDGPU_UCODE_ID_DMCU_INTV: - *type = GFX_FW_TYPE_DMCU_ISR; - break; - case AMDGPU_UCODE_ID_MAXIMUM: - default: - return -EINVAL; - } - - return 0; -} - static int psp_v10_0_init_microcode(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -158,26 +89,6 @@ out: return err; } -static int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, - struct psp_gfx_cmd_resp *cmd) -{ - int ret; - uint64_t fw_mem_mc_addr = ucode->mc_addr; - - memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); - - cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; - cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr); - cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr); - cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size; - - ret = psp_v10_0_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); - if (ret) - DRM_ERROR("Unknown firmware type\n"); - - return ret; -} - static int psp_v10_0_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) { @@ -454,7 +365,6 @@ static int psp_v10_0_mode1_reset(struct psp_context *psp) static const struct psp_funcs psp_v10_0_funcs = { .init_microcode = psp_v10_0_init_microcode, - .prep_cmd_buf = psp_v10_0_prep_cmd_buf, .ring_init = psp_v10_0_ring_init, .ring_create = psp_v10_0_ring_create, .ring_stop = psp_v10_0_ring_stop, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 0c6e7f9b143f..f71384be1f97 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -40,60 +40,6 @@ MODULE_FIRMWARE("amdgpu/vega20_ta.bin"); /* address block */ #define smnMP1_FIRMWARE_FLAGS 0x3010024 -static int -psp_v11_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type) -{ - switch (ucode->ucode_id) { - case AMDGPU_UCODE_ID_SDMA0: - *type = GFX_FW_TYPE_SDMA0; - break; - case AMDGPU_UCODE_ID_SDMA1: - *type = GFX_FW_TYPE_SDMA1; - break; - case AMDGPU_UCODE_ID_CP_CE: - *type = GFX_FW_TYPE_CP_CE; - break; - case AMDGPU_UCODE_ID_CP_PFP: - *type = GFX_FW_TYPE_CP_PFP; - break; - case AMDGPU_UCODE_ID_CP_ME: - *type = GFX_FW_TYPE_CP_ME; - break; - case AMDGPU_UCODE_ID_CP_MEC1: - *type = GFX_FW_TYPE_CP_MEC; - break; - case AMDGPU_UCODE_ID_CP_MEC1_JT: - *type = GFX_FW_TYPE_CP_MEC_ME1; - break; - case AMDGPU_UCODE_ID_CP_MEC2: - *type = GFX_FW_TYPE_CP_MEC; - break; - case AMDGPU_UCODE_ID_CP_MEC2_JT: - *type = GFX_FW_TYPE_CP_MEC_ME2; - break; - case AMDGPU_UCODE_ID_RLC_G: - *type = GFX_FW_TYPE_RLC_G; - break; - case AMDGPU_UCODE_ID_SMC: - *type = GFX_FW_TYPE_SMU; - break; - case AMDGPU_UCODE_ID_UVD: - *type = GFX_FW_TYPE_UVD; - break; - case AMDGPU_UCODE_ID_VCE: - *type = GFX_FW_TYPE_VCE; - break; - case AMDGPU_UCODE_ID_UVD1: - *type = GFX_FW_TYPE_UVD1; - break; - case AMDGPU_UCODE_ID_MAXIMUM: - default: - return -EINVAL; - } - - return 0; -} - static int psp_v11_0_init_microcode(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -267,26 +213,6 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp) return ret; } -static int psp_v11_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, - struct psp_gfx_cmd_resp *cmd) -{ - int ret; - uint64_t fw_mem_mc_addr = ucode->mc_addr; - - memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); - - cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; - cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr); - cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr); - cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size; - - ret = psp_v11_0_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); - if (ret) - DRM_ERROR("Unknown firmware type\n"); - - return ret; -} - static int psp_v11_0_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) { @@ -753,7 +679,6 @@ static const struct psp_funcs psp_v11_0_funcs = { .init_microcode = psp_v11_0_init_microcode, .bootloader_load_sysdrv = psp_v11_0_bootloader_load_sysdrv, .bootloader_load_sos = psp_v11_0_bootloader_load_sos, - .prep_cmd_buf = psp_v11_0_prep_cmd_buf, .ring_init = psp_v11_0_ring_init, .ring_create = psp_v11_0_ring_create, .ring_stop = psp_v11_0_ring_stop, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 79694ff16969..c63de945c021 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -47,57 +47,6 @@ MODULE_FIRMWARE("amdgpu/vega12_asd.bin"); static uint32_t sos_old_versions[] = {1517616, 1510592, 1448594, 1446554}; -static int -psp_v3_1_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type) -{ - switch(ucode->ucode_id) { - case AMDGPU_UCODE_ID_SDMA0: - *type = GFX_FW_TYPE_SDMA0; - break; - case AMDGPU_UCODE_ID_SDMA1: - *type = GFX_FW_TYPE_SDMA1; - break; - case AMDGPU_UCODE_ID_CP_CE: - *type = GFX_FW_TYPE_CP_CE; - break; - case AMDGPU_UCODE_ID_CP_PFP: - *type = GFX_FW_TYPE_CP_PFP; - break; - case AMDGPU_UCODE_ID_CP_ME: - *type = GFX_FW_TYPE_CP_ME; - break; - case AMDGPU_UCODE_ID_CP_MEC1: - *type = GFX_FW_TYPE_CP_MEC; - break; - case AMDGPU_UCODE_ID_CP_MEC1_JT: - *type = GFX_FW_TYPE_CP_MEC_ME1; - break; - case AMDGPU_UCODE_ID_CP_MEC2: - *type = GFX_FW_TYPE_CP_MEC; - break; - case AMDGPU_UCODE_ID_CP_MEC2_JT: - *type = GFX_FW_TYPE_CP_MEC_ME2; - break; - case AMDGPU_UCODE_ID_RLC_G: - *type = GFX_FW_TYPE_RLC_G; - break; - case AMDGPU_UCODE_ID_SMC: - *type = GFX_FW_TYPE_SMU; - break; - case AMDGPU_UCODE_ID_UVD: - *type = GFX_FW_TYPE_UVD; - break; - case AMDGPU_UCODE_ID_VCE: - *type = GFX_FW_TYPE_VCE; - break; - case AMDGPU_UCODE_ID_MAXIMUM: - default: - return -EINVAL; - } - - return 0; -} - static int psp_v3_1_init_microcode(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -277,26 +226,6 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) return ret; } -static int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode, - struct psp_gfx_cmd_resp *cmd) -{ - int ret; - uint64_t fw_mem_mc_addr = ucode->mc_addr; - - memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); - - cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; - cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr); - cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr); - cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size; - - ret = psp_v3_1_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); - if (ret) - DRM_ERROR("Unknown firmware type\n"); - - return ret; -} - static int psp_v3_1_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) { @@ -615,7 +544,6 @@ static const struct psp_funcs psp_v3_1_funcs = { .init_microcode = psp_v3_1_init_microcode, .bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv, .bootloader_load_sos = psp_v3_1_bootloader_load_sos, - .prep_cmd_buf = psp_v3_1_prep_cmd_buf, .ring_init = psp_v3_1_ring_init, .ring_create = psp_v3_1_ring_create, .ring_stop = psp_v3_1_ring_stop, From 36ca09a02aa6b02cb33b04962ec373a164e636a6 Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Fri, 4 Jan 2019 13:23:06 -0500 Subject: [PATCH 117/539] drm/amdgpu: Add message print when unable to get valid hive Add message print out and return -EINVAL when driver can not get valid hive from hive arrary on xgmi configuration Signed-off-by: shaoyunl Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 8a8bc60cb6b4..ac57a8767283 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -113,8 +113,13 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) mutex_lock(&xgmi_mutex); hive = amdgpu_get_xgmi_hive(adev); - if (!hive) + if (!hive) { + ret = -EINVAL; + dev_err(adev->dev, + "XGMI: node 0x%llx, can not matech hive 0x%llx in the hive list.\n", + adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id); goto exit; + } hive_topology = &hive->topology_info; From 9e869063b0021f29919c560745361999c71c086a Mon Sep 17 00:00:00 2001 From: Leo Li Date: Sun, 11 Nov 2018 11:11:52 -0500 Subject: [PATCH 118/539] drm/amd/display: Move iteration out of dm_update_planes [Why] To reduce indentation of dm_update_planes, and to make it operate on single plane instances. [How] Move iteration of plane states into atomic_check. No functional change is intended. Signed-off-by: Leo Li Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 254 +++++++++--------- 1 file changed, 131 insertions(+), 123 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index afb2e7994a85..790c4cef77bb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5680,145 +5680,141 @@ fail: return ret; } -static int dm_update_planes_state(struct dc *dc, - struct drm_atomic_state *state, - bool enable, - bool *lock_and_validation_needed) +static int dm_update_plane_state(struct dc *dc, + struct drm_atomic_state *state, + struct drm_plane *plane, + struct drm_plane_state *old_plane_state, + struct drm_plane_state *new_plane_state, + bool enable, + bool *lock_and_validation_needed) { struct dm_atomic_state *dm_state = NULL; struct drm_crtc *new_plane_crtc, *old_plane_crtc; struct drm_crtc_state *old_crtc_state, *new_crtc_state; - struct drm_plane *plane; - struct drm_plane_state *old_plane_state, *new_plane_state; struct dm_crtc_state *dm_new_crtc_state, *dm_old_crtc_state; struct dm_plane_state *dm_new_plane_state, *dm_old_plane_state; - int i ; /* TODO return page_flip_needed() function */ bool pflip_needed = !state->allow_modeset; int ret = 0; - /* Add new planes, in reverse order as DC expectation */ - for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) { - new_plane_crtc = new_plane_state->crtc; - old_plane_crtc = old_plane_state->crtc; - dm_new_plane_state = to_dm_plane_state(new_plane_state); - dm_old_plane_state = to_dm_plane_state(old_plane_state); + new_plane_crtc = new_plane_state->crtc; + old_plane_crtc = old_plane_state->crtc; + dm_new_plane_state = to_dm_plane_state(new_plane_state); + dm_old_plane_state = to_dm_plane_state(old_plane_state); - /*TODO Implement atomic check for cursor plane */ - if (plane->type == DRM_PLANE_TYPE_CURSOR) - continue; + /*TODO Implement atomic check for cursor plane */ + if (plane->type == DRM_PLANE_TYPE_CURSOR) + return 0; - /* Remove any changed/removed planes */ - if (!enable) { - if (pflip_needed && - plane->type != DRM_PLANE_TYPE_OVERLAY) - continue; + /* Remove any changed/removed planes */ + if (!enable) { + if (pflip_needed && + plane->type != DRM_PLANE_TYPE_OVERLAY) + return 0; - if (!old_plane_crtc) - continue; + if (!old_plane_crtc) + return 0; - old_crtc_state = drm_atomic_get_old_crtc_state( - state, old_plane_crtc); - dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); + old_crtc_state = drm_atomic_get_old_crtc_state( + state, old_plane_crtc); + dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); - if (!dm_old_crtc_state->stream) - continue; + if (!dm_old_crtc_state->stream) + return 0; - DRM_DEBUG_ATOMIC("Disabling DRM plane: %d on DRM crtc %d\n", - plane->base.id, old_plane_crtc->base.id); + DRM_DEBUG_ATOMIC("Disabling DRM plane: %d on DRM crtc %d\n", + plane->base.id, old_plane_crtc->base.id); - ret = dm_atomic_get_state(state, &dm_state); - if (ret) - return ret; + ret = dm_atomic_get_state(state, &dm_state); + if (ret) + return ret; - if (!dc_remove_plane_from_context( - dc, - dm_old_crtc_state->stream, - dm_old_plane_state->dc_state, - dm_state->context)) { + if (!dc_remove_plane_from_context( + dc, + dm_old_crtc_state->stream, + dm_old_plane_state->dc_state, + dm_state->context)) { - ret = EINVAL; - return ret; - } - - - dc_plane_state_release(dm_old_plane_state->dc_state); - dm_new_plane_state->dc_state = NULL; - - *lock_and_validation_needed = true; - - } else { /* Add new planes */ - struct dc_plane_state *dc_new_plane_state; - - if (drm_atomic_plane_disabling(plane->state, new_plane_state)) - continue; - - if (!new_plane_crtc) - continue; - - new_crtc_state = drm_atomic_get_new_crtc_state(state, new_plane_crtc); - dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); - - if (!dm_new_crtc_state->stream) - continue; - - if (pflip_needed && - plane->type != DRM_PLANE_TYPE_OVERLAY) - continue; - - WARN_ON(dm_new_plane_state->dc_state); - - dc_new_plane_state = dc_create_plane_state(dc); - if (!dc_new_plane_state) - return -ENOMEM; - - DRM_DEBUG_DRIVER("Enabling DRM plane: %d on DRM crtc %d\n", - plane->base.id, new_plane_crtc->base.id); - - ret = fill_plane_attributes( - new_plane_crtc->dev->dev_private, - dc_new_plane_state, - new_plane_state, - new_crtc_state); - if (ret) { - dc_plane_state_release(dc_new_plane_state); - return ret; - } - - ret = dm_atomic_get_state(state, &dm_state); - if (ret) { - dc_plane_state_release(dc_new_plane_state); - return ret; - } - - /* - * Any atomic check errors that occur after this will - * not need a release. The plane state will be attached - * to the stream, and therefore part of the atomic - * state. It'll be released when the atomic state is - * cleaned. - */ - if (!dc_add_plane_to_context( - dc, - dm_new_crtc_state->stream, - dc_new_plane_state, - dm_state->context)) { - - dc_plane_state_release(dc_new_plane_state); - return -EINVAL; - } - - dm_new_plane_state->dc_state = dc_new_plane_state; - - /* Tell DC to do a full surface update every time there - * is a plane change. Inefficient, but works for now. - */ - dm_new_plane_state->dc_state->update_flags.bits.full_update = 1; - - *lock_and_validation_needed = true; + ret = EINVAL; + return ret; } + + + dc_plane_state_release(dm_old_plane_state->dc_state); + dm_new_plane_state->dc_state = NULL; + + *lock_and_validation_needed = true; + + } else { /* Add new planes */ + struct dc_plane_state *dc_new_plane_state; + + if (drm_atomic_plane_disabling(plane->state, new_plane_state)) + return 0; + + if (!new_plane_crtc) + return 0; + + new_crtc_state = drm_atomic_get_new_crtc_state(state, new_plane_crtc); + dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); + + if (!dm_new_crtc_state->stream) + return 0; + + if (pflip_needed && plane->type != DRM_PLANE_TYPE_OVERLAY) + return 0; + + WARN_ON(dm_new_plane_state->dc_state); + + dc_new_plane_state = dc_create_plane_state(dc); + if (!dc_new_plane_state) + return -ENOMEM; + + DRM_DEBUG_DRIVER("Enabling DRM plane: %d on DRM crtc %d\n", + plane->base.id, new_plane_crtc->base.id); + + ret = fill_plane_attributes( + new_plane_crtc->dev->dev_private, + dc_new_plane_state, + new_plane_state, + new_crtc_state); + if (ret) { + dc_plane_state_release(dc_new_plane_state); + return ret; + } + + ret = dm_atomic_get_state(state, &dm_state); + if (ret) { + dc_plane_state_release(dc_new_plane_state); + return ret; + } + + /* + * Any atomic check errors that occur after this will + * not need a release. The plane state will be attached + * to the stream, and therefore part of the atomic + * state. It'll be released when the atomic state is + * cleaned. + */ + if (!dc_add_plane_to_context( + dc, + dm_new_crtc_state->stream, + dc_new_plane_state, + dm_state->context)) { + + dc_plane_state_release(dc_new_plane_state); + return -EINVAL; + } + + dm_new_plane_state->dc_state = dc_new_plane_state; + + /* Tell DC to do a full surface update every time there + * is a plane change. Inefficient, but works for now. + */ + dm_new_plane_state->dc_state->update_flags.bits.full_update = 1; + + *lock_and_validation_needed = true; } @@ -5976,6 +5972,8 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, struct drm_connector_state *old_con_state, *new_con_state; struct drm_crtc *crtc; struct drm_crtc_state *old_crtc_state, *new_crtc_state; + struct drm_plane *plane; + struct drm_plane_state *old_plane_state, *new_plane_state; enum surface_update_type update_type = UPDATE_TYPE_FAST; enum surface_update_type overall_update_type = UPDATE_TYPE_FAST; @@ -6010,9 +6008,14 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, } /* Remove exiting planes if they are modified */ - ret = dm_update_planes_state(dc, state, false, &lock_and_validation_needed); - if (ret) { - goto fail; + for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) { + ret = dm_update_plane_state(dc, state, plane, + old_plane_state, + new_plane_state, + false, + &lock_and_validation_needed); + if (ret) + goto fail; } /* Disable all crtcs which require disable */ @@ -6028,9 +6031,14 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, } /* Add new/modified planes */ - ret = dm_update_planes_state(dc, state, true, &lock_and_validation_needed); - if (ret) { - goto fail; + for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) { + ret = dm_update_plane_state(dc, state, plane, + old_plane_state, + new_plane_state, + true, + &lock_and_validation_needed); + if (ret) + goto fail; } /* Run this here since we want to validate the streams we created */ From 4b9674e509ea4365b68c5e309c402ef6544d567a Mon Sep 17 00:00:00 2001 From: Leo Li Date: Sun, 11 Nov 2018 11:35:13 -0500 Subject: [PATCH 119/539] drm/amd/display: Move iteration out of dm_update_crtcs [Why] To reduce indent in dm_update_crtcs, and to make it operate on single instances. [How] Move iteration of plane states into atomic_check. No functional change is intended. Signed-off-by: Leo Li Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 363 +++++++++--------- 1 file changed, 186 insertions(+), 177 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 790c4cef77bb..a67254010628 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5461,15 +5461,15 @@ static void reset_freesync_config_for_crtc( sizeof(new_crtc_state->vrr_infopacket)); } -static int dm_update_crtcs_state(struct amdgpu_display_manager *dm, - struct drm_atomic_state *state, - bool enable, - bool *lock_and_validation_needed) +static int dm_update_crtc_state(struct amdgpu_display_manager *dm, + struct drm_atomic_state *state, + struct drm_crtc *crtc, + struct drm_crtc_state *old_crtc_state, + struct drm_crtc_state *new_crtc_state, + bool enable, + bool *lock_and_validation_needed) { struct dm_atomic_state *dm_state = NULL; - struct drm_crtc *crtc; - struct drm_crtc_state *old_crtc_state, *new_crtc_state; - int i; struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state; struct dc_stream_state *new_stream; int ret = 0; @@ -5478,200 +5478,199 @@ static int dm_update_crtcs_state(struct amdgpu_display_manager *dm, * TODO Move this code into dm_crtc_atomic_check once we get rid of dc_validation_set * update changed items */ - for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - struct amdgpu_crtc *acrtc = NULL; - struct amdgpu_dm_connector *aconnector = NULL; - struct drm_connector_state *drm_new_conn_state = NULL, *drm_old_conn_state = NULL; - struct dm_connector_state *dm_new_conn_state = NULL, *dm_old_conn_state = NULL; - struct drm_plane_state *new_plane_state = NULL; + struct amdgpu_crtc *acrtc = NULL; + struct amdgpu_dm_connector *aconnector = NULL; + struct drm_connector_state *drm_new_conn_state = NULL, *drm_old_conn_state = NULL; + struct dm_connector_state *dm_new_conn_state = NULL, *dm_old_conn_state = NULL; + struct drm_plane_state *new_plane_state = NULL; - new_stream = NULL; + new_stream = NULL; - dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); - dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); - acrtc = to_amdgpu_crtc(crtc); + dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); + dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); + acrtc = to_amdgpu_crtc(crtc); - new_plane_state = drm_atomic_get_new_plane_state(state, new_crtc_state->crtc->primary); + new_plane_state = drm_atomic_get_new_plane_state(state, new_crtc_state->crtc->primary); - if (new_crtc_state->enable && new_plane_state && !new_plane_state->fb) { + if (new_crtc_state->enable && new_plane_state && !new_plane_state->fb) { + ret = -EINVAL; + goto fail; + } + + aconnector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc); + + /* TODO This hack should go away */ + if (aconnector && enable) { + /* Make sure fake sink is created in plug-in scenario */ + drm_new_conn_state = drm_atomic_get_new_connector_state(state, + &aconnector->base); + drm_old_conn_state = drm_atomic_get_old_connector_state(state, + &aconnector->base); + + if (IS_ERR(drm_new_conn_state)) { + ret = PTR_ERR_OR_ZERO(drm_new_conn_state); + goto fail; + } + + dm_new_conn_state = to_dm_connector_state(drm_new_conn_state); + dm_old_conn_state = to_dm_connector_state(drm_old_conn_state); + + new_stream = create_stream_for_sink(aconnector, + &new_crtc_state->mode, + dm_new_conn_state, + dm_old_crtc_state->stream); + + /* + * we can have no stream on ACTION_SET if a display + * was disconnected during S3, in this case it is not an + * error, the OS will be updated after detection, and + * will do the right thing on next atomic commit + */ + + if (!new_stream) { + DRM_DEBUG_DRIVER("%s: Failed to create new stream for crtc %d\n", + __func__, acrtc->base.base.id); + ret = -ENOMEM; + goto fail; + } + + dm_new_crtc_state->abm_level = dm_new_conn_state->abm_level; + + if (dc_is_stream_unchanged(new_stream, dm_old_crtc_state->stream) && + dc_is_stream_scaling_unchanged(new_stream, dm_old_crtc_state->stream)) { + new_crtc_state->mode_changed = false; + DRM_DEBUG_DRIVER("Mode change not required, setting mode_changed to %d", + new_crtc_state->mode_changed); + } + } + + if (!drm_atomic_crtc_needs_modeset(new_crtc_state)) + goto skip_modeset; + + DRM_DEBUG_DRIVER( + "amdgpu_crtc id:%d crtc_state_flags: enable:%d, active:%d, " + "planes_changed:%d, mode_changed:%d,active_changed:%d," + "connectors_changed:%d\n", + acrtc->crtc_id, + new_crtc_state->enable, + new_crtc_state->active, + new_crtc_state->planes_changed, + new_crtc_state->mode_changed, + new_crtc_state->active_changed, + new_crtc_state->connectors_changed); + + /* Remove stream for any changed/disabled CRTC */ + if (!enable) { + + if (!dm_old_crtc_state->stream) + goto skip_modeset; + + ret = dm_atomic_get_state(state, &dm_state); + if (ret) + goto fail; + + DRM_DEBUG_DRIVER("Disabling DRM crtc: %d\n", + crtc->base.id); + + /* i.e. reset mode */ + if (dc_remove_stream_from_ctx( + dm->dc, + dm_state->context, + dm_old_crtc_state->stream) != DC_OK) { ret = -EINVAL; goto fail; } - aconnector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc); + dc_stream_release(dm_old_crtc_state->stream); + dm_new_crtc_state->stream = NULL; - /* TODO This hack should go away */ - if (aconnector && enable) { - /* Make sure fake sink is created in plug-in scenario */ - drm_new_conn_state = drm_atomic_get_new_connector_state(state, - &aconnector->base); - drm_old_conn_state = drm_atomic_get_old_connector_state(state, - &aconnector->base); + reset_freesync_config_for_crtc(dm_new_crtc_state); - if (IS_ERR(drm_new_conn_state)) { - ret = PTR_ERR_OR_ZERO(drm_new_conn_state); - break; - } + *lock_and_validation_needed = true; - dm_new_conn_state = to_dm_connector_state(drm_new_conn_state); - dm_old_conn_state = to_dm_connector_state(drm_old_conn_state); + } else {/* Add stream for any updated/enabled CRTC */ + /* + * Quick fix to prevent NULL pointer on new_stream when + * added MST connectors not found in existing crtc_state in the chained mode + * TODO: need to dig out the root cause of that + */ + if (!aconnector || (!aconnector->dc_sink && aconnector->mst_port)) + goto skip_modeset; - new_stream = create_stream_for_sink(aconnector, - &new_crtc_state->mode, - dm_new_conn_state, - dm_old_crtc_state->stream); + if (modereset_required(new_crtc_state)) + goto skip_modeset; - /* - * we can have no stream on ACTION_SET if a display - * was disconnected during S3, in this case it is not an - * error, the OS will be updated after detection, and - * will do the right thing on next atomic commit - */ + if (modeset_required(new_crtc_state, new_stream, + dm_old_crtc_state->stream)) { - if (!new_stream) { - DRM_DEBUG_DRIVER("%s: Failed to create new stream for crtc %d\n", - __func__, acrtc->base.base.id); - break; - } - - dm_new_crtc_state->abm_level = dm_new_conn_state->abm_level; - - if (dc_is_stream_unchanged(new_stream, dm_old_crtc_state->stream) && - dc_is_stream_scaling_unchanged(new_stream, dm_old_crtc_state->stream)) { - new_crtc_state->mode_changed = false; - DRM_DEBUG_DRIVER("Mode change not required, setting mode_changed to %d", - new_crtc_state->mode_changed); - } - } - - if (!drm_atomic_crtc_needs_modeset(new_crtc_state)) - goto next_crtc; - - DRM_DEBUG_DRIVER( - "amdgpu_crtc id:%d crtc_state_flags: enable:%d, active:%d, " - "planes_changed:%d, mode_changed:%d,active_changed:%d," - "connectors_changed:%d\n", - acrtc->crtc_id, - new_crtc_state->enable, - new_crtc_state->active, - new_crtc_state->planes_changed, - new_crtc_state->mode_changed, - new_crtc_state->active_changed, - new_crtc_state->connectors_changed); - - /* Remove stream for any changed/disabled CRTC */ - if (!enable) { - - if (!dm_old_crtc_state->stream) - goto next_crtc; + WARN_ON(dm_new_crtc_state->stream); ret = dm_atomic_get_state(state, &dm_state); if (ret) goto fail; - DRM_DEBUG_DRIVER("Disabling DRM crtc: %d\n", - crtc->base.id); + dm_new_crtc_state->stream = new_stream; - /* i.e. reset mode */ - if (dc_remove_stream_from_ctx( + dc_stream_retain(new_stream); + + DRM_DEBUG_DRIVER("Enabling DRM crtc: %d\n", + crtc->base.id); + + if (dc_add_stream_to_ctx( dm->dc, dm_state->context, - dm_old_crtc_state->stream) != DC_OK) { + dm_new_crtc_state->stream) != DC_OK) { ret = -EINVAL; goto fail; } - dc_stream_release(dm_old_crtc_state->stream); - dm_new_crtc_state->stream = NULL; - - reset_freesync_config_for_crtc(dm_new_crtc_state); - *lock_and_validation_needed = true; - - } else {/* Add stream for any updated/enabled CRTC */ - /* - * Quick fix to prevent NULL pointer on new_stream when - * added MST connectors not found in existing crtc_state in the chained mode - * TODO: need to dig out the root cause of that - */ - if (!aconnector || (!aconnector->dc_sink && aconnector->mst_port)) - goto next_crtc; - - if (modereset_required(new_crtc_state)) - goto next_crtc; - - if (modeset_required(new_crtc_state, new_stream, - dm_old_crtc_state->stream)) { - - WARN_ON(dm_new_crtc_state->stream); - - ret = dm_atomic_get_state(state, &dm_state); - if (ret) - goto fail; - - dm_new_crtc_state->stream = new_stream; - - dc_stream_retain(new_stream); - - DRM_DEBUG_DRIVER("Enabling DRM crtc: %d\n", - crtc->base.id); - - if (dc_add_stream_to_ctx( - dm->dc, - dm_state->context, - dm_new_crtc_state->stream) != DC_OK) { - ret = -EINVAL; - goto fail; - } - - *lock_and_validation_needed = true; - } } - -next_crtc: - /* Release extra reference */ - if (new_stream) - dc_stream_release(new_stream); - - /* - * We want to do dc stream updates that do not require a - * full modeset below. - */ - if (!(enable && aconnector && new_crtc_state->enable && - new_crtc_state->active)) - continue; - /* - * Given above conditions, the dc state cannot be NULL because: - * 1. We're in the process of enabling CRTCs (just been added - * to the dc context, or already is on the context) - * 2. Has a valid connector attached, and - * 3. Is currently active and enabled. - * => The dc stream state currently exists. - */ - BUG_ON(dm_new_crtc_state->stream == NULL); - - /* Scaling or underscan settings */ - if (is_scaling_state_different(dm_old_conn_state, dm_new_conn_state)) - update_stream_scaling_settings( - &new_crtc_state->mode, dm_new_conn_state, dm_new_crtc_state->stream); - - /* - * Color management settings. We also update color properties - * when a modeset is needed, to ensure it gets reprogrammed. - */ - if (dm_new_crtc_state->base.color_mgmt_changed || - drm_atomic_crtc_needs_modeset(new_crtc_state)) { - ret = amdgpu_dm_set_regamma_lut(dm_new_crtc_state); - if (ret) - goto fail; - amdgpu_dm_set_ctm(dm_new_crtc_state); - } - - /* Update Freesync settings. */ - get_freesync_config_for_crtc(dm_new_crtc_state, - dm_new_conn_state); } +skip_modeset: + /* Release extra reference */ + if (new_stream) + dc_stream_release(new_stream); + + /* + * We want to do dc stream updates that do not require a + * full modeset below. + */ + if (!(enable && aconnector && new_crtc_state->enable && + new_crtc_state->active)) + return 0; + /* + * Given above conditions, the dc state cannot be NULL because: + * 1. We're in the process of enabling CRTCs (just been added + * to the dc context, or already is on the context) + * 2. Has a valid connector attached, and + * 3. Is currently active and enabled. + * => The dc stream state currently exists. + */ + BUG_ON(dm_new_crtc_state->stream == NULL); + + /* Scaling or underscan settings */ + if (is_scaling_state_different(dm_old_conn_state, dm_new_conn_state)) + update_stream_scaling_settings( + &new_crtc_state->mode, dm_new_conn_state, dm_new_crtc_state->stream); + + /* + * Color management settings. We also update color properties + * when a modeset is needed, to ensure it gets reprogrammed. + */ + if (dm_new_crtc_state->base.color_mgmt_changed || + drm_atomic_crtc_needs_modeset(new_crtc_state)) { + ret = amdgpu_dm_set_regamma_lut(dm_new_crtc_state); + if (ret) + goto fail; + amdgpu_dm_set_ctm(dm_new_crtc_state); + } + + /* Update Freesync settings. */ + get_freesync_config_for_crtc(dm_new_crtc_state, + dm_new_conn_state); + return ret; fail: @@ -6019,15 +6018,25 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, } /* Disable all crtcs which require disable */ - ret = dm_update_crtcs_state(&adev->dm, state, false, &lock_and_validation_needed); - if (ret) { - goto fail; + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + ret = dm_update_crtc_state(&adev->dm, state, crtc, + old_crtc_state, + new_crtc_state, + false, + &lock_and_validation_needed); + if (ret) + goto fail; } /* Enable all crtcs which require enable */ - ret = dm_update_crtcs_state(&adev->dm, state, true, &lock_and_validation_needed); - if (ret) { - goto fail; + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + ret = dm_update_crtc_state(&adev->dm, state, crtc, + old_crtc_state, + new_crtc_state, + true, + &lock_and_validation_needed); + if (ret) + goto fail; } /* Add new/modified planes */ From 22d6575b8db59097655797c740bf840a616a6816 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Mon, 7 Jan 2019 17:39:10 -0500 Subject: [PATCH 120/539] drm/amd/amdgpu: add missing mutex lock to amdgpu_get_xgmi_hive() (v3) v2: Move locks around in other functions so that this function can stand on its own. Also only hold the hive specific lock for add/remove device instead of the driver global lock so you can't add/remove devices in parallel from one hive. v3: add reset_lock Acked-by: Shaoyun.liu < Shaoyun.liu@amd.com> Signed-off-by: Tom St Denis Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 40 ++++++++++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 5 +-- 3 files changed, 32 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 39d5d058b2c7..1a558dc41ba6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3525,9 +3525,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * by different nodes. No point also since the one node already executing * reset will also reset all the other nodes in the hive. */ - hive = amdgpu_get_xgmi_hive(adev); + hive = amdgpu_get_xgmi_hive(adev, 0); if (hive && adev->gmc.xgmi.num_physical_nodes > 1 && - !mutex_trylock(&hive->hive_lock)) + !mutex_trylock(&hive->reset_lock)) return 0; /* Start with adev pre asic reset first for soft reset check.*/ @@ -3606,7 +3606,7 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ } if (hive && adev->gmc.xgmi.num_physical_nodes > 1) - mutex_unlock(&hive->hive_lock); + mutex_unlock(&hive->reset_lock); if (r) dev_info(adev->dev, "GPU reset end with ret = %d\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index ac57a8767283..dac187454b33 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -40,26 +40,40 @@ void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive) return &hive->device_list; } -struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) +struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock) { int i; struct amdgpu_hive_info *tmp; if (!adev->gmc.xgmi.hive_id) return NULL; + + mutex_lock(&xgmi_mutex); + for (i = 0 ; i < hive_count; ++i) { tmp = &xgmi_hives[i]; - if (tmp->hive_id == adev->gmc.xgmi.hive_id) + if (tmp->hive_id == adev->gmc.xgmi.hive_id) { + if (lock) + mutex_lock(&tmp->hive_lock); + mutex_unlock(&xgmi_mutex); return tmp; + } } - if (i >= AMDGPU_MAX_XGMI_HIVE) + if (i >= AMDGPU_MAX_XGMI_HIVE) { + mutex_unlock(&xgmi_mutex); return NULL; + } /* initialize new hive if not exist */ tmp = &xgmi_hives[hive_count++]; tmp->hive_id = adev->gmc.xgmi.hive_id; INIT_LIST_HEAD(&tmp->device_list); mutex_init(&tmp->hive_lock); + mutex_init(&tmp->reset_lock); + if (lock) + mutex_lock(&tmp->hive_lock); + + mutex_unlock(&xgmi_mutex); return tmp; } @@ -111,8 +125,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) return ret; } - mutex_lock(&xgmi_mutex); - hive = amdgpu_get_xgmi_hive(adev); + hive = amdgpu_get_xgmi_hive(adev, 1); if (!hive) { ret = -EINVAL; dev_err(adev->dev, @@ -147,8 +160,8 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) break; } + mutex_unlock(&hive->hive_lock); exit: - mutex_unlock(&xgmi_mutex); return ret; } @@ -159,15 +172,14 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev) if (!adev->gmc.xgmi.supported) return; - mutex_lock(&xgmi_mutex); - - hive = amdgpu_get_xgmi_hive(adev); + hive = amdgpu_get_xgmi_hive(adev, 1); if (!hive) - goto exit; + return; - if (!(hive->number_devices--)) + if (!(hive->number_devices--)) { mutex_destroy(&hive->hive_lock); - -exit: - mutex_unlock(&xgmi_mutex); + mutex_destroy(&hive->reset_lock); + } else { + mutex_unlock(&hive->hive_lock); + } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 6151eb9c8ad3..14bc60664159 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -29,10 +29,11 @@ struct amdgpu_hive_info { struct list_head device_list; struct psp_xgmi_topology_info topology_info; int number_devices; - struct mutex hive_lock; + struct mutex hive_lock, + reset_lock; }; -struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev); +struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock); int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev); int amdgpu_xgmi_add_device(struct amdgpu_device *adev); void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); From a0bb79e2559c9330c82080d6e4f8c762d72ed0f1 Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Mon, 7 Jan 2019 06:02:06 -0500 Subject: [PATCH 121/539] drm/amdgpu: Add NBIO SMN headers v2 We need these offsets for PCIE perf counters, so include them as well as the the previously-used defines from the nbio_*.c files v2: Return NBIF definitions back to previous files Signed-off-by: Kent Russell Reviewed-by: Felix Kuehling Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | 6 +- drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 5 +- .../amd/include/asic_reg/nbio/nbio_6_1_smn.h | 58 +++++++++++++++++++ .../amd/include/asic_reg/nbio/nbio_7_0_smn.h | 54 +++++++++++++++++ .../include/asic_reg/nbio/nbio_7_4_0_smn.h | 53 +++++++++++++++++ 6 files changed, 168 insertions(+), 12 deletions(-) create mode 100644 drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h create mode 100644 drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h create mode 100644 drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index accdedd63c98..1965756348bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -27,13 +27,9 @@ #include "nbio/nbio_6_1_default.h" #include "nbio/nbio_6_1_offset.h" #include "nbio/nbio_6_1_sh_mask.h" +#include "nbio/nbio_6_1_smn.h" #include "vega10_enum.h" -#define smnCPM_CONTROL 0x11180460 -#define smnPCIE_CNTL2 0x11180070 -#define smnPCIE_CONFIG_CNTL 0x11180044 -#define smnPCIE_CI_CNTL 0x11180080 - static u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index df34dc79d444..38291c5014be 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -27,13 +27,11 @@ #include "nbio/nbio_7_0_default.h" #include "nbio/nbio_7_0_offset.h" #include "nbio/nbio_7_0_sh_mask.h" +#include "nbio/nbio_7_0_smn.h" #include "vega10_enum.h" #define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c -#define smnCPM_CONTROL 0x11180460 -#define smnPCIE_CNTL2 0x11180070 - static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 4cd31a276dcd..0a613098337a 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -26,13 +26,10 @@ #include "nbio/nbio_7_4_offset.h" #include "nbio/nbio_7_4_sh_mask.h" +#include "nbio/nbio_7_4_0_smn.h" #define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c -#define smnCPM_CONTROL 0x11180460 -#define smnPCIE_CNTL2 0x11180070 -#define smnPCIE_CI_CNTL 0x11180080 - static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h new file mode 100644 index 000000000000..8c75669eb500 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _nbio_6_1_SMN_HEADER +#define _nbio_6_1_SMN_HEADER + + +#define smnCPM_CONTROL 0x11180460 +#define smnPCIE_CNTL2 0x11180070 +#define smnPCIE_CONFIG_CNTL 0x11180044 +#define smnPCIE_CI_CNTL 0x11180080 + + +#define smnPCIE_PERF_COUNT_CNTL 0x11180200 +#define smnPCIE_PERF_CNTL_TXCLK 0x11180204 +#define smnPCIE_PERF_COUNT0_TXCLK 0x11180208 +#define smnPCIE_PERF_COUNT1_TXCLK 0x1118020c +#define smnPCIE_PERF_CNTL_MST_R_CLK 0x11180210 +#define smnPCIE_PERF_COUNT0_MST_R_CLK 0x11180214 +#define smnPCIE_PERF_COUNT1_MST_R_CLK 0x11180218 +#define smnPCIE_PERF_CNTL_MST_C_CLK 0x1118021c +#define smnPCIE_PERF_COUNT0_MST_C_CLK 0x11180220 +#define smnPCIE_PERF_COUNT1_MST_C_CLK 0x11180224 +#define smnPCIE_PERF_CNTL_SLV_R_CLK 0x11180228 +#define smnPCIE_PERF_COUNT0_SLV_R_CLK 0x1118022c +#define smnPCIE_PERF_COUNT1_SLV_R_CLK 0x11180230 +#define smnPCIE_PERF_CNTL_SLV_S_C_CLK 0x11180234 +#define smnPCIE_PERF_COUNT0_SLV_S_C_CLK 0x11180238 +#define smnPCIE_PERF_COUNT1_SLV_S_C_CLK 0x1118023c +#define smnPCIE_PERF_CNTL_SLV_NS_C_CLK 0x11180240 +#define smnPCIE_PERF_COUNT0_SLV_NS_C_CLK 0x11180244 +#define smnPCIE_PERF_COUNT1_SLV_NS_C_CLK 0x11180248 +#define smnPCIE_PERF_CNTL_EVENT0_PORT_SEL 0x1118024c +#define smnPCIE_PERF_CNTL_EVENT1_PORT_SEL 0x11180250 +#define smnPCIE_PERF_CNTL_TXCLK2 0x11180254 +#define smnPCIE_PERF_COUNT0_TXCLK2 0x11180258 +#define smnPCIE_PERF_COUNT1_TXCLK2 0x1118025c + +#endif // _nbio_6_1_SMN_HEADER + diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h new file mode 100644 index 000000000000..5563f0715896 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _nbio_7_0_SMN_HEADER +#define _nbio_7_0_SMN_HEADER + + +#define smnCPM_CONTROL 0x11180460 +#define smnPCIE_CNTL2 0x11180070 + +#define smnPCIE_PERF_COUNT_CNTL 0x11180200 +#define smnPCIE_PERF_CNTL_TXCLK 0x11180204 +#define smnPCIE_PERF_COUNT0_TXCLK 0x11180208 +#define smnPCIE_PERF_COUNT1_TXCLK 0x1118020c +#define smnPCIE_PERF_CNTL_MST_R_CLK 0x11180210 +#define smnPCIE_PERF_COUNT0_MST_R_CLK 0x11180214 +#define smnPCIE_PERF_COUNT1_MST_R_CLK 0x11180218 +#define smnPCIE_PERF_CNTL_MST_C_CLK 0x1118021c +#define smnPCIE_PERF_COUNT0_MST_C_CLK 0x11180220 +#define smnPCIE_PERF_COUNT1_MST_C_CLK 0x11180224 +#define smnPCIE_PERF_CNTL_SLV_R_CLK 0x11180228 +#define smnPCIE_PERF_COUNT0_SLV_R_CLK 0x1118022c +#define smnPCIE_PERF_COUNT1_SLV_R_CLK 0x11180230 +#define smnPCIE_PERF_CNTL_SLV_S_C_CLK 0x11180234 +#define smnPCIE_PERF_COUNT0_SLV_S_C_CLK 0x11180238 +#define smnPCIE_PERF_COUNT1_SLV_S_C_CLK 0x1118023c +#define smnPCIE_PERF_CNTL_SLV_NS_C_CLK 0x11180240 +#define smnPCIE_PERF_COUNT0_SLV_NS_C_CLK 0x11180244 +#define smnPCIE_PERF_COUNT1_SLV_NS_C_CLK 0x11180248 +#define smnPCIE_PERF_CNTL_EVENT0_PORT_SEL 0x1118024c +#define smnPCIE_PERF_CNTL_EVENT1_PORT_SEL 0x11180250 +#define smnPCIE_PERF_CNTL_TXCLK2 0x11180254 +#define smnPCIE_PERF_COUNT0_TXCLK2 0x11180258 +#define smnPCIE_PERF_COUNT1_TXCLK2 0x1118025c + +#endif // _nbio_7_0_SMN_HEADER diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h new file mode 100644 index 000000000000..c1457d880c4d --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _nbio_7_4_0_SMN_HEADER +#define _nbio_7_4_0_SMN_HEADER + + +#define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c +#define smnCPM_CONTROL 0x11180460 +#define smnPCIE_CNTL2 0x11180070 +#define smnPCIE_CI_CNTL 0x11180080 + +#define smnPCIE_PERF_COUNT_CNTL 0x11180200 +#define smnPCIE_PERF_CNTL_TXCLK1 0x11180204 +#define smnPCIE_PERF_COUNT0_TXCLK1 0x11180208 +#define smnPCIE_PERF_COUNT1_TXCLK1 0x1118020c +#define smnPCIE_PERF_CNTL_TXCLK2 0x11180210 +#define smnPCIE_PERF_COUNT0_TXCLK2 0x11180214 +#define smnPCIE_PERF_COUNT1_TXCLK2 0x11180218 +#define smnPCIE_PERF_CNTL_TXCLK3 0x1118021c +#define smnPCIE_PERF_COUNT0_TXCLK3 0x11180220 +#define smnPCIE_PERF_COUNT1_TXCLK3 0x11180224 +#define smnPCIE_PERF_CNTL_TXCLK4 0x11180228 +#define smnPCIE_PERF_COUNT0_TXCLK4 0x1118022c +#define smnPCIE_PERF_COUNT1_TXCLK4 0x11180230 +#define smnPCIE_PERF_CNTL_SCLK1 0x11180234 +#define smnPCIE_PERF_COUNT0_SCLK1 0x11180238 +#define smnPCIE_PERF_COUNT1_SCLK1 0x1118023c +#define smnPCIE_PERF_CNTL_SCLK2 0x11180240 +#define smnPCIE_PERF_COUNT0_SCLK2 0x11180244 +#define smnPCIE_PERF_COUNT1_SCLK2 0x11180248 +#define smnPCIE_PERF_CNTL_EVENT_LC_PORT_SEL 0x1118024c +#define smnPCIE_PERF_CNTL_EVENT_CI_PORT_SEL 0x11180250 + +#endif // _nbio_7_4_0_SMN_HEADER From b45e18acd394954c24943762ada5d8dada75f2b9 Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Thu, 3 Jan 2019 08:12:39 -0500 Subject: [PATCH 122/539] drm/amdgpu: Add sysfs file for PCIe usage v5 Add a sysfs file that reports the number of bytes transmitted and received in the last second. This can be used to approximate the PCIe bandwidth usage over the last second. v2: Clarify use of mps as estimation of bandwidth v3: Don't make the file on APUs v4: Early exit for APUs in the read function, change output to display "packets-received packets-sent mps" v5: fix missing header for si (Alex) Signed-off-by: Kent Russell Reviewed-by: Felix Kuehling Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 36 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/cik.c | 47 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/si.c | 48 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/soc15.c | 50 ++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/vi.c | 47 ++++++++++++++++++++++++ 6 files changed, 232 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index bcef6ea4bcf9..3b30bb2cdd21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -542,6 +542,9 @@ struct amdgpu_asic_funcs { bool (*need_full_reset)(struct amdgpu_device *adev); /* initialize doorbell layout for specific asic*/ void (*init_doorbell_index)(struct amdgpu_device *adev); + /* PCIe bandwidth usage */ + void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0, + uint64_t *count1); }; /* @@ -1042,6 +1045,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r)) #define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev)) #define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev)) +#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) /* Common functions */ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 6896dec97fc7..b38c06f0196e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -990,6 +990,31 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev, return snprintf(buf, PAGE_SIZE, "%d\n", value); } +/** + * DOC: pcie_bw + * + * The amdgpu driver provides a sysfs API for estimating how much data + * has been received and sent by the GPU in the last second through PCIe. + * The file pcie_bw is used for this. + * The Perf counters count the number of received and sent messages and return + * those values, as well as the maximum payload size of a PCIe packet (mps). + * Note that it is not possible to easily and quickly obtain the size of each + * packet transmitted, so we output the max payload size (mps) to allow for + * quick estimation of the PCIe bandwidth usage + */ +static ssize_t amdgpu_get_pcie_bw(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + uint64_t count0, count1; + + amdgpu_asic_get_pcie_usage(adev, &count0, &count1); + return snprintf(buf, PAGE_SIZE, "%llu %llu %i\n", + count0, count1, pcie_get_mps(adev->pdev)); +} + static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR, amdgpu_get_dpm_forced_performance_level, @@ -1025,6 +1050,7 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR, amdgpu_set_pp_od_clk_voltage); static DEVICE_ATTR(gpu_busy_percent, S_IRUGO, amdgpu_get_busy_percent, NULL); +static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL); static ssize_t amdgpu_hwmon_show_temp(struct device *dev, struct device_attribute *attr, @@ -2108,6 +2134,14 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) "gpu_busy_level\n"); return ret; } + /* PCIe Perf counters won't work on APU nodes */ + if (adev->flags & !AMD_IS_APU) { + ret = device_create_file(adev->dev, &dev_attr_pcie_bw); + if (ret) { + DRM_ERROR("failed to create device file pcie_bw\n"); + return ret; + } + } ret = amdgpu_debugfs_pm_init(adev); if (ret) { DRM_ERROR("Failed to register debugfs file for dpm!\n"); @@ -2147,6 +2181,8 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_pp_od_clk_voltage); device_remove_file(adev->dev, &dev_attr_gpu_busy_percent); + if (adev->flags & !AMD_IS_APU) + device_remove_file(adev->dev, &dev_attr_pcie_bw); } void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 71c50d8900e3..6277de51483f 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1741,6 +1741,52 @@ static bool cik_need_full_reset(struct amdgpu_device *adev) return true; } +static void cik_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, + uint64_t *count1) +{ + uint32_t perfctr = 0; + uint64_t cnt0_of, cnt1_of; + int tmp; + + /* This reports 0 on APUs, so return to avoid writing/reading registers + * that may or may not be different from their GPU counterparts + */ + if (adev->flags & AMD_IS_APU) + return; + + /* Set the 2 events that we wish to watch, defined above */ + /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); + + /* Write to enable desired perf counters */ + WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr); + /* Zero out and enable the perf counters + * Write 0x5: + * Bit 0 = Start all counters(1) + * Bit 2 = Global counter reset enable(1) + */ + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005); + + msleep(1000); + + /* Load the shadow and disable the perf counters + * Write 0x2: + * Bit 0 = Stop counters(0) + * Bit 1 = Load the shadow counters(1) + */ + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002); + + /* Read register values to get any >32bit overflow */ + tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK); + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER); + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER); + + /* Get the values and add the overflow */ + *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32); + *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); +} + static const struct amdgpu_asic_funcs cik_asic_funcs = { .read_disabled_bios = &cik_read_disabled_bios, @@ -1756,6 +1802,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .invalidate_hdp = &cik_invalidate_hdp, .need_full_reset = &cik_need_full_reset, .init_doorbell_index = &legacy_doorbell_index_init, + .get_pcie_usage = &cik_get_pcie_usage, }; static int cik_common_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index f8408f88cd37..7d2a48727e76 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -47,6 +47,7 @@ #include "dce/dce_6_0_d.h" #include "uvd/uvd_4_0_d.h" #include "bif/bif_3_0_d.h" +#include "bif/bif_3_0_sh_mask.h" static const u32 tahiti_golden_registers[] = { @@ -1323,6 +1324,52 @@ static void si_set_pcie_lanes(struct amdgpu_device *adev, int lanes) WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); } +static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, + uint64_t *count1) +{ + uint32_t perfctr = 0; + uint64_t cnt0_of, cnt1_of; + int tmp; + + /* This reports 0 on APUs, so return to avoid writing/reading registers + * that may or may not be different from their GPU counterparts + */ + if (adev->flags & AMD_IS_APU) + return; + + /* Set the 2 events that we wish to watch, defined above */ + /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); + + /* Write to enable desired perf counters */ + WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr); + /* Zero out and enable the perf counters + * Write 0x5: + * Bit 0 = Start all counters(1) + * Bit 2 = Global counter reset enable(1) + */ + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005); + + msleep(1000); + + /* Load the shadow and disable the perf counters + * Write 0x2: + * Bit 0 = Stop counters(0) + * Bit 1 = Load the shadow counters(1) + */ + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002); + + /* Read register values to get any >32bit overflow */ + tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK); + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER); + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER); + + /* Get the values and add the overflow */ + *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32); + *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); +} + static const struct amdgpu_asic_funcs si_asic_funcs = { .read_disabled_bios = &si_read_disabled_bios, @@ -1339,6 +1386,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = .flush_hdp = &si_flush_hdp, .invalidate_hdp = &si_invalidate_hdp, .need_full_reset = &si_need_full_reset, + .get_pcie_usage = &si_get_pcie_usage, }; static uint32_t si_get_rev_id(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 8849b74078d6..bb89833ed3e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -43,6 +43,9 @@ #include "hdp/hdp_4_0_sh_mask.h" #include "smuio/smuio_9_0_offset.h" #include "smuio/smuio_9_0_sh_mask.h" +#include "nbio/nbio_7_0_default.h" +#include "nbio/nbio_7_0_sh_mask.h" +#include "nbio/nbio_7_0_smn.h" #include "soc15.h" #include "soc15_common.h" @@ -601,6 +604,51 @@ static bool soc15_need_full_reset(struct amdgpu_device *adev) /* change this when we implement soft reset */ return true; } +static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, + uint64_t *count1) +{ + uint32_t perfctr = 0; + uint64_t cnt0_of, cnt1_of; + int tmp; + + /* This reports 0 on APUs, so return to avoid writing/reading registers + * that may or may not be different from their GPU counterparts + */ + if (adev->flags & AMD_IS_APU) + return; + + /* Set the 2 events that we wish to watch, defined above */ + /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); + + /* Write to enable desired perf counters */ + WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK, perfctr); + /* Zero out and enable the perf counters + * Write 0x5: + * Bit 0 = Start all counters(1) + * Bit 2 = Global counter reset enable(1) + */ + WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000005); + + msleep(1000); + + /* Load the shadow and disable the perf counters + * Write 0x2: + * Bit 0 = Stop counters(0) + * Bit 1 = Load the shadow counters(1) + */ + WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000002); + + /* Read register values to get any >32bit overflow */ + tmp = RREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK); + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER); + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER); + + /* Get the values and add the overflow */ + *count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32); + *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); +} static const struct amdgpu_asic_funcs soc15_asic_funcs = { @@ -617,6 +665,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = .invalidate_hdp = &soc15_invalidate_hdp, .need_full_reset = &soc15_need_full_reset, .init_doorbell_index = &vega10_doorbell_index_init, + .get_pcie_usage = &soc15_get_pcie_usage, }; static const struct amdgpu_asic_funcs vega20_asic_funcs = @@ -634,6 +683,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = .invalidate_hdp = &soc15_invalidate_hdp, .need_full_reset = &soc15_need_full_reset, .init_doorbell_index = &vega20_doorbell_index_init, + .get_pcie_usage = &soc15_get_pcie_usage, }; static int soc15_common_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 03e7be595a0d..cdc8ab8d79d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -941,6 +941,52 @@ static bool vi_need_full_reset(struct amdgpu_device *adev) } } +static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, + uint64_t *count1) +{ + uint32_t perfctr = 0; + uint64_t cnt0_of, cnt1_of; + int tmp; + + /* This reports 0 on APUs, so return to avoid writing/reading registers + * that may or may not be different from their GPU counterparts + */ + if (adev->flags & AMD_IS_APU) + return; + + /* Set the 2 events that we wish to watch, defined above */ + /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); + + /* Write to enable desired perf counters */ + WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr); + /* Zero out and enable the perf counters + * Write 0x5: + * Bit 0 = Start all counters(1) + * Bit 2 = Global counter reset enable(1) + */ + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005); + + msleep(1000); + + /* Load the shadow and disable the perf counters + * Write 0x2: + * Bit 0 = Stop counters(0) + * Bit 1 = Load the shadow counters(1) + */ + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002); + + /* Read register values to get any >32bit overflow */ + tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK); + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER); + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER); + + /* Get the values and add the overflow */ + *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32); + *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); +} + static const struct amdgpu_asic_funcs vi_asic_funcs = { .read_disabled_bios = &vi_read_disabled_bios, @@ -956,6 +1002,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .invalidate_hdp = &vi_invalidate_hdp, .need_full_reset = &vi_need_full_reset, .init_doorbell_index = &legacy_doorbell_index_init, + .get_pcie_usage = &vi_get_pcie_usage, }; #define CZ_REV_BRISTOL(rev) \ From d0948af7f6aaf05e3228063b26f696dedf2685df Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 10 Dec 2018 16:04:15 -0500 Subject: [PATCH 123/539] drm/amdgpu: expose sclk and mclk via hwmon Expose sclk (gfx clock) and mclk (memory clock) via hwmon compatible interface. hwmon does not actually formally specify a frequency type attribute, but these are compatible with the format of the other attributes exposed via hwmon. Units are hertz. freq1_input - GPU gfx/compute clock in hertz freq2_input - GPU memory clock in hertz (dGPU only) Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 93 ++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index b38c06f0196e..51eb2cf42b81 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1542,6 +1542,75 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, return count; } +static ssize_t amdgpu_hwmon_show_sclk(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + struct drm_device *ddev = adev->ddev; + uint32_t sclk; + int r, size = sizeof(sclk); + + /* Can't get voltage when the card is off */ + if ((adev->flags & AMD_IS_PX) && + (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + + /* sanity check PP is enabled */ + if (!(adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->read_sensor)) + return -EINVAL; + + /* get the sclk */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, + (void *)&sclk, &size); + if (r) + return r; + + return snprintf(buf, PAGE_SIZE, "%d\n", sclk * 10 * 1000); +} + +static ssize_t amdgpu_hwmon_show_sclk_label(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "sclk\n"); +} + +static ssize_t amdgpu_hwmon_show_mclk(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + struct drm_device *ddev = adev->ddev; + uint32_t mclk; + int r, size = sizeof(mclk); + + /* Can't get voltage when the card is off */ + if ((adev->flags & AMD_IS_PX) && + (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + + /* sanity check PP is enabled */ + if (!(adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->read_sensor)) + return -EINVAL; + + /* get the sclk */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK, + (void *)&mclk, &size); + if (r) + return r; + + return snprintf(buf, PAGE_SIZE, "%d\n", mclk * 10 * 1000); +} + +static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "mclk\n"); +} /** * DOC: hwmon @@ -1558,6 +1627,10 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, * * - GPU fan * + * - GPU gfx/compute engine clock + * + * - GPU memory clock (dGPU only) + * * hwmon interfaces for GPU temperature: * * - temp1_input: the on die GPU temperature in millidegrees Celsius @@ -1602,6 +1675,12 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, * * - fan[1-*]_enable: Enable or disable the sensors.1: Enable 0: Disable * + * hwmon interfaces for GPU clocks: + * + * - freq1_input: the gfx/compute clock in hertz + * + * - freq2_input: the memory clock in hertz + * * You can use hwmon tools like sensors to view this information on your system. * */ @@ -1626,6 +1705,10 @@ static SENSOR_DEVICE_ATTR(power1_average, S_IRUGO, amdgpu_hwmon_show_power_avg, static SENSOR_DEVICE_ATTR(power1_cap_max, S_IRUGO, amdgpu_hwmon_show_power_cap_max, NULL, 0); static SENSOR_DEVICE_ATTR(power1_cap_min, S_IRUGO, amdgpu_hwmon_show_power_cap_min, NULL, 0); static SENSOR_DEVICE_ATTR(power1_cap, S_IRUGO | S_IWUSR, amdgpu_hwmon_show_power_cap, amdgpu_hwmon_set_power_cap, 0); +static SENSOR_DEVICE_ATTR(freq1_input, S_IRUGO, amdgpu_hwmon_show_sclk, NULL, 0); +static SENSOR_DEVICE_ATTR(freq1_label, S_IRUGO, amdgpu_hwmon_show_sclk_label, NULL, 0); +static SENSOR_DEVICE_ATTR(freq2_input, S_IRUGO, amdgpu_hwmon_show_mclk, NULL, 0); +static SENSOR_DEVICE_ATTR(freq2_label, S_IRUGO, amdgpu_hwmon_show_mclk_label, NULL, 0); static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_temp1_input.dev_attr.attr, @@ -1648,6 +1731,10 @@ static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_power1_cap_max.dev_attr.attr, &sensor_dev_attr_power1_cap_min.dev_attr.attr, &sensor_dev_attr_power1_cap.dev_attr.attr, + &sensor_dev_attr_freq1_input.dev_attr.attr, + &sensor_dev_attr_freq1_label.dev_attr.attr, + &sensor_dev_attr_freq2_input.dev_attr.attr, + &sensor_dev_attr_freq2_label.dev_attr.attr, NULL }; @@ -1738,6 +1825,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_in1_label.dev_attr.attr)) return 0; + /* no mclk on APUs */ + if ((adev->flags & AMD_IS_APU) && + (attr == &sensor_dev_attr_freq2_input.dev_attr.attr || + attr == &sensor_dev_attr_freq2_label.dev_attr.attr)) + return 0; + return effective_mode; } From 444018893abfde1df86be9d4be3e0c84832397dd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 31 Oct 2018 23:54:27 -0500 Subject: [PATCH 124/539] drm/amdgpu: add need_reset_on_init asic callback (v2) Used to determine if we need to reset the asic on init due to the driver having been previously loaded or not shutdown cleanly. E.g., kexec or VM passthrough. v2: rebase Reviewed-by: Evan Quan Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3b30bb2cdd21..88431ea88344 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -545,6 +545,8 @@ struct amdgpu_asic_funcs { /* PCIe bandwidth usage */ void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0, uint64_t *count1); + /* do we need to reset the asic at init time (e.g., kexec) */ + bool (*need_reset_on_init)(struct amdgpu_device *adev); }; /* @@ -1046,6 +1048,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev)) #define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev)) #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) +#define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev)) /* Common functions */ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); From 7450bbe7815b8286165c87a07340c80ca2764054 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 31 Oct 2018 23:56:16 -0500 Subject: [PATCH 125/539] drm/amdgpu/si: add need_reset_on_init asic callback for SI (v2) SI chips don't require a reset on reload due to the nature of the SMU on them. v2: rebase Reviewed-by: Evan Quan Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/si.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 7d2a48727e76..79c1a9bbcc21 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1259,6 +1259,11 @@ static bool si_need_full_reset(struct amdgpu_device *adev) return true; } +static bool si_need_reset_on_init(struct amdgpu_device *adev) +{ + return false; +} + static int si_get_pcie_lanes(struct amdgpu_device *adev) { u32 link_width_cntl; @@ -1387,6 +1392,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = .invalidate_hdp = &si_invalidate_hdp, .need_full_reset = &si_need_full_reset, .get_pcie_usage = &si_get_pcie_usage, + .need_reset_on_init = &si_need_reset_on_init, }; static uint32_t si_get_rev_id(struct amdgpu_device *adev) From 3fcc10d73d611e929fec00ed58d8b048df5c25bd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 31 Oct 2018 23:58:56 -0500 Subject: [PATCH 126/539] drm/amdgpu/cik: add need_reset_on_init asic callback for CIK (v2) CIK chips require a reset if the driver was previously loaded because the SMU can only be loaded once between each reset. v2: rebase Reviewed-by: Evan Quan Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 6277de51483f..b1b7abed7dab 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1787,6 +1787,23 @@ static void cik_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); } +static bool cik_need_reset_on_init(struct amdgpu_device *adev) +{ + u32 clock_cntl, pc; + + if (adev->flags & AMD_IS_APU) + return false; + + /* check if the SMC is already running */ + clock_cntl = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0); + pc = RREG32_SMC(ixSMC_PC_C); + if ((0 == REG_GET_FIELD(clock_cntl, SMC_SYSCON_CLOCK_CNTL_0, ck_disable)) && + (0x20100 <= pc)) + return true; + + return false; +} + static const struct amdgpu_asic_funcs cik_asic_funcs = { .read_disabled_bios = &cik_read_disabled_bios, @@ -1803,6 +1820,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .need_full_reset = &cik_need_full_reset, .init_doorbell_index = &legacy_doorbell_index_init, .get_pcie_usage = &cik_get_pcie_usage, + .need_reset_on_init = &cik_need_reset_on_init, }; static int cik_common_early_init(void *handle) From 762e6f3f200e0bd538b2d3fd0a4f4ff5a8d45460 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 1 Nov 2018 00:00:09 -0500 Subject: [PATCH 127/539] drm/amdgpu/vi: add need_reset_on_init asic callback for VI (v2) VI chips require a reset if the driver was previously loaded because the SMU can only be loaded once between each reset. v2: rebase Reviewed-by: Evan Quan Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vi.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index cdc8ab8d79d7..5e5b42a0744a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -987,6 +987,23 @@ static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); } +static bool vi_need_reset_on_init(struct amdgpu_device *adev) +{ + u32 clock_cntl, pc; + + if (adev->flags & AMD_IS_APU) + return false; + + /* check if the SMC is already running */ + clock_cntl = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0); + pc = RREG32_SMC(ixSMC_PC_C); + if ((0 == REG_GET_FIELD(clock_cntl, SMC_SYSCON_CLOCK_CNTL_0, ck_disable)) && + (0x20100 <= pc)) + return true; + + return false; +} + static const struct amdgpu_asic_funcs vi_asic_funcs = { .read_disabled_bios = &vi_read_disabled_bios, @@ -1003,6 +1020,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .need_full_reset = &vi_need_full_reset, .init_doorbell_index = &legacy_doorbell_index_init, .get_pcie_usage = &vi_get_pcie_usage, + .need_reset_on_init = &vi_need_reset_on_init, }; #define CZ_REV_BRISTOL(rev) \ From 9281f12cabb09059ae60603e14cc9cd98b02febb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 1 Nov 2018 00:00:57 -0500 Subject: [PATCH 128/539] drm/amdgpu/soc15: add need_reset_on_init asic callback for SOC15 (v2) SOC15 chips require a reset if the driver was previously loaded because the PSP can only be loaded once between each reset. v2: rebase, handle multiple asic funcs Reviewed-by: Evan Quan Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index bb89833ed3e7..483867f49154 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -46,6 +46,7 @@ #include "nbio/nbio_7_0_default.h" #include "nbio/nbio_7_0_sh_mask.h" #include "nbio/nbio_7_0_smn.h" +#include "mp/mp_9_0_offset.h" #include "soc15.h" #include "soc15_common.h" @@ -650,6 +651,23 @@ static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); } +static bool soc15_need_reset_on_init(struct amdgpu_device *adev) +{ + u32 sol_reg; + + if (adev->flags & AMD_IS_APU) + return false; + + /* Check sOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + if (sol_reg) + return true; + + return false; +} + static const struct amdgpu_asic_funcs soc15_asic_funcs = { .read_disabled_bios = &soc15_read_disabled_bios, @@ -666,6 +684,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = .need_full_reset = &soc15_need_full_reset, .init_doorbell_index = &vega10_doorbell_index_init, .get_pcie_usage = &soc15_get_pcie_usage, + .need_reset_on_init = &soc15_need_reset_on_init, }; static const struct amdgpu_asic_funcs vega20_asic_funcs = @@ -684,6 +703,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = .need_full_reset = &soc15_need_full_reset, .init_doorbell_index = &vega20_doorbell_index_init, .get_pcie_usage = &soc15_get_pcie_usage, + .need_reset_on_init = &soc15_need_reset_on_init, }; static int soc15_common_early_init(void *handle) From 95e8e59ec49f7ca5c8d3f531c1d396961b5b961b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 1 Nov 2018 00:02:01 -0500 Subject: [PATCH 129/539] drm/amdgpu: check if we need to reset at init time (v2) To deal with situations like kexec or GPU VM passthrough where the device may have been used previously without a proper GPU reset between. v2: rebase bug: https://bugs.freedesktop.org/show_bug.cgi?id=108585 bug: https://bugs.freedesktop.org/show_bug.cgi?id=108754 Reviewed-by: Evan Quan Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1a558dc41ba6..8a61764e64cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2550,6 +2550,17 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* detect if we are with an SRIOV vbios */ amdgpu_device_detect_sriov_bios(adev); + /* check if we need to reset the asic + * E.g., driver was not cleanly unloaded previously, etc. + */ + if (amdgpu_asic_need_reset_on_init(adev)) { + r = amdgpu_asic_reset(adev); + if (r) { + dev_err(adev->dev, "asic reset on init failed\n"); + goto failed; + } + } + /* Post card if necessary */ if (amdgpu_device_need_post(adev)) { if (!adev->bios) { From df1dd4f4a7271eb2744d8593c0da5d7a58dbe3a9 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 2 Jan 2019 14:52:13 -0500 Subject: [PATCH 130/539] drm/amdkfd: Allow building KFD on ARM64 (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ifdef x86_64 specific code. Allow enabling CONFIG_HSA_AMD on ARM64. v2: Fixed a compiler warning due to an unused variable CC: Mark Nutter Signed-off-by: Felix Kuehling Tested-by: Mark Nutter Acked-by: Alex Deucher Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/Kconfig | 4 ++-- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 8 ++++++++ drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 9 +++++---- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index fbf0ee5201c3..c3613604a4f8 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -4,8 +4,8 @@ config HSA_AMD bool "HSA kernel driver for AMD GPU devices" - depends on DRM_AMDGPU && X86_64 - imply AMD_IOMMU_V2 + depends on DRM_AMDGPU && (X86_64 || ARM64) + imply AMD_IOMMU_V2 if X86_64 select MMU_NOTIFIER help Enable this if you want to use HSA features on AMD GPU devices. diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index b7bc7d7d048f..5d85ff341385 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -863,6 +863,7 @@ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size, return 0; } +#if CONFIG_X86_64 static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, uint32_t *num_entries, struct crat_subtype_iolink *sub_type_hdr) @@ -905,6 +906,7 @@ static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, return 0; } +#endif /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU * @@ -920,7 +922,9 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) struct crat_subtype_generic *sub_type_hdr; int avail_size = *size; int numa_node_id; +#ifdef CONFIG_X86_64 uint32_t entries = 0; +#endif int ret = 0; if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_CPU) @@ -982,6 +986,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) sub_type_hdr->length); /* Fill in Subtype: IO Link */ +#ifdef CONFIG_X86_64 ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size, &entries, (struct crat_subtype_iolink *)sub_type_hdr); @@ -992,6 +997,9 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + sub_type_hdr->length * entries); +#else + pr_info("IO link not available for non x86 platforms\n"); +#endif crat_table->num_domains++; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 5f5b2acedbac..4734f1a35516 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1392,7 +1392,6 @@ int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev) static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask) { - const struct cpuinfo_x86 *cpuinfo; int first_cpu_of_numa_node; if (!cpumask || cpumask == cpu_none_mask) @@ -1400,9 +1399,11 @@ static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask) first_cpu_of_numa_node = cpumask_first(cpumask); if (first_cpu_of_numa_node >= nr_cpu_ids) return -1; - cpuinfo = &cpu_data(first_cpu_of_numa_node); - - return cpuinfo->apicid; +#ifdef CONFIG_X86_64 + return cpu_data(first_cpu_of_numa_node).apicid; +#else + return first_cpu_of_numa_node; +#endif } /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor From b8fe05247d49da9ae6648c4783d8d8f36ca4bc7f Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 2 Jan 2019 17:47:39 -0500 Subject: [PATCH 131/539] drm/amdkfd: Don't assign dGPUs to APU topology devices dGPUs need their own topology devices. Don't assign them to APU topology devices with CPU cores. Bug: https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/issues/66 Signed-off-by: Felix Kuehling Tested-by: Elias Konstantinidis Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 4734f1a35516..09da91644f9f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1093,8 +1093,6 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) * the GPU device is not already present in the topology device * list then return NULL. This means a new topology device has to * be created for this GPU. - * TODO: Rather than assiging @gpu to first topology device withtout - * gpu attached, it will better to have more stringent check. */ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) { @@ -1102,12 +1100,20 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) struct kfd_topology_device *out_dev = NULL; down_write(&topology_lock); - list_for_each_entry(dev, &topology_device_list, list) + list_for_each_entry(dev, &topology_device_list, list) { + /* Discrete GPUs need their own topology device list + * entries. Don't assign them to CPU/APU nodes. + */ + if (!gpu->device_info->needs_iommu_device && + dev->node_props.cpu_cores_count) + continue; + if (!dev->gpu && (dev->node_props.simd_count > 0)) { dev->gpu = gpu; out_dev = dev; break; } + } up_write(&topology_lock); return out_dev; } From b721056b34c6c045cd5eb0c003a6a2c2d6d077aa Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Wed, 9 Jan 2019 14:39:48 +0800 Subject: [PATCH 132/539] drm/amd/powerplay: run acg btc for Vega12 acg btc was added to Vega12 Signed-off-by: Kenneth Feng Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index 54364444ecd1..0c8212902275 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -753,6 +753,22 @@ static int vega12_init_smc_table(struct pp_hwmgr *hwmgr) return 0; } +static int vega12_run_acg_btc(struct pp_hwmgr *hwmgr) +{ + uint32_t result; + + PP_ASSERT_WITH_CODE( + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_RunAcgBtc) == 0, + "[Run_ACG_BTC] Attempt to run ACG BTC failed!", + return -EINVAL); + + result = smum_get_argument(hwmgr); + PP_ASSERT_WITH_CODE(result == 1, + "Failed to run ACG BTC!", return -EINVAL); + + return 0; +} + static int vega12_set_allowed_featuresmask(struct pp_hwmgr *hwmgr) { struct vega12_hwmgr *data = @@ -931,6 +947,11 @@ static int vega12_enable_dpm_tasks(struct pp_hwmgr *hwmgr) "Failed to initialize SMC table!", result = tmp_result); + tmp_result = vega12_run_acg_btc(hwmgr); + PP_ASSERT_WITH_CODE(!tmp_result, + "Failed to run ACG BTC!", + result = tmp_result); + result = vega12_enable_all_smu_features(hwmgr); PP_ASSERT_WITH_CODE(!result, "Failed to enable all smu features!", From a93587b31e3418dcc93a209da22a6f4018a73101 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 10 Jan 2019 15:12:48 -0500 Subject: [PATCH 133/539] drm/amd/display: Only get the connector state for VRR when toggled [Why] This fixes a stuttering issue that occurs when moving a hardware cursor when VRR is enabled. Previously when VRR is enabled atomic check will grab the connector state for every atomic update. This has to lock the connector in order to do so. The locking is bad enough by itself for performance, but it gets worse with what we do just below that - add all the planes for the CRTC to the commit. This prevents the cursor fast path from working - there's more than one plane now. With state->allow_modeset = true on top of this, it also adds and removes all the planes from the DC context triggering a full (very slow) update in DC. [How] We need the connector state to get the VRR min/max capbilities, but we only need them when there's a CRTC mode change or when VRR is toggled. The condition has been updated accordingly. Fixes: 3cc22f281318 ("drm/amdgpu: Set FreeSync state using drm VRR properties") Signed-off-by: Nicholas Kazlauskas Reviewed-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a67254010628..1059189229ea 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5991,7 +5991,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { if (!drm_atomic_crtc_needs_modeset(new_crtc_state) && !new_crtc_state->color_mgmt_changed && - !new_crtc_state->vrr_enabled) + old_crtc_state->vrr_enabled == new_crtc_state->vrr_enabled) continue; if (!new_crtc_state->enable) From 887c549dc0d0351d64fdd28ca217c70d123cc17b Mon Sep 17 00:00:00 2001 From: Steven Chiu Date: Mon, 10 Dec 2018 14:26:53 -0500 Subject: [PATCH 134/539] drm/amd/display: 3.2.12 Signed-off-by: Steven Chiu Reviewed-by: Yongqiang Sun Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index ee7054042901..f80f52ef4b01 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.11" +#define DC_VER "3.2.12" #define MAX_SURFACES 3 #define MAX_STREAMS 6 From 4bd0dc68fd00f41ed64fbcfe7b7afe93763b04ad Mon Sep 17 00:00:00 2001 From: Joshua Aberback Date: Mon, 10 Dec 2018 13:46:34 -0500 Subject: [PATCH 135/539] drm/amd/display: Fix missing hwss function for dce [Why] The driver will crash on dce hardware due to a null function pointer. [How] - bring back "program_csc_matrix" functionality as "program_output_csc" for dce110 - dce110 doesn't use the "opp_id" parameter, so use 0 when there's no hubp Signed-off-by: Joshua Aberback Reviewed-by: Anthony Koo Acked-by: Jun Lei Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +- .../display/dc/dce110/dce110_hw_sequencer.c | 23 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index af907d541318..c94bc405d135 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -451,7 +451,7 @@ bool dc_stream_program_csc_matrix(struct dc *dc, struct dc_stream_state *stream) pipes, stream->output_color_space, stream->csc_color_matrix.matrix, - pipes->plane_res.hubp->opp_id); + pipes->plane_res.hubp ? pipes->plane_res.hubp->opp_id : 0); ret = true; } } diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 8e961e699db5..602a41d1fc5d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -2616,6 +2616,28 @@ static void dce110_wait_for_mpcc_disconnect( /* do nothing*/ } +static void program_output_csc(struct dc *dc, + struct pipe_ctx *pipe_ctx, + enum dc_color_space colorspace, + uint16_t *matrix, + int opp_id) +{ + int i; + struct out_csc_color_matrix tbl_entry; + + if (pipe_ctx->stream->csc_color_matrix.enable_adjustment == true) { + enum dc_color_space color_space = pipe_ctx->stream->output_color_space; + + for (i = 0; i < 12; i++) + tbl_entry.regval[i] = pipe_ctx->stream->csc_color_matrix.matrix[i]; + + tbl_entry.color_space = color_space; + + pipe_ctx->plane_res.xfm->funcs->opp_set_csc_adjustment( + pipe_ctx->plane_res.xfm, &tbl_entry); + } +} + void dce110_set_cursor_position(struct pipe_ctx *pipe_ctx) { struct dc_cursor_position pos_cpy = pipe_ctx->stream->cursor_position; @@ -2666,6 +2688,7 @@ void dce110_set_cursor_attribute(struct pipe_ctx *pipe_ctx) static const struct hw_sequencer_funcs dce110_funcs = { .program_gamut_remap = program_gamut_remap, + .program_output_csc = program_output_csc, .init_hw = init_hw, .apply_ctx_to_hw = dce110_apply_ctx_to_hw, .apply_ctx_for_surface = dce110_apply_ctx_for_surface, From c591d8429df2832cdfe63907bf34c5f150a45b3d Mon Sep 17 00:00:00 2001 From: Josip Pavic Date: Tue, 11 Dec 2018 15:13:08 -0500 Subject: [PATCH 136/539] drm/amd/display: Pack DMCU iRAM alignment [Why] When the DMCU's iRAM definition was moved to the newly created power_helpers, a #pragma pack was lost, causing the iRAM to be misaligned [How] Restore the #pragma pack Signed-off-by: Josip Pavic Reviewed-by: Anthony Koo Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/power/power_helpers.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 00f63b7dd32f..c11a443dcbc8 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -57,6 +57,7 @@ static const unsigned char abm_config[abm_defines_max_config][abm_defines_max_le #define NUM_POWER_FN_SEGS 8 #define NUM_BL_CURVE_SEGS 16 +#pragma pack(push, 1) /* NOTE: iRAM is 256B in size */ struct iram_table_v_2 { /* flags */ @@ -100,6 +101,7 @@ struct iram_table_v_2 { uint8_t dummy8; /* 0xfe */ uint8_t dummy9; /* 0xff */ }; +#pragma pack(pop) static uint16_t backlight_8_to_16(unsigned int backlight_8bit) { From a6729a5a406a885c749bc05d73c06e6cd9bd6211 Mon Sep 17 00:00:00 2001 From: abdoulaye berthe Date: Fri, 7 Dec 2018 16:12:09 -0500 Subject: [PATCH 137/539] drm/amd/display: fix CTS 4.2.2.8 [Why] 1-Test equipment does not reset test automation flag after completing current test causing the next test to fail. 2.When test request is empty, we shouldn't ack the test. [How] 1-Driver should always clear test equipment automation test request when request is completed. 2-Driver should clear test equipement test automation if driver does not complete the request. Signed-off-by: abdoulaye berthe Reviewed-by: Wenjing Liu Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 568fdc9423e6..cf9362704d12 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -2002,11 +2002,7 @@ static void handle_automated_test(struct dc_link *link) dp_test_send_phy_test_pattern(link); test_response.bits.ACK = 1; } - if (!test_request.raw) - /* no requests, revert all test signals - * TODO: revert all test signals - */ - test_response.bits.ACK = 1; + /* send request acknowledgment */ if (test_response.bits.ACK) core_link_write_dpcd( From 29c8f23425e07092f25db3b57c1d8dcefc532290 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Mon, 7 Jan 2019 13:28:54 -0500 Subject: [PATCH 138/539] Revert "drm/amd/display: Signal hw_done() after waiting for flip_done()" This reverts commit 717276b9256f5d97b43e53adca1670cee2c45db2. The reverted patch was a workaround that addressed an issue fixed by: 4364bcb2cd21 ("drm: Get ref on CRTC commit object when waiting for flip_done") Signed-off-by: Leo Li Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 1059189229ea..ff2fcb99ac63 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5257,18 +5257,12 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) } spin_unlock_irqrestore(&adev->ddev->event_lock, flags); + /* Signal HW programming completion */ + drm_atomic_helper_commit_hw_done(state); if (wait_for_vblank) drm_atomic_helper_wait_for_flip_done(dev, state); - /* - * FIXME: - * Delay hw_done() until flip_done() is signaled. This is to block - * another commit from freeing the CRTC state while we're still - * waiting on flip_done. - */ - drm_atomic_helper_commit_hw_done(state); - drm_atomic_helper_cleanup_planes(dev, state); /* From 44788bbc309ba687ac239a77a2399d879c6d5f18 Mon Sep 17 00:00:00 2001 From: Tony Cheng Date: Wed, 24 Oct 2018 15:09:46 -0400 Subject: [PATCH 139/539] drm/amd/display: refactor reg_update [why] separate register access from logic to allow us abstract register sequences [how] consolidate mask and value first then apply to register. Signed-off-by: Tony Cheng Reviewed-by: Tony Cheng Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_helper.c | 36 +++++++++++++++++++--- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index 4842d2378bbf..597d38393379 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -29,31 +29,59 @@ #include "dm_services.h" #include +struct dc_reg_value_masks { + uint32_t value; + uint32_t mask; +}; + +struct dc_reg_sequence { + uint32_t addr; + struct dc_reg_value_masks value_masks; +}; + +static inline void set_reg_field_value_masks( + struct dc_reg_value_masks *field_value_mask, + uint32_t value, + uint32_t mask, + uint8_t shift) +{ + ASSERT(mask != 0); + + field_value_mask->value = (field_value_mask->value & ~mask) | (mask & (value << shift)); + field_value_mask->mask = field_value_mask->mask | mask; +} + uint32_t generic_reg_update_ex(const struct dc_context *ctx, uint32_t addr, uint32_t reg_val, int n, uint8_t shift1, uint32_t mask1, uint32_t field_value1, ...) { + struct dc_reg_value_masks field_value_mask = {0}; uint32_t shift, mask, field_value; int i = 1; va_list ap; va_start(ap, field_value1); - reg_val = set_reg_field_value_ex(reg_val, field_value1, mask1, shift1); + /* gather all bits value/mask getting updated in this register */ + set_reg_field_value_masks(&field_value_mask, + field_value1, mask1, shift1); while (i < n) { shift = va_arg(ap, uint32_t); mask = va_arg(ap, uint32_t); field_value = va_arg(ap, uint32_t); - reg_val = set_reg_field_value_ex(reg_val, field_value, mask, shift); + set_reg_field_value_masks(&field_value_mask, + field_value, mask, shift); i++; } - - dm_write_reg(ctx, addr, reg_val); va_end(ap); + + /* mmio write directly */ + reg_val = (reg_val & ~field_value_mask.mask) | field_value_mask.value; + dm_write_reg(ctx, addr, reg_val); return reg_val; } From 070fe7246f0666d8da2b06e4e829adaf26c75a0f Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Wed, 12 Dec 2018 18:09:16 -0500 Subject: [PATCH 140/539] drm/amd/display: fix PME notification not working in RV desktop [Why] PPLIB not receive the PME when unplug. Signed-off-by: Charlene Liu Reviewed-by: Chris Park Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 602a41d1fc5d..144a1c8dca1d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -997,7 +997,7 @@ void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx) pipe_ctx->stream_res.audio->funcs->az_enable(pipe_ctx->stream_res.audio); - if (num_audio == 1 && pp_smu != NULL && pp_smu->set_pme_wa_enable != NULL) + if (num_audio >= 1 && pp_smu != NULL && pp_smu->set_pme_wa_enable != NULL) /*this is the first audio. apply the PME w/a in order to wake AZ from D3*/ pp_smu->set_pme_wa_enable(&pp_smu->pp_smu); /* un-mute audio */ @@ -1014,6 +1014,8 @@ void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx, int option) pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control( pipe_ctx->stream_res.stream_enc, true); if (pipe_ctx->stream_res.audio) { + struct pp_smu_funcs_rv *pp_smu = dc->res_pool->pp_smu; + if (option != KEEP_ACQUIRED_RESOURCE || !dc->debug.az_endpoint_mute_only) { /*only disalbe az_endpoint if power down or free*/ @@ -1033,6 +1035,9 @@ void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx, int option) update_audio_usage(&dc->current_state->res_ctx, dc->res_pool, pipe_ctx->stream_res.audio, false); pipe_ctx->stream_res.audio = NULL; } + if (pp_smu != NULL && pp_smu->set_pme_wa_enable != NULL) + /*this is the first audio. apply the PME w/a in order to wake AZ from D3*/ + pp_smu->set_pme_wa_enable(&pp_smu->pp_smu); /* TODO: notify audio driver for if audio modes list changed * add audio mode list change flag */ From ffb6c1c6c5a12a623205c0d984851ed10dabb75f Mon Sep 17 00:00:00 2001 From: Martin Tsai Date: Thu, 13 Dec 2018 12:25:19 +0800 Subject: [PATCH 141/539] drm/amd/display: Redefine DMCU_SCRATCH to identify DMCU state [why] To resume system before entering S0i3 completely will cause PSP not reload DMCU FW since there is not HW power state change. In this case, driver cannot get correct DMCU version from IRAM since driver override it and DMCU didn't reload to update it. It makes driver return false in dcn10_dmcu_init(). [how] 1.To redefine DMCU_SCRATCH to identify different DMCU state. 2.To reserve IRAM 0xF0~0xFF write by DMCU only. 3.To remove dcn10_get_dmcu_state Signed-off-by: Martin Tsai Reviewed-by: Anthony Koo Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c | 123 ++++++------------ drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h | 10 +- .../amd/display/modules/power/power_helpers.c | 3 +- 3 files changed, 52 insertions(+), 84 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c index dea40b322191..e927c8934681 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c @@ -51,7 +51,6 @@ #define PSR_SET_WAITLOOP 0x31 #define MCP_INIT_DMCU 0x88 #define MCP_INIT_IRAM 0x89 -#define MCP_DMCU_VERSION 0x90 #define MASTER_COMM_CNTL_REG__MASTER_COMM_INTERRUPT_MASK 0x00000001L static bool dce_dmcu_init(struct dmcu *dmcu) @@ -317,38 +316,11 @@ static void dce_get_psr_wait_loop( } #if defined(CONFIG_DRM_AMD_DC_DCN1_0) -static void dcn10_get_dmcu_state(struct dmcu *dmcu) -{ - struct dce_dmcu *dmcu_dce = TO_DCE_DMCU(dmcu); - uint32_t dmcu_state_offset = 0xf6; - - /* Enable write access to IRAM */ - REG_UPDATE_2(DMCU_RAM_ACCESS_CTRL, - IRAM_HOST_ACCESS_EN, 1, - IRAM_RD_ADDR_AUTO_INC, 1); - - REG_WAIT(DMU_MEM_PWR_CNTL, DMCU_IRAM_MEM_PWR_STATE, 0, 2, 10); - - /* Write address to IRAM_RD_ADDR in DMCU_IRAM_RD_CTRL */ - REG_WRITE(DMCU_IRAM_RD_CTRL, dmcu_state_offset); - - /* Read data from IRAM_RD_DATA in DMCU_IRAM_RD_DATA*/ - dmcu->dmcu_state = REG_READ(DMCU_IRAM_RD_DATA); - - /* Disable write access to IRAM to allow dynamic sleep state */ - REG_UPDATE_2(DMCU_RAM_ACCESS_CTRL, - IRAM_HOST_ACCESS_EN, 0, - IRAM_RD_ADDR_AUTO_INC, 0); -} - static void dcn10_get_dmcu_version(struct dmcu *dmcu) { struct dce_dmcu *dmcu_dce = TO_DCE_DMCU(dmcu); uint32_t dmcu_version_offset = 0xf1; - /* Clear scratch */ - REG_WRITE(DC_DMCU_SCRATCH, 0); - /* Enable write access to IRAM */ REG_UPDATE_2(DMCU_RAM_ACCESS_CTRL, IRAM_HOST_ACCESS_EN, 1, @@ -368,76 +340,65 @@ static void dcn10_get_dmcu_version(struct dmcu *dmcu) REG_UPDATE_2(DMCU_RAM_ACCESS_CTRL, IRAM_HOST_ACCESS_EN, 0, IRAM_RD_ADDR_AUTO_INC, 0); - - /* Send MCP command message to DMCU to get version reply from FW. - * We expect this version should match the one in IRAM, otherwise - * something is wrong with DMCU and we should fail and disable UC. - */ - REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, 100, 800); - - /* Set command to get DMCU version from microcontroller */ - REG_UPDATE(MASTER_COMM_CMD_REG, MASTER_COMM_CMD_REG_BYTE0, - MCP_DMCU_VERSION); - - /* Notify microcontroller of new command */ - REG_UPDATE(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 1); - - /* Ensure command has been executed before continuing */ - REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, 100, 800); - - /* Somehow version does not match, so fail and return version 0 */ - if (dmcu->dmcu_version.interface_version != REG_READ(DC_DMCU_SCRATCH)) - dmcu->dmcu_version.interface_version = 0; } static bool dcn10_dmcu_init(struct dmcu *dmcu) { struct dce_dmcu *dmcu_dce = TO_DCE_DMCU(dmcu); + bool status = false; - /* DMCU FW should populate the scratch register if running */ - if (REG_READ(DC_DMCU_SCRATCH) == 0) - return false; + /* Definition of DC_DMCU_SCRATCH + * 0 : firmare not loaded + * 1 : PSP load DMCU FW but not initialized + * 2 : Firmware already initialized + */ + dmcu->dmcu_state = REG_READ(DC_DMCU_SCRATCH); - /* Check state is uninitialized */ - dcn10_get_dmcu_state(dmcu); + switch (dmcu->dmcu_state) { + case DMCU_UNLOADED: + status = false; + break; + case DMCU_LOADED_UNINITIALIZED: + /* Wait until microcontroller is ready to process interrupt */ + REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, 100, 800); - /* If microcontroller is already initialized, do nothing */ - if (dmcu->dmcu_state == DMCU_RUNNING) - return true; + /* Set initialized ramping boundary value */ + REG_WRITE(MASTER_COMM_DATA_REG1, 0xFFFF); - /* Retrieve and cache the DMCU firmware version. */ - dcn10_get_dmcu_version(dmcu); - - /* Check interface version to confirm firmware is loaded and running */ - if (dmcu->dmcu_version.interface_version == 0) - return false; - - /* Wait until microcontroller is ready to process interrupt */ - REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, 100, 800); - - /* Set initialized ramping boundary value */ - REG_WRITE(MASTER_COMM_DATA_REG1, 0xFFFF); - - /* Set command to initialize microcontroller */ - REG_UPDATE(MASTER_COMM_CMD_REG, MASTER_COMM_CMD_REG_BYTE0, + /* Set command to initialize microcontroller */ + REG_UPDATE(MASTER_COMM_CMD_REG, MASTER_COMM_CMD_REG_BYTE0, MCP_INIT_DMCU); - /* Notify microcontroller of new command */ - REG_UPDATE(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 1); + /* Notify microcontroller of new command */ + REG_UPDATE(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 1); - /* Ensure command has been executed before continuing */ - REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, 100, 800); + /* Ensure command has been executed before continuing */ + REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, 100, 800); - // Check state is initialized - dcn10_get_dmcu_state(dmcu); + // Check state is initialized + dmcu->dmcu_state = REG_READ(DC_DMCU_SCRATCH); - // If microcontroller is not in running state, fail - if (dmcu->dmcu_state != DMCU_RUNNING) - return false; + // If microcontroller is not in running state, fail + if (dmcu->dmcu_state == DMCU_RUNNING) { + /* Retrieve and cache the DMCU firmware version. */ + dcn10_get_dmcu_version(dmcu); + status = true; + } else + status = false; - return true; + break; + case DMCU_RUNNING: + status = true; + break; + default: + status = false; + break; + } + + return status; } + static bool dcn10_dmcu_load_iram(struct dmcu *dmcu, unsigned int start_offset, const char *src, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h index cb85eaa9857f..ed32a7503684 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h @@ -27,9 +27,15 @@ #include "dm_services_types.h" +/* If HW itself ever powered down it will be 0. + * fwDmcuInit will write to 1. + * Driver will only call MCP init if current state is 1, + * and the MCP command will transition this to 2. + */ enum dmcu_state { - DMCU_NOT_INITIALIZED = 0, - DMCU_RUNNING = 1 + DMCU_UNLOADED = 0, + DMCU_LOADED_UNINITIALIZED = 1, + DMCU_RUNNING = 2, }; struct dmcu_version { diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index c11a443dcbc8..89b082b5d107 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -56,6 +56,7 @@ static const unsigned char abm_config[abm_defines_max_config][abm_defines_max_le #define NUM_AGGR_LEVEL 4 #define NUM_POWER_FN_SEGS 8 #define NUM_BL_CURVE_SEGS 16 +#define IRAM_RESERVE_AREA_START 0xF0 // reserve 0xF0~0xFF are write by DMCU only #pragma pack(push, 1) /* NOTE: iRAM is 256B in size */ @@ -324,5 +325,5 @@ bool dmcu_load_iram(struct dmcu *dmcu, params, &ram_table); return dmcu->funcs->load_iram( - dmcu, 0, (char *)(&ram_table), sizeof(ram_table)); + dmcu, 0, (char *)(&ram_table), IRAM_RESERVE_AREA_START); } From 7eb9097541706a71dfda01e456cae2930f24b10a Mon Sep 17 00:00:00 2001 From: Joshua Aberback Date: Wed, 12 Dec 2018 19:53:02 -0500 Subject: [PATCH 142/539] drm/amd/display: Fix for NULL ramp pointer crashing driver [Why] In certain scenarios the ramp parameter come in as NULL, which crashes because this function doesn't guard properly in the early return. [How] - parameter mapUserRamp should be the guard (false means no ramp) - remove checking ramp in early return Signed-off-by: Joshua Aberback Reviewed-by: Sivapiriyan Kumarasamy Acked-by: Eryk Brol Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/color/color_gamma.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index beb7bda1478a..4cee084a8e2d 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -1772,8 +1772,7 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, /* we can use hardcoded curve for plain SRGB TF */ if (input_tf->type == TF_TYPE_PREDEFINED && input_tf->tf == TRANSFER_FUNCTION_SRGB && - (!mapUserRamp && - (ramp->type == GAMMA_RGB_256 || ramp->num_entries == 0))) + !mapUserRamp) return true; input_tf->type = TF_TYPE_DISTRIBUTED_POINTS; From b03a599b3e1f520e79d3dbf487179bd0d70d4681 Mon Sep 17 00:00:00 2001 From: Derek Lai Date: Tue, 11 Dec 2018 16:27:09 +0800 Subject: [PATCH 143/539] drm/amd/display: Set link rate set if eDP ver >= 1.4. [Why] If eDP ver >= 1.4, the Source device must use LINK_RATE_SET. [How] Get LINK_RATE_SET by reading DPCD 10h-1fh, then write DPCD 00115h before link training. Signed-off-by: Derek Lai Reviewed-by: Tony Cheng Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 103 +++++++++++++++++- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dc_dp_types.h | 13 ++- 3 files changed, 110 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index cf9362704d12..431805c566cf 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -117,6 +117,13 @@ static void dpcd_set_link_settings( core_link_write_dpcd(link, DP_DOWNSPREAD_CTRL, &downspread.raw, sizeof(downspread)); + if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_14 && + (link->dpcd_caps.link_rate_set >= 1 && + link->dpcd_caps.link_rate_set <= 8)) { + core_link_write_dpcd(link, DP_LINK_RATE_SET, + &link->dpcd_caps.link_rate_set, 1); + } + DC_LOG_HW_LINK_TRAINING("%s\n %x rate = %x\n %x lane = %x\n %x spread = %x\n", __func__, DP_LINK_BW_SET, @@ -2489,13 +2496,105 @@ bool detect_dp_sink_caps(struct dc_link *link) /* TODO save sink caps in link->sink */ } +enum dc_link_rate linkRateInKHzToLinkRateMultiplier(uint32_t link_rate_in_khz) +{ + enum dc_link_rate link_rate; + // LinkRate is normally stored as a multiplier of 0.27 Gbps per lane. Do the translation. + switch (link_rate_in_khz) { + case 1620000: + link_rate = LINK_RATE_LOW; // Rate_1 (RBR) - 1.62 Gbps/Lane + break; + case 2160000: + link_rate = LINK_RATE_RATE_2; // Rate_2 - 2.16 Gbps/Lane + break; + case 2430000: + link_rate = LINK_RATE_RATE_3; // Rate_3 - 2.43 Gbps/Lane + break; + case 2700000: + link_rate = LINK_RATE_HIGH; // Rate_4 (HBR) - 2.70 Gbps/Lane + break; + case 3240000: + link_rate = LINK_RATE_RBR2; // Rate_5 (RBR2) - 3.24 Gbps/Lane + break; + case 4320000: + link_rate = LINK_RATE_RATE_6; // Rate_6 - 4.32 Gbps/Lane + break; + case 5400000: + link_rate = LINK_RATE_HIGH2; // Rate_7 (HBR2) - 5.40 Gbps/Lane + break; + case 8100000: + link_rate = LINK_RATE_HIGH3; // Rate_8 (HBR3) - 8.10 Gbps/Lane + break; + default: + link_rate = LINK_RATE_UNKNOWN; + break; + } + return link_rate; +} + void detect_edp_sink_caps(struct dc_link *link) { + uint8_t supported_link_rates[16] = {0}; + uint32_t entry; + uint32_t link_rate_in_khz; + enum dc_link_rate link_rate = LINK_RATE_UNKNOWN; + uint8_t link_rate_set = 0; + retrieve_link_cap(link); - if (link->reported_link_cap.link_rate == LINK_RATE_UNKNOWN) - link->reported_link_cap.link_rate = LINK_RATE_HIGH2; + if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_14) { + // Read DPCD 00010h - 0001Fh 16 bytes at one shot + core_link_read_dpcd(link, DP_SUPPORTED_LINK_RATES, + supported_link_rates, sizeof(supported_link_rates)); + link->dpcd_caps.link_rate_set = 0; + for (entry = 0; entry < 16; entry += 2) { + // DPCD register reports per-lane link rate = 16-bit link rate capability + // value X 200 kHz. Need multipler to find link rate in kHz. + link_rate_in_khz = (supported_link_rates[entry+1] * 0x100 + + supported_link_rates[entry]) * 200; + + if (link_rate_in_khz != 0) { + link_rate = linkRateInKHzToLinkRateMultiplier(link_rate_in_khz); + if (link->reported_link_cap.link_rate < link_rate) { + link->reported_link_cap.link_rate = link_rate; + + switch (link_rate) { + case LINK_RATE_LOW: + link_rate_set = 1; + break; + case LINK_RATE_RATE_2: + link_rate_set = 2; + break; + case LINK_RATE_RATE_3: + link_rate_set = 3; + break; + case LINK_RATE_HIGH: + link_rate_set = 4; + break; + case LINK_RATE_RBR2: + link_rate_set = 5; + break; + case LINK_RATE_RATE_6: + link_rate_set = 6; + break; + case LINK_RATE_HIGH2: + link_rate_set = 7; + break; + case LINK_RATE_HIGH3: + link_rate_set = 8; + break; + default: + link_rate_set = 0; + break; + } + + if (link->dpcd_caps.link_rate_set < link_rate_set) + link->dpcd_caps.link_rate_set = link_rate_set; + } + } + } + } link->verified_link_cap = link->reported_link_cap; } diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index f80f52ef4b01..1c46249c8240 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -659,6 +659,7 @@ struct dpcd_caps { int8_t branch_dev_name[6]; int8_t branch_hw_revision; int8_t branch_fw_revision[2]; + uint8_t link_rate_set; bool allow_invalid_MSA_timing_param; bool panel_mode_edp; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index da93ab43f2d8..d4eab33c453b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -46,11 +46,14 @@ enum dc_lane_count { */ enum dc_link_rate { LINK_RATE_UNKNOWN = 0, - LINK_RATE_LOW = 0x06, - LINK_RATE_HIGH = 0x0A, - LINK_RATE_RBR2 = 0x0C, - LINK_RATE_HIGH2 = 0x14, - LINK_RATE_HIGH3 = 0x1E + LINK_RATE_LOW = 0x06, // Rate_1 (RBR) - 1.62 Gbps/Lane + LINK_RATE_RATE_2 = 0x08, // Rate_2 - 2.16 Gbps/Lane + LINK_RATE_RATE_3 = 0x09, // Rate_3 - 2.43 Gbps/Lane + LINK_RATE_HIGH = 0x0A, // Rate_4 (HBR) - 2.70 Gbps/Lane + LINK_RATE_RBR2 = 0x0C, // Rate_5 (RBR2)- 3.24 Gbps/Lane + LINK_RATE_RATE_6 = 0x10, // Rate_6 - 4.32 Gbps/Lane + LINK_RATE_HIGH2 = 0x14, // Rate_7 (HBR2)- 5.40 Gbps/Lane + LINK_RATE_HIGH3 = 0x1E // Rate_8 (HBR3)- 8.10 Gbps/Lane }; enum dc_link_spread { From 55a806d37f2e03988063036954a0959f0e4006ed Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 11 Dec 2018 12:35:56 -0500 Subject: [PATCH 144/539] drm/amd/display: Fix disabled cursor on top screen edge [Why] The cursor vanishes when touching the top of edge of the screen for Raven on Linux. This occurs because the cursor height is not taken into account when deciding to disable the cursor. [How] Factor in the cursor height into the cursor calculations - and mimic the existing x position calculations. Fixes: 94a4ffd1d40b ("drm/amd/display: fix PIP bugs on Dal3") Signed-off-by: Nicholas Kazlauskas Reviewed-by: Harry Wentland Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index dcb3c5530236..cd1ebe57ed59 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -463,7 +463,7 @@ void dpp1_set_cursor_position( if (src_y_offset >= (int)param->viewport.height) cur_en = 0; /* not visible beyond bottom edge*/ - if (src_y_offset < 0) + if (src_y_offset + (int)height <= 0) cur_en = 0; /* not visible beyond top edge*/ REG_UPDATE(CURSOR0_CONTROL, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index e24ea6146a29..fae62474a4d1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -1140,7 +1140,7 @@ void hubp1_cursor_set_position( if (src_y_offset >= (int)param->viewport.height) cur_en = 0; /* not visible beyond bottom edge*/ - if (src_y_offset < 0) //+ (int)hubp->curs_attr.height + if (src_y_offset + (int)hubp->curs_attr.height <= 0) cur_en = 0; /* not visible beyond top edge*/ if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) From bbba983103842c31ae3013364d90b624df08087f Mon Sep 17 00:00:00 2001 From: Steven Chiu Date: Fri, 14 Dec 2018 09:33:30 -0500 Subject: [PATCH 145/539] drm/amd/display: 3.2.13 Signed-off-by: Steven Chiu Reviewed-by: Yongqiang Sun Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 1c46249c8240..98f716be548a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.12" +#define DC_VER "3.2.13" #define MAX_SURFACES 3 #define MAX_STREAMS 6 From ad6756b4d773aae68430a7d8b3c35c6717de5a15 Mon Sep 17 00:00:00 2001 From: David Francis Date: Thu, 29 Nov 2018 13:40:03 -0500 Subject: [PATCH 146/539] drm/amd/display: Shift dc link aux to aux_payload [Why] aux_payload should be the struct used inside dc to start aux transactions. This will allow the old aux interface to be seamlessly replaced. [How] Add three fields to aux_payload: reply, mot, defer_delay This will mean that aux_payload has all data required to submit a request. Shift dc_link to use this struct Signed-off-by: David Francis Reviewed-by: Harry Wentland Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 80 +++---------------- .../gpu/drm/amd/display/dc/core/dc_link_ddc.c | 57 ++++++++----- drivers/gpu/drm/amd/display/dc/dce/dce_aux.c | 3 +- .../gpu/drm/amd/display/dc/inc/dc_link_ddc.h | 11 +-- .../amd/display/include/i2caux_interface.h | 10 +++ 5 files changed, 68 insertions(+), 93 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 5e7ca1f3a8d1..60da1222afaa 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -35,6 +35,8 @@ #include "dc_link_ddc.h" +#include "i2caux_interface.h" + /* #define TRACE_DPCD */ #ifdef TRACE_DPCD @@ -81,80 +83,24 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { ssize_t result = 0; - enum i2caux_transaction_action action; - enum aux_transaction_type type; + struct aux_payload payload; if (WARN_ON(msg->size > 16)) return -E2BIG; - switch (msg->request & ~DP_AUX_I2C_MOT) { - case DP_AUX_NATIVE_READ: - type = AUX_TRANSACTION_TYPE_DP; - action = I2CAUX_TRANSACTION_ACTION_DP_READ; + payload.address = msg->address; + payload.data = msg->buffer; + payload.length = msg->size; + payload.reply = &msg->reply; + payload.i2c_over_aux = (msg->request & DP_AUX_NATIVE_WRITE) == 0; + payload.write = (msg->request & DP_AUX_I2C_READ) == 0; + payload.mot = (msg->request & DP_AUX_I2C_MOT) != 0; + payload.defer_delay = 0; - result = dc_link_aux_transfer(TO_DM_AUX(aux)->ddc_service, - msg->address, - &msg->reply, - msg->buffer, - msg->size, - type, - action); - break; - case DP_AUX_NATIVE_WRITE: - type = AUX_TRANSACTION_TYPE_DP; - action = I2CAUX_TRANSACTION_ACTION_DP_WRITE; + result = dc_link_aux_transfer(TO_DM_AUX(aux)->ddc_service, &payload); - dc_link_aux_transfer(TO_DM_AUX(aux)->ddc_service, - msg->address, - &msg->reply, - msg->buffer, - msg->size, - type, - action); + if (payload.write) result = msg->size; - break; - case DP_AUX_I2C_READ: - type = AUX_TRANSACTION_TYPE_I2C; - if (msg->request & DP_AUX_I2C_MOT) - action = I2CAUX_TRANSACTION_ACTION_I2C_READ_MOT; - else - action = I2CAUX_TRANSACTION_ACTION_I2C_READ; - - result = dc_link_aux_transfer(TO_DM_AUX(aux)->ddc_service, - msg->address, - &msg->reply, - msg->buffer, - msg->size, - type, - action); - break; - case DP_AUX_I2C_WRITE: - type = AUX_TRANSACTION_TYPE_I2C; - if (msg->request & DP_AUX_I2C_MOT) - action = I2CAUX_TRANSACTION_ACTION_I2C_WRITE_MOT; - else - action = I2CAUX_TRANSACTION_ACTION_I2C_WRITE; - - dc_link_aux_transfer(TO_DM_AUX(aux)->ddc_service, - msg->address, - &msg->reply, - msg->buffer, - msg->size, - type, - action); - result = msg->size; - break; - default: - return -EINVAL; - } - -#ifdef TRACE_DPCD - log_dpcd(msg->request, - msg->address, - msg->buffer, - msg->size, - r == DDC_RESULT_SUCESSFULL); -#endif if (result < 0) /* DC doesn't know about kernel error codes */ result = -EIO; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c index 99a314b79850..5ac65737a1e8 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c @@ -229,7 +229,9 @@ void dal_ddc_aux_payloads_add( uint32_t address, uint32_t len, uint8_t *data, - bool write) + bool write, + bool mot, + uint32_t defer_delay) { uint32_t payload_size = DEFAULT_AUX_MAX_DATA_SIZE; uint32_t pos; @@ -240,7 +242,10 @@ void dal_ddc_aux_payloads_add( .write = write, .address = address, .length = DDC_MIN(payload_size, len - pos), - .data = data + pos }; + .data = data + pos, + .reply = NULL, + .mot = mot, + .defer_delay = defer_delay}; dal_vector_append(&payloads->payloads, &payload); } } @@ -584,10 +589,10 @@ bool dal_ddc_service_query_ddc_data( .max_defer_write_retry = 0 }; dal_ddc_aux_payloads_add( - payloads, address, write_size, write_buf, true); + payloads, address, write_size, write_buf, true, true, get_defer_delay(ddc)); dal_ddc_aux_payloads_add( - payloads, address, read_size, read_buf, false); + payloads, address, read_size, read_buf, false, false, get_defer_delay(ddc)); command.number_of_payloads = dal_ddc_aux_payloads_get_count(payloads); @@ -629,13 +634,25 @@ bool dal_ddc_service_query_ddc_data( return ret; } +static enum i2caux_transaction_action i2caux_action_from_payload(struct aux_payload *payload) +{ + if (payload->i2c_over_aux) { + if (payload->write) { + if (payload->mot) + return I2CAUX_TRANSACTION_ACTION_I2C_WRITE_MOT; + return I2CAUX_TRANSACTION_ACTION_I2C_WRITE; + } + if (payload->mot) + return I2CAUX_TRANSACTION_ACTION_I2C_READ_MOT; + return I2CAUX_TRANSACTION_ACTION_I2C_READ; + } + if (payload->write) + return I2CAUX_TRANSACTION_ACTION_DP_WRITE; + return I2CAUX_TRANSACTION_ACTION_DP_READ; +} + int dc_link_aux_transfer(struct ddc_service *ddc, - unsigned int address, - uint8_t *reply, - void *buffer, - unsigned int size, - enum aux_transaction_type type, - enum i2caux_transaction_action action) + struct aux_payload *payload) { struct ddc *ddc_pin = ddc->ddc_pin; struct aux_engine *aux_engine; @@ -652,21 +669,25 @@ int dc_link_aux_transfer(struct ddc_service *ddc, aux_engine = ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]; aux_engine->funcs->acquire(aux_engine, ddc_pin); - aux_req.type = type; - aux_req.action = action; + if (payload->i2c_over_aux) + aux_req.type = AUX_TRANSACTION_TYPE_I2C; + else + aux_req.type = AUX_TRANSACTION_TYPE_DP; - aux_req.address = address; - aux_req.delay = 0; - aux_req.length = size; - aux_req.data = buffer; + aux_req.action = i2caux_action_from_payload(payload); + + aux_req.address = payload->address; + aux_req.delay = payload->defer_delay * 10; + aux_req.length = payload->length; + aux_req.data = payload->data; aux_engine->funcs->submit_channel_request(aux_engine, &aux_req); operation_result = aux_engine->funcs->get_channel_status(aux_engine, &returned_bytes); switch (operation_result) { case AUX_CHANNEL_OPERATION_SUCCEEDED: - res = aux_engine->funcs->read_channel_reply(aux_engine, size, - buffer, reply, + res = aux_engine->funcs->read_channel_reply(aux_engine, payload->length, + payload->data, payload->reply, &status); break; case AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON: diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index aaeb7faac0c4..760bc5cc329d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -273,7 +273,8 @@ static int read_channel_reply(struct aux_engine *engine, uint32_t size, REG_GET(AUX_SW_DATA, AUX_SW_DATA, &reply_result_32); reply_result_32 = reply_result_32 >> 4; - *reply_result = (uint8_t)reply_result_32; + if (reply_result != NULL) + *reply_result = (uint8_t)reply_result_32; if (reply_result_32 == 0) { /* ACK */ uint32_t i = 0; diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h index 538b83303b86..b609cd886455 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h @@ -69,7 +69,9 @@ void dal_ddc_aux_payloads_add( uint32_t address, uint32_t len, uint8_t *data, - bool write); + bool write, + bool mot, + uint32_t defer_delay); struct ddc_service_init_data { struct graphics_object_id id; @@ -103,12 +105,7 @@ bool dal_ddc_service_query_ddc_data( uint32_t read_size); int dc_link_aux_transfer(struct ddc_service *ddc, - unsigned int address, - uint8_t *reply, - void *buffer, - unsigned int size, - enum aux_transaction_type type, - enum i2caux_transaction_action action); + struct aux_payload *payload); void dal_ddc_service_write_scdc_data( struct ddc_service *ddc_service, diff --git a/drivers/gpu/drm/amd/display/include/i2caux_interface.h b/drivers/gpu/drm/amd/display/include/i2caux_interface.h index 13a3c82d118f..1b648fe041da 100644 --- a/drivers/gpu/drm/amd/display/include/i2caux_interface.h +++ b/drivers/gpu/drm/amd/display/include/i2caux_interface.h @@ -40,9 +40,19 @@ struct aux_payload { /* set following flag to write data, * reset it to read data */ bool write; + bool mot; uint32_t address; uint8_t length; uint8_t *data; + /* + * used to return the reply type of the transaction + * ignored if NULL + */ + uint8_t *reply; + /* expressed in milliseconds + * zero means "use default value" + */ + uint32_t defer_delay; }; struct aux_command { From eae5ffa9bd7b898486e467633ef2b8aaf086ad05 Mon Sep 17 00:00:00 2001 From: David Francis Date: Thu, 29 Nov 2018 14:36:25 -0500 Subject: [PATCH 147/539] drm/amd/display: Switch ddc to new aux interface [Why] The old aux interface goes through i2caux and the aux_engine and engine function pointers. The multiple layers of indirection make it hard to tell waht is happening. The aux algorithm does not need to be this complicated: attempt to submit the request. If you get an ack (reply = 0), stop. Otherwise, retry, up to 7 times. [How] Add a new helper function in dce_aux that performs aux retries Move the plumbing of the aux calling code into dce_aux Add functions in ddc that redirect directly to dce_aux Make all aux calls use these functions Signed-off-by: David Francis Reviewed-by: Harry Wentland Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_link_ddc.c | 177 +++--------------- drivers/gpu/drm/amd/display/dc/dce/dce_aux.c | 99 ++++++++++ drivers/gpu/drm/amd/display/dc/dce/dce_aux.h | 6 + .../gpu/drm/amd/display/dc/inc/dc_link_ddc.h | 12 +- 4 files changed, 138 insertions(+), 156 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c index 5ac65737a1e8..a343b8b6f7fb 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c @@ -34,6 +34,7 @@ #include "core_types.h" #include "dc_link_ddc.h" #include "aux_engine.h" +#include "dce/dce_aux.h" #define AUX_POWER_UP_WA_DELAY 500 #define I2C_OVER_AUX_DEFER_WA_DELAY 70 @@ -164,43 +165,6 @@ static void dal_ddc_i2c_payloads_destroy(struct i2c_payloads **p) } -static struct aux_payloads *dal_ddc_aux_payloads_create(struct dc_context *ctx, uint32_t count) -{ - struct aux_payloads *payloads; - - payloads = kzalloc(sizeof(struct aux_payloads), GFP_KERNEL); - - if (!payloads) - return NULL; - - if (dal_vector_construct( - &payloads->payloads, ctx, count, sizeof(struct aux_payload))) - return payloads; - - kfree(payloads); - return NULL; -} - -static struct aux_payload *dal_ddc_aux_payloads_get(struct aux_payloads *p) -{ - return (struct aux_payload *)p->payloads.container; -} - -static uint32_t dal_ddc_aux_payloads_get_count(struct aux_payloads *p) -{ - return p->payloads.count; -} - -static void dal_ddc_aux_payloads_destroy(struct aux_payloads **p) -{ - if (!p || !*p) - return; - - dal_vector_destruct(&(*p)->payloads); - kfree(*p); - *p = NULL; -} - #define DDC_MIN(a, b) (((a) < (b)) ? (a) : (b)) void dal_ddc_i2c_payloads_add( @@ -224,32 +188,6 @@ void dal_ddc_i2c_payloads_add( } -void dal_ddc_aux_payloads_add( - struct aux_payloads *payloads, - uint32_t address, - uint32_t len, - uint8_t *data, - bool write, - bool mot, - uint32_t defer_delay) -{ - uint32_t payload_size = DEFAULT_AUX_MAX_DATA_SIZE; - uint32_t pos; - - for (pos = 0; pos < len; pos += payload_size) { - struct aux_payload payload = { - .i2c_over_aux = true, - .write = write, - .address = address, - .length = DDC_MIN(payload_size, len - pos), - .data = data + pos, - .reply = NULL, - .mot = mot, - .defer_delay = defer_delay}; - dal_vector_append(&payloads->payloads, &payload); - } -} - static void construct( struct ddc_service *ddc_service, struct ddc_service_init_data *init_data) @@ -578,32 +516,34 @@ bool dal_ddc_service_query_ddc_data( /*TODO: len of payload data for i2c and aux is uint8!!!!, * but we want to read 256 over i2c!!!!*/ if (dal_ddc_service_is_in_aux_transaction_mode(ddc)) { - - struct aux_payloads *payloads = - dal_ddc_aux_payloads_create(ddc->ctx, payloads_num); - - struct aux_command command = { - .payloads = dal_ddc_aux_payloads_get(payloads), - .number_of_payloads = 0, + struct aux_payload write_payload = { + .i2c_over_aux = true, + .write = true, + .mot = true, + .address = address, + .length = write_size, + .data = write_buf, + .reply = NULL, .defer_delay = get_defer_delay(ddc), - .max_defer_write_retry = 0 }; + }; - dal_ddc_aux_payloads_add( - payloads, address, write_size, write_buf, true, true, get_defer_delay(ddc)); + struct aux_payload read_payload = { + .i2c_over_aux = true, + .write = false, + .mot = false, + .address = address, + .length = read_size, + .data = read_buf, + .reply = NULL, + .defer_delay = get_defer_delay(ddc), + }; - dal_ddc_aux_payloads_add( - payloads, address, read_size, read_buf, false, false, get_defer_delay(ddc)); + ret = dc_link_aux_transfer_with_retries(ddc, &write_payload); - command.number_of_payloads = - dal_ddc_aux_payloads_get_count(payloads); - - ret = dal_i2caux_submit_aux_command( - ddc->ctx->i2caux, - ddc->ddc_pin, - &command); - - dal_ddc_aux_payloads_destroy(&payloads); + if (!ret) + return false; + ret = dc_link_aux_transfer_with_retries(ddc, &read_payload); } else { struct i2c_payloads *payloads = dal_ddc_i2c_payloads_create(ddc->ctx, payloads_num); @@ -634,73 +574,16 @@ bool dal_ddc_service_query_ddc_data( return ret; } -static enum i2caux_transaction_action i2caux_action_from_payload(struct aux_payload *payload) -{ - if (payload->i2c_over_aux) { - if (payload->write) { - if (payload->mot) - return I2CAUX_TRANSACTION_ACTION_I2C_WRITE_MOT; - return I2CAUX_TRANSACTION_ACTION_I2C_WRITE; - } - if (payload->mot) - return I2CAUX_TRANSACTION_ACTION_I2C_READ_MOT; - return I2CAUX_TRANSACTION_ACTION_I2C_READ; - } - if (payload->write) - return I2CAUX_TRANSACTION_ACTION_DP_WRITE; - return I2CAUX_TRANSACTION_ACTION_DP_READ; -} - int dc_link_aux_transfer(struct ddc_service *ddc, struct aux_payload *payload) { - struct ddc *ddc_pin = ddc->ddc_pin; - struct aux_engine *aux_engine; - enum aux_channel_operation_result operation_result; - struct aux_request_transaction_data aux_req; - struct aux_reply_transaction_data aux_rep; - uint8_t returned_bytes = 0; - int res = -1; - uint32_t status; + return dce_aux_transfer(ddc, payload); +} - memset(&aux_req, 0, sizeof(aux_req)); - memset(&aux_rep, 0, sizeof(aux_rep)); - - aux_engine = ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]; - aux_engine->funcs->acquire(aux_engine, ddc_pin); - - if (payload->i2c_over_aux) - aux_req.type = AUX_TRANSACTION_TYPE_I2C; - else - aux_req.type = AUX_TRANSACTION_TYPE_DP; - - aux_req.action = i2caux_action_from_payload(payload); - - aux_req.address = payload->address; - aux_req.delay = payload->defer_delay * 10; - aux_req.length = payload->length; - aux_req.data = payload->data; - - aux_engine->funcs->submit_channel_request(aux_engine, &aux_req); - operation_result = aux_engine->funcs->get_channel_status(aux_engine, &returned_bytes); - - switch (operation_result) { - case AUX_CHANNEL_OPERATION_SUCCEEDED: - res = aux_engine->funcs->read_channel_reply(aux_engine, payload->length, - payload->data, payload->reply, - &status); - break; - case AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON: - res = 0; - break; - case AUX_CHANNEL_OPERATION_FAILED_REASON_UNKNOWN: - case AUX_CHANNEL_OPERATION_FAILED_INVALID_REPLY: - case AUX_CHANNEL_OPERATION_FAILED_TIMEOUT: - res = -1; - break; - } - aux_engine->funcs->release_engine(aux_engine); - return res; +bool dc_link_aux_transfer_with_retries(struct ddc_service *ddc, + struct aux_payload *payload) +{ + return dce_aux_transfer_with_retries(ddc, payload); } /*test only function*/ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index 760bc5cc329d..566061529c58 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -24,6 +24,7 @@ */ #include "dm_services.h" +#include "core_types.h" #include "dce_aux.h" #include "dce/dce_11_0_sh_mask.h" @@ -936,3 +937,101 @@ struct aux_engine *dce110_aux_engine_construct(struct aux_engine_dce110 *aux_eng return &aux_engine110->base; } +static enum i2caux_transaction_action i2caux_action_from_payload(struct aux_payload *payload) +{ + if (payload->i2c_over_aux) { + if (payload->write) { + if (payload->mot) + return I2CAUX_TRANSACTION_ACTION_I2C_WRITE_MOT; + return I2CAUX_TRANSACTION_ACTION_I2C_WRITE; + } + if (payload->mot) + return I2CAUX_TRANSACTION_ACTION_I2C_READ_MOT; + return I2CAUX_TRANSACTION_ACTION_I2C_READ; + } + if (payload->write) + return I2CAUX_TRANSACTION_ACTION_DP_WRITE; + return I2CAUX_TRANSACTION_ACTION_DP_READ; +} + +int dce_aux_transfer(struct ddc_service *ddc, + struct aux_payload *payload) +{ + struct ddc *ddc_pin = ddc->ddc_pin; + struct aux_engine *aux_engine; + enum aux_channel_operation_result operation_result; + struct aux_request_transaction_data aux_req; + struct aux_reply_transaction_data aux_rep; + uint8_t returned_bytes = 0; + int res = -1; + uint32_t status; + + memset(&aux_req, 0, sizeof(aux_req)); + memset(&aux_rep, 0, sizeof(aux_rep)); + + aux_engine = ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]; + aux_engine->funcs->acquire(aux_engine, ddc_pin); + + if (payload->i2c_over_aux) + aux_req.type = AUX_TRANSACTION_TYPE_I2C; + else + aux_req.type = AUX_TRANSACTION_TYPE_DP; + + aux_req.action = i2caux_action_from_payload(payload); + + aux_req.address = payload->address; + aux_req.delay = payload->defer_delay * 10; + aux_req.length = payload->length; + aux_req.data = payload->data; + + aux_engine->funcs->submit_channel_request(aux_engine, &aux_req); + operation_result = aux_engine->funcs->get_channel_status(aux_engine, &returned_bytes); + + switch (operation_result) { + case AUX_CHANNEL_OPERATION_SUCCEEDED: + res = aux_engine->funcs->read_channel_reply(aux_engine, payload->length, + payload->data, payload->reply, + &status); + break; + case AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON: + res = 0; + break; + case AUX_CHANNEL_OPERATION_FAILED_REASON_UNKNOWN: + case AUX_CHANNEL_OPERATION_FAILED_INVALID_REPLY: + case AUX_CHANNEL_OPERATION_FAILED_TIMEOUT: + res = -1; + break; + } + aux_engine->funcs->release_engine(aux_engine); + return res; +} + +#define AUX_RETRY_MAX 7 + +bool dce_aux_transfer_with_retries(struct ddc_service *ddc, + struct aux_payload *payload) +{ + int i, ret = 0; + uint8_t reply; + bool payload_reply = true; + + if (!payload->reply) { + payload_reply = false; + payload->reply = &reply; + } + + for (i = 0; i < AUX_RETRY_MAX; i++) { + ret = dce_aux_transfer(ddc, payload); + + if (ret >= 0) { + if (*payload->reply == 0) { + if (!payload_reply) + payload->reply = NULL; + return true; + } + } + + msleep(1); + } + return false; +} diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h index f7caab85dc80..990e7c7e191e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h @@ -108,4 +108,10 @@ void dce110_engine_destroy(struct aux_engine **engine); bool dce110_aux_engine_acquire( struct aux_engine *aux_engine, struct ddc *ddc); + +int dce_aux_transfer(struct ddc_service *ddc, + struct aux_payload *cmd); + +bool dce_aux_transfer_with_retries(struct ddc_service *ddc, + struct aux_payload *cmd); #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h index b609cd886455..16fd4dc6c4dd 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h @@ -64,15 +64,6 @@ void dal_ddc_i2c_payloads_add( uint8_t *data, bool write); -void dal_ddc_aux_payloads_add( - struct aux_payloads *payloads, - uint32_t address, - uint32_t len, - uint8_t *data, - bool write, - bool mot, - uint32_t defer_delay); - struct ddc_service_init_data { struct graphics_object_id id; struct dc_context *ctx; @@ -107,6 +98,9 @@ bool dal_ddc_service_query_ddc_data( int dc_link_aux_transfer(struct ddc_service *ddc, struct aux_payload *payload); +bool dc_link_aux_transfer_with_retries(struct ddc_service *ddc, + struct aux_payload *payload); + void dal_ddc_service_write_scdc_data( struct ddc_service *ddc_service, uint32_t pix_clk, From 1877ccf6eea42993c4ca0d5e6876ced8b5b4ee8a Mon Sep 17 00:00:00 2001 From: David Francis Date: Fri, 30 Nov 2018 10:32:01 -0500 Subject: [PATCH 148/539] drm/amd/display: Change from aux_engine to dce_aux [Why] The aux_engine struct is needlessly complex and is defined multiple times. It contains function pointers that each have only one version and are called only from inside dce_aux. [How] Replace aux_engine with a new struct called dce_aux. Remove all function pointers and call functions directly. Remove unused functions Signed-off-by: David Francis Reviewed-by: Harry Wentland Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_aux.c | 557 +----------------- drivers/gpu/drm/amd/display/dc/dce/dce_aux.h | 20 +- .../amd/display/dc/dce100/dce100_resource.c | 2 +- .../amd/display/dc/dce110/dce110_resource.c | 2 +- .../amd/display/dc/dce112/dce112_resource.c | 2 +- .../amd/display/dc/dce120/dce120_resource.c | 2 +- .../drm/amd/display/dc/dce80/dce80_resource.c | 2 +- .../drm/amd/display/dc/dcn10/dcn10_resource.c | 4 +- .../gpu/drm/amd/display/dc/inc/core_types.h | 2 +- 9 files changed, 45 insertions(+), 548 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index 566061529c58..adbb22224e1a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -42,17 +42,17 @@ container_of((ptr), struct aux_engine_dce110, base) #define FROM_ENGINE(ptr) \ - FROM_AUX_ENGINE(container_of((ptr), struct aux_engine, base)) + FROM_AUX_ENGINE(container_of((ptr), struct dce_aux, base)) #define FROM_AUX_ENGINE_ENGINE(ptr) \ - container_of((ptr), struct aux_engine, base) + container_of((ptr), struct dce_aux, base) enum { AUX_INVALID_REPLY_RETRY_COUNTER = 1, AUX_TIMED_OUT_RETRY_COUNTER = 2, AUX_DEFER_RETRY_COUNTER = 6 }; static void release_engine( - struct aux_engine *engine) + struct dce_aux *engine) { struct aux_engine_dce110 *aux110 = FROM_AUX_ENGINE(engine); @@ -67,7 +67,7 @@ static void release_engine( #define DMCU_CAN_ACCESS_AUX 2 static bool is_engine_available( - struct aux_engine *engine) + struct dce_aux *engine) { struct aux_engine_dce110 *aux110 = FROM_AUX_ENGINE(engine); @@ -80,7 +80,7 @@ static bool is_engine_available( return (field != DMCU_CAN_ACCESS_AUX); } static bool acquire_engine( - struct aux_engine *engine) + struct dce_aux *engine) { struct aux_engine_dce110 *aux110 = FROM_AUX_ENGINE(engine); @@ -156,7 +156,7 @@ static bool acquire_engine( (0xFF & (address)) static void submit_channel_request( - struct aux_engine *engine, + struct dce_aux *engine, struct aux_request_transaction_data *request) { struct aux_engine_dce110 *aux110 = FROM_AUX_ENGINE(engine); @@ -248,7 +248,7 @@ static void submit_channel_request( REG_UPDATE(AUX_SW_CONTROL, AUX_SW_GO, 1); } -static int read_channel_reply(struct aux_engine *engine, uint32_t size, +static int read_channel_reply(struct dce_aux *engine, uint32_t size, uint8_t *buffer, uint8_t *reply_result, uint32_t *sw_status) { @@ -301,61 +301,8 @@ static int read_channel_reply(struct aux_engine *engine, uint32_t size, return 0; } -static void process_channel_reply( - struct aux_engine *engine, - struct aux_reply_transaction_data *reply) -{ - int bytes_replied; - uint8_t reply_result; - uint32_t sw_status; - - bytes_replied = read_channel_reply(engine, reply->length, reply->data, - &reply_result, &sw_status); - - /* in case HPD is LOW, exit AUX transaction */ - if ((sw_status & AUX_SW_STATUS__AUX_SW_HPD_DISCON_MASK)) { - reply->status = AUX_TRANSACTION_REPLY_HPD_DISCON; - return; - } - - if (bytes_replied < 0) { - /* Need to handle an error case... - * Hopefully, upper layer function won't call this function if - * the number of bytes in the reply was 0, because there was - * surely an error that was asserted that should have been - * handled for hot plug case, this could happens - */ - if (!(sw_status & AUX_SW_STATUS__AUX_SW_HPD_DISCON_MASK)) { - reply->status = AUX_TRANSACTION_REPLY_INVALID; - ASSERT_CRITICAL(false); - return; - } - } else { - - switch (reply_result) { - case 0: /* ACK */ - reply->status = AUX_TRANSACTION_REPLY_AUX_ACK; - break; - case 1: /* NACK */ - reply->status = AUX_TRANSACTION_REPLY_AUX_NACK; - break; - case 2: /* DEFER */ - reply->status = AUX_TRANSACTION_REPLY_AUX_DEFER; - break; - case 4: /* AUX ACK / I2C NACK */ - reply->status = AUX_TRANSACTION_REPLY_I2C_NACK; - break; - case 8: /* AUX ACK / I2C DEFER */ - reply->status = AUX_TRANSACTION_REPLY_I2C_DEFER; - break; - default: - reply->status = AUX_TRANSACTION_REPLY_INVALID; - } - } -} - static enum aux_channel_operation_result get_channel_status( - struct aux_engine *engine, + struct dce_aux *engine, uint8_t *returned_bytes) { struct aux_engine_dce110 *aux110 = FROM_AUX_ENGINE(engine); @@ -416,469 +363,22 @@ static enum aux_channel_operation_result get_channel_status( return AUX_CHANNEL_OPERATION_FAILED_TIMEOUT; } } -static void process_read_reply( - struct aux_engine *engine, - struct read_command_context *ctx) -{ - engine->funcs->process_channel_reply(engine, &ctx->reply); - switch (ctx->reply.status) { - case AUX_TRANSACTION_REPLY_AUX_ACK: - ctx->defer_retry_aux = 0; - if (ctx->returned_byte > ctx->current_read_length) { - ctx->status = - I2CAUX_TRANSACTION_STATUS_FAILED_PROTOCOL_ERROR; - ctx->operation_succeeded = false; - } else if (ctx->returned_byte < ctx->current_read_length) { - ctx->current_read_length -= ctx->returned_byte; - - ctx->offset += ctx->returned_byte; - - ++ctx->invalid_reply_retry_aux_on_ack; - - if (ctx->invalid_reply_retry_aux_on_ack > - AUX_INVALID_REPLY_RETRY_COUNTER) { - ctx->status = - I2CAUX_TRANSACTION_STATUS_FAILED_PROTOCOL_ERROR; - ctx->operation_succeeded = false; - } - } else { - ctx->status = I2CAUX_TRANSACTION_STATUS_SUCCEEDED; - ctx->transaction_complete = true; - ctx->operation_succeeded = true; - } - break; - case AUX_TRANSACTION_REPLY_AUX_NACK: - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_NACK; - ctx->operation_succeeded = false; - break; - case AUX_TRANSACTION_REPLY_AUX_DEFER: - ++ctx->defer_retry_aux; - - if (ctx->defer_retry_aux > AUX_DEFER_RETRY_COUNTER) { - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_TIMEOUT; - ctx->operation_succeeded = false; - } - break; - case AUX_TRANSACTION_REPLY_I2C_DEFER: - ctx->defer_retry_aux = 0; - - ++ctx->defer_retry_i2c; - - if (ctx->defer_retry_i2c > AUX_DEFER_RETRY_COUNTER) { - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_TIMEOUT; - ctx->operation_succeeded = false; - } - break; - case AUX_TRANSACTION_REPLY_HPD_DISCON: - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_HPD_DISCON; - ctx->operation_succeeded = false; - break; - default: - ctx->status = I2CAUX_TRANSACTION_STATUS_UNKNOWN; - ctx->operation_succeeded = false; - } -} -static void process_read_request( - struct aux_engine *engine, - struct read_command_context *ctx) -{ - enum aux_channel_operation_result operation_result; - - engine->funcs->submit_channel_request(engine, &ctx->request); - - operation_result = engine->funcs->get_channel_status( - engine, &ctx->returned_byte); - - switch (operation_result) { - case AUX_CHANNEL_OPERATION_SUCCEEDED: - if (ctx->returned_byte > ctx->current_read_length) { - ctx->status = - I2CAUX_TRANSACTION_STATUS_FAILED_PROTOCOL_ERROR; - ctx->operation_succeeded = false; - } else { - ctx->timed_out_retry_aux = 0; - ctx->invalid_reply_retry_aux = 0; - - ctx->reply.length = ctx->returned_byte; - ctx->reply.data = ctx->buffer; - - process_read_reply(engine, ctx); - } - break; - case AUX_CHANNEL_OPERATION_FAILED_INVALID_REPLY: - ++ctx->invalid_reply_retry_aux; - - if (ctx->invalid_reply_retry_aux > - AUX_INVALID_REPLY_RETRY_COUNTER) { - ctx->status = - I2CAUX_TRANSACTION_STATUS_FAILED_PROTOCOL_ERROR; - ctx->operation_succeeded = false; - } else - udelay(400); - break; - case AUX_CHANNEL_OPERATION_FAILED_TIMEOUT: - ++ctx->timed_out_retry_aux; - - if (ctx->timed_out_retry_aux > AUX_TIMED_OUT_RETRY_COUNTER) { - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_TIMEOUT; - ctx->operation_succeeded = false; - } else { - /* DP 1.2a, table 2-58: - * "S3: AUX Request CMD PENDING: - * retry 3 times, with 400usec wait on each" - * The HW timeout is set to 550usec, - * so we should not wait here - */ - } - break; - case AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON: - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_HPD_DISCON; - ctx->operation_succeeded = false; - break; - default: - ctx->status = I2CAUX_TRANSACTION_STATUS_UNKNOWN; - ctx->operation_succeeded = false; - } -} -static bool read_command( - struct aux_engine *engine, - struct i2caux_transaction_request *request, - bool middle_of_transaction) -{ - struct read_command_context ctx; - - ctx.buffer = request->payload.data; - ctx.current_read_length = request->payload.length; - ctx.offset = 0; - ctx.timed_out_retry_aux = 0; - ctx.invalid_reply_retry_aux = 0; - ctx.defer_retry_aux = 0; - ctx.defer_retry_i2c = 0; - ctx.invalid_reply_retry_aux_on_ack = 0; - ctx.transaction_complete = false; - ctx.operation_succeeded = true; - - if (request->payload.address_space == - I2CAUX_TRANSACTION_ADDRESS_SPACE_DPCD) { - ctx.request.type = AUX_TRANSACTION_TYPE_DP; - ctx.request.action = I2CAUX_TRANSACTION_ACTION_DP_READ; - ctx.request.address = request->payload.address; - } else if (request->payload.address_space == - I2CAUX_TRANSACTION_ADDRESS_SPACE_I2C) { - ctx.request.type = AUX_TRANSACTION_TYPE_I2C; - ctx.request.action = middle_of_transaction ? - I2CAUX_TRANSACTION_ACTION_I2C_READ_MOT : - I2CAUX_TRANSACTION_ACTION_I2C_READ; - ctx.request.address = request->payload.address >> 1; - } else { - /* in DAL2, there was no return in such case */ - BREAK_TO_DEBUGGER(); - return false; - } - - ctx.request.delay = 0; - - do { - memset(ctx.buffer + ctx.offset, 0, ctx.current_read_length); - - ctx.request.data = ctx.buffer + ctx.offset; - ctx.request.length = ctx.current_read_length; - - process_read_request(engine, &ctx); - - request->status = ctx.status; - - if (ctx.operation_succeeded && !ctx.transaction_complete) - if (ctx.request.type == AUX_TRANSACTION_TYPE_I2C) - msleep(engine->delay); - } while (ctx.operation_succeeded && !ctx.transaction_complete); - - if (request->payload.address_space == - I2CAUX_TRANSACTION_ADDRESS_SPACE_DPCD) { - DC_LOG_I2C_AUX("READ: addr:0x%x value:0x%x Result:%d", - request->payload.address, - request->payload.data[0], - ctx.operation_succeeded); - } - - return ctx.operation_succeeded; -} - -static void process_write_reply( - struct aux_engine *engine, - struct write_command_context *ctx) -{ - engine->funcs->process_channel_reply(engine, &ctx->reply); - - switch (ctx->reply.status) { - case AUX_TRANSACTION_REPLY_AUX_ACK: - ctx->operation_succeeded = true; - - if (ctx->returned_byte) { - ctx->request.action = ctx->mot ? - I2CAUX_TRANSACTION_ACTION_I2C_STATUS_REQUEST_MOT : - I2CAUX_TRANSACTION_ACTION_I2C_STATUS_REQUEST; - - ctx->current_write_length = 0; - - ++ctx->ack_m_retry; - - if (ctx->ack_m_retry > AUX_DEFER_RETRY_COUNTER) { - ctx->status = - I2CAUX_TRANSACTION_STATUS_FAILED_TIMEOUT; - ctx->operation_succeeded = false; - } else - udelay(300); - } else { - ctx->status = I2CAUX_TRANSACTION_STATUS_SUCCEEDED; - ctx->defer_retry_aux = 0; - ctx->ack_m_retry = 0; - ctx->transaction_complete = true; - } - break; - case AUX_TRANSACTION_REPLY_AUX_NACK: - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_NACK; - ctx->operation_succeeded = false; - break; - case AUX_TRANSACTION_REPLY_AUX_DEFER: - ++ctx->defer_retry_aux; - - if (ctx->defer_retry_aux > ctx->max_defer_retry) { - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_TIMEOUT; - ctx->operation_succeeded = false; - } - break; - case AUX_TRANSACTION_REPLY_I2C_DEFER: - ctx->defer_retry_aux = 0; - ctx->current_write_length = 0; - - ctx->request.action = ctx->mot ? - I2CAUX_TRANSACTION_ACTION_I2C_STATUS_REQUEST_MOT : - I2CAUX_TRANSACTION_ACTION_I2C_STATUS_REQUEST; - - ++ctx->defer_retry_i2c; - - if (ctx->defer_retry_i2c > ctx->max_defer_retry) { - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_TIMEOUT; - ctx->operation_succeeded = false; - } - break; - case AUX_TRANSACTION_REPLY_HPD_DISCON: - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_HPD_DISCON; - ctx->operation_succeeded = false; - break; - default: - ctx->status = I2CAUX_TRANSACTION_STATUS_UNKNOWN; - ctx->operation_succeeded = false; - } -} -static void process_write_request( - struct aux_engine *engine, - struct write_command_context *ctx) -{ - enum aux_channel_operation_result operation_result; - - engine->funcs->submit_channel_request(engine, &ctx->request); - - operation_result = engine->funcs->get_channel_status( - engine, &ctx->returned_byte); - - switch (operation_result) { - case AUX_CHANNEL_OPERATION_SUCCEEDED: - ctx->timed_out_retry_aux = 0; - ctx->invalid_reply_retry_aux = 0; - - ctx->reply.length = ctx->returned_byte; - ctx->reply.data = ctx->reply_data; - - process_write_reply(engine, ctx); - break; - case AUX_CHANNEL_OPERATION_FAILED_INVALID_REPLY: - ++ctx->invalid_reply_retry_aux; - - if (ctx->invalid_reply_retry_aux > - AUX_INVALID_REPLY_RETRY_COUNTER) { - ctx->status = - I2CAUX_TRANSACTION_STATUS_FAILED_PROTOCOL_ERROR; - ctx->operation_succeeded = false; - } else - udelay(400); - break; - case AUX_CHANNEL_OPERATION_FAILED_TIMEOUT: - ++ctx->timed_out_retry_aux; - - if (ctx->timed_out_retry_aux > AUX_TIMED_OUT_RETRY_COUNTER) { - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_TIMEOUT; - ctx->operation_succeeded = false; - } else { - /* DP 1.2a, table 2-58: - * "S3: AUX Request CMD PENDING: - * retry 3 times, with 400usec wait on each" - * The HW timeout is set to 550usec, - * so we should not wait here - */ - } - break; - case AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON: - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_HPD_DISCON; - ctx->operation_succeeded = false; - break; - default: - ctx->status = I2CAUX_TRANSACTION_STATUS_UNKNOWN; - ctx->operation_succeeded = false; - } -} -static bool write_command( - struct aux_engine *engine, - struct i2caux_transaction_request *request, - bool middle_of_transaction) -{ - struct write_command_context ctx; - - ctx.mot = middle_of_transaction; - ctx.buffer = request->payload.data; - ctx.current_write_length = request->payload.length; - ctx.timed_out_retry_aux = 0; - ctx.invalid_reply_retry_aux = 0; - ctx.defer_retry_aux = 0; - ctx.defer_retry_i2c = 0; - ctx.ack_m_retry = 0; - ctx.transaction_complete = false; - ctx.operation_succeeded = true; - - if (request->payload.address_space == - I2CAUX_TRANSACTION_ADDRESS_SPACE_DPCD) { - ctx.request.type = AUX_TRANSACTION_TYPE_DP; - ctx.request.action = I2CAUX_TRANSACTION_ACTION_DP_WRITE; - ctx.request.address = request->payload.address; - } else if (request->payload.address_space == - I2CAUX_TRANSACTION_ADDRESS_SPACE_I2C) { - ctx.request.type = AUX_TRANSACTION_TYPE_I2C; - ctx.request.action = middle_of_transaction ? - I2CAUX_TRANSACTION_ACTION_I2C_WRITE_MOT : - I2CAUX_TRANSACTION_ACTION_I2C_WRITE; - ctx.request.address = request->payload.address >> 1; - } else { - /* in DAL2, there was no return in such case */ - BREAK_TO_DEBUGGER(); - return false; - } - - ctx.request.delay = 0; - - ctx.max_defer_retry = - (engine->max_defer_write_retry > AUX_DEFER_RETRY_COUNTER) ? - engine->max_defer_write_retry : AUX_DEFER_RETRY_COUNTER; - - do { - ctx.request.data = ctx.buffer; - ctx.request.length = ctx.current_write_length; - - process_write_request(engine, &ctx); - - request->status = ctx.status; - - if (ctx.operation_succeeded && !ctx.transaction_complete) - if (ctx.request.type == AUX_TRANSACTION_TYPE_I2C) - msleep(engine->delay); - } while (ctx.operation_succeeded && !ctx.transaction_complete); - - if (request->payload.address_space == - I2CAUX_TRANSACTION_ADDRESS_SPACE_DPCD) { - DC_LOG_I2C_AUX("WRITE: addr:0x%x value:0x%x Result:%d", - request->payload.address, - request->payload.data[0], - ctx.operation_succeeded); - } - - return ctx.operation_succeeded; -} -static bool end_of_transaction_command( - struct aux_engine *engine, - struct i2caux_transaction_request *request) -{ - struct i2caux_transaction_request dummy_request; - uint8_t dummy_data; - - /* [tcheng] We only need to send the stop (read with MOT = 0) - * for I2C-over-Aux, not native AUX - */ - - if (request->payload.address_space != - I2CAUX_TRANSACTION_ADDRESS_SPACE_I2C) - return false; - - dummy_request.operation = request->operation; - dummy_request.payload.address_space = request->payload.address_space; - dummy_request.payload.address = request->payload.address; - - /* - * Add a dummy byte due to some receiver quirk - * where one byte is sent along with MOT = 0. - * Ideally this should be 0. - */ - - dummy_request.payload.length = 0; - dummy_request.payload.data = &dummy_data; - - if (request->operation == I2CAUX_TRANSACTION_READ) - return read_command(engine, &dummy_request, false); - else - return write_command(engine, &dummy_request, false); - - /* according Syed, it does not need now DoDummyMOT */ -} -static bool submit_request( - struct aux_engine *engine, - struct i2caux_transaction_request *request, - bool middle_of_transaction) -{ - - bool result; - bool mot_used = true; - - switch (request->operation) { - case I2CAUX_TRANSACTION_READ: - result = read_command(engine, request, mot_used); - break; - case I2CAUX_TRANSACTION_WRITE: - result = write_command(engine, request, mot_used); - break; - default: - result = false; - } - - /* [tcheng] - * need to send stop for the last transaction to free up the AUX - * if the above command fails, this would be the last transaction - */ - - if (!middle_of_transaction || !result) - end_of_transaction_command(engine, request); - - /* mask AUX interrupt */ - - return result; -} enum i2caux_engine_type get_engine_type( - const struct aux_engine *engine) + const struct dce_aux *engine) { return I2CAUX_ENGINE_TYPE_AUX; } static bool acquire( - struct aux_engine *engine, + struct dce_aux *engine, struct ddc *ddc) { enum gpio_result result; - if (engine->funcs->is_engine_available) { - /*check whether SW could use the engine*/ - if (!engine->funcs->is_engine_available(engine)) - return false; - } + if (!is_engine_available(engine)) + return false; result = dal_ddc_open(ddc, GPIO_MODE_HARDWARE, GPIO_DDC_CONFIG_TYPE_MODE_AUX); @@ -886,7 +386,7 @@ static bool acquire( if (result != GPIO_RESULT_OK) return false; - if (!engine->funcs->acquire_engine(engine)) { + if (!acquire_engine(engine)) { dal_ddc_close(ddc); return false; } @@ -896,21 +396,7 @@ static bool acquire( return true; } -static const struct aux_engine_funcs aux_engine_funcs = { - .acquire_engine = acquire_engine, - .submit_channel_request = submit_channel_request, - .process_channel_reply = process_channel_reply, - .read_channel_reply = read_channel_reply, - .get_channel_status = get_channel_status, - .is_engine_available = is_engine_available, - .release_engine = release_engine, - .destroy_engine = dce110_engine_destroy, - .submit_request = submit_request, - .get_engine_type = get_engine_type, - .acquire = acquire, -}; - -void dce110_engine_destroy(struct aux_engine **engine) +void dce110_engine_destroy(struct dce_aux **engine) { struct aux_engine_dce110 *engine110 = FROM_AUX_ENGINE(*engine); @@ -919,7 +405,7 @@ void dce110_engine_destroy(struct aux_engine **engine) *engine = NULL; } -struct aux_engine *dce110_aux_engine_construct(struct aux_engine_dce110 *aux_engine110, +struct dce_aux *dce110_aux_engine_construct(struct aux_engine_dce110 *aux_engine110, struct dc_context *ctx, uint32_t inst, uint32_t timeout_period, @@ -929,7 +415,6 @@ struct aux_engine *dce110_aux_engine_construct(struct aux_engine_dce110 *aux_eng aux_engine110->base.ctx = ctx; aux_engine110->base.delay = 0; aux_engine110->base.max_defer_write_retry = 0; - aux_engine110->base.funcs = &aux_engine_funcs; aux_engine110->base.inst = inst; aux_engine110->timeout_period = timeout_period; aux_engine110->regs = regs; @@ -958,7 +443,7 @@ int dce_aux_transfer(struct ddc_service *ddc, struct aux_payload *payload) { struct ddc *ddc_pin = ddc->ddc_pin; - struct aux_engine *aux_engine; + struct dce_aux *aux_engine; enum aux_channel_operation_result operation_result; struct aux_request_transaction_data aux_req; struct aux_reply_transaction_data aux_rep; @@ -970,7 +455,7 @@ int dce_aux_transfer(struct ddc_service *ddc, memset(&aux_rep, 0, sizeof(aux_rep)); aux_engine = ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]; - aux_engine->funcs->acquire(aux_engine, ddc_pin); + acquire(aux_engine, ddc_pin); if (payload->i2c_over_aux) aux_req.type = AUX_TRANSACTION_TYPE_I2C; @@ -984,12 +469,12 @@ int dce_aux_transfer(struct ddc_service *ddc, aux_req.length = payload->length; aux_req.data = payload->data; - aux_engine->funcs->submit_channel_request(aux_engine, &aux_req); - operation_result = aux_engine->funcs->get_channel_status(aux_engine, &returned_bytes); + submit_channel_request(aux_engine, &aux_req); + operation_result = get_channel_status(aux_engine, &returned_bytes); switch (operation_result) { case AUX_CHANNEL_OPERATION_SUCCEEDED: - res = aux_engine->funcs->read_channel_reply(aux_engine, payload->length, + res = read_channel_reply(aux_engine, payload->length, payload->data, payload->reply, &status); break; @@ -1002,7 +487,7 @@ int dce_aux_transfer(struct ddc_service *ddc, res = -1; break; } - aux_engine->funcs->release_engine(aux_engine); + release_engine(aux_engine); return res; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h index 990e7c7e191e..13e707e2e391 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h @@ -75,8 +75,20 @@ enum { /* This is the timeout as defined in DP 1.2a, */ SW_AUX_TIMEOUT_PERIOD_MULTIPLIER = 4 }; + +struct dce_aux { + uint32_t inst; + struct ddc *ddc; + struct dc_context *ctx; + /* following values are expressed in milliseconds */ + uint32_t delay; + uint32_t max_defer_write_retry; + + bool acquire_reset; +}; + struct aux_engine_dce110 { - struct aux_engine base; + struct dce_aux base; const struct dce110_aux_registers *regs; struct { uint32_t aux_control; @@ -96,17 +108,17 @@ struct aux_engine_dce110_init_data { const struct dce110_aux_registers *regs; }; -struct aux_engine *dce110_aux_engine_construct( +struct dce_aux *dce110_aux_engine_construct( struct aux_engine_dce110 *aux_engine110, struct dc_context *ctx, uint32_t inst, uint32_t timeout_period, const struct dce110_aux_registers *regs); -void dce110_engine_destroy(struct aux_engine **engine); +void dce110_engine_destroy(struct dce_aux **engine); bool dce110_aux_engine_acquire( - struct aux_engine *aux_engine, + struct dce_aux *aux_engine, struct ddc *ddc); int dce_aux_transfer(struct ddc_service *ddc, diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c index 6ae51a5dfc04..c3f616a739d7 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c @@ -587,7 +587,7 @@ struct output_pixel_processor *dce100_opp_create( return &opp->base; } -struct aux_engine *dce100_aux_engine_create( +struct dce_aux *dce100_aux_engine_create( struct dc_context *ctx, uint32_t inst) { diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index 3c27c31eaf7c..7d46eb7a2ace 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -606,7 +606,7 @@ static struct output_pixel_processor *dce110_opp_create( return &opp->base; } -struct aux_engine *dce110_aux_engine_create( +struct dce_aux *dce110_aux_engine_create( struct dc_context *ctx, uint32_t inst) { diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index c2a713399726..d930e09ccfb4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -607,7 +607,7 @@ struct output_pixel_processor *dce112_opp_create( return &opp->base; } -struct aux_engine *dce112_aux_engine_create( +struct dce_aux *dce112_aux_engine_create( struct dc_context *ctx, uint32_t inst) { diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 48a210ec975b..23d7d4d85207 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -391,7 +391,7 @@ struct output_pixel_processor *dce120_opp_create( ctx, inst, &opp_regs[inst], &opp_shift, &opp_mask); return &opp->base; } -struct aux_engine *dce120_aux_engine_create( +struct dce_aux *dce120_aux_engine_create( struct dc_context *ctx, uint32_t inst) { diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c index cdd1d6b7b9f2..e0bba0bc996b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c @@ -467,7 +467,7 @@ static struct output_pixel_processor *dce80_opp_create( return &opp->base; } -struct aux_engine *dce80_aux_engine_create( +struct dce_aux *dce80_aux_engine_create( struct dc_context *ctx, uint32_t inst) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index add84f9df90c..3e6a6025419a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -609,7 +609,7 @@ static struct output_pixel_processor *dcn10_opp_create( return &opp->base; } -struct aux_engine *dcn10_aux_engine_create( +struct dce_aux *dcn10_aux_engine_create( struct dc_context *ctx, uint32_t inst) { @@ -911,7 +911,7 @@ static void destruct(struct dcn10_resource_pool *pool) for (i = 0; i < pool->base.res_cap->num_ddc; i++) { if (pool->base.engines[i] != NULL) - pool->base.engines[i]->funcs->destroy_engine(&pool->base.engines[i]); + dce110_engine_destroy(&pool->base.engines[i]); if (pool->base.hw_i2cs[i] != NULL) { kfree(pool->base.hw_i2cs[i]); pool->base.hw_i2cs[i] = NULL; diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index b019a5e0e018..986ed1728644 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -146,7 +146,7 @@ struct resource_pool { struct mpc *mpc; struct pp_smu_funcs_rv *pp_smu; struct pp_smu_display_requirement_rv pp_smu_req; - struct aux_engine *engines[MAX_PIPES]; + struct dce_aux *engines[MAX_PIPES]; struct dce_i2c_hw *hw_i2cs[MAX_PIPES]; struct dce_i2c_sw *sw_i2cs[MAX_PIPES]; bool i2c_hw_buffer_in_use; From 077d0b6ba2114b86d353fce4eaca6cfb2e344eb6 Mon Sep 17 00:00:00 2001 From: David Francis Date: Fri, 30 Nov 2018 11:02:59 -0500 Subject: [PATCH 149/539] drm/amd/display: Remove i2caux folder [Why] It is huge, unmaintainable, needlessly layered, and obsolete [How] Remove it. All of it. Also remove the i2caux struct in dc_context and the code that created and destructed it Signed-off-by: David Francis Reviewed-by: Harry Wentland Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/core/dc.c | 11 - .../gpu/drm/amd/display/dc/core/dc_link_ddc.c | 1 - drivers/gpu/drm/amd/display/dc/dc_types.h | 1 - drivers/gpu/drm/amd/display/dc/dce/dce_aux.h | 4 +- .../drm/amd/display/dc/i2caux/aux_engine.c | 606 ------------ .../drm/amd/display/dc/i2caux/aux_engine.h | 86 -- .../display/dc/i2caux/dce100/i2caux_dce100.c | 106 --- .../display/dc/i2caux/dce100/i2caux_dce100.h | 32 - .../dc/i2caux/dce110/aux_engine_dce110.c | 505 ---------- .../dc/i2caux/dce110/aux_engine_dce110.h | 78 -- .../dc/i2caux/dce110/i2c_hw_engine_dce110.h | 218 ----- .../dc/i2caux/dce110/i2c_sw_engine_dce110.c | 160 ---- .../dc/i2caux/dce110/i2c_sw_engine_dce110.h | 43 - .../display/dc/i2caux/dce110/i2caux_dce110.c | 329 ------- .../display/dc/i2caux/dce110/i2caux_dce110.h | 54 -- .../display/dc/i2caux/dce120/i2caux_dce120.c | 120 --- .../display/dc/i2caux/dce120/i2caux_dce120.h | 32 - .../dc/i2caux/dce80/i2c_hw_engine_dce80.c | 875 ------------------ .../dc/i2caux/dce80/i2c_hw_engine_dce80.h | 54 -- .../dc/i2caux/dce80/i2c_sw_engine_dce80.h | 43 - .../display/dc/i2caux/dce80/i2caux_dce80.c | 284 ------ .../display/dc/i2caux/dce80/i2caux_dce80.h | 38 - .../display/dc/i2caux/dcn10/i2caux_dcn10.c | 120 --- .../display/dc/i2caux/dcn10/i2caux_dcn10.h | 32 - .../dc/i2caux/diagnostics/i2caux_diag.c | 97 -- .../dc/i2caux/diagnostics/i2caux_diag.h | 32 - .../gpu/drm/amd/display/dc/i2caux/engine.h | 111 --- .../drm/amd/display/dc/i2caux/engine_base.c | 52 -- .../drm/amd/display/dc/i2caux/i2c_engine.c | 118 --- .../drm/amd/display/dc/i2caux/i2c_engine.h | 115 --- .../display/dc/i2caux/i2c_generic_hw_engine.c | 284 ------ .../display/dc/i2caux/i2c_generic_hw_engine.h | 77 -- .../drm/amd/display/dc/i2caux/i2c_hw_engine.c | 251 ----- .../drm/amd/display/dc/i2caux/i2c_hw_engine.h | 80 -- .../drm/amd/display/dc/i2caux/i2c_sw_engine.c | 601 ------------ .../drm/amd/display/dc/i2caux/i2c_sw_engine.h | 81 -- .../gpu/drm/amd/display/dc/i2caux/i2caux.h | 122 --- .../amd/display/include/i2caux_interface.h | 23 - 39 files changed, 4 insertions(+), 5874 deletions(-) delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/aux_engine.c delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/aux_engine.h delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dce100/i2caux_dce100.c delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dce100/i2caux_dce100.h delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dce110/aux_engine_dce110.c delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dce110/aux_engine_dce110.h delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.h delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_sw_engine_dce110.c delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_sw_engine_dce110.h delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2caux_dce110.c delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2caux_dce110.h delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dce120/i2caux_dce120.c delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dce120/i2caux_dce120.h delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2c_hw_engine_dce80.c delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2c_hw_engine_dce80.h delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2c_sw_engine_dce80.h delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2caux_dce80.c delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2caux_dce80.h delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dcn10/i2caux_dcn10.c delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dcn10/i2caux_dcn10.h delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/diagnostics/i2caux_diag.c delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/diagnostics/i2caux_diag.h delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/engine.h delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/engine_base.c delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/i2c_engine.c delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/i2c_engine.h delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/i2c_generic_hw_engine.c delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/i2c_generic_hw_engine.h delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/i2c_hw_engine.c delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/i2c_hw_engine.h delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/i2c_sw_engine.c delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/i2c_sw_engine.h delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/i2caux.h diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile index aed538a4d1ba..fa24e4c38794 100644 --- a/drivers/gpu/drm/amd/display/dc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/Makefile @@ -23,7 +23,7 @@ # Makefile for Display Core (dc) component. # -DC_LIBS = basics bios calcs dce gpio i2caux irq virtual +DC_LIBS = basics bios calcs dce gpio irq virtual ifdef CONFIG_DRM_AMD_DC_DCN1_0 DC_LIBS += dcn10 dml diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index c94bc405d135..56702840710b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -585,9 +585,6 @@ static void destruct(struct dc *dc) if (dc->ctx->gpio_service) dal_gpio_service_destroy(&dc->ctx->gpio_service); - if (dc->ctx->i2caux) - dal_i2caux_destroy(&dc->ctx->i2caux); - if (dc->ctx->created_bios) dal_bios_parser_destroy(&dc->ctx->dc_bios); @@ -709,14 +706,6 @@ static bool construct(struct dc *dc, dc_ctx->created_bios = true; } - /* Create I2C AUX */ - dc_ctx->i2caux = dal_i2caux_create(dc_ctx); - - if (!dc_ctx->i2caux) { - ASSERT_CRITICAL(false); - goto fail; - } - dc_ctx->perf_trace = dc_perf_trace_create(); if (!dc_ctx->perf_trace) { ASSERT_CRITICAL(false); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c index a343b8b6f7fb..b7ee63cd8dc7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c @@ -33,7 +33,6 @@ #include "include/vector.h" #include "core_types.h" #include "dc_link_ddc.h" -#include "aux_engine.h" #include "dce/dce_aux.h" #define AUX_POWER_UP_WA_DELAY 500 diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 07cd165c389c..56e7f3dab15a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -97,7 +97,6 @@ struct dc_context { struct dc_bios *dc_bios; bool created_bios; struct gpio_service *gpio_service; - struct i2caux *i2caux; uint32_t dc_sink_id_count; uint32_t dc_stream_id_count; uint64_t fbc_gpu_addr; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h index 13e707e2e391..d27f22c05e4b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h @@ -25,7 +25,9 @@ #ifndef __DAL_AUX_ENGINE_DCE110_H__ #define __DAL_AUX_ENGINE_DCE110_H__ -#include "aux_engine.h" + +#include "i2caux_interface.h" +#include "inc/hw/aux_engine.h" #define AUX_COMMON_REG_LIST(id)\ SRI(AUX_CONTROL, DP_AUX, id), \ diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/aux_engine.c b/drivers/gpu/drm/amd/display/dc/i2caux/aux_engine.c deleted file mode 100644 index 8cbf38b2470d..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/aux_engine.c +++ /dev/null @@ -1,606 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" -#include "dm_event_log.h" - -/* - * Pre-requisites: headers required by header of this unit - */ -#include "include/i2caux_interface.h" -#include "engine.h" - -/* - * Header of this unit - */ - -#include "aux_engine.h" - -/* - * Post-requisites: headers required by this unit - */ - -#include "include/link_service_types.h" - -/* - * This unit - */ - -enum { - AUX_INVALID_REPLY_RETRY_COUNTER = 1, - AUX_TIMED_OUT_RETRY_COUNTER = 2, - AUX_DEFER_RETRY_COUNTER = 6 -}; - -#define FROM_ENGINE(ptr) \ - container_of((ptr), struct aux_engine, base) -#define DC_LOGGER \ - engine->base.ctx->logger - -enum i2caux_engine_type dal_aux_engine_get_engine_type( - const struct engine *engine) -{ - return I2CAUX_ENGINE_TYPE_AUX; -} - -bool dal_aux_engine_acquire( - struct engine *engine, - struct ddc *ddc) -{ - struct aux_engine *aux_engine = FROM_ENGINE(engine); - - enum gpio_result result; - if (aux_engine->funcs->is_engine_available) { - /*check whether SW could use the engine*/ - if (!aux_engine->funcs->is_engine_available(aux_engine)) { - return false; - } - } - - result = dal_ddc_open(ddc, GPIO_MODE_HARDWARE, - GPIO_DDC_CONFIG_TYPE_MODE_AUX); - - if (result != GPIO_RESULT_OK) - return false; - - if (!aux_engine->funcs->acquire_engine(aux_engine)) { - dal_ddc_close(ddc); - return false; - } - - engine->ddc = ddc; - - return true; -} - -struct read_command_context { - uint8_t *buffer; - uint32_t current_read_length; - uint32_t offset; - enum i2caux_transaction_status status; - - struct aux_request_transaction_data request; - struct aux_reply_transaction_data reply; - - uint8_t returned_byte; - - uint32_t timed_out_retry_aux; - uint32_t invalid_reply_retry_aux; - uint32_t defer_retry_aux; - uint32_t defer_retry_i2c; - uint32_t invalid_reply_retry_aux_on_ack; - - bool transaction_complete; - bool operation_succeeded; -}; - -static void process_read_reply( - struct aux_engine *engine, - struct read_command_context *ctx) -{ - engine->funcs->process_channel_reply(engine, &ctx->reply); - - switch (ctx->reply.status) { - case AUX_TRANSACTION_REPLY_AUX_ACK: - ctx->defer_retry_aux = 0; - if (ctx->returned_byte > ctx->current_read_length) { - ctx->status = - I2CAUX_TRANSACTION_STATUS_FAILED_PROTOCOL_ERROR; - ctx->operation_succeeded = false; - } else if (ctx->returned_byte < ctx->current_read_length) { - ctx->current_read_length -= ctx->returned_byte; - - ctx->offset += ctx->returned_byte; - - ++ctx->invalid_reply_retry_aux_on_ack; - - if (ctx->invalid_reply_retry_aux_on_ack > - AUX_INVALID_REPLY_RETRY_COUNTER) { - ctx->status = - I2CAUX_TRANSACTION_STATUS_FAILED_PROTOCOL_ERROR; - ctx->operation_succeeded = false; - } - } else { - ctx->status = I2CAUX_TRANSACTION_STATUS_SUCCEEDED; - ctx->transaction_complete = true; - ctx->operation_succeeded = true; - } - break; - case AUX_TRANSACTION_REPLY_AUX_NACK: - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_NACK; - ctx->operation_succeeded = false; - break; - case AUX_TRANSACTION_REPLY_AUX_DEFER: - ++ctx->defer_retry_aux; - - if (ctx->defer_retry_aux > AUX_DEFER_RETRY_COUNTER) { - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_TIMEOUT; - ctx->operation_succeeded = false; - } - break; - case AUX_TRANSACTION_REPLY_I2C_DEFER: - ctx->defer_retry_aux = 0; - - ++ctx->defer_retry_i2c; - - if (ctx->defer_retry_i2c > AUX_DEFER_RETRY_COUNTER) { - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_TIMEOUT; - ctx->operation_succeeded = false; - } - break; - case AUX_TRANSACTION_REPLY_HPD_DISCON: - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_HPD_DISCON; - ctx->operation_succeeded = false; - break; - default: - ctx->status = I2CAUX_TRANSACTION_STATUS_UNKNOWN; - ctx->operation_succeeded = false; - } -} - -static void process_read_request( - struct aux_engine *engine, - struct read_command_context *ctx) -{ - enum aux_channel_operation_result operation_result; - - engine->funcs->submit_channel_request(engine, &ctx->request); - - operation_result = engine->funcs->get_channel_status( - engine, &ctx->returned_byte); - - switch (operation_result) { - case AUX_CHANNEL_OPERATION_SUCCEEDED: - if (ctx->returned_byte > ctx->current_read_length) { - ctx->status = - I2CAUX_TRANSACTION_STATUS_FAILED_PROTOCOL_ERROR; - ctx->operation_succeeded = false; - } else { - ctx->timed_out_retry_aux = 0; - ctx->invalid_reply_retry_aux = 0; - - ctx->reply.length = ctx->returned_byte; - ctx->reply.data = ctx->buffer; - - process_read_reply(engine, ctx); - } - break; - case AUX_CHANNEL_OPERATION_FAILED_INVALID_REPLY: - ++ctx->invalid_reply_retry_aux; - - if (ctx->invalid_reply_retry_aux > - AUX_INVALID_REPLY_RETRY_COUNTER) { - ctx->status = - I2CAUX_TRANSACTION_STATUS_FAILED_PROTOCOL_ERROR; - ctx->operation_succeeded = false; - } else - udelay(400); - break; - case AUX_CHANNEL_OPERATION_FAILED_TIMEOUT: - ++ctx->timed_out_retry_aux; - - if (ctx->timed_out_retry_aux > AUX_TIMED_OUT_RETRY_COUNTER) { - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_TIMEOUT; - ctx->operation_succeeded = false; - } else { - /* DP 1.2a, table 2-58: - * "S3: AUX Request CMD PENDING: - * retry 3 times, with 400usec wait on each" - * The HW timeout is set to 550usec, - * so we should not wait here */ - } - break; - case AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON: - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_HPD_DISCON; - ctx->operation_succeeded = false; - break; - default: - ctx->status = I2CAUX_TRANSACTION_STATUS_UNKNOWN; - ctx->operation_succeeded = false; - } -} - -static bool read_command( - struct aux_engine *engine, - struct i2caux_transaction_request *request, - bool middle_of_transaction) -{ - struct read_command_context ctx; - - ctx.buffer = request->payload.data; - ctx.current_read_length = request->payload.length; - ctx.offset = 0; - ctx.timed_out_retry_aux = 0; - ctx.invalid_reply_retry_aux = 0; - ctx.defer_retry_aux = 0; - ctx.defer_retry_i2c = 0; - ctx.invalid_reply_retry_aux_on_ack = 0; - ctx.transaction_complete = false; - ctx.operation_succeeded = true; - - if (request->payload.address_space == - I2CAUX_TRANSACTION_ADDRESS_SPACE_DPCD) { - ctx.request.type = AUX_TRANSACTION_TYPE_DP; - ctx.request.action = I2CAUX_TRANSACTION_ACTION_DP_READ; - ctx.request.address = request->payload.address; - } else if (request->payload.address_space == - I2CAUX_TRANSACTION_ADDRESS_SPACE_I2C) { - ctx.request.type = AUX_TRANSACTION_TYPE_I2C; - ctx.request.action = middle_of_transaction ? - I2CAUX_TRANSACTION_ACTION_I2C_READ_MOT : - I2CAUX_TRANSACTION_ACTION_I2C_READ; - ctx.request.address = request->payload.address >> 1; - } else { - /* in DAL2, there was no return in such case */ - BREAK_TO_DEBUGGER(); - return false; - } - - ctx.request.delay = 0; - - do { - memset(ctx.buffer + ctx.offset, 0, ctx.current_read_length); - - ctx.request.data = ctx.buffer + ctx.offset; - ctx.request.length = ctx.current_read_length; - - process_read_request(engine, &ctx); - - request->status = ctx.status; - - if (ctx.operation_succeeded && !ctx.transaction_complete) - if (ctx.request.type == AUX_TRANSACTION_TYPE_I2C) - msleep(engine->delay); - } while (ctx.operation_succeeded && !ctx.transaction_complete); - - if (request->payload.address_space == - I2CAUX_TRANSACTION_ADDRESS_SPACE_DPCD) { - DC_LOG_I2C_AUX("READ: addr:0x%x value:0x%x Result:%d", - request->payload.address, - request->payload.data[0], - ctx.operation_succeeded); - } - - return ctx.operation_succeeded; -} - -struct write_command_context { - bool mot; - - uint8_t *buffer; - uint32_t current_write_length; - enum i2caux_transaction_status status; - - struct aux_request_transaction_data request; - struct aux_reply_transaction_data reply; - - uint8_t returned_byte; - - uint32_t timed_out_retry_aux; - uint32_t invalid_reply_retry_aux; - uint32_t defer_retry_aux; - uint32_t defer_retry_i2c; - uint32_t max_defer_retry; - uint32_t ack_m_retry; - - uint8_t reply_data[DEFAULT_AUX_MAX_DATA_SIZE]; - - bool transaction_complete; - bool operation_succeeded; -}; - -static void process_write_reply( - struct aux_engine *engine, - struct write_command_context *ctx) -{ - engine->funcs->process_channel_reply(engine, &ctx->reply); - - switch (ctx->reply.status) { - case AUX_TRANSACTION_REPLY_AUX_ACK: - ctx->operation_succeeded = true; - - if (ctx->returned_byte) { - ctx->request.action = ctx->mot ? - I2CAUX_TRANSACTION_ACTION_I2C_STATUS_REQUEST_MOT : - I2CAUX_TRANSACTION_ACTION_I2C_STATUS_REQUEST; - - ctx->current_write_length = 0; - - ++ctx->ack_m_retry; - - if (ctx->ack_m_retry > AUX_DEFER_RETRY_COUNTER) { - ctx->status = - I2CAUX_TRANSACTION_STATUS_FAILED_TIMEOUT; - ctx->operation_succeeded = false; - } else - udelay(300); - } else { - ctx->status = I2CAUX_TRANSACTION_STATUS_SUCCEEDED; - ctx->defer_retry_aux = 0; - ctx->ack_m_retry = 0; - ctx->transaction_complete = true; - } - break; - case AUX_TRANSACTION_REPLY_AUX_NACK: - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_NACK; - ctx->operation_succeeded = false; - break; - case AUX_TRANSACTION_REPLY_AUX_DEFER: - ++ctx->defer_retry_aux; - - if (ctx->defer_retry_aux > ctx->max_defer_retry) { - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_TIMEOUT; - ctx->operation_succeeded = false; - } - break; - case AUX_TRANSACTION_REPLY_I2C_DEFER: - ctx->defer_retry_aux = 0; - ctx->current_write_length = 0; - - ctx->request.action = ctx->mot ? - I2CAUX_TRANSACTION_ACTION_I2C_STATUS_REQUEST_MOT : - I2CAUX_TRANSACTION_ACTION_I2C_STATUS_REQUEST; - - ++ctx->defer_retry_i2c; - - if (ctx->defer_retry_i2c > ctx->max_defer_retry) { - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_TIMEOUT; - ctx->operation_succeeded = false; - } - break; - case AUX_TRANSACTION_REPLY_HPD_DISCON: - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_HPD_DISCON; - ctx->operation_succeeded = false; - break; - default: - ctx->status = I2CAUX_TRANSACTION_STATUS_UNKNOWN; - ctx->operation_succeeded = false; - } -} - -static void process_write_request( - struct aux_engine *engine, - struct write_command_context *ctx) -{ - enum aux_channel_operation_result operation_result; - - engine->funcs->submit_channel_request(engine, &ctx->request); - - operation_result = engine->funcs->get_channel_status( - engine, &ctx->returned_byte); - - switch (operation_result) { - case AUX_CHANNEL_OPERATION_SUCCEEDED: - ctx->timed_out_retry_aux = 0; - ctx->invalid_reply_retry_aux = 0; - - ctx->reply.length = ctx->returned_byte; - ctx->reply.data = ctx->reply_data; - - process_write_reply(engine, ctx); - break; - case AUX_CHANNEL_OPERATION_FAILED_INVALID_REPLY: - ++ctx->invalid_reply_retry_aux; - - if (ctx->invalid_reply_retry_aux > - AUX_INVALID_REPLY_RETRY_COUNTER) { - ctx->status = - I2CAUX_TRANSACTION_STATUS_FAILED_PROTOCOL_ERROR; - ctx->operation_succeeded = false; - } else - udelay(400); - break; - case AUX_CHANNEL_OPERATION_FAILED_TIMEOUT: - ++ctx->timed_out_retry_aux; - - if (ctx->timed_out_retry_aux > AUX_TIMED_OUT_RETRY_COUNTER) { - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_TIMEOUT; - ctx->operation_succeeded = false; - } else { - /* DP 1.2a, table 2-58: - * "S3: AUX Request CMD PENDING: - * retry 3 times, with 400usec wait on each" - * The HW timeout is set to 550usec, - * so we should not wait here */ - } - break; - case AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON: - ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_HPD_DISCON; - ctx->operation_succeeded = false; - break; - default: - ctx->status = I2CAUX_TRANSACTION_STATUS_UNKNOWN; - ctx->operation_succeeded = false; - } -} - -static bool write_command( - struct aux_engine *engine, - struct i2caux_transaction_request *request, - bool middle_of_transaction) -{ - struct write_command_context ctx; - - ctx.mot = middle_of_transaction; - ctx.buffer = request->payload.data; - ctx.current_write_length = request->payload.length; - ctx.timed_out_retry_aux = 0; - ctx.invalid_reply_retry_aux = 0; - ctx.defer_retry_aux = 0; - ctx.defer_retry_i2c = 0; - ctx.ack_m_retry = 0; - ctx.transaction_complete = false; - ctx.operation_succeeded = true; - - if (request->payload.address_space == - I2CAUX_TRANSACTION_ADDRESS_SPACE_DPCD) { - ctx.request.type = AUX_TRANSACTION_TYPE_DP; - ctx.request.action = I2CAUX_TRANSACTION_ACTION_DP_WRITE; - ctx.request.address = request->payload.address; - } else if (request->payload.address_space == - I2CAUX_TRANSACTION_ADDRESS_SPACE_I2C) { - ctx.request.type = AUX_TRANSACTION_TYPE_I2C; - ctx.request.action = middle_of_transaction ? - I2CAUX_TRANSACTION_ACTION_I2C_WRITE_MOT : - I2CAUX_TRANSACTION_ACTION_I2C_WRITE; - ctx.request.address = request->payload.address >> 1; - } else { - /* in DAL2, there was no return in such case */ - BREAK_TO_DEBUGGER(); - return false; - } - - ctx.request.delay = 0; - - ctx.max_defer_retry = - (engine->max_defer_write_retry > AUX_DEFER_RETRY_COUNTER) ? - engine->max_defer_write_retry : AUX_DEFER_RETRY_COUNTER; - - do { - ctx.request.data = ctx.buffer; - ctx.request.length = ctx.current_write_length; - - process_write_request(engine, &ctx); - - request->status = ctx.status; - - if (ctx.operation_succeeded && !ctx.transaction_complete) - if (ctx.request.type == AUX_TRANSACTION_TYPE_I2C) - msleep(engine->delay); - } while (ctx.operation_succeeded && !ctx.transaction_complete); - - if (request->payload.address_space == - I2CAUX_TRANSACTION_ADDRESS_SPACE_DPCD) { - DC_LOG_I2C_AUX("WRITE: addr:0x%x value:0x%x Result:%d", - request->payload.address, - request->payload.data[0], - ctx.operation_succeeded); - } - - return ctx.operation_succeeded; -} - -static bool end_of_transaction_command( - struct aux_engine *engine, - struct i2caux_transaction_request *request) -{ - struct i2caux_transaction_request dummy_request; - uint8_t dummy_data; - - /* [tcheng] We only need to send the stop (read with MOT = 0) - * for I2C-over-Aux, not native AUX */ - - if (request->payload.address_space != - I2CAUX_TRANSACTION_ADDRESS_SPACE_I2C) - return false; - - dummy_request.operation = request->operation; - dummy_request.payload.address_space = request->payload.address_space; - dummy_request.payload.address = request->payload.address; - - /* - * Add a dummy byte due to some receiver quirk - * where one byte is sent along with MOT = 0. - * Ideally this should be 0. - */ - - dummy_request.payload.length = 0; - dummy_request.payload.data = &dummy_data; - - if (request->operation == I2CAUX_TRANSACTION_READ) - return read_command(engine, &dummy_request, false); - else - return write_command(engine, &dummy_request, false); - - /* according Syed, it does not need now DoDummyMOT */ -} - -bool dal_aux_engine_submit_request( - struct engine *engine, - struct i2caux_transaction_request *request, - bool middle_of_transaction) -{ - struct aux_engine *aux_engine = FROM_ENGINE(engine); - - bool result; - bool mot_used = true; - - switch (request->operation) { - case I2CAUX_TRANSACTION_READ: - result = read_command(aux_engine, request, mot_used); - break; - case I2CAUX_TRANSACTION_WRITE: - result = write_command(aux_engine, request, mot_used); - break; - default: - result = false; - } - - /* [tcheng] - * need to send stop for the last transaction to free up the AUX - * if the above command fails, this would be the last transaction */ - - if (!middle_of_transaction || !result) - end_of_transaction_command(aux_engine, request); - - /* mask AUX interrupt */ - - return result; -} - -void dal_aux_engine_construct( - struct aux_engine *engine, - struct dc_context *ctx) -{ - dal_i2caux_construct_engine(&engine->base, ctx); - engine->delay = 0; - engine->max_defer_write_retry = 0; -} - -void dal_aux_engine_destruct( - struct aux_engine *engine) -{ - dal_i2caux_destruct_engine(&engine->base); -} diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/aux_engine.h b/drivers/gpu/drm/amd/display/dc/i2caux/aux_engine.h deleted file mode 100644 index c33a2898d967..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/aux_engine.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DAL_AUX_ENGINE_H__ -#define __DAL_AUX_ENGINE_H__ - -#include "dc_ddc_types.h" - -struct aux_engine; - -struct aux_engine_funcs { - void (*destroy)( - struct aux_engine **ptr); - bool (*acquire_engine)( - struct aux_engine *engine); - void (*configure)( - struct aux_engine *engine, - union aux_config cfg); - void (*submit_channel_request)( - struct aux_engine *engine, - struct aux_request_transaction_data *request); - void (*process_channel_reply)( - struct aux_engine *engine, - struct aux_reply_transaction_data *reply); - int (*read_channel_reply)( - struct aux_engine *engine, - uint32_t size, - uint8_t *buffer, - uint8_t *reply_result, - uint32_t *sw_status); - enum aux_channel_operation_result (*get_channel_status)( - struct aux_engine *engine, - uint8_t *returned_bytes); - bool (*is_engine_available) ( - struct aux_engine *engine); -}; - -struct aux_engine { - struct engine base; - const struct aux_engine_funcs *funcs; - /* following values are expressed in milliseconds */ - uint32_t delay; - uint32_t max_defer_write_retry; - - bool acquire_reset; -}; - -void dal_aux_engine_construct( - struct aux_engine *engine, - struct dc_context *ctx); - -void dal_aux_engine_destruct( - struct aux_engine *engine); -bool dal_aux_engine_submit_request( - struct engine *ptr, - struct i2caux_transaction_request *request, - bool middle_of_transaction); -bool dal_aux_engine_acquire( - struct engine *ptr, - struct ddc *ddc); -enum i2caux_engine_type dal_aux_engine_get_engine_type( - const struct engine *engine); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce100/i2caux_dce100.c b/drivers/gpu/drm/amd/display/dc/i2caux/dce100/i2caux_dce100.c deleted file mode 100644 index 8b704ab0471c..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce100/i2caux_dce100.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" - -#include "include/i2caux_interface.h" -#include "../i2caux.h" -#include "../engine.h" -#include "../i2c_engine.h" -#include "../i2c_sw_engine.h" -#include "../i2c_hw_engine.h" - -#include "../dce110/aux_engine_dce110.h" -#include "../dce110/i2c_hw_engine_dce110.h" -#include "../dce110/i2caux_dce110.h" - -#include "dce/dce_10_0_d.h" -#include "dce/dce_10_0_sh_mask.h" - -/* set register offset */ -#define SR(reg_name)\ - .reg_name = mm ## reg_name - -/* set register offset with instance */ -#define SRI(reg_name, block, id)\ - .reg_name = mm ## block ## id ## _ ## reg_name - -#define aux_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST(id), \ - .AUX_RESET_MASK = 0 \ -} - -#define hw_engine_regs(id)\ -{\ - I2C_HW_ENGINE_COMMON_REG_LIST(id) \ -} - -static const struct dce110_aux_registers dce100_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3), - aux_regs(4), - aux_regs(5), -}; - -static const struct dce110_i2c_hw_engine_registers dce100_hw_engine_regs[] = { - hw_engine_regs(1), - hw_engine_regs(2), - hw_engine_regs(3), - hw_engine_regs(4), - hw_engine_regs(5), - hw_engine_regs(6) -}; - -static const struct dce110_i2c_hw_engine_shift i2c_shift = { - I2C_COMMON_MASK_SH_LIST_DCE100(__SHIFT) -}; - -static const struct dce110_i2c_hw_engine_mask i2c_mask = { - I2C_COMMON_MASK_SH_LIST_DCE100(_MASK) -}; - -struct i2caux *dal_i2caux_dce100_create( - struct dc_context *ctx) -{ - struct i2caux_dce110 *i2caux_dce110 = - kzalloc(sizeof(struct i2caux_dce110), GFP_KERNEL); - - if (!i2caux_dce110) { - ASSERT_CRITICAL(false); - return NULL; - } - - dal_i2caux_dce110_construct(i2caux_dce110, - ctx, - ARRAY_SIZE(dce100_aux_regs), - dce100_aux_regs, - dce100_hw_engine_regs, - &i2c_shift, - &i2c_mask); - return &i2caux_dce110->base; -} diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce100/i2caux_dce100.h b/drivers/gpu/drm/amd/display/dc/i2caux/dce100/i2caux_dce100.h deleted file mode 100644 index 2b508d3e0ef4..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce100/i2caux_dce100.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DAL_I2C_AUX_DCE100_H__ -#define __DAL_I2C_AUX_DCE100_H__ - -struct i2caux *dal_i2caux_dce100_create( - struct dc_context *ctx); - -#endif /* __DAL_I2C_AUX_DCE100_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/aux_engine_dce110.c b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/aux_engine_dce110.c deleted file mode 100644 index 59c3ed43d609..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/aux_engine_dce110.c +++ /dev/null @@ -1,505 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" -#include "dm_event_log.h" - -/* - * Pre-requisites: headers required by header of this unit - */ -#include "include/i2caux_interface.h" -#include "../engine.h" -#include "../aux_engine.h" - -/* - * Header of this unit - */ - -#include "aux_engine_dce110.h" - -/* - * Post-requisites: headers required by this unit - */ -#include "dce/dce_11_0_sh_mask.h" - -#define CTX \ - aux110->base.base.ctx -#define REG(reg_name)\ - (aux110->regs->reg_name) -#include "reg_helper.h" - -/* - * This unit - */ - -/* - * @brief - * Cast 'struct aux_engine *' - * to 'struct aux_engine_dce110 *' - */ -#define FROM_AUX_ENGINE(ptr) \ - container_of((ptr), struct aux_engine_dce110, base) - -/* - * @brief - * Cast 'struct engine *' - * to 'struct aux_engine_dce110 *' - */ -#define FROM_ENGINE(ptr) \ - FROM_AUX_ENGINE(container_of((ptr), struct aux_engine, base)) - -static void release_engine( - struct engine *engine) -{ - struct aux_engine_dce110 *aux110 = FROM_ENGINE(engine); - - REG_UPDATE(AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, 1); -} - -static void destruct( - struct aux_engine_dce110 *engine); - -static void destroy( - struct aux_engine **aux_engine) -{ - struct aux_engine_dce110 *engine = FROM_AUX_ENGINE(*aux_engine); - - destruct(engine); - - kfree(engine); - - *aux_engine = NULL; -} - -#define SW_CAN_ACCESS_AUX 1 -#define DMCU_CAN_ACCESS_AUX 2 - -static bool is_engine_available( - struct aux_engine *engine) -{ - struct aux_engine_dce110 *aux110 = FROM_AUX_ENGINE(engine); - - uint32_t value = REG_READ(AUX_ARB_CONTROL); - uint32_t field = get_reg_field_value( - value, - AUX_ARB_CONTROL, - AUX_REG_RW_CNTL_STATUS); - - return (field != DMCU_CAN_ACCESS_AUX); -} -static bool acquire_engine( - struct aux_engine *engine) -{ - struct aux_engine_dce110 *aux110 = FROM_AUX_ENGINE(engine); - - uint32_t value = REG_READ(AUX_ARB_CONTROL); - uint32_t field = get_reg_field_value( - value, - AUX_ARB_CONTROL, - AUX_REG_RW_CNTL_STATUS); - if (field == DMCU_CAN_ACCESS_AUX) - return false; - /* enable AUX before request SW to access AUX */ - value = REG_READ(AUX_CONTROL); - field = get_reg_field_value(value, - AUX_CONTROL, - AUX_EN); - - if (field == 0) { - set_reg_field_value( - value, - 1, - AUX_CONTROL, - AUX_EN); - - if (REG(AUX_RESET_MASK)) { - /*DP_AUX block as part of the enable sequence*/ - set_reg_field_value( - value, - 1, - AUX_CONTROL, - AUX_RESET); - } - - REG_WRITE(AUX_CONTROL, value); - - if (REG(AUX_RESET_MASK)) { - /*poll HW to make sure reset it done*/ - - REG_WAIT(AUX_CONTROL, AUX_RESET_DONE, 1, - 1, 11); - - set_reg_field_value( - value, - 0, - AUX_CONTROL, - AUX_RESET); - - REG_WRITE(AUX_CONTROL, value); - - REG_WAIT(AUX_CONTROL, AUX_RESET_DONE, 0, - 1, 11); - } - } /*if (field)*/ - - /* request SW to access AUX */ - REG_UPDATE(AUX_ARB_CONTROL, AUX_SW_USE_AUX_REG_REQ, 1); - - value = REG_READ(AUX_ARB_CONTROL); - field = get_reg_field_value( - value, - AUX_ARB_CONTROL, - AUX_REG_RW_CNTL_STATUS); - - return (field == SW_CAN_ACCESS_AUX); -} - -#define COMPOSE_AUX_SW_DATA_16_20(command, address) \ - ((command) | ((0xF0000 & (address)) >> 16)) - -#define COMPOSE_AUX_SW_DATA_8_15(address) \ - ((0xFF00 & (address)) >> 8) - -#define COMPOSE_AUX_SW_DATA_0_7(address) \ - (0xFF & (address)) - -static void submit_channel_request( - struct aux_engine *engine, - struct aux_request_transaction_data *request) -{ - struct aux_engine_dce110 *aux110 = FROM_AUX_ENGINE(engine); - uint32_t value; - uint32_t length; - - bool is_write = - ((request->type == AUX_TRANSACTION_TYPE_DP) && - (request->action == I2CAUX_TRANSACTION_ACTION_DP_WRITE)) || - ((request->type == AUX_TRANSACTION_TYPE_I2C) && - ((request->action == I2CAUX_TRANSACTION_ACTION_I2C_WRITE) || - (request->action == I2CAUX_TRANSACTION_ACTION_I2C_WRITE_MOT))); - if (REG(AUXN_IMPCAL)) { - /* clear_aux_error */ - REG_UPDATE_SEQ(AUXN_IMPCAL, AUXN_CALOUT_ERROR_AK, - 1, - 0); - - REG_UPDATE_SEQ(AUXP_IMPCAL, AUXP_CALOUT_ERROR_AK, - 1, - 0); - - /* force_default_calibrate */ - REG_UPDATE_1BY1_2(AUXN_IMPCAL, - AUXN_IMPCAL_ENABLE, 1, - AUXN_IMPCAL_OVERRIDE_ENABLE, 0); - - /* bug? why AUXN update EN and OVERRIDE_EN 1 by 1 while AUX P toggles OVERRIDE? */ - - REG_UPDATE_SEQ(AUXP_IMPCAL, AUXP_IMPCAL_OVERRIDE_ENABLE, - 1, - 0); - } - /* set the delay and the number of bytes to write */ - - /* The length include - * the 4 bit header and the 20 bit address - * (that is 3 byte). - * If the requested length is non zero this means - * an addition byte specifying the length is required. */ - - length = request->length ? 4 : 3; - if (is_write) - length += request->length; - - REG_UPDATE_2(AUX_SW_CONTROL, - AUX_SW_START_DELAY, request->delay, - AUX_SW_WR_BYTES, length); - - /* program action and address and payload data (if 'is_write') */ - value = REG_UPDATE_4(AUX_SW_DATA, - AUX_SW_INDEX, 0, - AUX_SW_DATA_RW, 0, - AUX_SW_AUTOINCREMENT_DISABLE, 1, - AUX_SW_DATA, COMPOSE_AUX_SW_DATA_16_20(request->action, request->address)); - - value = REG_SET_2(AUX_SW_DATA, value, - AUX_SW_AUTOINCREMENT_DISABLE, 0, - AUX_SW_DATA, COMPOSE_AUX_SW_DATA_8_15(request->address)); - - value = REG_SET(AUX_SW_DATA, value, - AUX_SW_DATA, COMPOSE_AUX_SW_DATA_0_7(request->address)); - - if (request->length) { - value = REG_SET(AUX_SW_DATA, value, - AUX_SW_DATA, request->length - 1); - } - - if (is_write) { - /* Load the HW buffer with the Data to be sent. - * This is relevant for write operation. - * For read, the data recived data will be - * processed in process_channel_reply(). */ - uint32_t i = 0; - - while (i < request->length) { - value = REG_SET(AUX_SW_DATA, value, - AUX_SW_DATA, request->data[i]); - - ++i; - } - } - - REG_UPDATE(AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, 1); - REG_WAIT(AUX_SW_STATUS, AUX_SW_DONE, 0, - 10, aux110->timeout_period/10); - REG_UPDATE(AUX_SW_CONTROL, AUX_SW_GO, 1); - EVENT_LOG_AUX_REQ(engine->base.ddc->pin_data->en, EVENT_LOG_AUX_ORIGIN_NATIVE, - request->action, request->address, request->length, request->data); -} - -static int read_channel_reply(struct aux_engine *engine, uint32_t size, - uint8_t *buffer, uint8_t *reply_result, - uint32_t *sw_status) -{ - struct aux_engine_dce110 *aux110 = FROM_AUX_ENGINE(engine); - uint32_t bytes_replied; - uint32_t reply_result_32; - - *sw_status = REG_GET(AUX_SW_STATUS, AUX_SW_REPLY_BYTE_COUNT, - &bytes_replied); - - /* In case HPD is LOW, exit AUX transaction */ - if ((*sw_status & AUX_SW_STATUS__AUX_SW_HPD_DISCON_MASK)) - return -1; - - /* Need at least the status byte */ - if (!bytes_replied) - return -1; - - REG_UPDATE_1BY1_3(AUX_SW_DATA, - AUX_SW_INDEX, 0, - AUX_SW_AUTOINCREMENT_DISABLE, 1, - AUX_SW_DATA_RW, 1); - - REG_GET(AUX_SW_DATA, AUX_SW_DATA, &reply_result_32); - reply_result_32 = reply_result_32 >> 4; - *reply_result = (uint8_t)reply_result_32; - - if (reply_result_32 == 0) { /* ACK */ - uint32_t i = 0; - - /* First byte was already used to get the command status */ - --bytes_replied; - - /* Do not overflow buffer */ - if (bytes_replied > size) - return -1; - - while (i < bytes_replied) { - uint32_t aux_sw_data_val; - - REG_GET(AUX_SW_DATA, AUX_SW_DATA, &aux_sw_data_val); - buffer[i] = aux_sw_data_val; - ++i; - } - - return i; - } - - return 0; -} - -static void process_channel_reply( - struct aux_engine *engine, - struct aux_reply_transaction_data *reply) -{ - int bytes_replied; - uint8_t reply_result; - uint32_t sw_status; - - bytes_replied = read_channel_reply(engine, reply->length, reply->data, - &reply_result, &sw_status); - EVENT_LOG_AUX_REP(engine->base.ddc->pin_data->en, - EVENT_LOG_AUX_ORIGIN_NATIVE, reply_result, - bytes_replied, reply->data); - - /* in case HPD is LOW, exit AUX transaction */ - if ((sw_status & AUX_SW_STATUS__AUX_SW_HPD_DISCON_MASK)) { - reply->status = AUX_TRANSACTION_REPLY_HPD_DISCON; - return; - } - - if (bytes_replied < 0) { - /* Need to handle an error case... - * Hopefully, upper layer function won't call this function if - * the number of bytes in the reply was 0, because there was - * surely an error that was asserted that should have been - * handled for hot plug case, this could happens - */ - if (!(sw_status & AUX_SW_STATUS__AUX_SW_HPD_DISCON_MASK)) { - reply->status = AUX_TRANSACTION_REPLY_INVALID; - ASSERT_CRITICAL(false); - return; - } - } else { - - switch (reply_result) { - case 0: /* ACK */ - reply->status = AUX_TRANSACTION_REPLY_AUX_ACK; - break; - case 1: /* NACK */ - reply->status = AUX_TRANSACTION_REPLY_AUX_NACK; - break; - case 2: /* DEFER */ - reply->status = AUX_TRANSACTION_REPLY_AUX_DEFER; - break; - case 4: /* AUX ACK / I2C NACK */ - reply->status = AUX_TRANSACTION_REPLY_I2C_NACK; - break; - case 8: /* AUX ACK / I2C DEFER */ - reply->status = AUX_TRANSACTION_REPLY_I2C_DEFER; - break; - default: - reply->status = AUX_TRANSACTION_REPLY_INVALID; - } - } -} - -static enum aux_channel_operation_result get_channel_status( - struct aux_engine *engine, - uint8_t *returned_bytes) -{ - struct aux_engine_dce110 *aux110 = FROM_AUX_ENGINE(engine); - - uint32_t value; - - if (returned_bytes == NULL) { - /*caller pass NULL pointer*/ - ASSERT_CRITICAL(false); - return AUX_CHANNEL_OPERATION_FAILED_REASON_UNKNOWN; - } - *returned_bytes = 0; - - /* poll to make sure that SW_DONE is asserted */ - value = REG_WAIT(AUX_SW_STATUS, AUX_SW_DONE, 1, - 10, aux110->timeout_period/10); - - /* in case HPD is LOW, exit AUX transaction */ - if ((value & AUX_SW_STATUS__AUX_SW_HPD_DISCON_MASK)) - return AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON; - - /* Note that the following bits are set in 'status.bits' - * during CTS 4.2.1.2 (FW 3.3.1): - * AUX_SW_RX_MIN_COUNT_VIOL, AUX_SW_RX_INVALID_STOP, - * AUX_SW_RX_RECV_NO_DET, AUX_SW_RX_RECV_INVALID_H. - * - * AUX_SW_RX_MIN_COUNT_VIOL is an internal, - * HW debugging bit and should be ignored. */ - if (value & AUX_SW_STATUS__AUX_SW_DONE_MASK) { - if ((value & AUX_SW_STATUS__AUX_SW_RX_TIMEOUT_STATE_MASK) || - (value & AUX_SW_STATUS__AUX_SW_RX_TIMEOUT_MASK)) - return AUX_CHANNEL_OPERATION_FAILED_TIMEOUT; - - else if ((value & AUX_SW_STATUS__AUX_SW_RX_INVALID_STOP_MASK) || - (value & AUX_SW_STATUS__AUX_SW_RX_RECV_NO_DET_MASK) || - (value & - AUX_SW_STATUS__AUX_SW_RX_RECV_INVALID_H_MASK) || - (value & AUX_SW_STATUS__AUX_SW_RX_RECV_INVALID_L_MASK)) - return AUX_CHANNEL_OPERATION_FAILED_INVALID_REPLY; - - *returned_bytes = get_reg_field_value(value, - AUX_SW_STATUS, - AUX_SW_REPLY_BYTE_COUNT); - - if (*returned_bytes == 0) - return - AUX_CHANNEL_OPERATION_FAILED_INVALID_REPLY; - else { - *returned_bytes -= 1; - return AUX_CHANNEL_OPERATION_SUCCEEDED; - } - } else { - /*time_elapsed >= aux_engine->timeout_period - * AUX_SW_STATUS__AUX_SW_HPD_DISCON = at this point - */ - ASSERT_CRITICAL(false); - return AUX_CHANNEL_OPERATION_FAILED_TIMEOUT; - } -} - -static const struct aux_engine_funcs aux_engine_funcs = { - .destroy = destroy, - .acquire_engine = acquire_engine, - .submit_channel_request = submit_channel_request, - .process_channel_reply = process_channel_reply, - .read_channel_reply = read_channel_reply, - .get_channel_status = get_channel_status, - .is_engine_available = is_engine_available, -}; - -static const struct engine_funcs engine_funcs = { - .release_engine = release_engine, - .submit_request = dal_aux_engine_submit_request, - .get_engine_type = dal_aux_engine_get_engine_type, - .acquire = dal_aux_engine_acquire, -}; - -static void construct( - struct aux_engine_dce110 *engine, - const struct aux_engine_dce110_init_data *aux_init_data) -{ - dal_aux_engine_construct(&engine->base, aux_init_data->ctx); - engine->base.base.funcs = &engine_funcs; - engine->base.funcs = &aux_engine_funcs; - - engine->timeout_period = aux_init_data->timeout_period; - engine->regs = aux_init_data->regs; -} - -static void destruct( - struct aux_engine_dce110 *engine) -{ - dal_aux_engine_destruct(&engine->base); -} - -struct aux_engine *dal_aux_engine_dce110_create( - const struct aux_engine_dce110_init_data *aux_init_data) -{ - struct aux_engine_dce110 *engine; - - if (!aux_init_data) { - ASSERT_CRITICAL(false); - return NULL; - } - - engine = kzalloc(sizeof(*engine), GFP_KERNEL); - - if (!engine) { - ASSERT_CRITICAL(false); - return NULL; - } - - construct(engine, aux_init_data); - return &engine->base; -} diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/aux_engine_dce110.h b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/aux_engine_dce110.h deleted file mode 100644 index 85ee82162590..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/aux_engine_dce110.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DAL_AUX_ENGINE_DCE110_H__ -#define __DAL_AUX_ENGINE_DCE110_H__ - -#include "../aux_engine.h" - -#define AUX_COMMON_REG_LIST(id)\ - SRI(AUX_CONTROL, DP_AUX, id), \ - SRI(AUX_ARB_CONTROL, DP_AUX, id), \ - SRI(AUX_SW_DATA, DP_AUX, id), \ - SRI(AUX_SW_CONTROL, DP_AUX, id), \ - SRI(AUX_INTERRUPT_CONTROL, DP_AUX, id), \ - SRI(AUX_SW_STATUS, DP_AUX, id), \ - SR(AUXN_IMPCAL), \ - SR(AUXP_IMPCAL) - -struct dce110_aux_registers { - uint32_t AUX_CONTROL; - uint32_t AUX_ARB_CONTROL; - uint32_t AUX_SW_DATA; - uint32_t AUX_SW_CONTROL; - uint32_t AUX_INTERRUPT_CONTROL; - uint32_t AUX_SW_STATUS; - uint32_t AUXN_IMPCAL; - uint32_t AUXP_IMPCAL; - - uint32_t AUX_RESET_MASK; -}; - -struct aux_engine_dce110 { - struct aux_engine base; - const struct dce110_aux_registers *regs; - struct { - uint32_t aux_control; - uint32_t aux_arb_control; - uint32_t aux_sw_data; - uint32_t aux_sw_control; - uint32_t aux_interrupt_control; - uint32_t aux_sw_status; - } addr; - uint32_t timeout_period; -}; - -struct aux_engine_dce110_init_data { - uint32_t engine_id; - uint32_t timeout_period; - struct dc_context *ctx; - const struct dce110_aux_registers *regs; -}; - -struct aux_engine *dal_aux_engine_dce110_create( - const struct aux_engine_dce110_init_data *aux_init_data); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.h b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.h deleted file mode 100644 index fea2946906ed..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.h +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DAL_I2C_HW_ENGINE_DCE110_H__ -#define __DAL_I2C_HW_ENGINE_DCE110_H__ - -#define I2C_HW_ENGINE_COMMON_REG_LIST(id)\ - SRI(SETUP, DC_I2C_DDC, id),\ - SRI(SPEED, DC_I2C_DDC, id),\ - SR(DC_I2C_ARBITRATION),\ - SR(DC_I2C_CONTROL),\ - SR(DC_I2C_SW_STATUS),\ - SR(DC_I2C_TRANSACTION0),\ - SR(DC_I2C_TRANSACTION1),\ - SR(DC_I2C_TRANSACTION2),\ - SR(DC_I2C_TRANSACTION3),\ - SR(DC_I2C_DATA),\ - SR(MICROSECOND_TIME_BASE_DIV) - -#define I2C_SF(reg_name, field_name, post_fix)\ - .field_name = reg_name ## __ ## field_name ## post_fix - -#define I2C_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(mask_sh)\ - I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_ENABLE, mask_sh),\ - I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_TIME_LIMIT, mask_sh),\ - I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_DATA_DRIVE_EN, mask_sh),\ - I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_CLK_DRIVE_EN, mask_sh),\ - I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_DATA_DRIVE_SEL, mask_sh),\ - I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_INTRA_TRANSACTION_DELAY, mask_sh),\ - I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_INTRA_BYTE_DELAY, mask_sh),\ - I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, mask_sh),\ - I2C_SF(DC_I2C_ARBITRATION, DC_I2C_NO_QUEUED_SW_GO, mask_sh),\ - I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_PRIORITY, mask_sh),\ - I2C_SF(DC_I2C_CONTROL, DC_I2C_SOFT_RESET, mask_sh),\ - I2C_SF(DC_I2C_CONTROL, DC_I2C_SW_STATUS_RESET, mask_sh),\ - I2C_SF(DC_I2C_CONTROL, DC_I2C_GO, mask_sh),\ - I2C_SF(DC_I2C_CONTROL, DC_I2C_SEND_RESET, mask_sh),\ - I2C_SF(DC_I2C_CONTROL, DC_I2C_TRANSACTION_COUNT, mask_sh),\ - I2C_SF(DC_I2C_CONTROL, DC_I2C_DDC_SELECT, mask_sh),\ - I2C_SF(DC_I2C_DDC1_SPEED, DC_I2C_DDC1_PRESCALE, mask_sh),\ - I2C_SF(DC_I2C_DDC1_SPEED, DC_I2C_DDC1_THRESHOLD, mask_sh),\ - I2C_SF(DC_I2C_SW_STATUS, DC_I2C_SW_STOPPED_ON_NACK, mask_sh),\ - I2C_SF(DC_I2C_SW_STATUS, DC_I2C_SW_TIMEOUT, mask_sh),\ - I2C_SF(DC_I2C_SW_STATUS, DC_I2C_SW_ABORTED, mask_sh),\ - I2C_SF(DC_I2C_SW_STATUS, DC_I2C_SW_DONE, mask_sh),\ - I2C_SF(DC_I2C_SW_STATUS, DC_I2C_SW_STATUS, mask_sh),\ - I2C_SF(DC_I2C_TRANSACTION0, DC_I2C_STOP_ON_NACK0, mask_sh),\ - I2C_SF(DC_I2C_TRANSACTION0, DC_I2C_START0, mask_sh),\ - I2C_SF(DC_I2C_TRANSACTION0, DC_I2C_RW0, mask_sh),\ - I2C_SF(DC_I2C_TRANSACTION0, DC_I2C_STOP0, mask_sh),\ - I2C_SF(DC_I2C_TRANSACTION0, DC_I2C_COUNT0, mask_sh),\ - I2C_SF(DC_I2C_DATA, DC_I2C_DATA_RW, mask_sh),\ - I2C_SF(DC_I2C_DATA, DC_I2C_DATA, mask_sh),\ - I2C_SF(DC_I2C_DATA, DC_I2C_INDEX, mask_sh),\ - I2C_SF(DC_I2C_DATA, DC_I2C_INDEX_WRITE, mask_sh),\ - I2C_SF(MICROSECOND_TIME_BASE_DIV, XTAL_REF_DIV, mask_sh) - -#define I2C_COMMON_MASK_SH_LIST_DCE100(mask_sh)\ - I2C_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(mask_sh) - -#define I2C_COMMON_MASK_SH_LIST_DCE110(mask_sh)\ - I2C_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(mask_sh),\ - I2C_SF(DC_I2C_DDC1_SPEED, DC_I2C_DDC1_START_STOP_TIMING_CNTL, mask_sh) - -struct dce110_i2c_hw_engine_shift { - uint8_t DC_I2C_DDC1_ENABLE; - uint8_t DC_I2C_DDC1_TIME_LIMIT; - uint8_t DC_I2C_DDC1_DATA_DRIVE_EN; - uint8_t DC_I2C_DDC1_CLK_DRIVE_EN; - uint8_t DC_I2C_DDC1_DATA_DRIVE_SEL; - uint8_t DC_I2C_DDC1_INTRA_TRANSACTION_DELAY; - uint8_t DC_I2C_DDC1_INTRA_BYTE_DELAY; - uint8_t DC_I2C_SW_DONE_USING_I2C_REG; - uint8_t DC_I2C_NO_QUEUED_SW_GO; - uint8_t DC_I2C_SW_PRIORITY; - uint8_t DC_I2C_SOFT_RESET; - uint8_t DC_I2C_SW_STATUS_RESET; - uint8_t DC_I2C_GO; - uint8_t DC_I2C_SEND_RESET; - uint8_t DC_I2C_TRANSACTION_COUNT; - uint8_t DC_I2C_DDC_SELECT; - uint8_t DC_I2C_DDC1_PRESCALE; - uint8_t DC_I2C_DDC1_THRESHOLD; - uint8_t DC_I2C_DDC1_START_STOP_TIMING_CNTL; - uint8_t DC_I2C_SW_STOPPED_ON_NACK; - uint8_t DC_I2C_SW_TIMEOUT; - uint8_t DC_I2C_SW_ABORTED; - uint8_t DC_I2C_SW_DONE; - uint8_t DC_I2C_SW_STATUS; - uint8_t DC_I2C_STOP_ON_NACK0; - uint8_t DC_I2C_START0; - uint8_t DC_I2C_RW0; - uint8_t DC_I2C_STOP0; - uint8_t DC_I2C_COUNT0; - uint8_t DC_I2C_DATA_RW; - uint8_t DC_I2C_DATA; - uint8_t DC_I2C_INDEX; - uint8_t DC_I2C_INDEX_WRITE; - uint8_t XTAL_REF_DIV; -}; - -struct dce110_i2c_hw_engine_mask { - uint32_t DC_I2C_DDC1_ENABLE; - uint32_t DC_I2C_DDC1_TIME_LIMIT; - uint32_t DC_I2C_DDC1_DATA_DRIVE_EN; - uint32_t DC_I2C_DDC1_CLK_DRIVE_EN; - uint32_t DC_I2C_DDC1_DATA_DRIVE_SEL; - uint32_t DC_I2C_DDC1_INTRA_TRANSACTION_DELAY; - uint32_t DC_I2C_DDC1_INTRA_BYTE_DELAY; - uint32_t DC_I2C_SW_DONE_USING_I2C_REG; - uint32_t DC_I2C_NO_QUEUED_SW_GO; - uint32_t DC_I2C_SW_PRIORITY; - uint32_t DC_I2C_SOFT_RESET; - uint32_t DC_I2C_SW_STATUS_RESET; - uint32_t DC_I2C_GO; - uint32_t DC_I2C_SEND_RESET; - uint32_t DC_I2C_TRANSACTION_COUNT; - uint32_t DC_I2C_DDC_SELECT; - uint32_t DC_I2C_DDC1_PRESCALE; - uint32_t DC_I2C_DDC1_THRESHOLD; - uint32_t DC_I2C_DDC1_START_STOP_TIMING_CNTL; - uint32_t DC_I2C_SW_STOPPED_ON_NACK; - uint32_t DC_I2C_SW_TIMEOUT; - uint32_t DC_I2C_SW_ABORTED; - uint32_t DC_I2C_SW_DONE; - uint32_t DC_I2C_SW_STATUS; - uint32_t DC_I2C_STOP_ON_NACK0; - uint32_t DC_I2C_START0; - uint32_t DC_I2C_RW0; - uint32_t DC_I2C_STOP0; - uint32_t DC_I2C_COUNT0; - uint32_t DC_I2C_DATA_RW; - uint32_t DC_I2C_DATA; - uint32_t DC_I2C_INDEX; - uint32_t DC_I2C_INDEX_WRITE; - uint32_t XTAL_REF_DIV; -}; - -struct dce110_i2c_hw_engine_registers { - uint32_t SETUP; - uint32_t SPEED; - uint32_t DC_I2C_ARBITRATION; - uint32_t DC_I2C_CONTROL; - uint32_t DC_I2C_SW_STATUS; - uint32_t DC_I2C_TRANSACTION0; - uint32_t DC_I2C_TRANSACTION1; - uint32_t DC_I2C_TRANSACTION2; - uint32_t DC_I2C_TRANSACTION3; - uint32_t DC_I2C_DATA; - uint32_t MICROSECOND_TIME_BASE_DIV; -}; - -struct i2c_hw_engine_dce110 { - struct i2c_hw_engine base; - const struct dce110_i2c_hw_engine_registers *regs; - const struct dce110_i2c_hw_engine_shift *i2c_shift; - const struct dce110_i2c_hw_engine_mask *i2c_mask; - struct { - uint32_t DC_I2C_DDCX_SETUP; - uint32_t DC_I2C_DDCX_SPEED; - } addr; - uint32_t engine_id; - /* expressed in kilohertz */ - uint32_t reference_frequency; - /* number of bytes currently used in HW buffer */ - uint32_t buffer_used_bytes; - /* number of bytes used for write transaction in HW buffer - * - this will be used as the index to read from*/ - uint32_t buffer_used_write; - /* number of pending transactions (before GO) */ - uint32_t transaction_count; - uint32_t engine_keep_power_up_count; - uint32_t i2_setup_time_limit; -}; - -struct i2c_hw_engine_dce110_create_arg { - uint32_t engine_id; - uint32_t reference_frequency; - uint32_t default_speed; - struct dc_context *ctx; - const struct dce110_i2c_hw_engine_registers *regs; - const struct dce110_i2c_hw_engine_shift *i2c_shift; - const struct dce110_i2c_hw_engine_mask *i2c_mask; -}; - -struct i2c_engine *dal_i2c_hw_engine_dce110_create( - const struct i2c_hw_engine_dce110_create_arg *arg); - -enum { - I2C_SETUP_TIME_LIMIT_DCE = 255, - I2C_SETUP_TIME_LIMIT_DCN = 3, - I2C_HW_BUFFER_SIZE = 538, - I2C_SEND_RESET_LENGTH_9 = 9, - I2C_SEND_RESET_LENGTH_10 = 10, -}; -#endif diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_sw_engine_dce110.c b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_sw_engine_dce110.c deleted file mode 100644 index 3aa7f791e523..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_sw_engine_dce110.c +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" - -/* - * Pre-requisites: headers required by header of this unit - */ -#include "include/i2caux_interface.h" -#include "../engine.h" -#include "../i2c_engine.h" -#include "../i2c_sw_engine.h" - -/* - * Header of this unit - */ - -#include "i2c_sw_engine_dce110.h" - -/* - * Post-requisites: headers required by this unit - */ - -/* - * This unit - */ - -/* - * @brief - * Cast 'struct i2c_sw_engine *' - * to 'struct i2c_sw_engine_dce110 *' - */ -#define FROM_I2C_SW_ENGINE(ptr) \ - container_of((ptr), struct i2c_sw_engine_dce110, base) -/* - * @brief - * Cast 'struct i2c_engine *' - * to 'struct i2c_sw_engine_dce80 *' - */ -#define FROM_I2C_ENGINE(ptr) \ - FROM_I2C_SW_ENGINE(container_of((ptr), struct i2c_sw_engine, base)) - -/* - * @brief - * Cast 'struct engine *' - * to 'struct i2c_sw_engine_dce80 *' - */ -#define FROM_ENGINE(ptr) \ - FROM_I2C_ENGINE(container_of((ptr), struct i2c_engine, base)) - -static void release_engine( - struct engine *engine) -{ -} - -static void destruct( - struct i2c_sw_engine_dce110 *engine) -{ - dal_i2c_sw_engine_destruct(&engine->base); -} - -static void destroy( - struct i2c_engine **engine) -{ - struct i2c_sw_engine_dce110 *sw_engine = FROM_I2C_ENGINE(*engine); - - destruct(sw_engine); - - kfree(sw_engine); - - *engine = NULL; -} - -static bool acquire_engine( - struct i2c_engine *engine, - struct ddc *ddc_handle) -{ - return dal_i2caux_i2c_sw_engine_acquire_engine(engine, ddc_handle); -} - -static const struct i2c_engine_funcs i2c_engine_funcs = { - .acquire_engine = acquire_engine, - .destroy = destroy, - .get_speed = dal_i2c_sw_engine_get_speed, - .set_speed = dal_i2c_sw_engine_set_speed, - .setup_engine = dal_i2c_engine_setup_i2c_engine, - .submit_channel_request = dal_i2c_sw_engine_submit_channel_request, - .process_channel_reply = dal_i2c_engine_process_channel_reply, - .get_channel_status = dal_i2c_sw_engine_get_channel_status, -}; - -static const struct engine_funcs engine_funcs = { - .release_engine = release_engine, - .get_engine_type = dal_i2c_sw_engine_get_engine_type, - .acquire = dal_i2c_engine_acquire, - .submit_request = dal_i2c_sw_engine_submit_request, -}; - -static void construct( - struct i2c_sw_engine_dce110 *engine_dce110, - const struct i2c_sw_engine_dce110_create_arg *arg_dce110) -{ - struct i2c_sw_engine_create_arg arg_base; - - arg_base.ctx = arg_dce110->ctx; - arg_base.default_speed = arg_dce110->default_speed; - - dal_i2c_sw_engine_construct(&engine_dce110->base, &arg_base); - - /*struct engine struct engine_funcs*/ - engine_dce110->base.base.base.funcs = &engine_funcs; - /*struct i2c_engine struct i2c_engine_funcs*/ - engine_dce110->base.base.funcs = &i2c_engine_funcs; - engine_dce110->base.default_speed = arg_dce110->default_speed; - engine_dce110->engine_id = arg_dce110->engine_id; -} - -struct i2c_engine *dal_i2c_sw_engine_dce110_create( - const struct i2c_sw_engine_dce110_create_arg *arg) -{ - struct i2c_sw_engine_dce110 *engine_dce110; - - if (!arg) { - ASSERT_CRITICAL(false); - return NULL; - } - - engine_dce110 = kzalloc(sizeof(struct i2c_sw_engine_dce110), - GFP_KERNEL); - - if (!engine_dce110) { - ASSERT_CRITICAL(false); - return NULL; - } - - construct(engine_dce110, arg); - return &engine_dce110->base.base; -} diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_sw_engine_dce110.h b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_sw_engine_dce110.h deleted file mode 100644 index c48c61f540a8..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_sw_engine_dce110.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DAL_I2C_SW_ENGINE_DCE110_H__ -#define __DAL_I2C_SW_ENGINE_DCE110_H__ - -struct i2c_sw_engine_dce110 { - struct i2c_sw_engine base; - uint32_t engine_id; -}; - -struct i2c_sw_engine_dce110_create_arg { - uint32_t engine_id; - uint32_t default_speed; - struct dc_context *ctx; -}; - -struct i2c_engine *dal_i2c_sw_engine_dce110_create( - const struct i2c_sw_engine_dce110_create_arg *arg); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2caux_dce110.c b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2caux_dce110.c deleted file mode 100644 index 1d748ac1d6d6..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2caux_dce110.c +++ /dev/null @@ -1,329 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" - -/* - * Pre-requisites: headers required by header of this unit - */ -#include "include/i2caux_interface.h" -#include "../i2caux.h" -#include "../engine.h" -#include "../i2c_engine.h" -#include "../i2c_sw_engine.h" -#include "../i2c_hw_engine.h" - -/* - * Header of this unit - */ -#include "i2caux_dce110.h" - -#include "i2c_sw_engine_dce110.h" -#include "i2c_hw_engine_dce110.h" -#include "aux_engine_dce110.h" -#include "../../dc.h" -#include "dc_types.h" - - -/* - * Post-requisites: headers required by this unit - */ - -/* - * This unit - */ -/*cast pointer to struct i2caux TO pointer to struct i2caux_dce110*/ -#define FROM_I2C_AUX(ptr) \ - container_of((ptr), struct i2caux_dce110, base) - -static void destruct( - struct i2caux_dce110 *i2caux_dce110) -{ - dal_i2caux_destruct(&i2caux_dce110->base); -} - -static void destroy( - struct i2caux **i2c_engine) -{ - struct i2caux_dce110 *i2caux_dce110 = FROM_I2C_AUX(*i2c_engine); - - destruct(i2caux_dce110); - - kfree(i2caux_dce110); - - *i2c_engine = NULL; -} - -static struct i2c_engine *acquire_i2c_hw_engine( - struct i2caux *i2caux, - struct ddc *ddc) -{ - struct i2caux_dce110 *i2caux_dce110 = FROM_I2C_AUX(i2caux); - - struct i2c_engine *engine = NULL; - /* generic hw engine is not used for EDID read - * It may be needed for external i2c device, like thermal chip, - * TODO will be implemented when needed. - * check dce80 bool non_generic for generic hw engine; - */ - - if (!ddc) - return NULL; - - if (ddc->hw_info.hw_supported) { - enum gpio_ddc_line line = dal_ddc_get_line(ddc); - - if (line < GPIO_DDC_LINE_COUNT) - engine = i2caux->i2c_hw_engines[line]; - } - - if (!engine) - return NULL; - - if (!i2caux_dce110->i2c_hw_buffer_in_use && - engine->base.funcs->acquire(&engine->base, ddc)) { - i2caux_dce110->i2c_hw_buffer_in_use = true; - return engine; - } - - return NULL; -} - -static void release_engine( - struct i2caux *i2caux, - struct engine *engine) -{ - struct i2caux_dce110 *i2caux_dce110 = FROM_I2C_AUX(i2caux); - - if (engine->funcs->get_engine_type(engine) == - I2CAUX_ENGINE_TYPE_I2C_DDC_HW) - i2caux_dce110->i2c_hw_buffer_in_use = false; - - dal_i2caux_release_engine(i2caux, engine); -} - -static const enum gpio_ddc_line hw_ddc_lines[] = { - GPIO_DDC_LINE_DDC1, - GPIO_DDC_LINE_DDC2, - GPIO_DDC_LINE_DDC3, - GPIO_DDC_LINE_DDC4, - GPIO_DDC_LINE_DDC5, - GPIO_DDC_LINE_DDC6, -}; - -static const enum gpio_ddc_line hw_aux_lines[] = { - GPIO_DDC_LINE_DDC1, - GPIO_DDC_LINE_DDC2, - GPIO_DDC_LINE_DDC3, - GPIO_DDC_LINE_DDC4, - GPIO_DDC_LINE_DDC5, - GPIO_DDC_LINE_DDC6, -}; - -/* function table */ -static const struct i2caux_funcs i2caux_funcs = { - .destroy = destroy, - .acquire_i2c_hw_engine = acquire_i2c_hw_engine, - .release_engine = release_engine, - .acquire_i2c_sw_engine = dal_i2caux_acquire_i2c_sw_engine, - .acquire_aux_engine = dal_i2caux_acquire_aux_engine, -}; - -#include "dce/dce_11_0_d.h" -#include "dce/dce_11_0_sh_mask.h" - -/* set register offset */ -#define SR(reg_name)\ - .reg_name = mm ## reg_name - -/* set register offset with instance */ -#define SRI(reg_name, block, id)\ - .reg_name = mm ## block ## id ## _ ## reg_name - -#define aux_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST(id), \ - .AUX_RESET_MASK = AUX_CONTROL__AUX_RESET_MASK \ -} - -#define hw_engine_regs(id)\ -{\ - I2C_HW_ENGINE_COMMON_REG_LIST(id) \ -} - -static const struct dce110_aux_registers dce110_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3), - aux_regs(4), - aux_regs(5) -}; - -static const struct dce110_i2c_hw_engine_registers i2c_hw_engine_regs[] = { - hw_engine_regs(1), - hw_engine_regs(2), - hw_engine_regs(3), - hw_engine_regs(4), - hw_engine_regs(5), - hw_engine_regs(6) -}; - -static const struct dce110_i2c_hw_engine_shift i2c_shift = { - I2C_COMMON_MASK_SH_LIST_DCE110(__SHIFT) -}; - -static const struct dce110_i2c_hw_engine_mask i2c_mask = { - I2C_COMMON_MASK_SH_LIST_DCE110(_MASK) -}; - -void dal_i2caux_dce110_construct( - struct i2caux_dce110 *i2caux_dce110, - struct dc_context *ctx, - unsigned int num_i2caux_inst, - const struct dce110_aux_registers aux_regs[], - const struct dce110_i2c_hw_engine_registers i2c_hw_engine_regs[], - const struct dce110_i2c_hw_engine_shift *i2c_shift, - const struct dce110_i2c_hw_engine_mask *i2c_mask) -{ - uint32_t i = 0; - uint32_t reference_frequency = 0; - bool use_i2c_sw_engine = false; - struct i2caux *base = NULL; - /*TODO: For CZ bring up, if dal_i2caux_get_reference_clock - * does not return 48KHz, we need hard coded for 48Khz. - * Some BIOS setting incorrect cause this - * For production, we always get value from BIOS*/ - reference_frequency = - dal_i2caux_get_reference_clock(ctx->dc_bios) >> 1; - - base = &i2caux_dce110->base; - - dal_i2caux_construct(base, ctx); - - i2caux_dce110->base.funcs = &i2caux_funcs; - i2caux_dce110->i2c_hw_buffer_in_use = false; - /* Create I2C engines (DDC lines per connector) - * different I2C/AUX usage cases, DDC, Generic GPIO, AUX. - */ - do { - enum gpio_ddc_line line_id = hw_ddc_lines[i]; - - struct i2c_hw_engine_dce110_create_arg hw_arg_dce110; - - if (use_i2c_sw_engine) { - struct i2c_sw_engine_dce110_create_arg sw_arg; - - sw_arg.engine_id = i; - sw_arg.default_speed = base->default_i2c_sw_speed; - sw_arg.ctx = ctx; - base->i2c_sw_engines[line_id] = - dal_i2c_sw_engine_dce110_create(&sw_arg); - } - - hw_arg_dce110.engine_id = i; - hw_arg_dce110.reference_frequency = reference_frequency; - hw_arg_dce110.default_speed = base->default_i2c_hw_speed; - hw_arg_dce110.ctx = ctx; - hw_arg_dce110.regs = &i2c_hw_engine_regs[i]; - hw_arg_dce110.i2c_shift = i2c_shift; - hw_arg_dce110.i2c_mask = i2c_mask; - - base->i2c_hw_engines[line_id] = - dal_i2c_hw_engine_dce110_create(&hw_arg_dce110); - if (base->i2c_hw_engines[line_id] != NULL) { - switch (ctx->dce_version) { - case DCN_VERSION_1_0: - base->i2c_hw_engines[line_id]->setup_limit = - I2C_SETUP_TIME_LIMIT_DCN; - base->i2c_hw_engines[line_id]->send_reset_length = 0; - break; - default: - base->i2c_hw_engines[line_id]->setup_limit = - I2C_SETUP_TIME_LIMIT_DCE; - base->i2c_hw_engines[line_id]->send_reset_length = 0; - break; - } - } - ++i; - } while (i < num_i2caux_inst); - - /* Create AUX engines for all lines which has assisted HW AUX - * 'i' (loop counter) used as DDC/AUX engine_id */ - - i = 0; - - do { - enum gpio_ddc_line line_id = hw_aux_lines[i]; - - struct aux_engine_dce110_init_data aux_init_data; - - aux_init_data.engine_id = i; - aux_init_data.timeout_period = base->aux_timeout_period; - aux_init_data.ctx = ctx; - aux_init_data.regs = &aux_regs[i]; - - base->aux_engines[line_id] = - dal_aux_engine_dce110_create(&aux_init_data); - - ++i; - } while (i < num_i2caux_inst); - - /*TODO Generic I2C SW and HW*/ -} - -/* - * dal_i2caux_dce110_create - * - * @brief - * public interface to allocate memory for DCE11 I2CAUX - * - * @param - * struct adapter_service *as - [in] - * struct dc_context *ctx - [in] - * - * @return - * pointer to the base struct of DCE11 I2CAUX - */ -struct i2caux *dal_i2caux_dce110_create( - struct dc_context *ctx) -{ - struct i2caux_dce110 *i2caux_dce110 = - kzalloc(sizeof(struct i2caux_dce110), GFP_KERNEL); - - if (!i2caux_dce110) { - ASSERT_CRITICAL(false); - return NULL; - } - - dal_i2caux_dce110_construct(i2caux_dce110, - ctx, - ARRAY_SIZE(dce110_aux_regs), - dce110_aux_regs, - i2c_hw_engine_regs, - &i2c_shift, - &i2c_mask); - return &i2caux_dce110->base; -} diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2caux_dce110.h b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2caux_dce110.h deleted file mode 100644 index d3d8cc58666a..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2caux_dce110.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DAL_I2C_AUX_DCE110_H__ -#define __DAL_I2C_AUX_DCE110_H__ - -#include "../i2caux.h" - -struct i2caux_dce110 { - struct i2caux base; - /* indicate the I2C HW circular buffer is in use */ - bool i2c_hw_buffer_in_use; -}; - -struct dce110_aux_registers; -struct dce110_i2c_hw_engine_registers; -struct dce110_i2c_hw_engine_shift; -struct dce110_i2c_hw_engine_mask; - -struct i2caux *dal_i2caux_dce110_create( - struct dc_context *ctx); - -void dal_i2caux_dce110_construct( - struct i2caux_dce110 *i2caux_dce110, - struct dc_context *ctx, - unsigned int num_i2caux_inst, - const struct dce110_aux_registers *aux_regs, - const struct dce110_i2c_hw_engine_registers *i2c_hw_engine_regs, - const struct dce110_i2c_hw_engine_shift *i2c_shift, - const struct dce110_i2c_hw_engine_mask *i2c_mask); - -#endif /* __DAL_I2C_AUX_DCE110_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce120/i2caux_dce120.c b/drivers/gpu/drm/amd/display/dc/i2caux/dce120/i2caux_dce120.c deleted file mode 100644 index 6a4f344c1db4..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce120/i2caux_dce120.c +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright 2012-16 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" - -#include "include/i2caux_interface.h" -#include "../i2caux.h" -#include "../engine.h" -#include "../i2c_engine.h" -#include "../i2c_sw_engine.h" -#include "../i2c_hw_engine.h" - -#include "../dce110/i2c_hw_engine_dce110.h" -#include "../dce110/aux_engine_dce110.h" -#include "../dce110/i2caux_dce110.h" - -#include "dce/dce_12_0_offset.h" -#include "dce/dce_12_0_sh_mask.h" -#include "soc15_hw_ip.h" -#include "vega10_ip_offset.h" - -/* begin ********************* - * macros to expend register list macro defined in HW object header file */ - -#define BASE_INNER(seg) \ - DCE_BASE__INST0_SEG ## seg - -/* compile time expand base address. */ -#define BASE(seg) \ - BASE_INNER(seg) - -#define SR(reg_name)\ - .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -#define SRI(reg_name, block, id)\ - .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name -/* macros to expend register list macro defined in HW object header file - * end *********************/ - -#define aux_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST(id), \ - .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK \ -} - -static const struct dce110_aux_registers dce120_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3), - aux_regs(4), - aux_regs(5), -}; - -#define hw_engine_regs(id)\ -{\ - I2C_HW_ENGINE_COMMON_REG_LIST(id) \ -} - -static const struct dce110_i2c_hw_engine_registers dce120_hw_engine_regs[] = { - hw_engine_regs(1), - hw_engine_regs(2), - hw_engine_regs(3), - hw_engine_regs(4), - hw_engine_regs(5), - hw_engine_regs(6) -}; - -static const struct dce110_i2c_hw_engine_shift i2c_shift = { - I2C_COMMON_MASK_SH_LIST_DCE110(__SHIFT) -}; - -static const struct dce110_i2c_hw_engine_mask i2c_mask = { - I2C_COMMON_MASK_SH_LIST_DCE110(_MASK) -}; - -struct i2caux *dal_i2caux_dce120_create( - struct dc_context *ctx) -{ - struct i2caux_dce110 *i2caux_dce110 = - kzalloc(sizeof(struct i2caux_dce110), GFP_KERNEL); - - if (!i2caux_dce110) { - ASSERT_CRITICAL(false); - return NULL; - } - - dal_i2caux_dce110_construct(i2caux_dce110, - ctx, - ARRAY_SIZE(dce120_aux_regs), - dce120_aux_regs, - dce120_hw_engine_regs, - &i2c_shift, - &i2c_mask); - return &i2caux_dce110->base; -} diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce120/i2caux_dce120.h b/drivers/gpu/drm/amd/display/dc/i2caux/dce120/i2caux_dce120.h deleted file mode 100644 index b6ac47617c70..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce120/i2caux_dce120.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2012-16 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DAL_I2C_AUX_DCE120_H__ -#define __DAL_I2C_AUX_DCE120_H__ - -struct i2caux *dal_i2caux_dce120_create( - struct dc_context *ctx); - -#endif /* __DAL_I2C_AUX_DCE120_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2c_hw_engine_dce80.c b/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2c_hw_engine_dce80.c deleted file mode 100644 index fd0832dd2c75..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2c_hw_engine_dce80.c +++ /dev/null @@ -1,875 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" - -/* - * Pre-requisites: headers required by header of this unit - */ -#include "include/i2caux_interface.h" -#include "../engine.h" -#include "../i2c_engine.h" -#include "../i2c_hw_engine.h" -#include "../i2c_generic_hw_engine.h" -/* - * Header of this unit - */ - -#include "i2c_hw_engine_dce80.h" - -/* - * Post-requisites: headers required by this unit - */ - -#include "dce/dce_8_0_d.h" -#include "dce/dce_8_0_sh_mask.h" -/* - * This unit - */ - -enum dc_i2c_status { - DC_I2C_STATUS__DC_I2C_STATUS_IDLE, - DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_SW, - DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_HW -}; - -enum dc_i2c_arbitration { - DC_I2C_ARBITRATION__DC_I2C_SW_PRIORITY_NORMAL, - DC_I2C_ARBITRATION__DC_I2C_SW_PRIORITY_HIGH -}; - -enum { - /* No timeout in HW - * (timeout implemented in SW by querying status) */ - I2C_SETUP_TIME_LIMIT = 255, - I2C_HW_BUFFER_SIZE = 144 -}; - -/* - * @brief - * Cast 'struct i2c_hw_engine *' - * to 'struct i2c_hw_engine_dce80 *' - */ -#define FROM_I2C_HW_ENGINE(ptr) \ - container_of((ptr), struct i2c_hw_engine_dce80, base) - -/* - * @brief - * Cast pointer to 'struct i2c_engine *' - * to pointer to 'struct i2c_hw_engine_dce80 *' - */ -#define FROM_I2C_ENGINE(ptr) \ - FROM_I2C_HW_ENGINE(container_of((ptr), struct i2c_hw_engine, base)) - -/* - * @brief - * Cast pointer to 'struct engine *' - * to 'pointer to struct i2c_hw_engine_dce80 *' - */ -#define FROM_ENGINE(ptr) \ - FROM_I2C_ENGINE(container_of((ptr), struct i2c_engine, base)) - -static void disable_i2c_hw_engine( - struct i2c_hw_engine_dce80 *engine) -{ - const uint32_t addr = engine->addr.DC_I2C_DDCX_SETUP; - uint32_t value = 0; - - struct dc_context *ctx = NULL; - - ctx = engine->base.base.base.ctx; - - value = dm_read_reg(ctx, addr); - - set_reg_field_value( - value, - 0, - DC_I2C_DDC1_SETUP, - DC_I2C_DDC1_ENABLE); - - dm_write_reg(ctx, addr, value); -} - -static void release_engine( - struct engine *engine) -{ - struct i2c_hw_engine_dce80 *hw_engine = FROM_ENGINE(engine); - - struct i2c_engine *base = NULL; - bool safe_to_reset; - uint32_t value = 0; - - base = &hw_engine->base.base; - - /* Restore original HW engine speed */ - - base->funcs->set_speed(base, hw_engine->base.original_speed); - - /* Release I2C */ - { - value = dm_read_reg(engine->ctx, mmDC_I2C_ARBITRATION); - - set_reg_field_value( - value, - 1, - DC_I2C_ARBITRATION, - DC_I2C_SW_DONE_USING_I2C_REG); - - dm_write_reg(engine->ctx, mmDC_I2C_ARBITRATION, value); - } - - /* Reset HW engine */ - { - uint32_t i2c_sw_status = 0; - - value = dm_read_reg(engine->ctx, mmDC_I2C_SW_STATUS); - - i2c_sw_status = get_reg_field_value( - value, - DC_I2C_SW_STATUS, - DC_I2C_SW_STATUS); - /* if used by SW, safe to reset */ - safe_to_reset = (i2c_sw_status == 1); - } - { - value = dm_read_reg(engine->ctx, mmDC_I2C_CONTROL); - - if (safe_to_reset) - set_reg_field_value( - value, - 1, - DC_I2C_CONTROL, - DC_I2C_SOFT_RESET); - - set_reg_field_value( - value, - 1, - DC_I2C_CONTROL, - DC_I2C_SW_STATUS_RESET); - - dm_write_reg(engine->ctx, mmDC_I2C_CONTROL, value); - } - - /* HW I2c engine - clock gating feature */ - if (!hw_engine->engine_keep_power_up_count) - disable_i2c_hw_engine(hw_engine); -} - -static void destruct( - struct i2c_hw_engine_dce80 *engine) -{ - dal_i2c_hw_engine_destruct(&engine->base); -} - -static void destroy( - struct i2c_engine **i2c_engine) -{ - struct i2c_hw_engine_dce80 *engine = FROM_I2C_ENGINE(*i2c_engine); - - destruct(engine); - - kfree(engine); - - *i2c_engine = NULL; -} - -static bool setup_engine( - struct i2c_engine *i2c_engine) -{ - uint32_t value = 0; - struct i2c_hw_engine_dce80 *engine = FROM_I2C_ENGINE(i2c_engine); - - /* Program pin select */ - { - const uint32_t addr = mmDC_I2C_CONTROL; - - value = dm_read_reg(i2c_engine->base.ctx, addr); - - set_reg_field_value( - value, - 0, - DC_I2C_CONTROL, - DC_I2C_GO); - - set_reg_field_value( - value, - 0, - DC_I2C_CONTROL, - DC_I2C_SOFT_RESET); - - set_reg_field_value( - value, - 0, - DC_I2C_CONTROL, - DC_I2C_SEND_RESET); - - set_reg_field_value( - value, - 0, - DC_I2C_CONTROL, - DC_I2C_SW_STATUS_RESET); - - set_reg_field_value( - value, - 0, - DC_I2C_CONTROL, - DC_I2C_TRANSACTION_COUNT); - - set_reg_field_value( - value, - engine->engine_id, - DC_I2C_CONTROL, - DC_I2C_DDC_SELECT); - - dm_write_reg(i2c_engine->base.ctx, addr, value); - } - - /* Program time limit */ - { - const uint32_t addr = engine->addr.DC_I2C_DDCX_SETUP; - - value = dm_read_reg(i2c_engine->base.ctx, addr); - - set_reg_field_value( - value, - I2C_SETUP_TIME_LIMIT, - DC_I2C_DDC1_SETUP, - DC_I2C_DDC1_TIME_LIMIT); - - set_reg_field_value( - value, - 1, - DC_I2C_DDC1_SETUP, - DC_I2C_DDC1_ENABLE); - - dm_write_reg(i2c_engine->base.ctx, addr, value); - } - - /* Program HW priority - * set to High - interrupt software I2C at any time - * Enable restart of SW I2C that was interrupted by HW - * disable queuing of software while I2C is in use by HW */ - { - value = dm_read_reg(i2c_engine->base.ctx, - mmDC_I2C_ARBITRATION); - - set_reg_field_value( - value, - 0, - DC_I2C_ARBITRATION, - DC_I2C_NO_QUEUED_SW_GO); - - set_reg_field_value( - value, - DC_I2C_ARBITRATION__DC_I2C_SW_PRIORITY_NORMAL, - DC_I2C_ARBITRATION, - DC_I2C_SW_PRIORITY); - - dm_write_reg(i2c_engine->base.ctx, - mmDC_I2C_ARBITRATION, value); - } - - return true; -} - -static uint32_t get_speed( - const struct i2c_engine *i2c_engine) -{ - const struct i2c_hw_engine_dce80 *engine = FROM_I2C_ENGINE(i2c_engine); - - const uint32_t addr = engine->addr.DC_I2C_DDCX_SPEED; - - uint32_t pre_scale = 0; - - uint32_t value = dm_read_reg(i2c_engine->base.ctx, addr); - - pre_scale = get_reg_field_value( - value, - DC_I2C_DDC1_SPEED, - DC_I2C_DDC1_PRESCALE); - - /* [anaumov] it seems following is unnecessary */ - /*ASSERT(value.bits.DC_I2C_DDC1_PRESCALE);*/ - - return pre_scale ? - engine->reference_frequency / pre_scale : - engine->base.default_speed; -} - -static void set_speed( - struct i2c_engine *i2c_engine, - uint32_t speed) -{ - struct i2c_hw_engine_dce80 *engine = FROM_I2C_ENGINE(i2c_engine); - - if (speed) { - const uint32_t addr = engine->addr.DC_I2C_DDCX_SPEED; - - uint32_t value = dm_read_reg(i2c_engine->base.ctx, addr); - - set_reg_field_value( - value, - engine->reference_frequency / speed, - DC_I2C_DDC1_SPEED, - DC_I2C_DDC1_PRESCALE); - - set_reg_field_value( - value, - 2, - DC_I2C_DDC1_SPEED, - DC_I2C_DDC1_THRESHOLD); - - dm_write_reg(i2c_engine->base.ctx, addr, value); - } -} - -static inline void reset_hw_engine(struct engine *engine) -{ - uint32_t value = dm_read_reg(engine->ctx, mmDC_I2C_CONTROL); - - set_reg_field_value( - value, - 1, - DC_I2C_CONTROL, - DC_I2C_SOFT_RESET); - - set_reg_field_value( - value, - 1, - DC_I2C_CONTROL, - DC_I2C_SW_STATUS_RESET); - - dm_write_reg(engine->ctx, mmDC_I2C_CONTROL, value); -} - -static bool is_hw_busy(struct engine *engine) -{ - uint32_t i2c_sw_status = 0; - - uint32_t value = dm_read_reg(engine->ctx, mmDC_I2C_SW_STATUS); - - i2c_sw_status = get_reg_field_value( - value, - DC_I2C_SW_STATUS, - DC_I2C_SW_STATUS); - - if (i2c_sw_status == DC_I2C_STATUS__DC_I2C_STATUS_IDLE) - return false; - - reset_hw_engine(engine); - - value = dm_read_reg(engine->ctx, mmDC_I2C_SW_STATUS); - - i2c_sw_status = get_reg_field_value( - value, - DC_I2C_SW_STATUS, - DC_I2C_SW_STATUS); - - return i2c_sw_status != DC_I2C_STATUS__DC_I2C_STATUS_IDLE; -} - -/* - * @brief - * DC_GPIO_DDC MM register offsets - */ -static const uint32_t transaction_addr[] = { - mmDC_I2C_TRANSACTION0, - mmDC_I2C_TRANSACTION1, - mmDC_I2C_TRANSACTION2, - mmDC_I2C_TRANSACTION3 -}; - -static bool process_transaction( - struct i2c_hw_engine_dce80 *engine, - struct i2c_request_transaction_data *request) -{ - uint32_t length = request->length; - uint8_t *buffer = request->data; - - bool last_transaction = false; - uint32_t value = 0; - - struct dc_context *ctx = NULL; - - ctx = engine->base.base.base.ctx; - - { - const uint32_t addr = - transaction_addr[engine->transaction_count]; - - value = dm_read_reg(ctx, addr); - - set_reg_field_value( - value, - 1, - DC_I2C_TRANSACTION0, - DC_I2C_STOP_ON_NACK0); - - set_reg_field_value( - value, - 1, - DC_I2C_TRANSACTION0, - DC_I2C_START0); - - if ((engine->transaction_count == 3) || - (request->action == I2CAUX_TRANSACTION_ACTION_I2C_WRITE) || - (request->action & I2CAUX_TRANSACTION_ACTION_I2C_READ)) { - - set_reg_field_value( - value, - 1, - DC_I2C_TRANSACTION0, - DC_I2C_STOP0); - - last_transaction = true; - } else - set_reg_field_value( - value, - 0, - DC_I2C_TRANSACTION0, - DC_I2C_STOP0); - - set_reg_field_value( - value, - (0 != (request->action & - I2CAUX_TRANSACTION_ACTION_I2C_READ)), - DC_I2C_TRANSACTION0, - DC_I2C_RW0); - - set_reg_field_value( - value, - length, - DC_I2C_TRANSACTION0, - DC_I2C_COUNT0); - - dm_write_reg(ctx, addr, value); - } - - /* Write the I2C address and I2C data - * into the hardware circular buffer, one byte per entry. - * As an example, the 7-bit I2C slave address for CRT monitor - * for reading DDC/EDID information is 0b1010001. - * For an I2C send operation, the LSB must be programmed to 0; - * for I2C receive operation, the LSB must be programmed to 1. */ - - { - value = 0; - - set_reg_field_value( - value, - false, - DC_I2C_DATA, - DC_I2C_DATA_RW); - - set_reg_field_value( - value, - request->address, - DC_I2C_DATA, - DC_I2C_DATA); - - if (engine->transaction_count == 0) { - set_reg_field_value( - value, - 0, - DC_I2C_DATA, - DC_I2C_INDEX); - - /*enable index write*/ - set_reg_field_value( - value, - 1, - DC_I2C_DATA, - DC_I2C_INDEX_WRITE); - } - - dm_write_reg(ctx, mmDC_I2C_DATA, value); - - if (!(request->action & I2CAUX_TRANSACTION_ACTION_I2C_READ)) { - - set_reg_field_value( - value, - 0, - DC_I2C_DATA, - DC_I2C_INDEX_WRITE); - - while (length) { - - set_reg_field_value( - value, - *buffer++, - DC_I2C_DATA, - DC_I2C_DATA); - - dm_write_reg(ctx, mmDC_I2C_DATA, value); - --length; - } - } - } - - ++engine->transaction_count; - engine->buffer_used_bytes += length + 1; - - return last_transaction; -} - -static void execute_transaction( - struct i2c_hw_engine_dce80 *engine) -{ - uint32_t value = 0; - struct dc_context *ctx = NULL; - - ctx = engine->base.base.base.ctx; - - { - const uint32_t addr = engine->addr.DC_I2C_DDCX_SETUP; - - value = dm_read_reg(ctx, addr); - - set_reg_field_value( - value, - 0, - DC_I2C_DDC1_SETUP, - DC_I2C_DDC1_DATA_DRIVE_EN); - - set_reg_field_value( - value, - 0, - DC_I2C_DDC1_SETUP, - DC_I2C_DDC1_CLK_DRIVE_EN); - - set_reg_field_value( - value, - 0, - DC_I2C_DDC1_SETUP, - DC_I2C_DDC1_DATA_DRIVE_SEL); - - set_reg_field_value( - value, - 0, - DC_I2C_DDC1_SETUP, - DC_I2C_DDC1_INTRA_TRANSACTION_DELAY); - - set_reg_field_value( - value, - 0, - DC_I2C_DDC1_SETUP, - DC_I2C_DDC1_INTRA_BYTE_DELAY); - - dm_write_reg(ctx, addr, value); - } - - { - const uint32_t addr = mmDC_I2C_CONTROL; - - value = dm_read_reg(ctx, addr); - - set_reg_field_value( - value, - 0, - DC_I2C_CONTROL, - DC_I2C_SOFT_RESET); - - set_reg_field_value( - value, - 0, - DC_I2C_CONTROL, - DC_I2C_SW_STATUS_RESET); - - set_reg_field_value( - value, - 0, - DC_I2C_CONTROL, - DC_I2C_SEND_RESET); - - set_reg_field_value( - value, - 0, - DC_I2C_CONTROL, - DC_I2C_GO); - - set_reg_field_value( - value, - engine->transaction_count - 1, - DC_I2C_CONTROL, - DC_I2C_TRANSACTION_COUNT); - - dm_write_reg(ctx, addr, value); - } - - /* start I2C transfer */ - { - const uint32_t addr = mmDC_I2C_CONTROL; - - value = dm_read_reg(ctx, addr); - - set_reg_field_value( - value, - 1, - DC_I2C_CONTROL, - DC_I2C_GO); - - dm_write_reg(ctx, addr, value); - } - - /* all transactions were executed and HW buffer became empty - * (even though it actually happens when status becomes DONE) */ - engine->transaction_count = 0; - engine->buffer_used_bytes = 0; -} - -static void submit_channel_request( - struct i2c_engine *engine, - struct i2c_request_transaction_data *request) -{ - request->status = I2C_CHANNEL_OPERATION_SUCCEEDED; - - if (!process_transaction(FROM_I2C_ENGINE(engine), request)) - return; - - if (is_hw_busy(&engine->base)) { - request->status = I2C_CHANNEL_OPERATION_ENGINE_BUSY; - return; - } - - execute_transaction(FROM_I2C_ENGINE(engine)); -} - -static void process_channel_reply( - struct i2c_engine *engine, - struct i2c_reply_transaction_data *reply) -{ - uint32_t length = reply->length; - uint8_t *buffer = reply->data; - - uint32_t value = 0; - - /*set index*/ - set_reg_field_value( - value, - length - 1, - DC_I2C_DATA, - DC_I2C_INDEX); - - set_reg_field_value( - value, - 1, - DC_I2C_DATA, - DC_I2C_DATA_RW); - - set_reg_field_value( - value, - 1, - DC_I2C_DATA, - DC_I2C_INDEX_WRITE); - - dm_write_reg(engine->base.ctx, mmDC_I2C_DATA, value); - - while (length) { - /* after reading the status, - * if the I2C operation executed successfully - * (i.e. DC_I2C_STATUS_DONE = 1) then the I2C controller - * should read data bytes from I2C circular data buffer */ - - value = dm_read_reg(engine->base.ctx, mmDC_I2C_DATA); - - *buffer++ = get_reg_field_value( - value, - DC_I2C_DATA, - DC_I2C_DATA); - - --length; - } -} - -static enum i2c_channel_operation_result get_channel_status( - struct i2c_engine *engine, - uint8_t *returned_bytes) -{ - uint32_t i2c_sw_status = 0; - uint32_t value = dm_read_reg(engine->base.ctx, mmDC_I2C_SW_STATUS); - - i2c_sw_status = get_reg_field_value( - value, - DC_I2C_SW_STATUS, - DC_I2C_SW_STATUS); - - if (i2c_sw_status == DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_SW) - return I2C_CHANNEL_OPERATION_ENGINE_BUSY; - else if (value & DC_I2C_SW_STATUS__DC_I2C_SW_STOPPED_ON_NACK_MASK) - return I2C_CHANNEL_OPERATION_NO_RESPONSE; - else if (value & DC_I2C_SW_STATUS__DC_I2C_SW_TIMEOUT_MASK) - return I2C_CHANNEL_OPERATION_TIMEOUT; - else if (value & DC_I2C_SW_STATUS__DC_I2C_SW_ABORTED_MASK) - return I2C_CHANNEL_OPERATION_FAILED; - else if (value & DC_I2C_SW_STATUS__DC_I2C_SW_DONE_MASK) - return I2C_CHANNEL_OPERATION_SUCCEEDED; - - /* - * this is the case when HW used for communication, I2C_SW_STATUS - * could be zero - */ - return I2C_CHANNEL_OPERATION_SUCCEEDED; -} - -static uint32_t get_hw_buffer_available_size( - const struct i2c_hw_engine *engine) -{ - return I2C_HW_BUFFER_SIZE - - FROM_I2C_HW_ENGINE(engine)->buffer_used_bytes; -} - -static uint32_t get_transaction_timeout( - const struct i2c_hw_engine *engine, - uint32_t length) -{ - uint32_t speed = engine->base.funcs->get_speed(&engine->base); - - uint32_t period_timeout; - uint32_t num_of_clock_stretches; - - if (!speed) - return 0; - - period_timeout = (1000 * TRANSACTION_TIMEOUT_IN_I2C_CLOCKS) / speed; - - num_of_clock_stretches = 1 + (length << 3) + 1; - num_of_clock_stretches += - (FROM_I2C_HW_ENGINE(engine)->buffer_used_bytes << 3) + - (FROM_I2C_HW_ENGINE(engine)->transaction_count << 1); - - return period_timeout * num_of_clock_stretches; -} - -/* - * @brief - * DC_I2C_DDC1_SETUP MM register offsets - * - * @note - * The indices of this offset array are DDC engine IDs - */ -static const int32_t ddc_setup_offset[] = { - - mmDC_I2C_DDC1_SETUP - mmDC_I2C_DDC1_SETUP, /* DDC Engine 1 */ - mmDC_I2C_DDC2_SETUP - mmDC_I2C_DDC1_SETUP, /* DDC Engine 2 */ - mmDC_I2C_DDC3_SETUP - mmDC_I2C_DDC1_SETUP, /* DDC Engine 3 */ - mmDC_I2C_DDC4_SETUP - mmDC_I2C_DDC1_SETUP, /* DDC Engine 4 */ - mmDC_I2C_DDC5_SETUP - mmDC_I2C_DDC1_SETUP, /* DDC Engine 5 */ - mmDC_I2C_DDC6_SETUP - mmDC_I2C_DDC1_SETUP, /* DDC Engine 6 */ - mmDC_I2C_DDCVGA_SETUP - mmDC_I2C_DDC1_SETUP /* DDC Engine 7 */ -}; - -/* - * @brief - * DC_I2C_DDC1_SPEED MM register offsets - * - * @note - * The indices of this offset array are DDC engine IDs - */ -static const int32_t ddc_speed_offset[] = { - mmDC_I2C_DDC1_SPEED - mmDC_I2C_DDC1_SPEED, /* DDC Engine 1 */ - mmDC_I2C_DDC2_SPEED - mmDC_I2C_DDC1_SPEED, /* DDC Engine 2 */ - mmDC_I2C_DDC3_SPEED - mmDC_I2C_DDC1_SPEED, /* DDC Engine 3 */ - mmDC_I2C_DDC4_SPEED - mmDC_I2C_DDC1_SPEED, /* DDC Engine 4 */ - mmDC_I2C_DDC5_SPEED - mmDC_I2C_DDC1_SPEED, /* DDC Engine 5 */ - mmDC_I2C_DDC6_SPEED - mmDC_I2C_DDC1_SPEED, /* DDC Engine 6 */ - mmDC_I2C_DDCVGA_SPEED - mmDC_I2C_DDC1_SPEED /* DDC Engine 7 */ -}; - -static const struct i2c_engine_funcs i2c_engine_funcs = { - .destroy = destroy, - .get_speed = get_speed, - .set_speed = set_speed, - .setup_engine = setup_engine, - .submit_channel_request = submit_channel_request, - .process_channel_reply = process_channel_reply, - .get_channel_status = get_channel_status, - .acquire_engine = dal_i2c_hw_engine_acquire_engine, -}; - -static const struct engine_funcs engine_funcs = { - .release_engine = release_engine, - .get_engine_type = dal_i2c_hw_engine_get_engine_type, - .acquire = dal_i2c_engine_acquire, - .submit_request = dal_i2c_hw_engine_submit_request, -}; - -static const struct i2c_hw_engine_funcs i2c_hw_engine_funcs = { - .get_hw_buffer_available_size = - get_hw_buffer_available_size, - .get_transaction_timeout = - get_transaction_timeout, - .wait_on_operation_result = - dal_i2c_hw_engine_wait_on_operation_result, -}; - -static void construct( - struct i2c_hw_engine_dce80 *engine, - const struct i2c_hw_engine_dce80_create_arg *arg) -{ - dal_i2c_hw_engine_construct(&engine->base, arg->ctx); - - engine->base.base.base.funcs = &engine_funcs; - engine->base.base.funcs = &i2c_engine_funcs; - engine->base.funcs = &i2c_hw_engine_funcs; - engine->base.default_speed = arg->default_speed; - engine->addr.DC_I2C_DDCX_SETUP = - mmDC_I2C_DDC1_SETUP + ddc_setup_offset[arg->engine_id]; - engine->addr.DC_I2C_DDCX_SPEED = - mmDC_I2C_DDC1_SPEED + ddc_speed_offset[arg->engine_id]; - - engine->engine_id = arg->engine_id; - engine->reference_frequency = arg->reference_frequency; - engine->buffer_used_bytes = 0; - engine->transaction_count = 0; - engine->engine_keep_power_up_count = 1; -} - -struct i2c_engine *dal_i2c_hw_engine_dce80_create( - const struct i2c_hw_engine_dce80_create_arg *arg) -{ - struct i2c_hw_engine_dce80 *engine; - - if (!arg) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - if ((arg->engine_id >= sizeof(ddc_setup_offset) / sizeof(int32_t)) || - (arg->engine_id >= sizeof(ddc_speed_offset) / sizeof(int32_t)) || - !arg->reference_frequency) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - engine = kzalloc(sizeof(struct i2c_hw_engine_dce80), GFP_KERNEL); - - if (!engine) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - construct(engine, arg); - return &engine->base.base; -} diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2c_hw_engine_dce80.h b/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2c_hw_engine_dce80.h deleted file mode 100644 index 5c6116fb5479..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2c_hw_engine_dce80.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DAL_I2C_HW_ENGINE_DCE80_H__ -#define __DAL_I2C_HW_ENGINE_DCE80_H__ - -struct i2c_hw_engine_dce80 { - struct i2c_hw_engine base; - struct { - uint32_t DC_I2C_DDCX_SETUP; - uint32_t DC_I2C_DDCX_SPEED; - } addr; - uint32_t engine_id; - /* expressed in kilohertz */ - uint32_t reference_frequency; - /* number of bytes currently used in HW buffer */ - uint32_t buffer_used_bytes; - /* number of pending transactions (before GO) */ - uint32_t transaction_count; - uint32_t engine_keep_power_up_count; -}; - -struct i2c_hw_engine_dce80_create_arg { - uint32_t engine_id; - uint32_t reference_frequency; - uint32_t default_speed; - struct dc_context *ctx; -}; - -struct i2c_engine *dal_i2c_hw_engine_dce80_create( - const struct i2c_hw_engine_dce80_create_arg *arg); -#endif diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2c_sw_engine_dce80.h b/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2c_sw_engine_dce80.h deleted file mode 100644 index 26355c088746..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2c_sw_engine_dce80.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DAL_I2C_SW_ENGINE_DCE80_H__ -#define __DAL_I2C_SW_ENGINE_DCE80_H__ - -struct i2c_sw_engine_dce80 { - struct i2c_sw_engine base; - uint32_t engine_id; -}; - -struct i2c_sw_engine_dce80_create_arg { - uint32_t engine_id; - uint32_t default_speed; - struct dc_context *ctx; -}; - -struct i2c_engine *dal_i2c_sw_engine_dce80_create( - const struct i2c_sw_engine_dce80_create_arg *arg); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2caux_dce80.c b/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2caux_dce80.c deleted file mode 100644 index ed48596dd2a5..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2caux_dce80.c +++ /dev/null @@ -1,284 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" - -/* - * Pre-requisites: headers required by header of this unit - */ -#include "include/i2caux_interface.h" -#include "../i2caux.h" - -/* - * Header of this unit - */ - -#include "i2caux_dce80.h" - -/* - * Post-requisites: headers required by this unit - */ - -#include "../engine.h" -#include "../i2c_engine.h" -#include "../i2c_sw_engine.h" -#include "i2c_sw_engine_dce80.h" -#include "../i2c_hw_engine.h" -#include "i2c_hw_engine_dce80.h" -#include "../i2c_generic_hw_engine.h" -#include "../aux_engine.h" - - -#include "../dce110/aux_engine_dce110.h" -#include "../dce110/i2caux_dce110.h" - -#include "dce/dce_8_0_d.h" -#include "dce/dce_8_0_sh_mask.h" - - -/* set register offset */ -#define SR(reg_name)\ - .reg_name = mm ## reg_name - -/* set register offset with instance */ -#define SRI(reg_name, block, id)\ - .reg_name = mm ## block ## id ## _ ## reg_name - -#define aux_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST(id), \ - .AUX_RESET_MASK = 0 \ -} - -static const struct dce110_aux_registers dce80_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3), - aux_regs(4), - aux_regs(5) -}; - -/* - * This unit - */ - -#define FROM_I2C_AUX(ptr) \ - container_of((ptr), struct i2caux_dce80, base) - -static void destruct( - struct i2caux_dce80 *i2caux_dce80) -{ - dal_i2caux_destruct(&i2caux_dce80->base); -} - -static void destroy( - struct i2caux **i2c_engine) -{ - struct i2caux_dce80 *i2caux_dce80 = FROM_I2C_AUX(*i2c_engine); - - destruct(i2caux_dce80); - - kfree(i2caux_dce80); - - *i2c_engine = NULL; -} - -static struct i2c_engine *acquire_i2c_hw_engine( - struct i2caux *i2caux, - struct ddc *ddc) -{ - struct i2caux_dce80 *i2caux_dce80 = FROM_I2C_AUX(i2caux); - - struct i2c_engine *engine = NULL; - bool non_generic; - - if (!ddc) - return NULL; - - if (ddc->hw_info.hw_supported) { - enum gpio_ddc_line line = dal_ddc_get_line(ddc); - - if (line < GPIO_DDC_LINE_COUNT) { - non_generic = true; - engine = i2caux->i2c_hw_engines[line]; - } - } - - if (!engine) { - non_generic = false; - engine = i2caux->i2c_generic_hw_engine; - } - - if (!engine) - return NULL; - - if (non_generic) { - if (!i2caux_dce80->i2c_hw_buffer_in_use && - engine->base.funcs->acquire(&engine->base, ddc)) { - i2caux_dce80->i2c_hw_buffer_in_use = true; - return engine; - } - } else { - if (engine->base.funcs->acquire(&engine->base, ddc)) - return engine; - } - - return NULL; -} - -static void release_engine( - struct i2caux *i2caux, - struct engine *engine) -{ - if (engine->funcs->get_engine_type(engine) == - I2CAUX_ENGINE_TYPE_I2C_DDC_HW) - FROM_I2C_AUX(i2caux)->i2c_hw_buffer_in_use = false; - - dal_i2caux_release_engine(i2caux, engine); -} - -static const enum gpio_ddc_line hw_ddc_lines[] = { - GPIO_DDC_LINE_DDC1, - GPIO_DDC_LINE_DDC2, - GPIO_DDC_LINE_DDC3, - GPIO_DDC_LINE_DDC4, - GPIO_DDC_LINE_DDC5, - GPIO_DDC_LINE_DDC6, - GPIO_DDC_LINE_DDC_VGA -}; - -static const enum gpio_ddc_line hw_aux_lines[] = { - GPIO_DDC_LINE_DDC1, - GPIO_DDC_LINE_DDC2, - GPIO_DDC_LINE_DDC3, - GPIO_DDC_LINE_DDC4, - GPIO_DDC_LINE_DDC5, - GPIO_DDC_LINE_DDC6 -}; - -static const struct i2caux_funcs i2caux_funcs = { - .destroy = destroy, - .acquire_i2c_hw_engine = acquire_i2c_hw_engine, - .release_engine = release_engine, - .acquire_i2c_sw_engine = dal_i2caux_acquire_i2c_sw_engine, - .acquire_aux_engine = dal_i2caux_acquire_aux_engine, -}; - -static void construct( - struct i2caux_dce80 *i2caux_dce80, - struct dc_context *ctx) -{ - /* Entire family have I2C engine reference clock frequency - * changed from XTALIN (27) to XTALIN/2 (13.5) */ - - struct i2caux *base = &i2caux_dce80->base; - - uint32_t reference_frequency = - dal_i2caux_get_reference_clock(ctx->dc_bios) >> 1; - - /*bool use_i2c_sw_engine = dal_adapter_service_is_feature_supported(as, - FEATURE_RESTORE_USAGE_I2C_SW_ENGINE);*/ - - /* Use SWI2C for dce8 currently, sicne we have bug with hwi2c */ - bool use_i2c_sw_engine = true; - - uint32_t i; - - dal_i2caux_construct(base, ctx); - - i2caux_dce80->base.funcs = &i2caux_funcs; - i2caux_dce80->i2c_hw_buffer_in_use = false; - - /* Create I2C HW engines (HW + SW pairs) - * for all lines which has assisted HW DDC - * 'i' (loop counter) used as DDC/AUX engine_id */ - - i = 0; - - do { - enum gpio_ddc_line line_id = hw_ddc_lines[i]; - - struct i2c_hw_engine_dce80_create_arg hw_arg; - - if (use_i2c_sw_engine) { - struct i2c_sw_engine_dce80_create_arg sw_arg; - - sw_arg.engine_id = i; - sw_arg.default_speed = base->default_i2c_sw_speed; - sw_arg.ctx = ctx; - base->i2c_sw_engines[line_id] = - dal_i2c_sw_engine_dce80_create(&sw_arg); - } - - hw_arg.engine_id = i; - hw_arg.reference_frequency = reference_frequency; - hw_arg.default_speed = base->default_i2c_hw_speed; - hw_arg.ctx = ctx; - - base->i2c_hw_engines[line_id] = - dal_i2c_hw_engine_dce80_create(&hw_arg); - - ++i; - } while (i < ARRAY_SIZE(hw_ddc_lines)); - - /* Create AUX engines for all lines which has assisted HW AUX - * 'i' (loop counter) used as DDC/AUX engine_id */ - - i = 0; - - do { - enum gpio_ddc_line line_id = hw_aux_lines[i]; - - struct aux_engine_dce110_init_data arg; - - arg.engine_id = i; - arg.timeout_period = base->aux_timeout_period; - arg.ctx = ctx; - arg.regs = &dce80_aux_regs[i]; - - base->aux_engines[line_id] = - dal_aux_engine_dce110_create(&arg); - - ++i; - } while (i < ARRAY_SIZE(hw_aux_lines)); - - /* TODO Generic I2C SW and HW */ -} - -struct i2caux *dal_i2caux_dce80_create( - struct dc_context *ctx) -{ - struct i2caux_dce80 *i2caux_dce80 = - kzalloc(sizeof(struct i2caux_dce80), GFP_KERNEL); - - if (!i2caux_dce80) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - construct(i2caux_dce80, ctx); - return &i2caux_dce80->base; -} diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2caux_dce80.h b/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2caux_dce80.h deleted file mode 100644 index 21908629e973..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2caux_dce80.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DAL_I2C_AUX_DCE80_H__ -#define __DAL_I2C_AUX_DCE80_H__ - -struct i2caux_dce80 { - struct i2caux base; - /* indicate the I2C HW circular buffer is in use */ - bool i2c_hw_buffer_in_use; -}; - -struct i2caux *dal_i2caux_dce80_create( - struct dc_context *ctx); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dcn10/i2caux_dcn10.c b/drivers/gpu/drm/amd/display/dc/i2caux/dcn10/i2caux_dcn10.c deleted file mode 100644 index a59c1f50c1e8..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dcn10/i2caux_dcn10.c +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" - -#include "include/i2caux_interface.h" -#include "../i2caux.h" -#include "../engine.h" -#include "../i2c_engine.h" -#include "../i2c_sw_engine.h" -#include "../i2c_hw_engine.h" - -#include "../dce110/aux_engine_dce110.h" -#include "../dce110/i2c_hw_engine_dce110.h" -#include "../dce110/i2caux_dce110.h" - -#include "dcn/dcn_1_0_offset.h" -#include "dcn/dcn_1_0_sh_mask.h" -#include "soc15_hw_ip.h" -#include "vega10_ip_offset.h" - -/* begin ********************* - * macros to expend register list macro defined in HW object header file */ - -#define BASE_INNER(seg) \ - DCE_BASE__INST0_SEG ## seg - -/* compile time expand base address. */ -#define BASE(seg) \ - BASE_INNER(seg) - -#define SR(reg_name)\ - .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -#define SRI(reg_name, block, id)\ - .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name -/* macros to expend register list macro defined in HW object header file - * end *********************/ - -#define aux_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST(id), \ - .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK \ -} - -#define hw_engine_regs(id)\ -{\ - I2C_HW_ENGINE_COMMON_REG_LIST(id) \ -} - -static const struct dce110_aux_registers dcn10_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3), - aux_regs(4), - aux_regs(5), -}; - -static const struct dce110_i2c_hw_engine_registers dcn10_hw_engine_regs[] = { - hw_engine_regs(1), - hw_engine_regs(2), - hw_engine_regs(3), - hw_engine_regs(4), - hw_engine_regs(5), - hw_engine_regs(6) -}; - -static const struct dce110_i2c_hw_engine_shift i2c_shift = { - I2C_COMMON_MASK_SH_LIST_DCE110(__SHIFT) -}; - -static const struct dce110_i2c_hw_engine_mask i2c_mask = { - I2C_COMMON_MASK_SH_LIST_DCE110(_MASK) -}; - -struct i2caux *dal_i2caux_dcn10_create( - struct dc_context *ctx) -{ - struct i2caux_dce110 *i2caux_dce110 = - kzalloc(sizeof(struct i2caux_dce110), GFP_KERNEL); - - if (!i2caux_dce110) { - ASSERT_CRITICAL(false); - return NULL; - } - - dal_i2caux_dce110_construct(i2caux_dce110, - ctx, - ARRAY_SIZE(dcn10_aux_regs), - dcn10_aux_regs, - dcn10_hw_engine_regs, - &i2c_shift, - &i2c_mask); - return &i2caux_dce110->base; -} diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dcn10/i2caux_dcn10.h b/drivers/gpu/drm/amd/display/dc/i2caux/dcn10/i2caux_dcn10.h deleted file mode 100644 index aeb4a86463d4..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dcn10/i2caux_dcn10.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DAL_I2C_AUX_DCN10_H__ -#define __DAL_I2C_AUX_DCN10_H__ - -struct i2caux *dal_i2caux_dcn10_create( - struct dc_context *ctx); - -#endif /* __DAL_I2C_AUX_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/diagnostics/i2caux_diag.c b/drivers/gpu/drm/amd/display/dc/i2caux/diagnostics/i2caux_diag.c deleted file mode 100644 index e6408f644086..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/diagnostics/i2caux_diag.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright 2012-16 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" - -/* - * Pre-requisites: headers required by header of this unit - */ -#include "include/i2caux_interface.h" -#include "../i2caux.h" -#include "../engine.h" -#include "../i2c_engine.h" -#include "../i2c_sw_engine.h" -#include "../i2c_hw_engine.h" - -/* - * Header of this unit - */ -#include "i2caux_diag.h" - -/* - * Post-requisites: headers required by this unit - */ - -/* - * This unit - */ - -static void destruct( - struct i2caux *i2caux) -{ - dal_i2caux_destruct(i2caux); -} - -static void destroy( - struct i2caux **i2c_engine) -{ - destruct(*i2c_engine); - - kfree(*i2c_engine); - - *i2c_engine = NULL; -} - -/* function table */ -static const struct i2caux_funcs i2caux_funcs = { - .destroy = destroy, - .acquire_i2c_hw_engine = NULL, - .release_engine = NULL, - .acquire_i2c_sw_engine = NULL, - .acquire_aux_engine = NULL, -}; - -static void construct( - struct i2caux *i2caux, - struct dc_context *ctx) -{ - dal_i2caux_construct(i2caux, ctx); - i2caux->funcs = &i2caux_funcs; -} - -struct i2caux *dal_i2caux_diag_fpga_create( - struct dc_context *ctx) -{ - struct i2caux *i2caux = kzalloc(sizeof(struct i2caux), - GFP_KERNEL); - - if (!i2caux) { - ASSERT_CRITICAL(false); - return NULL; - } - - construct(i2caux, ctx); - return i2caux; -} diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/diagnostics/i2caux_diag.h b/drivers/gpu/drm/amd/display/dc/i2caux/diagnostics/i2caux_diag.h deleted file mode 100644 index a83eeb748283..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/diagnostics/i2caux_diag.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2012-16 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DAL_I2C_AUX_DIAG_FPGA_H__ -#define __DAL_I2C_AUX_DIAG_FPGA_H__ - -struct i2caux *dal_i2caux_diag_fpga_create( - struct dc_context *ctx); - -#endif /* __DAL_I2C_AUX_DIAG_FPGA_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/engine.h b/drivers/gpu/drm/amd/display/dc/i2caux/engine.h deleted file mode 100644 index b16fb1ff687d..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/engine.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DAL_ENGINE_H__ -#define __DAL_ENGINE_H__ - -#include "dc_ddc_types.h" - -enum i2caux_transaction_operation { - I2CAUX_TRANSACTION_READ, - I2CAUX_TRANSACTION_WRITE -}; - -enum i2caux_transaction_address_space { - I2CAUX_TRANSACTION_ADDRESS_SPACE_I2C = 1, - I2CAUX_TRANSACTION_ADDRESS_SPACE_DPCD -}; - -struct i2caux_transaction_payload { - enum i2caux_transaction_address_space address_space; - uint32_t address; - uint32_t length; - uint8_t *data; -}; - -enum i2caux_transaction_status { - I2CAUX_TRANSACTION_STATUS_UNKNOWN = (-1L), - I2CAUX_TRANSACTION_STATUS_SUCCEEDED, - I2CAUX_TRANSACTION_STATUS_FAILED_CHANNEL_BUSY, - I2CAUX_TRANSACTION_STATUS_FAILED_TIMEOUT, - I2CAUX_TRANSACTION_STATUS_FAILED_PROTOCOL_ERROR, - I2CAUX_TRANSACTION_STATUS_FAILED_NACK, - I2CAUX_TRANSACTION_STATUS_FAILED_INCOMPLETE, - I2CAUX_TRANSACTION_STATUS_FAILED_OPERATION, - I2CAUX_TRANSACTION_STATUS_FAILED_INVALID_OPERATION, - I2CAUX_TRANSACTION_STATUS_FAILED_BUFFER_OVERFLOW, - I2CAUX_TRANSACTION_STATUS_FAILED_HPD_DISCON -}; - -struct i2caux_transaction_request { - enum i2caux_transaction_operation operation; - struct i2caux_transaction_payload payload; - enum i2caux_transaction_status status; -}; - -enum i2caux_engine_type { - I2CAUX_ENGINE_TYPE_UNKNOWN = (-1L), - I2CAUX_ENGINE_TYPE_AUX, - I2CAUX_ENGINE_TYPE_I2C_DDC_HW, - I2CAUX_ENGINE_TYPE_I2C_GENERIC_HW, - I2CAUX_ENGINE_TYPE_I2C_SW -}; - -enum i2c_default_speed { - I2CAUX_DEFAULT_I2C_HW_SPEED = 50, - I2CAUX_DEFAULT_I2C_SW_SPEED = 50 -}; - -struct engine; - -struct engine_funcs { - enum i2caux_engine_type (*get_engine_type)( - const struct engine *engine); - bool (*acquire)( - struct engine *engine, - struct ddc *ddc); - bool (*submit_request)( - struct engine *engine, - struct i2caux_transaction_request *request, - bool middle_of_transaction); - void (*release_engine)( - struct engine *engine); -}; - -struct engine { - const struct engine_funcs *funcs; - uint32_t inst; - struct ddc *ddc; - struct dc_context *ctx; -}; - -void dal_i2caux_construct_engine( - struct engine *engine, - struct dc_context *ctx); - -void dal_i2caux_destruct_engine( - struct engine *engine); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/engine_base.c b/drivers/gpu/drm/amd/display/dc/i2caux/engine_base.c deleted file mode 100644 index 5d155d36d353..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/engine_base.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" - -/* - * Pre-requisites: headers required by header of this unit - */ -#include "include/i2caux_interface.h" - -/* - * Header of this unit - */ - -#include "engine.h" - -void dal_i2caux_construct_engine( - struct engine *engine, - struct dc_context *ctx) -{ - engine->ddc = NULL; - engine->ctx = ctx; -} - -void dal_i2caux_destruct_engine( - struct engine *engine) -{ - /* nothing to do */ -} - diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/i2c_engine.c b/drivers/gpu/drm/amd/display/dc/i2caux/i2c_engine.c deleted file mode 100644 index 70e20bd47ce4..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/i2c_engine.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" - -/* - * Pre-requisites: headers required by header of this unit - */ -#include "include/i2caux_interface.h" -#include "engine.h" - -/* - * Header of this unit - */ - -#include "i2c_engine.h" - -/* - * Post-requisites: headers required by this unit - */ - -/* - * This unit - */ - -#define FROM_ENGINE(ptr) \ - container_of((ptr), struct i2c_engine, base) - -bool dal_i2c_engine_acquire( - struct engine *engine, - struct ddc *ddc_handle) -{ - struct i2c_engine *i2c_engine = FROM_ENGINE(engine); - - uint32_t counter = 0; - bool result; - - do { - result = i2c_engine->funcs->acquire_engine( - i2c_engine, ddc_handle); - - if (result) - break; - - /* i2c_engine is busy by VBios, lets wait and retry */ - - udelay(10); - - ++counter; - } while (counter < 2); - - if (result) { - if (!i2c_engine->funcs->setup_engine(i2c_engine)) { - engine->funcs->release_engine(engine); - result = false; - } - } - - return result; -} - -bool dal_i2c_engine_setup_i2c_engine( - struct i2c_engine *engine) -{ - /* Derivative classes do not have to override this */ - - return true; -} - -void dal_i2c_engine_submit_channel_request( - struct i2c_engine *engine, - struct i2c_request_transaction_data *request) -{ - -} - -void dal_i2c_engine_process_channel_reply( - struct i2c_engine *engine, - struct i2c_reply_transaction_data *reply) -{ - -} - -void dal_i2c_engine_construct( - struct i2c_engine *engine, - struct dc_context *ctx) -{ - dal_i2caux_construct_engine(&engine->base, ctx); - engine->timeout_delay = 0; -} - -void dal_i2c_engine_destruct( - struct i2c_engine *engine) -{ - dal_i2caux_destruct_engine(&engine->base); -} diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/i2c_engine.h b/drivers/gpu/drm/amd/display/dc/i2caux/i2c_engine.h deleted file mode 100644 index ded6ea34b714..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/i2c_engine.h +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DAL_I2C_ENGINE_H__ -#define __DAL_I2C_ENGINE_H__ - -enum i2c_channel_operation_result { - I2C_CHANNEL_OPERATION_SUCCEEDED, - I2C_CHANNEL_OPERATION_FAILED, - I2C_CHANNEL_OPERATION_NOT_GRANTED, - I2C_CHANNEL_OPERATION_IS_BUSY, - I2C_CHANNEL_OPERATION_NO_HANDLE_PROVIDED, - I2C_CHANNEL_OPERATION_CHANNEL_IN_USE, - I2C_CHANNEL_OPERATION_CHANNEL_CLIENT_MAX_ALLOWED, - I2C_CHANNEL_OPERATION_ENGINE_BUSY, - I2C_CHANNEL_OPERATION_TIMEOUT, - I2C_CHANNEL_OPERATION_NO_RESPONSE, - I2C_CHANNEL_OPERATION_HW_REQUEST_I2C_BUS, - I2C_CHANNEL_OPERATION_WRONG_PARAMETER, - I2C_CHANNEL_OPERATION_OUT_NB_OF_RETRIES, - I2C_CHANNEL_OPERATION_NOT_STARTED -}; - -struct i2c_request_transaction_data { - enum i2caux_transaction_action action; - enum i2c_channel_operation_result status; - uint8_t address; - uint32_t length; - uint8_t *data; -}; - -struct i2c_reply_transaction_data { - uint32_t length; - uint8_t *data; -}; - -struct i2c_engine; - -struct i2c_engine_funcs { - void (*destroy)( - struct i2c_engine **ptr); - uint32_t (*get_speed)( - const struct i2c_engine *engine); - void (*set_speed)( - struct i2c_engine *engine, - uint32_t speed); - bool (*acquire_engine)( - struct i2c_engine *engine, - struct ddc *ddc); - bool (*setup_engine)( - struct i2c_engine *engine); - void (*submit_channel_request)( - struct i2c_engine *engine, - struct i2c_request_transaction_data *request); - void (*process_channel_reply)( - struct i2c_engine *engine, - struct i2c_reply_transaction_data *reply); - enum i2c_channel_operation_result (*get_channel_status)( - struct i2c_engine *engine, - uint8_t *returned_bytes); -}; - -struct i2c_engine { - struct engine base; - const struct i2c_engine_funcs *funcs; - uint32_t timeout_delay; - uint32_t setup_limit; - uint32_t send_reset_length; -}; - -void dal_i2c_engine_construct( - struct i2c_engine *engine, - struct dc_context *ctx); - -void dal_i2c_engine_destruct( - struct i2c_engine *engine); - -bool dal_i2c_engine_setup_i2c_engine( - struct i2c_engine *engine); - -void dal_i2c_engine_submit_channel_request( - struct i2c_engine *engine, - struct i2c_request_transaction_data *request); - -void dal_i2c_engine_process_channel_reply( - struct i2c_engine *engine, - struct i2c_reply_transaction_data *reply); - -bool dal_i2c_engine_acquire( - struct engine *ptr, - struct ddc *ddc_handle); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/i2c_generic_hw_engine.c b/drivers/gpu/drm/amd/display/dc/i2caux/i2c_generic_hw_engine.c deleted file mode 100644 index 5a4295e0fae5..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/i2c_generic_hw_engine.c +++ /dev/null @@ -1,284 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" - -/* - * Pre-requisites: headers required by header of this unit - */ -#include "include/i2caux_interface.h" -#include "engine.h" -#include "i2c_engine.h" -#include "i2c_hw_engine.h" - -/* - * Header of this unit - */ - -#include "i2c_generic_hw_engine.h" - -/* - * Post-requisites: headers required by this unit - */ - -/* - * This unit - */ - -/* - * @brief - * Cast 'struct i2c_hw_engine *' - * to 'struct i2c_generic_hw_engine *' - */ -#define FROM_I2C_HW_ENGINE(ptr) \ - container_of((ptr), struct i2c_generic_hw_engine, base) - -/* - * @brief - * Cast 'struct i2c_engine *' - * to 'struct i2c_generic_hw_engine *' - */ -#define FROM_I2C_ENGINE(ptr) \ - FROM_I2C_HW_ENGINE(container_of((ptr), struct i2c_hw_engine, base)) - -/* - * @brief - * Cast 'struct engine *' - * to 'struct i2c_generic_hw_engine *' - */ -#define FROM_ENGINE(ptr) \ - FROM_I2C_ENGINE(container_of((ptr), struct i2c_engine, base)) - -enum i2caux_engine_type dal_i2c_generic_hw_engine_get_engine_type( - const struct engine *engine) -{ - return I2CAUX_ENGINE_TYPE_I2C_GENERIC_HW; -} - -/* - * @brief - * Single transaction handling. - * Since transaction may be bigger than HW buffer size, - * it divides transaction to sub-transactions - * and uses batch transaction feature of the engine. - */ -bool dal_i2c_generic_hw_engine_submit_request( - struct engine *engine, - struct i2caux_transaction_request *i2caux_request, - bool middle_of_transaction) -{ - struct i2c_generic_hw_engine *hw_engine = FROM_ENGINE(engine); - - struct i2c_hw_engine *base = &hw_engine->base; - - uint32_t max_payload_size = - base->funcs->get_hw_buffer_available_size(base); - - bool initial_stop_bit = !middle_of_transaction; - - struct i2c_generic_transaction_attributes attributes; - - enum i2c_channel_operation_result operation_result = - I2C_CHANNEL_OPERATION_FAILED; - - bool result = false; - - /* setup transaction initial properties */ - - uint8_t address = i2caux_request->payload.address; - uint8_t *current_payload = i2caux_request->payload.data; - uint32_t remaining_payload_size = i2caux_request->payload.length; - - bool first_iteration = true; - - if (i2caux_request->operation == I2CAUX_TRANSACTION_READ) - attributes.action = I2CAUX_TRANSACTION_ACTION_I2C_READ; - else if (i2caux_request->operation == I2CAUX_TRANSACTION_WRITE) - attributes.action = I2CAUX_TRANSACTION_ACTION_I2C_WRITE; - else { - i2caux_request->status = - I2CAUX_TRANSACTION_STATUS_FAILED_INVALID_OPERATION; - return false; - } - - /* Do batch transaction. - * Divide read/write data into payloads which fit HW buffer size. - * 1. Single transaction: - * start_bit = 1, stop_bit depends on session state, ack_on_read = 0; - * 2. Start of batch transaction: - * start_bit = 1, stop_bit = 0, ack_on_read = 1; - * 3. Middle of batch transaction: - * start_bit = 0, stop_bit = 0, ack_on_read = 1; - * 4. End of batch transaction: - * start_bit = 0, stop_bit depends on session state, ack_on_read = 0. - * Session stop bit is set if 'middle_of_transaction' = 0. */ - - while (remaining_payload_size) { - uint32_t current_transaction_size; - uint32_t current_payload_size; - - bool last_iteration; - bool stop_bit; - - /* Calculate current transaction size and payload size. - * Transaction size = total number of bytes in transaction, - * including slave's address; - * Payload size = number of data bytes in transaction. */ - - if (first_iteration) { - /* In the first sub-transaction we send slave's address - * thus we need to reserve one byte for it */ - current_transaction_size = - (remaining_payload_size > max_payload_size - 1) ? - max_payload_size : - remaining_payload_size + 1; - - current_payload_size = current_transaction_size - 1; - } else { - /* Second and further sub-transactions will have - * entire buffer reserved for data */ - current_transaction_size = - (remaining_payload_size > max_payload_size) ? - max_payload_size : - remaining_payload_size; - - current_payload_size = current_transaction_size; - } - - last_iteration = - (remaining_payload_size == current_payload_size); - - stop_bit = last_iteration ? initial_stop_bit : false; - - /* write slave device address */ - - if (first_iteration) - hw_engine->funcs->write_address(hw_engine, address); - - /* write current portion of data, if requested */ - - if (i2caux_request->operation == I2CAUX_TRANSACTION_WRITE) - hw_engine->funcs->write_data( - hw_engine, - current_payload, - current_payload_size); - - /* execute transaction */ - - attributes.start_bit = first_iteration; - attributes.stop_bit = stop_bit; - attributes.last_read = last_iteration; - attributes.transaction_size = current_transaction_size; - - hw_engine->funcs->execute_transaction(hw_engine, &attributes); - - /* wait until transaction is processed; if it fails - quit */ - - operation_result = base->funcs->wait_on_operation_result( - base, - base->funcs->get_transaction_timeout( - base, current_transaction_size), - I2C_CHANNEL_OPERATION_ENGINE_BUSY); - - if (operation_result != I2C_CHANNEL_OPERATION_SUCCEEDED) - break; - - /* read current portion of data, if requested */ - - /* the read offset should be 1 for first sub-transaction, - * and 0 for any next one */ - - if (i2caux_request->operation == I2CAUX_TRANSACTION_READ) - hw_engine->funcs->read_data(hw_engine, current_payload, - current_payload_size, first_iteration ? 1 : 0); - - /* update loop variables */ - - first_iteration = false; - current_payload += current_payload_size; - remaining_payload_size -= current_payload_size; - } - - /* update transaction status */ - - switch (operation_result) { - case I2C_CHANNEL_OPERATION_SUCCEEDED: - i2caux_request->status = - I2CAUX_TRANSACTION_STATUS_SUCCEEDED; - result = true; - break; - case I2C_CHANNEL_OPERATION_NO_RESPONSE: - i2caux_request->status = - I2CAUX_TRANSACTION_STATUS_FAILED_NACK; - break; - case I2C_CHANNEL_OPERATION_TIMEOUT: - i2caux_request->status = - I2CAUX_TRANSACTION_STATUS_FAILED_TIMEOUT; - break; - case I2C_CHANNEL_OPERATION_FAILED: - i2caux_request->status = - I2CAUX_TRANSACTION_STATUS_FAILED_INCOMPLETE; - break; - default: - i2caux_request->status = - I2CAUX_TRANSACTION_STATUS_FAILED_OPERATION; - } - - return result; -} - -/* - * @brief - * Returns number of microseconds to wait until timeout to be considered - */ -uint32_t dal_i2c_generic_hw_engine_get_transaction_timeout( - const struct i2c_hw_engine *engine, - uint32_t length) -{ - const struct i2c_engine *base = &engine->base; - - uint32_t speed = base->funcs->get_speed(base); - - if (!speed) - return 0; - - /* total timeout = period_timeout * (start + data bits count + stop) */ - - return ((1000 * TRANSACTION_TIMEOUT_IN_I2C_CLOCKS) / speed) * - (1 + (length << 3) + 1); -} - -void dal_i2c_generic_hw_engine_construct( - struct i2c_generic_hw_engine *engine, - struct dc_context *ctx) -{ - dal_i2c_hw_engine_construct(&engine->base, ctx); -} - -void dal_i2c_generic_hw_engine_destruct( - struct i2c_generic_hw_engine *engine) -{ - dal_i2c_hw_engine_destruct(&engine->base); -} diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/i2c_generic_hw_engine.h b/drivers/gpu/drm/amd/display/dc/i2caux/i2c_generic_hw_engine.h deleted file mode 100644 index 1da0397b04a2..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/i2c_generic_hw_engine.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DAL_I2C_GENERIC_HW_ENGINE_H__ -#define __DAL_I2C_GENERIC_HW_ENGINE_H__ - -struct i2c_generic_transaction_attributes { - enum i2caux_transaction_action action; - uint32_t transaction_size; - bool start_bit; - bool stop_bit; - bool last_read; -}; - -struct i2c_generic_hw_engine; - -struct i2c_generic_hw_engine_funcs { - void (*write_address)( - struct i2c_generic_hw_engine *engine, - uint8_t address); - void (*write_data)( - struct i2c_generic_hw_engine *engine, - const uint8_t *buffer, - uint32_t length); - void (*read_data)( - struct i2c_generic_hw_engine *engine, - uint8_t *buffer, - uint32_t length, - uint32_t offset); - void (*execute_transaction)( - struct i2c_generic_hw_engine *engine, - struct i2c_generic_transaction_attributes *attributes); -}; - -struct i2c_generic_hw_engine { - struct i2c_hw_engine base; - const struct i2c_generic_hw_engine_funcs *funcs; -}; - -void dal_i2c_generic_hw_engine_construct( - struct i2c_generic_hw_engine *engine, - struct dc_context *ctx); - -void dal_i2c_generic_hw_engine_destruct( - struct i2c_generic_hw_engine *engine); -enum i2caux_engine_type dal_i2c_generic_hw_engine_get_engine_type( - const struct engine *engine); -bool dal_i2c_generic_hw_engine_submit_request( - struct engine *ptr, - struct i2caux_transaction_request *i2caux_request, - bool middle_of_transaction); -uint32_t dal_i2c_generic_hw_engine_get_transaction_timeout( - const struct i2c_hw_engine *engine, - uint32_t length); -#endif diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/i2c_hw_engine.c b/drivers/gpu/drm/amd/display/dc/i2caux/i2c_hw_engine.c deleted file mode 100644 index 141898533e8e..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/i2c_hw_engine.c +++ /dev/null @@ -1,251 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" -#include "dm_event_log.h" - -/* - * Pre-requisites: headers required by header of this unit - */ -#include "include/i2caux_interface.h" -#include "engine.h" -#include "i2c_engine.h" - -/* - * Header of this unit - */ - -#include "i2c_hw_engine.h" - -/* - * Post-requisites: headers required by this unit - */ - -/* - * This unit - */ - -/* - * @brief - * Cast 'struct i2c_engine *' - * to 'struct i2c_hw_engine *' - */ -#define FROM_I2C_ENGINE(ptr) \ - container_of((ptr), struct i2c_hw_engine, base) - -/* - * @brief - * Cast 'struct engine *' - * to 'struct i2c_hw_engine *' - */ -#define FROM_ENGINE(ptr) \ - FROM_I2C_ENGINE(container_of((ptr), struct i2c_engine, base)) - -enum i2caux_engine_type dal_i2c_hw_engine_get_engine_type( - const struct engine *engine) -{ - return I2CAUX_ENGINE_TYPE_I2C_DDC_HW; -} - -bool dal_i2c_hw_engine_submit_request( - struct engine *engine, - struct i2caux_transaction_request *i2caux_request, - bool middle_of_transaction) -{ - struct i2c_hw_engine *hw_engine = FROM_ENGINE(engine); - - struct i2c_request_transaction_data request; - - uint32_t transaction_timeout; - - enum i2c_channel_operation_result operation_result; - - bool result = false; - - /* We need following: - * transaction length will not exceed - * the number of free bytes in HW buffer (minus one for address)*/ - - if (i2caux_request->payload.length >= - hw_engine->funcs->get_hw_buffer_available_size(hw_engine)) { - i2caux_request->status = - I2CAUX_TRANSACTION_STATUS_FAILED_BUFFER_OVERFLOW; - return false; - } - - if (i2caux_request->operation == I2CAUX_TRANSACTION_READ) - request.action = middle_of_transaction ? - I2CAUX_TRANSACTION_ACTION_I2C_READ_MOT : - I2CAUX_TRANSACTION_ACTION_I2C_READ; - else if (i2caux_request->operation == I2CAUX_TRANSACTION_WRITE) - request.action = middle_of_transaction ? - I2CAUX_TRANSACTION_ACTION_I2C_WRITE_MOT : - I2CAUX_TRANSACTION_ACTION_I2C_WRITE; - else { - i2caux_request->status = - I2CAUX_TRANSACTION_STATUS_FAILED_INVALID_OPERATION; - /* [anaumov] in DAL2, there was no "return false" */ - return false; - } - - request.address = (uint8_t)i2caux_request->payload.address; - request.length = i2caux_request->payload.length; - request.data = i2caux_request->payload.data; - - /* obtain timeout value before submitting request */ - - transaction_timeout = hw_engine->funcs->get_transaction_timeout( - hw_engine, i2caux_request->payload.length + 1); - - hw_engine->base.funcs->submit_channel_request( - &hw_engine->base, &request); - /* EVENT_LOG_AUX_REQ(engine->ddc->pin_data->en, EVENT_LOG_AUX_ORIGIN_I2C, */ - /* request.action, request.address, request.length, request.data); */ - - if ((request.status == I2C_CHANNEL_OPERATION_FAILED) || - (request.status == I2C_CHANNEL_OPERATION_ENGINE_BUSY)) { - i2caux_request->status = - I2CAUX_TRANSACTION_STATUS_FAILED_CHANNEL_BUSY; - return false; - } - - /* wait until transaction proceed */ - - operation_result = hw_engine->funcs->wait_on_operation_result( - hw_engine, - transaction_timeout, - I2C_CHANNEL_OPERATION_ENGINE_BUSY); - - /* update transaction status */ - - switch (operation_result) { - case I2C_CHANNEL_OPERATION_SUCCEEDED: - i2caux_request->status = - I2CAUX_TRANSACTION_STATUS_SUCCEEDED; - result = true; - break; - case I2C_CHANNEL_OPERATION_NO_RESPONSE: - i2caux_request->status = - I2CAUX_TRANSACTION_STATUS_FAILED_NACK; - break; - case I2C_CHANNEL_OPERATION_TIMEOUT: - i2caux_request->status = - I2CAUX_TRANSACTION_STATUS_FAILED_TIMEOUT; - break; - case I2C_CHANNEL_OPERATION_FAILED: - i2caux_request->status = - I2CAUX_TRANSACTION_STATUS_FAILED_INCOMPLETE; - break; - default: - i2caux_request->status = - I2CAUX_TRANSACTION_STATUS_FAILED_OPERATION; - } - - if (result && (i2caux_request->operation == I2CAUX_TRANSACTION_READ)) { - struct i2c_reply_transaction_data reply; - - reply.data = i2caux_request->payload.data; - reply.length = i2caux_request->payload.length; - - hw_engine->base.funcs-> - process_channel_reply(&hw_engine->base, &reply); - /* EVENT_LOG_AUX_REP(engine->ddc->pin_data->en, EVENT_LOG_AUX_ORIGIN_I2C, */ - /* AUX_TRANSACTION_REPLY_I2C_ACK, reply.length, reply.data); */ - } - - - - return result; -} - -bool dal_i2c_hw_engine_acquire_engine( - struct i2c_engine *engine, - struct ddc *ddc) -{ - enum gpio_result result; - uint32_t current_speed; - - result = dal_ddc_open(ddc, GPIO_MODE_HARDWARE, - GPIO_DDC_CONFIG_TYPE_MODE_I2C); - - if (result != GPIO_RESULT_OK) - return false; - - engine->base.ddc = ddc; - - current_speed = engine->funcs->get_speed(engine); - - if (current_speed) - FROM_I2C_ENGINE(engine)->original_speed = current_speed; - - return true; -} -/* - * @brief - * Queries in a loop for current engine status - * until retrieved status matches 'expected_result', or timeout occurs. - * Timeout given in microseconds - * and the status query frequency is also one per microsecond. - */ -enum i2c_channel_operation_result dal_i2c_hw_engine_wait_on_operation_result( - struct i2c_hw_engine *engine, - uint32_t timeout, - enum i2c_channel_operation_result expected_result) -{ - enum i2c_channel_operation_result result; - uint32_t i = 0; - - if (!timeout) - return I2C_CHANNEL_OPERATION_SUCCEEDED; - - do { - result = engine->base.funcs->get_channel_status( - &engine->base, NULL); - - if (result != expected_result) - break; - - udelay(1); - - ++i; - } while (i < timeout); - - return result; -} - -void dal_i2c_hw_engine_construct( - struct i2c_hw_engine *engine, - struct dc_context *ctx) -{ - dal_i2c_engine_construct(&engine->base, ctx); - engine->original_speed = I2CAUX_DEFAULT_I2C_HW_SPEED; - engine->default_speed = I2CAUX_DEFAULT_I2C_HW_SPEED; -} - -void dal_i2c_hw_engine_destruct( - struct i2c_hw_engine *engine) -{ - dal_i2c_engine_destruct(&engine->base); -} diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/i2c_hw_engine.h b/drivers/gpu/drm/amd/display/dc/i2caux/i2c_hw_engine.h deleted file mode 100644 index 8936a994804a..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/i2c_hw_engine.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DAL_I2C_HW_ENGINE_H__ -#define __DAL_I2C_HW_ENGINE_H__ - -enum { - TRANSACTION_TIMEOUT_IN_I2C_CLOCKS = 32 -}; - -struct i2c_hw_engine; - -struct i2c_hw_engine_funcs { - uint32_t (*get_hw_buffer_available_size)( - const struct i2c_hw_engine *engine); - enum i2c_channel_operation_result (*wait_on_operation_result)( - struct i2c_hw_engine *engine, - uint32_t timeout, - enum i2c_channel_operation_result expected_result); - uint32_t (*get_transaction_timeout)( - const struct i2c_hw_engine *engine, - uint32_t length); -}; - -struct i2c_hw_engine { - struct i2c_engine base; - const struct i2c_hw_engine_funcs *funcs; - - /* Values below are in kilohertz */ - uint32_t original_speed; - uint32_t default_speed; -}; - -void dal_i2c_hw_engine_construct( - struct i2c_hw_engine *engine, - struct dc_context *ctx); - -void dal_i2c_hw_engine_destruct( - struct i2c_hw_engine *engine); - -enum i2c_channel_operation_result dal_i2c_hw_engine_wait_on_operation_result( - struct i2c_hw_engine *engine, - uint32_t timeout, - enum i2c_channel_operation_result expected_result); - -bool dal_i2c_hw_engine_acquire_engine( - struct i2c_engine *engine, - struct ddc *ddc); - -bool dal_i2c_hw_engine_submit_request( - struct engine *ptr, - struct i2caux_transaction_request *i2caux_request, - bool middle_of_transaction); - -enum i2caux_engine_type dal_i2c_hw_engine_get_engine_type( - const struct engine *engine); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/i2c_sw_engine.c b/drivers/gpu/drm/amd/display/dc/i2caux/i2c_sw_engine.c deleted file mode 100644 index 8e19bb629394..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/i2c_sw_engine.c +++ /dev/null @@ -1,601 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" - -/* - * Pre-requisites: headers required by header of this unit - */ -#include "include/i2caux_interface.h" -#include "engine.h" -#include "i2c_engine.h" - -/* - * Header of this unit - */ - -#include "i2c_sw_engine.h" - -/* - * Post-requisites: headers required by this unit - */ - -/* - * This unit - */ - -#define SCL false -#define SDA true - -static inline bool read_bit_from_ddc( - struct ddc *ddc, - bool data_nor_clock) -{ - uint32_t value = 0; - - if (data_nor_clock) - dal_gpio_get_value(ddc->pin_data, &value); - else - dal_gpio_get_value(ddc->pin_clock, &value); - - return (value != 0); -} - -static inline void write_bit_to_ddc( - struct ddc *ddc, - bool data_nor_clock, - bool bit) -{ - uint32_t value = bit ? 1 : 0; - - if (data_nor_clock) - dal_gpio_set_value(ddc->pin_data, value); - else - dal_gpio_set_value(ddc->pin_clock, value); -} - -static bool wait_for_scl_high( - struct dc_context *ctx, - struct ddc *ddc, - uint16_t clock_delay_div_4) -{ - uint32_t scl_retry = 0; - uint32_t scl_retry_max = I2C_SW_TIMEOUT_DELAY / clock_delay_div_4; - - udelay(clock_delay_div_4); - - /* 3 milliseconds delay - * to wake up some displays from "low power" state. - */ - - do { - if (read_bit_from_ddc(ddc, SCL)) - return true; - - udelay(clock_delay_div_4); - - ++scl_retry; - } while (scl_retry <= scl_retry_max); - - return false; -} - -static bool start_sync( - struct dc_context *ctx, - struct ddc *ddc_handle, - uint16_t clock_delay_div_4) -{ - uint32_t retry = 0; - - /* The I2C communications start signal is: - * the SDA going low from high, while the SCL is high. */ - - write_bit_to_ddc(ddc_handle, SCL, true); - - udelay(clock_delay_div_4); - - do { - write_bit_to_ddc(ddc_handle, SDA, true); - - if (!read_bit_from_ddc(ddc_handle, SDA)) { - ++retry; - continue; - } - - udelay(clock_delay_div_4); - - write_bit_to_ddc(ddc_handle, SCL, true); - - if (!wait_for_scl_high(ctx, ddc_handle, clock_delay_div_4)) - break; - - write_bit_to_ddc(ddc_handle, SDA, false); - - udelay(clock_delay_div_4); - - write_bit_to_ddc(ddc_handle, SCL, false); - - udelay(clock_delay_div_4); - - return true; - } while (retry <= I2C_SW_RETRIES); - - return false; -} - -static bool stop_sync( - struct dc_context *ctx, - struct ddc *ddc_handle, - uint16_t clock_delay_div_4) -{ - uint32_t retry = 0; - - /* The I2C communications stop signal is: - * the SDA going high from low, while the SCL is high. */ - - write_bit_to_ddc(ddc_handle, SCL, false); - - udelay(clock_delay_div_4); - - write_bit_to_ddc(ddc_handle, SDA, false); - - udelay(clock_delay_div_4); - - write_bit_to_ddc(ddc_handle, SCL, true); - - if (!wait_for_scl_high(ctx, ddc_handle, clock_delay_div_4)) - return false; - - write_bit_to_ddc(ddc_handle, SDA, true); - - do { - udelay(clock_delay_div_4); - - if (read_bit_from_ddc(ddc_handle, SDA)) - return true; - - ++retry; - } while (retry <= 2); - - return false; -} - -static bool write_byte( - struct dc_context *ctx, - struct ddc *ddc_handle, - uint16_t clock_delay_div_4, - uint8_t byte) -{ - int32_t shift = 7; - bool ack; - - /* bits are transmitted serially, starting from MSB */ - - do { - udelay(clock_delay_div_4); - - write_bit_to_ddc(ddc_handle, SDA, (byte >> shift) & 1); - - udelay(clock_delay_div_4); - - write_bit_to_ddc(ddc_handle, SCL, true); - - if (!wait_for_scl_high(ctx, ddc_handle, clock_delay_div_4)) - return false; - - write_bit_to_ddc(ddc_handle, SCL, false); - - --shift; - } while (shift >= 0); - - /* The display sends ACK by preventing the SDA from going high - * after the SCL pulse we use to send our last data bit. - * If the SDA goes high after that bit, it's a NACK */ - - udelay(clock_delay_div_4); - - write_bit_to_ddc(ddc_handle, SDA, true); - - udelay(clock_delay_div_4); - - write_bit_to_ddc(ddc_handle, SCL, true); - - if (!wait_for_scl_high(ctx, ddc_handle, clock_delay_div_4)) - return false; - - /* read ACK bit */ - - ack = !read_bit_from_ddc(ddc_handle, SDA); - - udelay(clock_delay_div_4 << 1); - - write_bit_to_ddc(ddc_handle, SCL, false); - - udelay(clock_delay_div_4 << 1); - - return ack; -} - -static bool read_byte( - struct dc_context *ctx, - struct ddc *ddc_handle, - uint16_t clock_delay_div_4, - uint8_t *byte, - bool more) -{ - int32_t shift = 7; - - uint8_t data = 0; - - /* The data bits are read from MSB to LSB; - * bit is read while SCL is high */ - - do { - write_bit_to_ddc(ddc_handle, SCL, true); - - if (!wait_for_scl_high(ctx, ddc_handle, clock_delay_div_4)) - return false; - - if (read_bit_from_ddc(ddc_handle, SDA)) - data |= (1 << shift); - - write_bit_to_ddc(ddc_handle, SCL, false); - - udelay(clock_delay_div_4 << 1); - - --shift; - } while (shift >= 0); - - /* read only whole byte */ - - *byte = data; - - udelay(clock_delay_div_4); - - /* send the acknowledge bit: - * SDA low means ACK, SDA high means NACK */ - - write_bit_to_ddc(ddc_handle, SDA, !more); - - udelay(clock_delay_div_4); - - write_bit_to_ddc(ddc_handle, SCL, true); - - if (!wait_for_scl_high(ctx, ddc_handle, clock_delay_div_4)) - return false; - - write_bit_to_ddc(ddc_handle, SCL, false); - - udelay(clock_delay_div_4); - - write_bit_to_ddc(ddc_handle, SDA, true); - - udelay(clock_delay_div_4); - - return true; -} - -static bool i2c_write( - struct dc_context *ctx, - struct ddc *ddc_handle, - uint16_t clock_delay_div_4, - uint8_t address, - uint32_t length, - const uint8_t *data) -{ - uint32_t i = 0; - - if (!write_byte(ctx, ddc_handle, clock_delay_div_4, address)) - return false; - - while (i < length) { - if (!write_byte(ctx, ddc_handle, clock_delay_div_4, data[i])) - return false; - ++i; - } - - return true; -} - -static bool i2c_read( - struct dc_context *ctx, - struct ddc *ddc_handle, - uint16_t clock_delay_div_4, - uint8_t address, - uint32_t length, - uint8_t *data) -{ - uint32_t i = 0; - - if (!write_byte(ctx, ddc_handle, clock_delay_div_4, address)) - return false; - - while (i < length) { - if (!read_byte(ctx, ddc_handle, clock_delay_div_4, data + i, - i < length - 1)) - return false; - ++i; - } - - return true; -} - -/* - * @brief - * Cast 'struct i2c_engine *' - * to 'struct i2c_sw_engine *' - */ -#define FROM_I2C_ENGINE(ptr) \ - container_of((ptr), struct i2c_sw_engine, base) - -/* - * @brief - * Cast 'struct engine *' - * to 'struct i2c_sw_engine *' - */ -#define FROM_ENGINE(ptr) \ - FROM_I2C_ENGINE(container_of((ptr), struct i2c_engine, base)) - -enum i2caux_engine_type dal_i2c_sw_engine_get_engine_type( - const struct engine *engine) -{ - return I2CAUX_ENGINE_TYPE_I2C_SW; -} - -bool dal_i2c_sw_engine_submit_request( - struct engine *engine, - struct i2caux_transaction_request *i2caux_request, - bool middle_of_transaction) -{ - struct i2c_sw_engine *sw_engine = FROM_ENGINE(engine); - - struct i2c_engine *base = &sw_engine->base; - - struct i2c_request_transaction_data request; - bool operation_succeeded = false; - - if (i2caux_request->operation == I2CAUX_TRANSACTION_READ) - request.action = middle_of_transaction ? - I2CAUX_TRANSACTION_ACTION_I2C_READ_MOT : - I2CAUX_TRANSACTION_ACTION_I2C_READ; - else if (i2caux_request->operation == I2CAUX_TRANSACTION_WRITE) - request.action = middle_of_transaction ? - I2CAUX_TRANSACTION_ACTION_I2C_WRITE_MOT : - I2CAUX_TRANSACTION_ACTION_I2C_WRITE; - else { - i2caux_request->status = - I2CAUX_TRANSACTION_STATUS_FAILED_INVALID_OPERATION; - /* in DAL2, there was no "return false" */ - return false; - } - - request.address = (uint8_t)i2caux_request->payload.address; - request.length = i2caux_request->payload.length; - request.data = i2caux_request->payload.data; - - base->funcs->submit_channel_request(base, &request); - - if ((request.status == I2C_CHANNEL_OPERATION_ENGINE_BUSY) || - (request.status == I2C_CHANNEL_OPERATION_FAILED)) - i2caux_request->status = - I2CAUX_TRANSACTION_STATUS_FAILED_CHANNEL_BUSY; - else { - enum i2c_channel_operation_result operation_result; - - do { - operation_result = - base->funcs->get_channel_status(base, NULL); - - switch (operation_result) { - case I2C_CHANNEL_OPERATION_SUCCEEDED: - i2caux_request->status = - I2CAUX_TRANSACTION_STATUS_SUCCEEDED; - operation_succeeded = true; - break; - case I2C_CHANNEL_OPERATION_NO_RESPONSE: - i2caux_request->status = - I2CAUX_TRANSACTION_STATUS_FAILED_NACK; - break; - case I2C_CHANNEL_OPERATION_TIMEOUT: - i2caux_request->status = - I2CAUX_TRANSACTION_STATUS_FAILED_TIMEOUT; - break; - case I2C_CHANNEL_OPERATION_FAILED: - i2caux_request->status = - I2CAUX_TRANSACTION_STATUS_FAILED_INCOMPLETE; - break; - default: - i2caux_request->status = - I2CAUX_TRANSACTION_STATUS_FAILED_OPERATION; - break; - } - } while (operation_result == I2C_CHANNEL_OPERATION_ENGINE_BUSY); - } - - return operation_succeeded; -} - -uint32_t dal_i2c_sw_engine_get_speed( - const struct i2c_engine *engine) -{ - return FROM_I2C_ENGINE(engine)->speed; -} - -void dal_i2c_sw_engine_set_speed( - struct i2c_engine *engine, - uint32_t speed) -{ - struct i2c_sw_engine *sw_engine = FROM_I2C_ENGINE(engine); - - ASSERT(speed); - - sw_engine->speed = speed ? speed : I2CAUX_DEFAULT_I2C_SW_SPEED; - - sw_engine->clock_delay = 1000 / sw_engine->speed; - - if (sw_engine->clock_delay < 12) - sw_engine->clock_delay = 12; -} - -bool dal_i2caux_i2c_sw_engine_acquire_engine( - struct i2c_engine *engine, - struct ddc *ddc) -{ - enum gpio_result result; - - result = dal_ddc_open(ddc, GPIO_MODE_FAST_OUTPUT, - GPIO_DDC_CONFIG_TYPE_MODE_I2C); - - if (result != GPIO_RESULT_OK) - return false; - - engine->base.ddc = ddc; - - return true; -} - -void dal_i2c_sw_engine_submit_channel_request( - struct i2c_engine *engine, - struct i2c_request_transaction_data *req) -{ - struct i2c_sw_engine *sw_engine = FROM_I2C_ENGINE(engine); - - struct ddc *ddc = engine->base.ddc; - uint16_t clock_delay_div_4 = sw_engine->clock_delay >> 2; - - /* send sync (start / repeated start) */ - - bool result = start_sync(engine->base.ctx, ddc, clock_delay_div_4); - - /* process payload */ - - if (result) { - switch (req->action) { - case I2CAUX_TRANSACTION_ACTION_I2C_WRITE: - case I2CAUX_TRANSACTION_ACTION_I2C_WRITE_MOT: - result = i2c_write(engine->base.ctx, ddc, clock_delay_div_4, - req->address, req->length, req->data); - break; - case I2CAUX_TRANSACTION_ACTION_I2C_READ: - case I2CAUX_TRANSACTION_ACTION_I2C_READ_MOT: - result = i2c_read(engine->base.ctx, ddc, clock_delay_div_4, - req->address, req->length, req->data); - break; - default: - result = false; - break; - } - } - - /* send stop if not 'mot' or operation failed */ - - if (!result || - (req->action == I2CAUX_TRANSACTION_ACTION_I2C_WRITE) || - (req->action == I2CAUX_TRANSACTION_ACTION_I2C_READ)) - if (!stop_sync(engine->base.ctx, ddc, clock_delay_div_4)) - result = false; - - req->status = result ? - I2C_CHANNEL_OPERATION_SUCCEEDED : - I2C_CHANNEL_OPERATION_FAILED; -} - -enum i2c_channel_operation_result dal_i2c_sw_engine_get_channel_status( - struct i2c_engine *engine, - uint8_t *returned_bytes) -{ - /* No arbitration with VBIOS is performed since DCE 6.0 */ - return I2C_CHANNEL_OPERATION_SUCCEEDED; -} - -void dal_i2c_sw_engine_destruct( - struct i2c_sw_engine *engine) -{ - dal_i2c_engine_destruct(&engine->base); -} - -static void destroy( - struct i2c_engine **ptr) -{ - dal_i2c_sw_engine_destruct(FROM_I2C_ENGINE(*ptr)); - - kfree(*ptr); - *ptr = NULL; -} - -static const struct i2c_engine_funcs i2c_engine_funcs = { - .acquire_engine = dal_i2caux_i2c_sw_engine_acquire_engine, - .destroy = destroy, - .get_speed = dal_i2c_sw_engine_get_speed, - .set_speed = dal_i2c_sw_engine_set_speed, - .setup_engine = dal_i2c_engine_setup_i2c_engine, - .submit_channel_request = dal_i2c_sw_engine_submit_channel_request, - .process_channel_reply = dal_i2c_engine_process_channel_reply, - .get_channel_status = dal_i2c_sw_engine_get_channel_status, -}; - -static void release_engine( - struct engine *engine) -{ - -} - -static const struct engine_funcs engine_funcs = { - .release_engine = release_engine, - .get_engine_type = dal_i2c_sw_engine_get_engine_type, - .acquire = dal_i2c_engine_acquire, - .submit_request = dal_i2c_sw_engine_submit_request, -}; - -void dal_i2c_sw_engine_construct( - struct i2c_sw_engine *engine, - const struct i2c_sw_engine_create_arg *arg) -{ - dal_i2c_engine_construct(&engine->base, arg->ctx); - dal_i2c_sw_engine_set_speed(&engine->base, arg->default_speed); - engine->base.funcs = &i2c_engine_funcs; - engine->base.base.funcs = &engine_funcs; -} - -struct i2c_engine *dal_i2c_sw_engine_create( - const struct i2c_sw_engine_create_arg *arg) -{ - struct i2c_sw_engine *engine; - - if (!arg) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - engine = kzalloc(sizeof(struct i2c_sw_engine), GFP_KERNEL); - - if (!engine) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dal_i2c_sw_engine_construct(engine, arg); - return &engine->base; -} diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/i2c_sw_engine.h b/drivers/gpu/drm/amd/display/dc/i2caux/i2c_sw_engine.h deleted file mode 100644 index 546f15b0d3f1..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/i2c_sw_engine.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DAL_I2C_SW_ENGINE_H__ -#define __DAL_I2C_SW_ENGINE_H__ - -enum { - I2C_SW_RETRIES = 10, - I2C_SW_SCL_READ_RETRIES = 128, - /* following value is in microseconds */ - I2C_SW_TIMEOUT_DELAY = 3000 -}; - -struct i2c_sw_engine; - -struct i2c_sw_engine { - struct i2c_engine base; - uint32_t clock_delay; - /* Values below are in KHz */ - uint32_t speed; - uint32_t default_speed; -}; - -struct i2c_sw_engine_create_arg { - uint32_t default_speed; - struct dc_context *ctx; -}; - -void dal_i2c_sw_engine_construct( - struct i2c_sw_engine *engine, - const struct i2c_sw_engine_create_arg *arg); - -bool dal_i2caux_i2c_sw_engine_acquire_engine( - struct i2c_engine *engine, - struct ddc *ddc_handle); - -void dal_i2c_sw_engine_destruct( - struct i2c_sw_engine *engine); - -struct i2c_engine *dal_i2c_sw_engine_create( - const struct i2c_sw_engine_create_arg *arg); -enum i2caux_engine_type dal_i2c_sw_engine_get_engine_type( - const struct engine *engine); -bool dal_i2c_sw_engine_submit_request( - struct engine *ptr, - struct i2caux_transaction_request *i2caux_request, - bool middle_of_transaction); -uint32_t dal_i2c_sw_engine_get_speed( - const struct i2c_engine *engine); -void dal_i2c_sw_engine_set_speed( - struct i2c_engine *ptr, - uint32_t speed); -void dal_i2c_sw_engine_submit_channel_request( - struct i2c_engine *ptr, - struct i2c_request_transaction_data *req); -enum i2c_channel_operation_result dal_i2c_sw_engine_get_channel_status( - struct i2c_engine *engine, - uint8_t *returned_bytes); -#endif diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.h b/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.h deleted file mode 100644 index 64f51bb06915..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DAL_I2C_AUX_H__ -#define __DAL_I2C_AUX_H__ - -uint32_t dal_i2caux_get_reference_clock( - struct dc_bios *bios); - -struct i2caux; - -struct engine; - -struct i2caux_funcs { - void (*destroy)(struct i2caux **ptr); - struct i2c_engine * (*acquire_i2c_sw_engine)( - struct i2caux *i2caux, - struct ddc *ddc); - struct i2c_engine * (*acquire_i2c_hw_engine)( - struct i2caux *i2caux, - struct ddc *ddc); - struct aux_engine * (*acquire_aux_engine)( - struct i2caux *i2caux, - struct ddc *ddc); - void (*release_engine)( - struct i2caux *i2caux, - struct engine *engine); -}; - -struct i2c_engine; -struct aux_engine; - -struct i2caux { - struct dc_context *ctx; - const struct i2caux_funcs *funcs; - /* On ASIC we have certain amount of lines with HW DDC engine - * (4, 6, or maybe more in the future). - * For every such line, we create separate HW DDC engine - * (since we have these engines in HW) and separate SW DDC engine - * (to allow concurrent use of few lines). - * In similar way we have AUX engines. */ - - /* I2C SW engines, per DDC line. - * Only lines with HW DDC support will be initialized */ - struct i2c_engine *i2c_sw_engines[GPIO_DDC_LINE_COUNT]; - - /* I2C HW engines, per DDC line. - * Only lines with HW DDC support will be initialized */ - struct i2c_engine *i2c_hw_engines[GPIO_DDC_LINE_COUNT]; - - /* AUX engines, per DDC line. - * Only lines with HW AUX support will be initialized */ - struct aux_engine *aux_engines[GPIO_DDC_LINE_COUNT]; - - /* For all other lines, we can use - * single instance of generic I2C HW engine - * (since in HW, there is single instance of it) - * or single instance of generic I2C SW engine. - * AUX is not supported for other lines. */ - - /* General-purpose I2C SW engine. - * Can be assigned dynamically to any line per transaction */ - struct i2c_engine *i2c_generic_sw_engine; - - /* General-purpose I2C generic HW engine. - * Can be assigned dynamically to almost any line per transaction */ - struct i2c_engine *i2c_generic_hw_engine; - - /* [anaumov] in DAL2, there is a Mutex */ - - uint32_t aux_timeout_period; - - /* expressed in KHz */ - uint32_t default_i2c_sw_speed; - uint32_t default_i2c_hw_speed; -}; - -void dal_i2caux_construct( - struct i2caux *i2caux, - struct dc_context *ctx); - -void dal_i2caux_release_engine( - struct i2caux *i2caux, - struct engine *engine); - -void dal_i2caux_destruct( - struct i2caux *i2caux); - -void dal_i2caux_destroy( - struct i2caux **ptr); - -struct i2c_engine *dal_i2caux_acquire_i2c_sw_engine( - struct i2caux *i2caux, - struct ddc *ddc); - -struct aux_engine *dal_i2caux_acquire_aux_engine( - struct i2caux *i2caux, - struct ddc *ddc); - -#endif diff --git a/drivers/gpu/drm/amd/display/include/i2caux_interface.h b/drivers/gpu/drm/amd/display/include/i2caux_interface.h index 1b648fe041da..bb012cb1a9f5 100644 --- a/drivers/gpu/drm/amd/display/include/i2caux_interface.h +++ b/drivers/gpu/drm/amd/display/include/i2caux_interface.h @@ -76,27 +76,4 @@ union aux_config { uint32_t raw; }; -struct i2caux; - -struct i2caux *dal_i2caux_create( - struct dc_context *ctx); - -bool dal_i2caux_submit_i2c_command( - struct i2caux *i2caux, - struct ddc *ddc, - struct i2c_command *cmd); - -bool dal_i2caux_submit_aux_command( - struct i2caux *i2caux, - struct ddc *ddc, - struct aux_command *cmd); - -void dal_i2caux_configure_aux( - struct i2caux *i2caux, - struct ddc *ddc, - union aux_config cfg); - -void dal_i2caux_destroy( - struct i2caux **ptr); - #endif From 588715bdcfbc2592f50b433a3ebaac72014d5fb4 Mon Sep 17 00:00:00 2001 From: hersen wu Date: Wed, 28 Nov 2018 16:57:56 -0500 Subject: [PATCH 150/539] drm/amd/display: dal-pplib interface refactor dal part [WHY] clarify dal input parameters to pplib interface, remove un-used parameters. dal knows exactly which parameters needed and their effects at pplib and smu sides. current dal sequence for dcn1_update_clock to pplib: 1.smu10_display_clock_voltage_request for dcefclk 2.smu10_display_clock_voltage_request for fclk 3.phm_store_dal_configuration_data { set_min_deep_sleep_dcfclk set_active_display_count store_cc6_data --- this data never be referenced new sequence will be: 1. set_display_count --- need add new pplib interface 2. set_min_deep_sleep_dcfclk -- new pplib interface 3. set_hard_min_dcfclk_by_freq 4. set_hard_min_fclk_by_freq after this code refactor, smu10_display_clock_voltage_request, phm_store_dal_configuration_data will not be needed for rv. Signed-off-by: hersen wu Reviewed-by: Harry Wentland Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c index 9d2d6986b983..e8e9eebbae3f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c @@ -559,6 +559,58 @@ void pp_rv_set_pme_wa_enable(struct pp_smu *pp) pp_funcs->notify_smu_enable_pwe(pp_handle); } +void pp_rv_set_active_display_count(struct pp_smu *pp, int count) +{ + const struct dc_context *ctx = pp->dm; + struct amdgpu_device *adev = ctx->driver_context; + void *pp_handle = adev->powerplay.pp_handle; + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + + if (!pp_funcs || !pp_funcs->set_active_display_count) + return; + + pp_funcs->set_active_display_count(pp_handle, count); +} + +void pp_rv_set_min_deep_sleep_dcfclk(struct pp_smu *pp, int clock) +{ + const struct dc_context *ctx = pp->dm; + struct amdgpu_device *adev = ctx->driver_context; + void *pp_handle = adev->powerplay.pp_handle; + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + + if (!pp_funcs || !pp_funcs->set_min_deep_sleep_dcefclk) + return; + + pp_funcs->set_min_deep_sleep_dcefclk(pp_handle, clock); +} + +void pp_rv_set_hard_min_dcefclk_by_freq(struct pp_smu *pp, int clock) +{ + const struct dc_context *ctx = pp->dm; + struct amdgpu_device *adev = ctx->driver_context; + void *pp_handle = adev->powerplay.pp_handle; + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + + if (!pp_funcs || !pp_funcs->set_hard_min_dcefclk_by_freq) + return; + + pp_funcs->set_hard_min_dcefclk_by_freq(pp_handle, clock); +} + +void pp_rv_set_hard_min_fclk_by_freq(struct pp_smu *pp, int mhz) +{ + const struct dc_context *ctx = pp->dm; + struct amdgpu_device *adev = ctx->driver_context; + void *pp_handle = adev->powerplay.pp_handle; + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + + if (!pp_funcs || !pp_funcs->set_hard_min_fclk_by_freq) + return; + + pp_funcs->set_hard_min_fclk_by_freq(pp_handle, mhz); +} + void dm_pp_get_funcs_rv( struct dc_context *ctx, struct pp_smu_funcs_rv *funcs) @@ -567,4 +619,9 @@ void dm_pp_get_funcs_rv( funcs->set_display_requirement = pp_rv_set_display_requirement; funcs->set_wm_ranges = pp_rv_set_wm_ranges; funcs->set_pme_wa_enable = pp_rv_set_pme_wa_enable; + funcs->set_display_count = pp_rv_set_active_display_count; + funcs->set_min_deep_sleep_dcfclk = pp_rv_set_min_deep_sleep_dcfclk; + funcs->set_hard_min_dcfclk_by_freq = pp_rv_set_hard_min_dcefclk_by_freq; + funcs->set_hard_min_fclk_by_freq = pp_rv_set_hard_min_fclk_by_freq; } + From 570744b98ca865d95bdf2da064a7a57f2655f889 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Wed, 19 Dec 2018 09:24:06 -0500 Subject: [PATCH 151/539] drm/amd/display: fix warning on raven hotplug [Why] Hotplug on raven results in REG_WAIT_TIMEOUT warning due to failing attempt to lock disabled otg for the hubp interdependent pipes programming. [How] Don't setup pipe interdependencies for disabled otg. Also removed the unnecessary duplicate logic checks. Signed-off-by: Roman Li Reviewed-by: Dmytro Laktyushkin Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 93226fc37f27..814f5976ec9b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2354,30 +2354,23 @@ static void dcn10_apply_ctx_for_surface( top_pipe_to_program->plane_state->update_flags.bits.full_update) for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - + tg = pipe_ctx->stream_res.tg; /* Skip inactive pipes and ones already updated */ if (!pipe_ctx->stream || pipe_ctx->stream == stream - || !pipe_ctx->plane_state) + || !pipe_ctx->plane_state + || !tg->funcs->is_tg_enabled(tg)) continue; - pipe_ctx->stream_res.tg->funcs->lock(pipe_ctx->stream_res.tg); + tg->funcs->lock(tg); pipe_ctx->plane_res.hubp->funcs->hubp_setup_interdependent( pipe_ctx->plane_res.hubp, &pipe_ctx->dlg_regs, &pipe_ctx->ttu_regs); + + tg->funcs->unlock(tg); } - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - - if (!pipe_ctx->stream || pipe_ctx->stream == stream - || !pipe_ctx->plane_state) - continue; - - dcn10_pipe_control_lock(dc, pipe_ctx, false); - } - if (num_planes == 0) false_optc_underflow_wa(dc, stream, tg); From 9983b80053e4fd3d5dea7b936aa933edd49924ce Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Wed, 19 Dec 2018 13:47:19 -0500 Subject: [PATCH 152/539] drm/amd/display: dp interlace MSA timing programming for Interlace mode. [Why] DP compliance box shows wrong MSA data. Signed-off-by: Charlene Liu Reviewed-by: Jun Lei Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../amd/display/dc/dce/dce_stream_encoder.c | 65 ++++++++++-------- .../display/dc/dcn10/dcn10_stream_encoder.c | 68 +++++++++++-------- 2 files changed, 77 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index 4bd7c5daabc0..4a49fd631f33 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -288,9 +288,18 @@ static void dce110_stream_encoder_dp_set_stream_attribute( #endif struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc); - + struct dc_crtc_timing hw_crtc_timing = *crtc_timing; + if (hw_crtc_timing.flags.INTERLACE) { + /*the input timing is in VESA spec format with Interlace flag =1*/ + hw_crtc_timing.v_total /= 2; + hw_crtc_timing.v_border_top /= 2; + hw_crtc_timing.v_addressable /= 2; + hw_crtc_timing.v_border_bottom /= 2; + hw_crtc_timing.v_front_porch /= 2; + hw_crtc_timing.v_sync_width /= 2; + } /* set pixel encoding */ - switch (crtc_timing->pixel_encoding) { + switch (hw_crtc_timing.pixel_encoding) { case PIXEL_ENCODING_YCBCR422: REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, DP_PIXEL_ENCODING_TYPE_YCBCR422); @@ -299,8 +308,8 @@ static void dce110_stream_encoder_dp_set_stream_attribute( REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, DP_PIXEL_ENCODING_TYPE_YCBCR444); - if (crtc_timing->flags.Y_ONLY) - if (crtc_timing->display_color_depth != COLOR_DEPTH_666) + if (hw_crtc_timing.flags.Y_ONLY) + if (hw_crtc_timing.display_color_depth != COLOR_DEPTH_666) /* HW testing only, no use case yet. * Color depth of Y-only could be * 8, 10, 12, 16 bits */ @@ -335,7 +344,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute( /* set color depth */ - switch (crtc_timing->display_color_depth) { + switch (hw_crtc_timing.display_color_depth) { case COLOR_DEPTH_666: REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, 0); @@ -363,7 +372,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute( #if defined(CONFIG_DRM_AMD_DC_DCN1_0) - switch (crtc_timing->display_color_depth) { + switch (hw_crtc_timing.display_color_depth) { case COLOR_DEPTH_666: colorimetry_bpc = 0; break; @@ -401,9 +410,9 @@ static void dce110_stream_encoder_dp_set_stream_attribute( misc0 = misc0 | 0x8; /* bit3=1, bit4=0 */ misc1 = misc1 & ~0x80; /* bit7 = 0*/ dynamic_range_ycbcr = 0; /*bt601*/ - if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) + if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */ - else if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR444) + else if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR444) misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */ break; case COLOR_SPACE_YCBCR709: @@ -411,9 +420,9 @@ static void dce110_stream_encoder_dp_set_stream_attribute( misc0 = misc0 | 0x18; /* bit3=1, bit4=1 */ misc1 = misc1 & ~0x80; /* bit7 = 0*/ dynamic_range_ycbcr = 1; /*bt709*/ - if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) + if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */ - else if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR444) + else if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR444) misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */ break; case COLOR_SPACE_2020_RGB_LIMITEDRANGE: @@ -453,27 +462,27 @@ static void dce110_stream_encoder_dp_set_stream_attribute( */ if (REG(DP_MSA_TIMING_PARAM1)) REG_SET_2(DP_MSA_TIMING_PARAM1, 0, - DP_MSA_HTOTAL, crtc_timing->h_total, - DP_MSA_VTOTAL, crtc_timing->v_total); + DP_MSA_HTOTAL, hw_crtc_timing.h_total, + DP_MSA_VTOTAL, hw_crtc_timing.v_total); #endif /* calcuate from vesa timing parameters * h_active_start related to leading edge of sync */ - h_blank = crtc_timing->h_total - crtc_timing->h_border_left - - crtc_timing->h_addressable - crtc_timing->h_border_right; + h_blank = hw_crtc_timing.h_total - hw_crtc_timing.h_border_left - + hw_crtc_timing.h_addressable - hw_crtc_timing.h_border_right; - h_back_porch = h_blank - crtc_timing->h_front_porch - - crtc_timing->h_sync_width; + h_back_porch = h_blank - hw_crtc_timing.h_front_porch - + hw_crtc_timing.h_sync_width; /* start at begining of left border */ - h_active_start = crtc_timing->h_sync_width + h_back_porch; + h_active_start = hw_crtc_timing.h_sync_width + h_back_porch; - v_active_start = crtc_timing->v_total - crtc_timing->v_border_top - - crtc_timing->v_addressable - crtc_timing->v_border_bottom - - crtc_timing->v_front_porch; + v_active_start = hw_crtc_timing.v_total - hw_crtc_timing.v_border_top - + hw_crtc_timing.v_addressable - hw_crtc_timing.v_border_bottom - + hw_crtc_timing.v_front_porch; #if defined(CONFIG_DRM_AMD_DC_DCN1_0) @@ -486,21 +495,21 @@ static void dce110_stream_encoder_dp_set_stream_attribute( if (REG(DP_MSA_TIMING_PARAM3)) REG_SET_4(DP_MSA_TIMING_PARAM3, 0, DP_MSA_HSYNCWIDTH, - crtc_timing->h_sync_width, + hw_crtc_timing.h_sync_width, DP_MSA_HSYNCPOLARITY, - !crtc_timing->flags.HSYNC_POSITIVE_POLARITY, + !hw_crtc_timing.flags.HSYNC_POSITIVE_POLARITY, DP_MSA_VSYNCWIDTH, - crtc_timing->v_sync_width, + hw_crtc_timing.v_sync_width, DP_MSA_VSYNCPOLARITY, - !crtc_timing->flags.VSYNC_POSITIVE_POLARITY); + !hw_crtc_timing.flags.VSYNC_POSITIVE_POLARITY); /* HWDITH include border or overscan */ if (REG(DP_MSA_TIMING_PARAM4)) REG_SET_2(DP_MSA_TIMING_PARAM4, 0, - DP_MSA_HWIDTH, crtc_timing->h_border_left + - crtc_timing->h_addressable + crtc_timing->h_border_right, - DP_MSA_VHEIGHT, crtc_timing->v_border_top + - crtc_timing->v_addressable + crtc_timing->v_border_bottom); + DP_MSA_HWIDTH, hw_crtc_timing.h_border_left + + hw_crtc_timing.h_addressable + hw_crtc_timing.h_border_right, + DP_MSA_VHEIGHT, hw_crtc_timing.v_border_top + + hw_crtc_timing.v_addressable + hw_crtc_timing.v_border_bottom); #endif } #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index 0b0e06fe5c53..0fb43c93932d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -261,17 +261,29 @@ void enc1_stream_encoder_dp_set_stream_attribute( uint8_t dp_component_depth = 0; struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + struct dc_crtc_timing hw_crtc_timing = *crtc_timing; + + if (hw_crtc_timing.flags.INTERLACE) { + /*the input timing is in VESA spec format with Interlace flag =1*/ + hw_crtc_timing.v_total /= 2; + hw_crtc_timing.v_border_top /= 2; + hw_crtc_timing.v_addressable /= 2; + hw_crtc_timing.v_border_bottom /= 2; + hw_crtc_timing.v_front_porch /= 2; + hw_crtc_timing.v_sync_width /= 2; + } + /* set pixel encoding */ - switch (crtc_timing->pixel_encoding) { + switch (hw_crtc_timing.pixel_encoding) { case PIXEL_ENCODING_YCBCR422: dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_YCBCR422; break; case PIXEL_ENCODING_YCBCR444: dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_YCBCR444; - if (crtc_timing->flags.Y_ONLY) - if (crtc_timing->display_color_depth != COLOR_DEPTH_666) + if (hw_crtc_timing.flags.Y_ONLY) + if (hw_crtc_timing.display_color_depth != COLOR_DEPTH_666) /* HW testing only, no use case yet. * Color depth of Y-only could be * 8, 10, 12, 16 bits @@ -299,7 +311,7 @@ void enc1_stream_encoder_dp_set_stream_attribute( * Pixel Encoding/Colorimetry Format and that a Sink device shall ignore MISC1, bit 7, * and MISC0, bits 7:1 (MISC1, bit 7, and MISC0, bits 7:1, become "don't care"). */ - if ((crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) || + if ((hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) || (output_color_space == COLOR_SPACE_2020_YCBCR) || (output_color_space == COLOR_SPACE_2020_RGB_FULLRANGE) || (output_color_space == COLOR_SPACE_2020_RGB_LIMITEDRANGE)) @@ -308,7 +320,7 @@ void enc1_stream_encoder_dp_set_stream_attribute( misc1 = misc1 & ~0x40; /* set color depth */ - switch (crtc_timing->display_color_depth) { + switch (hw_crtc_timing.display_color_depth) { case COLOR_DEPTH_666: dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_6BPC; break; @@ -336,7 +348,7 @@ void enc1_stream_encoder_dp_set_stream_attribute( /* set dynamic range and YCbCr range */ - switch (crtc_timing->display_color_depth) { + switch (hw_crtc_timing.display_color_depth) { case COLOR_DEPTH_666: colorimetry_bpc = 0; break; @@ -372,9 +384,9 @@ void enc1_stream_encoder_dp_set_stream_attribute( misc0 = misc0 | 0x8; /* bit3=1, bit4=0 */ misc1 = misc1 & ~0x80; /* bit7 = 0*/ dynamic_range_ycbcr = 0; /*bt601*/ - if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) + if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */ - else if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR444) + else if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR444) misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */ break; case COLOR_SPACE_YCBCR709: @@ -382,9 +394,9 @@ void enc1_stream_encoder_dp_set_stream_attribute( misc0 = misc0 | 0x18; /* bit3=1, bit4=1 */ misc1 = misc1 & ~0x80; /* bit7 = 0*/ dynamic_range_ycbcr = 1; /*bt709*/ - if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) + if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */ - else if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR444) + else if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR444) misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */ break; case COLOR_SPACE_2020_RGB_LIMITEDRANGE: @@ -414,26 +426,26 @@ void enc1_stream_encoder_dp_set_stream_attribute( * dc_crtc_timing is vesa dmt struct. data from edid */ REG_SET_2(DP_MSA_TIMING_PARAM1, 0, - DP_MSA_HTOTAL, crtc_timing->h_total, - DP_MSA_VTOTAL, crtc_timing->v_total); + DP_MSA_HTOTAL, hw_crtc_timing.h_total, + DP_MSA_VTOTAL, hw_crtc_timing.v_total); /* calculate from vesa timing parameters * h_active_start related to leading edge of sync */ - h_blank = crtc_timing->h_total - crtc_timing->h_border_left - - crtc_timing->h_addressable - crtc_timing->h_border_right; + h_blank = hw_crtc_timing.h_total - hw_crtc_timing.h_border_left - + hw_crtc_timing.h_addressable - hw_crtc_timing.h_border_right; - h_back_porch = h_blank - crtc_timing->h_front_porch - - crtc_timing->h_sync_width; + h_back_porch = h_blank - hw_crtc_timing.h_front_porch - + hw_crtc_timing.h_sync_width; /* start at beginning of left border */ - h_active_start = crtc_timing->h_sync_width + h_back_porch; + h_active_start = hw_crtc_timing.h_sync_width + h_back_porch; - v_active_start = crtc_timing->v_total - crtc_timing->v_border_top - - crtc_timing->v_addressable - crtc_timing->v_border_bottom - - crtc_timing->v_front_porch; + v_active_start = hw_crtc_timing.v_total - hw_crtc_timing.v_border_top - + hw_crtc_timing.v_addressable - hw_crtc_timing.v_border_bottom - + hw_crtc_timing.v_front_porch; /* start at beginning of left border */ @@ -443,20 +455,20 @@ void enc1_stream_encoder_dp_set_stream_attribute( REG_SET_4(DP_MSA_TIMING_PARAM3, 0, DP_MSA_HSYNCWIDTH, - crtc_timing->h_sync_width, + hw_crtc_timing.h_sync_width, DP_MSA_HSYNCPOLARITY, - !crtc_timing->flags.HSYNC_POSITIVE_POLARITY, + !hw_crtc_timing.flags.HSYNC_POSITIVE_POLARITY, DP_MSA_VSYNCWIDTH, - crtc_timing->v_sync_width, + hw_crtc_timing.v_sync_width, DP_MSA_VSYNCPOLARITY, - !crtc_timing->flags.VSYNC_POSITIVE_POLARITY); + !hw_crtc_timing.flags.VSYNC_POSITIVE_POLARITY); /* HWDITH include border or overscan */ REG_SET_2(DP_MSA_TIMING_PARAM4, 0, - DP_MSA_HWIDTH, crtc_timing->h_border_left + - crtc_timing->h_addressable + crtc_timing->h_border_right, - DP_MSA_VHEIGHT, crtc_timing->v_border_top + - crtc_timing->v_addressable + crtc_timing->v_border_bottom); + DP_MSA_HWIDTH, hw_crtc_timing.h_border_left + + hw_crtc_timing.h_addressable + hw_crtc_timing.h_border_right, + DP_MSA_VHEIGHT, hw_crtc_timing.v_border_top + + hw_crtc_timing.v_addressable + hw_crtc_timing.v_border_bottom); } static void enc1_stream_encoder_set_stream_attribute_helper( From 0f0c1924339670e9b34bbe03c181d88a8ad57e74 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Mon, 17 Dec 2018 18:28:59 -0500 Subject: [PATCH 153/539] drm/amd/display: add workaround for 4k video underflow [Why] On DCN1, there is an issue where on high BW config on single channel systems, underflow will be observed if DCC is disabled. This issue can be observed on several use cases. For this particular case, it is observed when playing 4k video on 4k desktop with video downscaled to a certain size. [How] Block MPO for this particular case, this will prevent extra BW consumed from downscaling, working around the underflow. Signed-off-by: Eric Yang Reviewed-by: Yongqiang Sun Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn10/dcn10_resource.c | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 3e6a6025419a..ea0628bebe0d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -1131,6 +1131,56 @@ static enum dc_status dcn10_validate_plane(const struct dc_plane_state *plane_st return DC_OK; } +static enum dc_status dcn10_validate_global(struct dc *dc, struct dc_state *context) +{ + int i, j; + bool video_down_scaled = false; + bool video_large = false; + bool desktop_large = false; + bool dcc_disabled = false; + + for (i = 0; i < context->stream_count; i++) { + if (context->stream_status[i].plane_count == 0) + continue; + + if (context->stream_status[i].plane_count > 2) + return false; + + for (j = 0; j < context->stream_status[i].plane_count; j++) { + struct dc_plane_state *plane = + context->stream_status[i].plane_states[j]; + + + if (plane->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { + + if (plane->src_rect.width > plane->dst_rect.width || + plane->src_rect.height > plane->dst_rect.height) + video_down_scaled = true; + + if (plane->src_rect.width >= 3840) + video_large = true; + + } else { + if (plane->src_rect.width >= 3840) + desktop_large = true; + if (!plane->dcc.enable) + dcc_disabled = true; + } + } + } + + /* + * Workaround: On DCN10 there is UMC issue that causes underflow when + * playing 4k video on 4k desktop with video downscaled and single channel + * memory + */ + if (video_large && desktop_large && video_down_scaled && dcc_disabled && + dc->dcn_soc->number_of_channels == 1) + return DC_FAIL_SURFACE_VALIDATE; + + return DC_OK; +} + static enum dc_status dcn10_get_default_swizzle_mode(struct dc_plane_state *plane_state) { enum dc_status result = DC_OK; @@ -1159,6 +1209,7 @@ static const struct resource_funcs dcn10_res_pool_funcs = { .validate_bandwidth = dcn_validate_bandwidth, .acquire_idle_pipe_for_layer = dcn10_acquire_idle_pipe_for_layer, .validate_plane = dcn10_validate_plane, + .validate_global = dcn10_validate_global, .add_stream_to_ctx = dcn10_add_stream_to_ctx, .get_default_swizzle_mode = dcn10_get_default_swizzle_mode }; From 2ee7c03cf105fa969454f55dfeb3b0a69a930d74 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Mon, 3 Dec 2018 13:56:40 -0500 Subject: [PATCH 154/539] drm/amd/display: Rename configure_encoder to enc1_configure_encoder Signed-off-by: Dmytro Laktyushkin Reviewed-by: Charlene Liu Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c | 6 +++--- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c index af0bcff0b01a..771449f8984f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c @@ -440,7 +440,7 @@ static uint8_t get_frontend_source( } } -void configure_encoder( +void enc1_configure_encoder( struct dcn10_link_encoder *enc10, const struct dc_link_settings *link_settings) { @@ -910,7 +910,7 @@ void dcn10_link_encoder_enable_dp_output( * but it's not passed to asic_control. * We need to set number of lanes manually. */ - configure_encoder(enc10, link_settings); + enc1_configure_encoder(enc10, link_settings); cntl.action = TRANSMITTER_CONTROL_ENABLE; cntl.engine_id = enc->preferred_engine; @@ -949,7 +949,7 @@ void dcn10_link_encoder_enable_dp_mst_output( * but it's not passed to asic_control. * We need to set number of lanes manually. */ - configure_encoder(enc10, link_settings); + enc1_configure_encoder(enc10, link_settings); cntl.action = TRANSMITTER_CONTROL_ENABLE; cntl.engine_id = ENGINE_ID_UNKNOWN; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h index 49ead12b2532..670b46e887ed 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h @@ -271,7 +271,7 @@ void dcn10_link_encoder_setup( struct link_encoder *enc, enum signal_type signal); -void configure_encoder( +void enc1_configure_encoder( struct dcn10_link_encoder *enc10, const struct dc_link_settings *link_settings); From ca35899c4e3a173c0d6d4619dc62e836a9487cb2 Mon Sep 17 00:00:00 2001 From: Bayan Zabihiyan Date: Thu, 27 Dec 2018 08:43:45 -0500 Subject: [PATCH 155/539] drm/amd/display: Add new infopacket definition Modify freesync module to build VTEM infopackets when in HdmiVRR mode Signed-off-by: Bayan Zabihiyan Reviewed-by: Jun Lei Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../amd/display/modules/freesync/freesync.c | 94 ++++++++++++++++++- .../amd/display/modules/inc/mod_freesync.h | 2 +- .../drm/amd/display/modules/inc/mod_shared.h | 3 +- 3 files changed, 92 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index d967ac001f59..94a84bc57c7a 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -461,6 +461,26 @@ bool mod_freesync_get_v_position(struct mod_freesync *mod_freesync, return false; } +static void build_vrr_infopacket_header_vtem(enum signal_type signal, + struct dc_info_packet *infopacket) +{ + // HEADER + + // HB0, HB1, HB2 indicates PacketType VTEMPacket + infopacket->hb0 = 0x7F; + infopacket->hb1 = 0xC0; + infopacket->hb2 = 0x00; + /* HB3 Bit Fields + * Reserved :1 = 0 + * Sync :1 = 0 + * VFR :1 = 1 + * Ds_Type :2 = 0 + * End :1 = 0 + * New :1 = 0 + */ + infopacket->hb3 = 0x20; +} + static void build_vrr_infopacket_header_v1(enum signal_type signal, struct dc_info_packet *infopacket, unsigned int *payload_size) @@ -559,6 +579,54 @@ static void build_vrr_infopacket_header_v2(enum signal_type signal, } } +static void build_vrr_vtem_infopacket_data(const struct dc_stream_state *stream, + const struct mod_vrr_params *vrr, + struct dc_info_packet *infopacket) +{ + /* dc_info_packet to VtemPacket Translation of Bit-fields, + * SB[6] + * unsigned char VRR_EN :1 + * unsigned char M_CONST :1 + * unsigned char Reserved2 :2 + * unsigned char FVA_Factor_M1 :4 + * SB[7] + * unsigned char Base_Vfront :8 + * SB[8] + * unsigned char Base_Refresh_Rate_98 :2 + * unsigned char RB :1 + * unsigned char Reserved3 :5 + * SB[9] + * unsigned char Base_RefreshRate_07 :8 + */ + unsigned int fieldRateInHz; + + if (vrr->state == VRR_STATE_ACTIVE_VARIABLE || + vrr->state == VRR_STATE_ACTIVE_FIXED){ + infopacket->sb[6] |= 0x80; //VRR_EN Bit = 1 + } else { + infopacket->sb[6] &= 0x7F; //VRR_EN Bit = 0 + } + + if (!stream->timing.vic) { + infopacket->sb[7] = stream->timing.v_front_porch; + + /* TODO: In dal2, we check mode flags for a reduced blanking timing. + * Need a way to relay that information to this function. + * if("ReducedBlanking") + * { + * infopacket->sb[8] |= 0x20; //Set 3rd bit to 1 + * } + */ + fieldRateInHz = (stream->timing.pix_clk_100hz * 100)/ + (stream->timing.h_total * stream->timing.v_total); + + infopacket->sb[8] |= ((fieldRateInHz & 0x300) >> 2); + infopacket->sb[9] |= fieldRateInHz & 0xFF; + + } + infopacket->valid = true; +} + static void build_vrr_infopacket_data(const struct mod_vrr_params *vrr, struct dc_info_packet *infopacket) { @@ -672,6 +740,19 @@ static void build_vrr_infopacket_v2(enum signal_type signal, infopacket->valid = true; } +static void build_vrr_infopacket_vtem(const struct dc_stream_state *stream, + const struct mod_vrr_params *vrr, + struct dc_info_packet *infopacket) +{ + //VTEM info packet for HdmiVrr + + //VTEM Packet is structured differently + build_vrr_infopacket_header_vtem(stream->signal, infopacket); + build_vrr_vtem_infopacket_data(stream, vrr, infopacket); + + infopacket->valid = true; +} + void mod_freesync_build_vrr_infopacket(struct mod_freesync *mod_freesync, const struct dc_stream_state *stream, const struct mod_vrr_params *vrr, @@ -679,18 +760,21 @@ void mod_freesync_build_vrr_infopacket(struct mod_freesync *mod_freesync, const enum color_transfer_func *app_tf, struct dc_info_packet *infopacket) { - /* SPD info packet for FreeSync */ - - /* Check if Freesync is supported. Return if false. If true, + /* SPD info packet for FreeSync + * VTEM info packet for HdmiVRR + * Check if Freesync is supported. Return if false. If true, * set the corresponding bit in the info packet */ - if (!vrr->supported || !vrr->send_vsif) + if (!vrr->supported || (!vrr->send_info_frame && packet_type != PACKET_TYPE_VTEM)) return; switch (packet_type) { case PACKET_TYPE_FS2: build_vrr_infopacket_v2(stream->signal, vrr, app_tf, infopacket); break; + case PACKET_TYPE_VTEM: + build_vrr_infopacket_vtem(stream, vrr, infopacket); + break; case PACKET_TYPE_VRR: case PACKET_TYPE_FS1: default: @@ -739,7 +823,7 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, return; in_out_vrr->state = in_config->state; - in_out_vrr->send_vsif = in_config->vsif_supported; + in_out_vrr->send_info_frame = in_config->vsif_supported; if (in_config->state == VRR_STATE_UNSUPPORTED) { in_out_vrr->state = VRR_STATE_UNSUPPORTED; diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h index 949a8b62aa98..4222e403b151 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h @@ -104,7 +104,7 @@ struct mod_vrr_params_fixed_refresh { struct mod_vrr_params { bool supported; - bool send_vsif; + bool send_info_frame; enum mod_vrr_state state; uint32_t min_refresh_in_uhz; diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h b/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h index 1bd02c0ac30c..b711e7e6c204 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h @@ -41,7 +41,8 @@ enum color_transfer_func { enum vrr_packet_type { PACKET_TYPE_VRR, PACKET_TYPE_FS1, - PACKET_TYPE_FS2 + PACKET_TYPE_FS2, + PACKET_TYPE_VTEM }; From 5dc3fc5a7835f6b98184d2b8df909c5230c37a2c Mon Sep 17 00:00:00 2001 From: Eric Bernstein Date: Mon, 19 Nov 2018 10:52:10 -0500 Subject: [PATCH 156/539] drm/amd/display: Check if registers are available before accessing Check if VERT_FILTER_INIT_BOT and BLACK_OFFSET registers exists in the DCN SCL IP block before trying to access. Signed-off-by: Eric Bernstein Reviewed-by: Dmytro Laktyushkin Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c | 42 +++++++++++-------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c index 4a863a5dab41..c7642e748297 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c @@ -597,11 +597,13 @@ static void dpp1_dscl_set_manual_ratio_init( SCL_V_INIT_FRAC, init_frac, SCL_V_INIT_INT, init_int); - init_frac = dc_fixpt_u0d19(data->inits.v_bot) << 5; - init_int = dc_fixpt_floor(data->inits.v_bot); - REG_SET_2(SCL_VERT_FILTER_INIT_BOT, 0, - SCL_V_INIT_FRAC_BOT, init_frac, - SCL_V_INIT_INT_BOT, init_int); + if (REG(SCL_VERT_FILTER_INIT_BOT)) { + init_frac = dc_fixpt_u0d19(data->inits.v_bot) << 5; + init_int = dc_fixpt_floor(data->inits.v_bot); + REG_SET_2(SCL_VERT_FILTER_INIT_BOT, 0, + SCL_V_INIT_FRAC_BOT, init_frac, + SCL_V_INIT_INT_BOT, init_int); + } init_frac = dc_fixpt_u0d19(data->inits.v_c) << 5; init_int = dc_fixpt_floor(data->inits.v_c); @@ -609,11 +611,13 @@ static void dpp1_dscl_set_manual_ratio_init( SCL_V_INIT_FRAC_C, init_frac, SCL_V_INIT_INT_C, init_int); - init_frac = dc_fixpt_u0d19(data->inits.v_c_bot) << 5; - init_int = dc_fixpt_floor(data->inits.v_c_bot); - REG_SET_2(SCL_VERT_FILTER_INIT_BOT_C, 0, - SCL_V_INIT_FRAC_BOT_C, init_frac, - SCL_V_INIT_INT_BOT_C, init_int); + if (REG(SCL_VERT_FILTER_INIT_BOT_C)) { + init_frac = dc_fixpt_u0d19(data->inits.v_c_bot) << 5; + init_int = dc_fixpt_floor(data->inits.v_c_bot); + REG_SET_2(SCL_VERT_FILTER_INIT_BOT_C, 0, + SCL_V_INIT_FRAC_BOT_C, init_frac, + SCL_V_INIT_INT_BOT_C, init_int); + } } @@ -688,15 +692,17 @@ void dpp1_dscl_set_scaler_manual_scale( return; /* Black offsets */ - if (ycbcr) - REG_SET_2(SCL_BLACK_OFFSET, 0, - SCL_BLACK_OFFSET_RGB_Y, BLACK_OFFSET_RGB_Y, - SCL_BLACK_OFFSET_CBCR, BLACK_OFFSET_CBCR); - else + if (REG(SCL_BLACK_OFFSET)) { + if (ycbcr) + REG_SET_2(SCL_BLACK_OFFSET, 0, + SCL_BLACK_OFFSET_RGB_Y, BLACK_OFFSET_RGB_Y, + SCL_BLACK_OFFSET_CBCR, BLACK_OFFSET_CBCR); + else - REG_SET_2(SCL_BLACK_OFFSET, 0, - SCL_BLACK_OFFSET_RGB_Y, BLACK_OFFSET_RGB_Y, - SCL_BLACK_OFFSET_CBCR, BLACK_OFFSET_RGB_Y); + REG_SET_2(SCL_BLACK_OFFSET, 0, + SCL_BLACK_OFFSET_RGB_Y, BLACK_OFFSET_RGB_Y, + SCL_BLACK_OFFSET_CBCR, BLACK_OFFSET_RGB_Y); + } /* Manually calculate scale ratio and init values */ dpp1_dscl_set_manual_ratio_init(dpp, scl_data); From ccab1217230246dccc3110990b028f195ad2d609 Mon Sep 17 00:00:00 2001 From: Krunoslav Kovac Date: Wed, 2 Jan 2019 14:12:53 -0500 Subject: [PATCH 157/539] drm/amd/display: Check for NULL when creating gamma struct [Wjy&How] Some stress test is causing unexpected memory allocation failure. This prevents null dereference but there will likely be problems later, hard to gracefully handle memalloc fail for critical objects. Signed-off-by: Krunoslav Kovac Reviewed-by: Anthony Koo Acked-by: Leo Li Acked-by: Reza Amini Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_surface.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c index c60c9b4c3075..ee6bd50f60b8 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c @@ -40,11 +40,14 @@ static void construct(struct dc_context *ctx, struct dc_plane_state *plane_state plane_state->ctx = ctx; plane_state->gamma_correction = dc_create_gamma(); - plane_state->gamma_correction->is_identity = true; + if (plane_state->gamma_correction != NULL) + plane_state->gamma_correction->is_identity = true; plane_state->in_transfer_func = dc_create_transfer_func(); - plane_state->in_transfer_func->type = TF_TYPE_BYPASS; - plane_state->in_transfer_func->ctx = ctx; + if (plane_state->in_transfer_func != NULL) { + plane_state->in_transfer_func->type = TF_TYPE_BYPASS; + plane_state->in_transfer_func->ctx = ctx; + } } static void destruct(struct dc_plane_state *plane_state) From 45a31b01b5f2ba08597fa36a2af797d9062fdeb4 Mon Sep 17 00:00:00 2001 From: Steven Chiu Date: Fri, 4 Jan 2019 10:28:52 -0500 Subject: [PATCH 158/539] drm/amd/display: 3.2.14 Signed-off-by: Steven Chiu Reviewed-by: Yongqiang Sun Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 98f716be548a..f362b04a6f99 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.13" +#define DC_VER "3.2.14" #define MAX_SURFACES 3 #define MAX_STREAMS 6 From 9c7c0ae75497b06079889302e27abf119da6c903 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Fri, 11 Jan 2019 10:41:17 -0500 Subject: [PATCH 159/539] drm/amd/display: Fully remove i2caux folder This is a follow up to: e28e1490794d ("drm/amd/display: Remove i2caux folder") Some files were still left, so delete all of them. CC: David Francis CC: Harry Wentland Signed-off-by: Leo Li Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/i2caux/Makefile | 99 --- .../dc/i2caux/dce110/i2c_hw_engine_dce110.c | 573 ------------------ .../display/dc/i2caux/dce112/i2caux_dce112.c | 129 ---- .../display/dc/i2caux/dce112/i2caux_dce112.h | 32 - .../dc/i2caux/dce80/i2c_sw_engine_dce80.c | 163 ----- .../gpu/drm/amd/display/dc/i2caux/i2caux.c | 491 --------------- 6 files changed, 1487 deletions(-) delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/Makefile delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dce112/i2caux_dce112.c delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dce112/i2caux_dce112.h delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2c_sw_engine_dce80.c delete mode 100644 drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/Makefile b/drivers/gpu/drm/amd/display/dc/i2caux/Makefile deleted file mode 100644 index 352885cb4d07..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/Makefile +++ /dev/null @@ -1,99 +0,0 @@ -# -# Copyright 2017 Advanced Micro Devices, Inc. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# -# Makefile for the 'i2c' sub-component of DAL. -# It provides the control and status of HW i2c engine of the adapter. - -I2CAUX = aux_engine.o engine_base.o i2caux.o i2c_engine.o \ - i2c_generic_hw_engine.o i2c_hw_engine.o i2c_sw_engine.o - -AMD_DAL_I2CAUX = $(addprefix $(AMDDALPATH)/dc/i2caux/,$(I2CAUX)) - -AMD_DISPLAY_FILES += $(AMD_DAL_I2CAUX) - -############################################################################### -# DCE 8x family -############################################################################### -I2CAUX_DCE80 = i2caux_dce80.o i2c_hw_engine_dce80.o \ - i2c_sw_engine_dce80.o - -AMD_DAL_I2CAUX_DCE80 = $(addprefix $(AMDDALPATH)/dc/i2caux/dce80/,$(I2CAUX_DCE80)) - -AMD_DISPLAY_FILES += $(AMD_DAL_I2CAUX_DCE80) - -############################################################################### -# DCE 100 family -############################################################################### -I2CAUX_DCE100 = i2caux_dce100.o - -AMD_DAL_I2CAUX_DCE100 = $(addprefix $(AMDDALPATH)/dc/i2caux/dce100/,$(I2CAUX_DCE100)) - -AMD_DISPLAY_FILES += $(AMD_DAL_I2CAUX_DCE100) - -############################################################################### -# DCE 110 family -############################################################################### -I2CAUX_DCE110 = i2caux_dce110.o i2c_sw_engine_dce110.o i2c_hw_engine_dce110.o \ - aux_engine_dce110.o - -AMD_DAL_I2CAUX_DCE110 = $(addprefix $(AMDDALPATH)/dc/i2caux/dce110/,$(I2CAUX_DCE110)) - -AMD_DISPLAY_FILES += $(AMD_DAL_I2CAUX_DCE110) - -############################################################################### -# DCE 112 family -############################################################################### -I2CAUX_DCE112 = i2caux_dce112.o - -AMD_DAL_I2CAUX_DCE112 = $(addprefix $(AMDDALPATH)/dc/i2caux/dce112/,$(I2CAUX_DCE112)) - -AMD_DISPLAY_FILES += $(AMD_DAL_I2CAUX_DCE112) - -############################################################################### -# DCN 1.0 family -############################################################################### -ifdef CONFIG_DRM_AMD_DC_DCN1_0 -I2CAUX_DCN1 = i2caux_dcn10.o - -AMD_DAL_I2CAUX_DCN1 = $(addprefix $(AMDDALPATH)/dc/i2caux/dcn10/,$(I2CAUX_DCN1)) - -AMD_DISPLAY_FILES += $(AMD_DAL_I2CAUX_DCN1) -endif - -############################################################################### -# DCE 120 family -############################################################################### -I2CAUX_DCE120 = i2caux_dce120.o - -AMD_DAL_I2CAUX_DCE120 = $(addprefix $(AMDDALPATH)/dc/i2caux/dce120/,$(I2CAUX_DCE120)) - -AMD_DISPLAY_FILES += $(AMD_DAL_I2CAUX_DCE120) - -############################################################################### -# Diagnostics on FPGA -############################################################################### -I2CAUX_DIAG = i2caux_diag.o - -AMD_DAL_I2CAUX_DIAG = $(addprefix $(AMDDALPATH)/dc/i2caux/diagnostics/,$(I2CAUX_DIAG)) - -AMD_DISPLAY_FILES += $(AMD_DAL_I2CAUX_DIAG) - diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c deleted file mode 100644 index 137350142fb2..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c +++ /dev/null @@ -1,573 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" -#include "include/logger_interface.h" -/* - * Pre-requisites: headers required by header of this unit - */ - -#include "include/i2caux_interface.h" -#include "../engine.h" -#include "../i2c_engine.h" -#include "../i2c_hw_engine.h" -#include "../i2c_generic_hw_engine.h" -/* - * Header of this unit - */ - -#include "i2c_hw_engine_dce110.h" - -/* - * Post-requisites: headers required by this unit - */ -#include "reg_helper.h" - -/* - * This unit - */ -#define DC_LOGGER \ - hw_engine->base.base.base.ctx->logger - -enum dc_i2c_status { - DC_I2C_STATUS__DC_I2C_STATUS_IDLE, - DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_SW, - DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_HW -}; - -enum dc_i2c_arbitration { - DC_I2C_ARBITRATION__DC_I2C_SW_PRIORITY_NORMAL, - DC_I2C_ARBITRATION__DC_I2C_SW_PRIORITY_HIGH -}; - - - -/* - * @brief - * Cast pointer to 'struct i2c_hw_engine *' - * to pointer 'struct i2c_hw_engine_dce110 *' - */ -#define FROM_I2C_HW_ENGINE(ptr) \ - container_of((ptr), struct i2c_hw_engine_dce110, base) -/* - * @brief - * Cast pointer to 'struct i2c_engine *' - * to pointer to 'struct i2c_hw_engine_dce110 *' - */ -#define FROM_I2C_ENGINE(ptr) \ - FROM_I2C_HW_ENGINE(container_of((ptr), struct i2c_hw_engine, base)) - -/* - * @brief - * Cast pointer to 'struct engine *' - * to 'pointer to struct i2c_hw_engine_dce110 *' - */ -#define FROM_ENGINE(ptr) \ - FROM_I2C_ENGINE(container_of((ptr), struct i2c_engine, base)) - -#define CTX \ - hw_engine->base.base.base.ctx - -#define REG(reg_name)\ - (hw_engine->regs->reg_name) - -#undef FN -#define FN(reg_name, field_name) \ - hw_engine->i2c_shift->field_name, hw_engine->i2c_mask->field_name - - -static void disable_i2c_hw_engine( - struct i2c_hw_engine_dce110 *hw_engine) -{ - REG_UPDATE_N(SETUP, 1, FN(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_ENABLE), 0); -} - -static void release_engine( - struct engine *engine) -{ - struct i2c_hw_engine_dce110 *hw_engine = FROM_ENGINE(engine); - - struct i2c_engine *base = NULL; - bool safe_to_reset; - - base = &hw_engine->base.base; - - /* Restore original HW engine speed */ - - base->funcs->set_speed(base, hw_engine->base.original_speed); - - /* Release I2C */ - REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, 1); - - /* Reset HW engine */ - { - uint32_t i2c_sw_status = 0; - REG_GET(DC_I2C_SW_STATUS, DC_I2C_SW_STATUS, &i2c_sw_status); - /* if used by SW, safe to reset */ - safe_to_reset = (i2c_sw_status == 1); - } - - if (safe_to_reset) - REG_UPDATE_2( - DC_I2C_CONTROL, - DC_I2C_SOFT_RESET, 1, - DC_I2C_SW_STATUS_RESET, 1); - else - REG_UPDATE(DC_I2C_CONTROL, DC_I2C_SW_STATUS_RESET, 1); - - /* HW I2c engine - clock gating feature */ - if (!hw_engine->engine_keep_power_up_count) - disable_i2c_hw_engine(hw_engine); -} - -static bool setup_engine( - struct i2c_engine *i2c_engine) -{ - struct i2c_hw_engine_dce110 *hw_engine = FROM_I2C_ENGINE(i2c_engine); - uint32_t i2c_setup_limit = I2C_SETUP_TIME_LIMIT_DCE; - uint32_t reset_length = 0; - - if (hw_engine->base.base.setup_limit != 0) - i2c_setup_limit = hw_engine->base.base.setup_limit; - - /* Program pin select */ - REG_UPDATE_6( - DC_I2C_CONTROL, - DC_I2C_GO, 0, - DC_I2C_SOFT_RESET, 0, - DC_I2C_SEND_RESET, 0, - DC_I2C_SW_STATUS_RESET, 1, - DC_I2C_TRANSACTION_COUNT, 0, - DC_I2C_DDC_SELECT, hw_engine->engine_id); - - /* Program time limit */ - if (hw_engine->base.base.send_reset_length == 0) { - /*pre-dcn*/ - REG_UPDATE_N( - SETUP, 2, - FN(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_TIME_LIMIT), i2c_setup_limit, - FN(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_ENABLE), 1); - } else { - reset_length = hw_engine->base.base.send_reset_length; - } - /* Program HW priority - * set to High - interrupt software I2C at any time - * Enable restart of SW I2C that was interrupted by HW - * disable queuing of software while I2C is in use by HW */ - REG_UPDATE_2( - DC_I2C_ARBITRATION, - DC_I2C_NO_QUEUED_SW_GO, 0, - DC_I2C_SW_PRIORITY, DC_I2C_ARBITRATION__DC_I2C_SW_PRIORITY_NORMAL); - - return true; -} - -static uint32_t get_speed( - const struct i2c_engine *i2c_engine) -{ - const struct i2c_hw_engine_dce110 *hw_engine = FROM_I2C_ENGINE(i2c_engine); - uint32_t pre_scale = 0; - - REG_GET(SPEED, DC_I2C_DDC1_PRESCALE, &pre_scale); - - /* [anaumov] it seems following is unnecessary */ - /*ASSERT(value.bits.DC_I2C_DDC1_PRESCALE);*/ - return pre_scale ? - hw_engine->reference_frequency / pre_scale : - hw_engine->base.default_speed; -} - -static void set_speed( - struct i2c_engine *i2c_engine, - uint32_t speed) -{ - struct i2c_hw_engine_dce110 *hw_engine = FROM_I2C_ENGINE(i2c_engine); - - if (speed) { - if (hw_engine->i2c_mask->DC_I2C_DDC1_START_STOP_TIMING_CNTL) - REG_UPDATE_N( - SPEED, 3, - FN(DC_I2C_DDC1_SPEED, DC_I2C_DDC1_PRESCALE), hw_engine->reference_frequency / speed, - FN(DC_I2C_DDC1_SPEED, DC_I2C_DDC1_THRESHOLD), 2, - FN(DC_I2C_DDC1_SPEED, DC_I2C_DDC1_START_STOP_TIMING_CNTL), speed > 50 ? 2:1); - else - REG_UPDATE_N( - SPEED, 2, - FN(DC_I2C_DDC1_SPEED, DC_I2C_DDC1_PRESCALE), hw_engine->reference_frequency / speed, - FN(DC_I2C_DDC1_SPEED, DC_I2C_DDC1_THRESHOLD), 2); - } -} - -static inline void reset_hw_engine(struct engine *engine) -{ - struct i2c_hw_engine_dce110 *hw_engine = FROM_ENGINE(engine); - - REG_UPDATE_2( - DC_I2C_CONTROL, - DC_I2C_SW_STATUS_RESET, 1, - DC_I2C_SW_STATUS_RESET, 1); -} - -static bool is_hw_busy(struct engine *engine) -{ - struct i2c_hw_engine_dce110 *hw_engine = FROM_ENGINE(engine); - uint32_t i2c_sw_status = 0; - - REG_GET(DC_I2C_SW_STATUS, DC_I2C_SW_STATUS, &i2c_sw_status); - if (i2c_sw_status == DC_I2C_STATUS__DC_I2C_STATUS_IDLE) - return false; - - reset_hw_engine(engine); - - REG_GET(DC_I2C_SW_STATUS, DC_I2C_SW_STATUS, &i2c_sw_status); - return i2c_sw_status != DC_I2C_STATUS__DC_I2C_STATUS_IDLE; -} - - -#define STOP_TRANS_PREDICAT \ - ((hw_engine->transaction_count == 3) || \ - (request->action == I2CAUX_TRANSACTION_ACTION_I2C_WRITE) || \ - (request->action & I2CAUX_TRANSACTION_ACTION_I2C_READ)) - -#define SET_I2C_TRANSACTION(id) \ - do { \ - REG_UPDATE_N(DC_I2C_TRANSACTION##id, 5, \ - FN(DC_I2C_TRANSACTION0, DC_I2C_STOP_ON_NACK0), 1, \ - FN(DC_I2C_TRANSACTION0, DC_I2C_START0), 1, \ - FN(DC_I2C_TRANSACTION0, DC_I2C_STOP0), STOP_TRANS_PREDICAT ? 1:0, \ - FN(DC_I2C_TRANSACTION0, DC_I2C_RW0), (0 != (request->action & I2CAUX_TRANSACTION_ACTION_I2C_READ)), \ - FN(DC_I2C_TRANSACTION0, DC_I2C_COUNT0), length); \ - if (STOP_TRANS_PREDICAT) \ - last_transaction = true; \ - } while (false) - - -static bool process_transaction( - struct i2c_hw_engine_dce110 *hw_engine, - struct i2c_request_transaction_data *request) -{ - uint32_t length = request->length; - uint8_t *buffer = request->data; - uint32_t value = 0; - - bool last_transaction = false; - - struct dc_context *ctx = NULL; - - ctx = hw_engine->base.base.base.ctx; - - - - switch (hw_engine->transaction_count) { - case 0: - SET_I2C_TRANSACTION(0); - break; - case 1: - SET_I2C_TRANSACTION(1); - break; - case 2: - SET_I2C_TRANSACTION(2); - break; - case 3: - SET_I2C_TRANSACTION(3); - break; - default: - /* TODO Warning ? */ - break; - } - - - /* Write the I2C address and I2C data - * into the hardware circular buffer, one byte per entry. - * As an example, the 7-bit I2C slave address for CRT monitor - * for reading DDC/EDID information is 0b1010001. - * For an I2C send operation, the LSB must be programmed to 0; - * for I2C receive operation, the LSB must be programmed to 1. */ - if (hw_engine->transaction_count == 0) { - value = REG_SET_4(DC_I2C_DATA, 0, - DC_I2C_DATA_RW, false, - DC_I2C_DATA, request->address, - DC_I2C_INDEX, 0, - DC_I2C_INDEX_WRITE, 1); - hw_engine->buffer_used_write = 0; - } else - value = REG_SET_2(DC_I2C_DATA, 0, - DC_I2C_DATA_RW, false, - DC_I2C_DATA, request->address); - - hw_engine->buffer_used_write++; - - if (!(request->action & I2CAUX_TRANSACTION_ACTION_I2C_READ)) { - while (length) { - REG_SET_2(DC_I2C_DATA, value, - DC_I2C_INDEX_WRITE, 0, - DC_I2C_DATA, *buffer++); - hw_engine->buffer_used_write++; - --length; - } - } - - ++hw_engine->transaction_count; - hw_engine->buffer_used_bytes += length + 1; - - return last_transaction; -} - -static void execute_transaction( - struct i2c_hw_engine_dce110 *hw_engine) -{ - REG_UPDATE_N(SETUP, 5, - FN(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_DATA_DRIVE_EN), 0, - FN(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_CLK_DRIVE_EN), 0, - FN(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_DATA_DRIVE_SEL), 0, - FN(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_INTRA_TRANSACTION_DELAY), 0, - FN(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_INTRA_BYTE_DELAY), 0); - - - REG_UPDATE_5(DC_I2C_CONTROL, - DC_I2C_SOFT_RESET, 0, - DC_I2C_SW_STATUS_RESET, 0, - DC_I2C_SEND_RESET, 0, - DC_I2C_GO, 0, - DC_I2C_TRANSACTION_COUNT, hw_engine->transaction_count - 1); - - /* start I2C transfer */ - REG_UPDATE(DC_I2C_CONTROL, DC_I2C_GO, 1); - - /* all transactions were executed and HW buffer became empty - * (even though it actually happens when status becomes DONE) */ - hw_engine->transaction_count = 0; - hw_engine->buffer_used_bytes = 0; -} - -static void submit_channel_request( - struct i2c_engine *engine, - struct i2c_request_transaction_data *request) -{ - request->status = I2C_CHANNEL_OPERATION_SUCCEEDED; - - if (!process_transaction(FROM_I2C_ENGINE(engine), request)) - return; - - if (is_hw_busy(&engine->base)) { - request->status = I2C_CHANNEL_OPERATION_ENGINE_BUSY; - return; - } - - execute_transaction(FROM_I2C_ENGINE(engine)); -} - -static void process_channel_reply( - struct i2c_engine *engine, - struct i2c_reply_transaction_data *reply) -{ - uint32_t length = reply->length; - uint8_t *buffer = reply->data; - - struct i2c_hw_engine_dce110 *hw_engine = - FROM_I2C_ENGINE(engine); - - - REG_SET_3(DC_I2C_DATA, 0, - DC_I2C_INDEX, hw_engine->buffer_used_write, - DC_I2C_DATA_RW, 1, - DC_I2C_INDEX_WRITE, 1); - - while (length) { - /* after reading the status, - * if the I2C operation executed successfully - * (i.e. DC_I2C_STATUS_DONE = 1) then the I2C controller - * should read data bytes from I2C circular data buffer */ - - uint32_t i2c_data; - - REG_GET(DC_I2C_DATA, DC_I2C_DATA, &i2c_data); - *buffer++ = i2c_data; - - --length; - } -} - -static enum i2c_channel_operation_result get_channel_status( - struct i2c_engine *i2c_engine, - uint8_t *returned_bytes) -{ - uint32_t i2c_sw_status = 0; - struct i2c_hw_engine_dce110 *hw_engine = FROM_I2C_ENGINE(i2c_engine); - uint32_t value = - REG_GET(DC_I2C_SW_STATUS, DC_I2C_SW_STATUS, &i2c_sw_status); - - if (i2c_sw_status == DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_SW) - return I2C_CHANNEL_OPERATION_ENGINE_BUSY; - else if (value & hw_engine->i2c_mask->DC_I2C_SW_STOPPED_ON_NACK) - return I2C_CHANNEL_OPERATION_NO_RESPONSE; - else if (value & hw_engine->i2c_mask->DC_I2C_SW_TIMEOUT) - return I2C_CHANNEL_OPERATION_TIMEOUT; - else if (value & hw_engine->i2c_mask->DC_I2C_SW_ABORTED) - return I2C_CHANNEL_OPERATION_FAILED; - else if (value & hw_engine->i2c_mask->DC_I2C_SW_DONE) - return I2C_CHANNEL_OPERATION_SUCCEEDED; - - /* - * this is the case when HW used for communication, I2C_SW_STATUS - * could be zero - */ - return I2C_CHANNEL_OPERATION_SUCCEEDED; -} - -static uint32_t get_hw_buffer_available_size( - const struct i2c_hw_engine *engine) -{ - return I2C_HW_BUFFER_SIZE - - FROM_I2C_HW_ENGINE(engine)->buffer_used_bytes; -} - -static uint32_t get_transaction_timeout( - const struct i2c_hw_engine *engine, - uint32_t length) -{ - uint32_t speed = engine->base.funcs->get_speed(&engine->base); - - uint32_t period_timeout; - uint32_t num_of_clock_stretches; - - if (!speed) - return 0; - - period_timeout = (1000 * TRANSACTION_TIMEOUT_IN_I2C_CLOCKS) / speed; - - num_of_clock_stretches = 1 + (length << 3) + 1; - num_of_clock_stretches += - (FROM_I2C_HW_ENGINE(engine)->buffer_used_bytes << 3) + - (FROM_I2C_HW_ENGINE(engine)->transaction_count << 1); - - return period_timeout * num_of_clock_stretches; -} - -static void destroy( - struct i2c_engine **i2c_engine) -{ - struct i2c_hw_engine_dce110 *engine_dce110 = - FROM_I2C_ENGINE(*i2c_engine); - - dal_i2c_hw_engine_destruct(&engine_dce110->base); - - kfree(engine_dce110); - - *i2c_engine = NULL; -} - -static const struct i2c_engine_funcs i2c_engine_funcs = { - .destroy = destroy, - .get_speed = get_speed, - .set_speed = set_speed, - .setup_engine = setup_engine, - .submit_channel_request = submit_channel_request, - .process_channel_reply = process_channel_reply, - .get_channel_status = get_channel_status, - .acquire_engine = dal_i2c_hw_engine_acquire_engine, -}; - -static const struct engine_funcs engine_funcs = { - .release_engine = release_engine, - .get_engine_type = dal_i2c_hw_engine_get_engine_type, - .acquire = dal_i2c_engine_acquire, - .submit_request = dal_i2c_hw_engine_submit_request, -}; - -static const struct i2c_hw_engine_funcs i2c_hw_engine_funcs = { - .get_hw_buffer_available_size = get_hw_buffer_available_size, - .get_transaction_timeout = get_transaction_timeout, - .wait_on_operation_result = dal_i2c_hw_engine_wait_on_operation_result, -}; - -static void construct( - struct i2c_hw_engine_dce110 *hw_engine, - const struct i2c_hw_engine_dce110_create_arg *arg) -{ - uint32_t xtal_ref_div = 0; - - dal_i2c_hw_engine_construct(&hw_engine->base, arg->ctx); - - hw_engine->base.base.base.funcs = &engine_funcs; - hw_engine->base.base.funcs = &i2c_engine_funcs; - hw_engine->base.funcs = &i2c_hw_engine_funcs; - hw_engine->base.default_speed = arg->default_speed; - - hw_engine->regs = arg->regs; - hw_engine->i2c_shift = arg->i2c_shift; - hw_engine->i2c_mask = arg->i2c_mask; - - hw_engine->engine_id = arg->engine_id; - - hw_engine->buffer_used_bytes = 0; - hw_engine->transaction_count = 0; - hw_engine->engine_keep_power_up_count = 1; - - - REG_GET(MICROSECOND_TIME_BASE_DIV, XTAL_REF_DIV, &xtal_ref_div); - - if (xtal_ref_div == 0) { - DC_LOG_WARNING("Invalid base timer divider [%s]\n", - __func__); - xtal_ref_div = 2; - } - - /*Calculating Reference Clock by divding original frequency by - * XTAL_REF_DIV. - * At upper level, uint32_t reference_frequency = - * dal_i2caux_get_reference_clock(as) >> 1 - * which already divided by 2. So we need x2 to get original - * reference clock from ppll_info - */ - hw_engine->reference_frequency = - (arg->reference_frequency * 2) / xtal_ref_div; -} - -struct i2c_engine *dal_i2c_hw_engine_dce110_create( - const struct i2c_hw_engine_dce110_create_arg *arg) -{ - struct i2c_hw_engine_dce110 *engine_dce10; - - if (!arg) { - ASSERT_CRITICAL(false); - return NULL; - } - if (!arg->reference_frequency) { - ASSERT_CRITICAL(false); - return NULL; - } - - engine_dce10 = kzalloc(sizeof(struct i2c_hw_engine_dce110), - GFP_KERNEL); - - if (!engine_dce10) { - ASSERT_CRITICAL(false); - return NULL; - } - - construct(engine_dce10, arg); - return &engine_dce10->base.base; -} diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce112/i2caux_dce112.c b/drivers/gpu/drm/amd/display/dc/i2caux/dce112/i2caux_dce112.c deleted file mode 100644 index a9db04738724..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce112/i2caux_dce112.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" - -#include "include/i2caux_interface.h" -#include "../i2caux.h" -#include "../engine.h" -#include "../i2c_engine.h" -#include "../i2c_sw_engine.h" -#include "../i2c_hw_engine.h" - -#include "../dce110/i2caux_dce110.h" -#include "i2caux_dce112.h" - -#include "../dce110/aux_engine_dce110.h" - -#include "../dce110/i2c_hw_engine_dce110.h" - -#include "dce/dce_11_2_d.h" -#include "dce/dce_11_2_sh_mask.h" - -/* set register offset */ -#define SR(reg_name)\ - .reg_name = mm ## reg_name - -/* set register offset with instance */ -#define SRI(reg_name, block, id)\ - .reg_name = mm ## block ## id ## _ ## reg_name - -#define aux_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST(id), \ - .AUX_RESET_MASK = AUX_CONTROL__AUX_RESET_MASK \ -} - -#define hw_engine_regs(id)\ -{\ - I2C_HW_ENGINE_COMMON_REG_LIST(id) \ -} - -static const struct dce110_aux_registers dce112_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3), - aux_regs(4), - aux_regs(5), -}; - -static const struct dce110_i2c_hw_engine_registers dce112_hw_engine_regs[] = { - hw_engine_regs(1), - hw_engine_regs(2), - hw_engine_regs(3), - hw_engine_regs(4), - hw_engine_regs(5), - hw_engine_regs(6) -}; - -static const struct dce110_i2c_hw_engine_shift i2c_shift = { - I2C_COMMON_MASK_SH_LIST_DCE110(__SHIFT) -}; - -static const struct dce110_i2c_hw_engine_mask i2c_mask = { - I2C_COMMON_MASK_SH_LIST_DCE110(_MASK) -}; - -static void construct( - struct i2caux_dce110 *i2caux_dce110, - struct dc_context *ctx) -{ - dal_i2caux_dce110_construct(i2caux_dce110, - ctx, - ARRAY_SIZE(dce112_aux_regs), - dce112_aux_regs, - dce112_hw_engine_regs, - &i2c_shift, - &i2c_mask); -} - -/* - * dal_i2caux_dce110_create - * - * @brief - * public interface to allocate memory for DCE11 I2CAUX - * - * @param - * struct adapter_service *as - [in] - * struct dc_context *ctx - [in] - * - * @return - * pointer to the base struct of DCE11 I2CAUX - */ -struct i2caux *dal_i2caux_dce112_create( - struct dc_context *ctx) -{ - struct i2caux_dce110 *i2caux_dce110 = - kzalloc(sizeof(struct i2caux_dce110), GFP_KERNEL); - - if (!i2caux_dce110) { - ASSERT_CRITICAL(false); - return NULL; - } - - construct(i2caux_dce110, ctx); - return &i2caux_dce110->base; -} diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce112/i2caux_dce112.h b/drivers/gpu/drm/amd/display/dc/i2caux/dce112/i2caux_dce112.h deleted file mode 100644 index 8d35453c25b6..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce112/i2caux_dce112.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DAL_I2C_AUX_DCE112_H__ -#define __DAL_I2C_AUX_DCE112_H__ - -struct i2caux *dal_i2caux_dce112_create( - struct dc_context *ctx); - -#endif /* __DAL_I2C_AUX_DCE112_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2c_sw_engine_dce80.c b/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2c_sw_engine_dce80.c deleted file mode 100644 index c9d6cf08e8ab..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce80/i2c_sw_engine_dce80.c +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" - -/* - * Pre-requisites: headers required by header of this unit - */ -#include "include/i2caux_interface.h" -#include "../engine.h" -#include "../i2c_engine.h" -#include "../i2c_sw_engine.h" - -/* - * Header of this unit - */ - -#include "i2c_sw_engine_dce80.h" - -/* - * Post-requisites: headers required by this unit - */ - -#include "dce/dce_8_0_d.h" -#include "dce/dce_8_0_sh_mask.h" - -/* - * This unit - */ - -/* - * @brief - * Cast 'struct i2c_sw_engine *' - * to 'struct i2c_sw_engine_dce80 *' - */ -#define FROM_I2C_SW_ENGINE(ptr) \ - container_of((ptr), struct i2c_sw_engine_dce80, base) - -/* - * @brief - * Cast 'struct i2c_engine *' - * to 'struct i2c_sw_engine_dce80 *' - */ -#define FROM_I2C_ENGINE(ptr) \ - FROM_I2C_SW_ENGINE(container_of((ptr), struct i2c_sw_engine, base)) - -/* - * @brief - * Cast 'struct engine *' - * to 'struct i2c_sw_engine_dce80 *' - */ -#define FROM_ENGINE(ptr) \ - FROM_I2C_ENGINE(container_of((ptr), struct i2c_engine, base)) - -static void release_engine( - struct engine *engine) -{ - -} - -static void destruct( - struct i2c_sw_engine_dce80 *engine) -{ - dal_i2c_sw_engine_destruct(&engine->base); -} - -static void destroy( - struct i2c_engine **engine) -{ - struct i2c_sw_engine_dce80 *sw_engine = FROM_I2C_ENGINE(*engine); - - destruct(sw_engine); - - kfree(sw_engine); - - *engine = NULL; -} - -static bool acquire_engine( - struct i2c_engine *engine, - struct ddc *ddc_handle) -{ - return dal_i2caux_i2c_sw_engine_acquire_engine(engine, ddc_handle); -} - -static const struct i2c_engine_funcs i2c_engine_funcs = { - .acquire_engine = acquire_engine, - .destroy = destroy, - .get_speed = dal_i2c_sw_engine_get_speed, - .set_speed = dal_i2c_sw_engine_set_speed, - .setup_engine = dal_i2c_engine_setup_i2c_engine, - .submit_channel_request = dal_i2c_sw_engine_submit_channel_request, - .process_channel_reply = dal_i2c_engine_process_channel_reply, - .get_channel_status = dal_i2c_sw_engine_get_channel_status, -}; - -static const struct engine_funcs engine_funcs = { - .release_engine = release_engine, - .get_engine_type = dal_i2c_sw_engine_get_engine_type, - .acquire = dal_i2c_engine_acquire, - .submit_request = dal_i2c_sw_engine_submit_request, -}; - -static void construct( - struct i2c_sw_engine_dce80 *engine, - const struct i2c_sw_engine_dce80_create_arg *arg) -{ - struct i2c_sw_engine_create_arg arg_base; - - arg_base.ctx = arg->ctx; - arg_base.default_speed = arg->default_speed; - - dal_i2c_sw_engine_construct(&engine->base, &arg_base); - - engine->base.base.base.funcs = &engine_funcs; - engine->base.base.funcs = &i2c_engine_funcs; - engine->base.default_speed = arg->default_speed; - engine->engine_id = arg->engine_id; -} - -struct i2c_engine *dal_i2c_sw_engine_dce80_create( - const struct i2c_sw_engine_dce80_create_arg *arg) -{ - struct i2c_sw_engine_dce80 *engine; - - if (!arg) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - engine = kzalloc(sizeof(struct i2c_sw_engine_dce80), GFP_KERNEL); - - if (!engine) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - construct(engine, arg); - return &engine->base.base; -} - diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c b/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c deleted file mode 100644 index 1ad6e49102ff..000000000000 --- a/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c +++ /dev/null @@ -1,491 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" - -/* - * Pre-requisites: headers required by header of this unit - */ -#include "include/i2caux_interface.h" -#include "dc_bios_types.h" - -/* - * Header of this unit - */ - -#include "i2caux.h" - -/* - * Post-requisites: headers required by this unit - */ - -#include "engine.h" -#include "i2c_engine.h" -#include "aux_engine.h" - -/* - * This unit - */ - -#include "dce80/i2caux_dce80.h" - -#include "dce100/i2caux_dce100.h" - -#include "dce110/i2caux_dce110.h" - -#include "dce112/i2caux_dce112.h" - -#include "dce120/i2caux_dce120.h" - -#if defined(CONFIG_DRM_AMD_DC_DCN1_0) -#include "dcn10/i2caux_dcn10.h" -#endif - -#include "diagnostics/i2caux_diag.h" - -/* - * @brief - * Plain API, available publicly - */ - -struct i2caux *dal_i2caux_create( - struct dc_context *ctx) -{ - if (IS_FPGA_MAXIMUS_DC(ctx->dce_environment)) { - return dal_i2caux_diag_fpga_create(ctx); - } - - switch (ctx->dce_version) { - case DCE_VERSION_8_0: - case DCE_VERSION_8_1: - case DCE_VERSION_8_3: - return dal_i2caux_dce80_create(ctx); - case DCE_VERSION_11_2: - case DCE_VERSION_11_22: - return dal_i2caux_dce112_create(ctx); - case DCE_VERSION_11_0: - return dal_i2caux_dce110_create(ctx); - case DCE_VERSION_10_0: - return dal_i2caux_dce100_create(ctx); - case DCE_VERSION_12_0: - case DCE_VERSION_12_1: - return dal_i2caux_dce120_create(ctx); -#if defined(CONFIG_DRM_AMD_DC_DCN1_0) - case DCN_VERSION_1_0: - return dal_i2caux_dcn10_create(ctx); -#endif - -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) - case DCN_VERSION_1_01: - return dal_i2caux_dcn10_create(ctx); -#endif - default: - BREAK_TO_DEBUGGER(); - return NULL; - } -} - -bool dal_i2caux_submit_i2c_command( - struct i2caux *i2caux, - struct ddc *ddc, - struct i2c_command *cmd) -{ - struct i2c_engine *engine; - uint8_t index_of_payload = 0; - bool result; - - if (!ddc) { - BREAK_TO_DEBUGGER(); - return false; - } - - if (!cmd) { - BREAK_TO_DEBUGGER(); - return false; - } - - /* - * default will be SW, however there is a feature flag in adapter - * service that determines whether SW i2c_engine will be available or - * not, if sw i2c is not available we will fallback to hw. This feature - * flag is set to not creating sw i2c engine for every dce except dce80 - * currently - */ - switch (cmd->engine) { - case I2C_COMMAND_ENGINE_DEFAULT: - case I2C_COMMAND_ENGINE_SW: - /* try to acquire SW engine first, - * acquire HW engine if SW engine not available */ - engine = i2caux->funcs->acquire_i2c_sw_engine(i2caux, ddc); - - if (!engine) - engine = i2caux->funcs->acquire_i2c_hw_engine( - i2caux, ddc); - break; - case I2C_COMMAND_ENGINE_HW: - default: - /* try to acquire HW engine first, - * acquire SW engine if HW engine not available */ - engine = i2caux->funcs->acquire_i2c_hw_engine(i2caux, ddc); - - if (!engine) - engine = i2caux->funcs->acquire_i2c_sw_engine( - i2caux, ddc); - } - - if (!engine) - return false; - - engine->funcs->set_speed(engine, cmd->speed); - - result = true; - - while (index_of_payload < cmd->number_of_payloads) { - bool mot = (index_of_payload != cmd->number_of_payloads - 1); - - struct i2c_payload *payload = cmd->payloads + index_of_payload; - - struct i2caux_transaction_request request = { 0 }; - - request.operation = payload->write ? - I2CAUX_TRANSACTION_WRITE : - I2CAUX_TRANSACTION_READ; - - request.payload.address_space = - I2CAUX_TRANSACTION_ADDRESS_SPACE_I2C; - request.payload.address = (payload->address << 1) | - !payload->write; - request.payload.length = payload->length; - request.payload.data = payload->data; - - if (!engine->base.funcs->submit_request( - &engine->base, &request, mot)) { - result = false; - break; - } - - ++index_of_payload; - } - - i2caux->funcs->release_engine(i2caux, &engine->base); - - return result; -} - -bool dal_i2caux_submit_aux_command( - struct i2caux *i2caux, - struct ddc *ddc, - struct aux_command *cmd) -{ - struct aux_engine *engine; - uint8_t index_of_payload = 0; - bool result; - bool mot; - - if (!ddc) { - BREAK_TO_DEBUGGER(); - return false; - } - - if (!cmd) { - BREAK_TO_DEBUGGER(); - return false; - } - - engine = i2caux->funcs->acquire_aux_engine(i2caux, ddc); - - if (!engine) - return false; - - engine->delay = cmd->defer_delay; - engine->max_defer_write_retry = cmd->max_defer_write_retry; - - result = true; - - while (index_of_payload < cmd->number_of_payloads) { - struct aux_payload *payload = cmd->payloads + index_of_payload; - struct i2caux_transaction_request request = { 0 }; - - if (cmd->mot == I2C_MOT_UNDEF) - mot = (index_of_payload != cmd->number_of_payloads - 1); - else - mot = (cmd->mot == I2C_MOT_TRUE); - - request.operation = payload->write ? - I2CAUX_TRANSACTION_WRITE : - I2CAUX_TRANSACTION_READ; - - if (payload->i2c_over_aux) { - request.payload.address_space = - I2CAUX_TRANSACTION_ADDRESS_SPACE_I2C; - - request.payload.address = (payload->address << 1) | - !payload->write; - } else { - request.payload.address_space = - I2CAUX_TRANSACTION_ADDRESS_SPACE_DPCD; - - request.payload.address = payload->address; - } - - request.payload.length = payload->length; - request.payload.data = payload->data; - - if (!engine->base.funcs->submit_request( - &engine->base, &request, mot)) { - result = false; - break; - } - - ++index_of_payload; - } - - i2caux->funcs->release_engine(i2caux, &engine->base); - - return result; -} - -static bool get_hw_supported_ddc_line( - struct ddc *ddc, - enum gpio_ddc_line *line) -{ - enum gpio_ddc_line line_found; - - *line = GPIO_DDC_LINE_UNKNOWN; - - if (!ddc) { - BREAK_TO_DEBUGGER(); - return false; - } - - if (!ddc->hw_info.hw_supported) - return false; - - line_found = dal_ddc_get_line(ddc); - - if (line_found >= GPIO_DDC_LINE_COUNT) - return false; - - *line = line_found; - - return true; -} - -void dal_i2caux_configure_aux( - struct i2caux *i2caux, - struct ddc *ddc, - union aux_config cfg) -{ - struct aux_engine *engine = - i2caux->funcs->acquire_aux_engine(i2caux, ddc); - - if (!engine) - return; - - engine->funcs->configure(engine, cfg); - - i2caux->funcs->release_engine(i2caux, &engine->base); -} - -void dal_i2caux_destroy( - struct i2caux **i2caux) -{ - if (!i2caux || !*i2caux) { - BREAK_TO_DEBUGGER(); - return; - } - - (*i2caux)->funcs->destroy(i2caux); - - *i2caux = NULL; -} - -/* - * @brief - * An utility function used by 'struct i2caux' and its descendants - */ - -uint32_t dal_i2caux_get_reference_clock( - struct dc_bios *bios) -{ - struct dc_firmware_info info = { { 0 } }; - - if (bios->funcs->get_firmware_info(bios, &info) != BP_RESULT_OK) - return 0; - - return info.pll_info.crystal_frequency; -} - -/* - * @brief - * i2caux - */ - -enum { - /* following are expressed in KHz */ - DEFAULT_I2C_SW_SPEED = 50, - DEFAULT_I2C_HW_SPEED = 50, - - DEFAULT_I2C_SW_SPEED_100KHZ = 100, - DEFAULT_I2C_HW_SPEED_100KHZ = 100, - - /* This is the timeout as defined in DP 1.2a, - * 2.3.4 "Detailed uPacket TX AUX CH State Description". */ - AUX_TIMEOUT_PERIOD = 400, - - /* Ideally, the SW timeout should be just above 550usec - * which is programmed in HW. - * But the SW timeout of 600usec is not reliable, - * because on some systems, delay_in_microseconds() - * returns faster than it should. - * EPR #379763: by trial-and-error on different systems, - * 700usec is the minimum reliable SW timeout for polling - * the AUX_SW_STATUS.AUX_SW_DONE bit. - * This timeout expires *only* when there is - * AUX Error or AUX Timeout conditions - not during normal operation. - * During normal operation, AUX_SW_STATUS.AUX_SW_DONE bit is set - * at most within ~240usec. That means, - * increasing this timeout will not affect normal operation, - * and we'll timeout after - * SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD = 1600usec. - * This timeout is especially important for - * resume from S3 and CTS. */ - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER = 4 -}; - -struct i2c_engine *dal_i2caux_acquire_i2c_sw_engine( - struct i2caux *i2caux, - struct ddc *ddc) -{ - enum gpio_ddc_line line; - struct i2c_engine *engine = NULL; - - if (get_hw_supported_ddc_line(ddc, &line)) - engine = i2caux->i2c_sw_engines[line]; - - if (!engine) - engine = i2caux->i2c_generic_sw_engine; - - if (!engine) - return NULL; - - if (!engine->base.funcs->acquire(&engine->base, ddc)) - return NULL; - - return engine; -} - -struct aux_engine *dal_i2caux_acquire_aux_engine( - struct i2caux *i2caux, - struct ddc *ddc) -{ - enum gpio_ddc_line line; - struct aux_engine *engine; - - if (!get_hw_supported_ddc_line(ddc, &line)) - return NULL; - - engine = i2caux->aux_engines[line]; - - if (!engine) - return NULL; - - if (!engine->base.funcs->acquire(&engine->base, ddc)) - return NULL; - - return engine; -} - -void dal_i2caux_release_engine( - struct i2caux *i2caux, - struct engine *engine) -{ - engine->funcs->release_engine(engine); - - dal_ddc_close(engine->ddc); - - engine->ddc = NULL; -} - -void dal_i2caux_construct( - struct i2caux *i2caux, - struct dc_context *ctx) -{ - uint32_t i = 0; - - i2caux->ctx = ctx; - do { - i2caux->i2c_sw_engines[i] = NULL; - i2caux->i2c_hw_engines[i] = NULL; - i2caux->aux_engines[i] = NULL; - - ++i; - } while (i < GPIO_DDC_LINE_COUNT); - - i2caux->i2c_generic_sw_engine = NULL; - i2caux->i2c_generic_hw_engine = NULL; - - i2caux->aux_timeout_period = - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD; - - if (ctx->dce_version >= DCE_VERSION_11_2) { - i2caux->default_i2c_hw_speed = DEFAULT_I2C_HW_SPEED_100KHZ; - i2caux->default_i2c_sw_speed = DEFAULT_I2C_SW_SPEED_100KHZ; - } else { - i2caux->default_i2c_hw_speed = DEFAULT_I2C_HW_SPEED; - i2caux->default_i2c_sw_speed = DEFAULT_I2C_SW_SPEED; - } -} - -void dal_i2caux_destruct( - struct i2caux *i2caux) -{ - uint32_t i = 0; - - if (i2caux->i2c_generic_hw_engine) - i2caux->i2c_generic_hw_engine->funcs->destroy( - &i2caux->i2c_generic_hw_engine); - - if (i2caux->i2c_generic_sw_engine) - i2caux->i2c_generic_sw_engine->funcs->destroy( - &i2caux->i2c_generic_sw_engine); - - do { - if (i2caux->aux_engines[i]) - i2caux->aux_engines[i]->funcs->destroy( - &i2caux->aux_engines[i]); - - if (i2caux->i2c_hw_engines[i]) - i2caux->i2c_hw_engines[i]->funcs->destroy( - &i2caux->i2c_hw_engines[i]); - - if (i2caux->i2c_sw_engines[i]) - i2caux->i2c_sw_engines[i]->funcs->destroy( - &i2caux->i2c_sw_engines[i]); - - ++i; - } while (i < GPIO_DDC_LINE_COUNT); -} - From 58a50420aa681502369875213374121425428b52 Mon Sep 17 00:00:00 2001 From: Jim Qu Date: Wed, 7 Nov 2018 10:54:18 +0800 Subject: [PATCH 160/539] drm/amdgpu: update nbio v6.1 register/master to support BACO Signed-off-by: Jim Qu Reviewed-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_offset.h | 2 ++ drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_sh_mask.h | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_offset.h index 13d4de645190..d8e0dd192fdd 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_offset.h @@ -2247,6 +2247,8 @@ // addressBlock: nbio_nbif_rcc_strap_BIFDEC1[13440..14975] // base address: 0x3480 +#define mmRCC_BIF_STRAP0 0x0000 +#define mmRCC_BIF_STRAP0_BASE_IDX 2 #define mmRCC_DEV0_EPF0_STRAP0 0x000f #define mmRCC_DEV0_EPF0_STRAP0_BASE_IDX 2 diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_sh_mask.h index a02b67943372..29af5167cd00 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_sh_mask.h @@ -16838,6 +16838,10 @@ // addressBlock: nbio_nbif_rcc_strap_BIFDEC1[13440..14975] +//RCC_BIF_STRAP0 +#define RCC_BIF_STRAP0__STRAP_PX_CAPABLE__SHIFT 0x7 +#define RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK 0x00000080L + //RCC_DEV0_EPF0_STRAP0 #define RCC_DEV0_EPF0_STRAP0__STRAP_DEVICE_ID_DEV0_F0__SHIFT 0x0 #define RCC_DEV0_EPF0_STRAP0__STRAP_MAJOR_REV_ID_DEV0_F0__SHIFT 0x10 From 7451ca88d51dc9d97ea94b453450fc81494f48e0 Mon Sep 17 00:00:00 2001 From: Jim Qu Date: Mon, 5 Nov 2018 17:45:56 +0800 Subject: [PATCH 161/539] drm/amdgpu: add BACO interfaces in pm and hwmgr function table Signed-off-by: Jim Qu Reviewed-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/kgd_pp_interface.h | 3 +++ drivers/gpu/drm/amd/powerplay/inc/hwmgr.h | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 789c4f288485..a2ea4c933360 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -281,6 +281,9 @@ struct amd_pm_funcs { int (*set_hard_min_dcefclk_by_freq)(void *handle, uint32_t clock); int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock); int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock); + int (*get_asic_baco_capability)(void *handle, bool *cap); + int (*get_asic_baco_state)(void *handle, int *state); + int (*set_asic_baco_state)(void *handle, int state); }; #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index be4f87aed99c..577cec90aef1 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -47,6 +47,11 @@ enum DISPLAY_GAP { }; typedef enum DISPLAY_GAP DISPLAY_GAP; +enum BACO_STATE { + BACO_STATE_OUT = 0, + BACO_STATE_IN, +}; + struct vi_dpm_level { bool enabled; uint32_t value; @@ -333,6 +338,9 @@ struct pp_hwmgr_func { int (*enable_mgpu_fan_boost)(struct pp_hwmgr *hwmgr); int (*set_hard_min_dcefclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock); int (*set_hard_min_fclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock); + int (*get_asic_baco_capability)(struct pp_hwmgr *hwmgr, bool *cap); + int (*get_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE *state); + int (*set_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE state); }; struct pp_table_func { From 305dc3f9834c9df40b3f6d4a6980447fb503cfc1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 21:59:56 +0000 Subject: [PATCH 162/539] drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex We have two classes of VM, global GTT and per-process GTT. In order to allow ourselves the freedom to mix both along call chains, distinguish the two classes with regards to their mutex and lockdep maps. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190114215956.32266-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 10 +++++----- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 ++ drivers/gpu/drm/i915/selftests/mock_gtt.c | 6 +++--- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index dbea14bf67cc..74e6d02dcbbf 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -473,8 +473,7 @@ static void vm_free_page(struct i915_address_space *vm, struct page *page) spin_unlock(&vm->free_pages.lock); } -static void i915_address_space_init(struct i915_address_space *vm, - struct drm_i915_private *dev_priv) +static void i915_address_space_init(struct i915_address_space *vm, int subclass) { /* * The vm->mutex must be reclaim safe (for use in the shrinker). @@ -482,6 +481,7 @@ static void i915_address_space_init(struct i915_address_space *vm, * attempt holding the lock is immediately reported by lockdep. */ mutex_init(&vm->mutex); + lockdep_set_subclass(&vm->mutex, subclass); i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); GEM_BUG_ON(!vm->total); @@ -1547,7 +1547,7 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) /* From bdw, there is support for read-only pages in the PPGTT. */ ppgtt->vm.has_read_only = true; - i915_address_space_init(&ppgtt->vm, i915); + i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); /* There are only few exceptions for gen >=6. chv and bxt. * And we are not sure about the latter so play safe for now. @@ -1996,7 +1996,7 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) ppgtt->base.vm.total = I915_PDES * GEN6_PTES * I915_GTT_PAGE_SIZE; - i915_address_space_init(&ppgtt->base.vm, i915); + i915_address_space_init(&ppgtt->base.vm, VM_CLASS_PPGTT); ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range; ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range; @@ -3433,7 +3433,7 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) * and beyond the end of the GTT if we do not provide a guard. */ mutex_lock(&dev_priv->drm.struct_mutex); - i915_address_space_init(&ggtt->vm, dev_priv); + i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); ggtt->vm.is_ggtt = true; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index e2360f16427a..9229b03d629b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -288,6 +288,8 @@ struct i915_address_space { bool closed; struct mutex mutex; /* protects vma and our lists */ +#define VM_CLASS_GGTT 0 +#define VM_CLASS_PPGTT 1 u64 scratch_pte; struct i915_page_dma scratch_page; diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index 6ae418c76015..976c862b3842 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -70,7 +70,7 @@ mock_ppgtt(struct drm_i915_private *i915, ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE); ppgtt->vm.file = ERR_PTR(-ENODEV); - i915_address_space_init(&ppgtt->vm, i915); + i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); ppgtt->vm.clear_range = nop_clear_range; ppgtt->vm.insert_page = mock_insert_page; @@ -102,6 +102,7 @@ void mock_init_ggtt(struct drm_i915_private *i915) struct i915_ggtt *ggtt = &i915->ggtt; ggtt->vm.i915 = i915; + ggtt->vm.is_ggtt = true; ggtt->gmadr = (struct resource) DEFINE_RES_MEM(0, 2048 * PAGE_SIZE); ggtt->mappable_end = resource_size(&ggtt->gmadr); @@ -117,9 +118,8 @@ void mock_init_ggtt(struct drm_i915_private *i915) ggtt->vm.vma_ops.set_pages = ggtt_set_pages; ggtt->vm.vma_ops.clear_pages = clear_pages; - i915_address_space_init(&ggtt->vm, i915); - ggtt->vm.is_ggtt = true; + i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); } void mock_fini_ggtt(struct drm_i915_private *i915) From 8cd999181f8c744c87fb64e7b3600876ec3428b2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 21:17:27 +0000 Subject: [PATCH 163/539] drm/i915: Prevent concurrent GGTT update and use on Braswell (again) On Braswell, under heavy stress, if we update the GGTT while simultaneously accessing another region inside the GTT, we are returned the wrong values. To prevent this we stop the machine to update the GGTT entries so that no memory traffic can occur at the same time. This was first spotted in commit 5bab6f60cb4d1417ad7c599166bcfec87529c1a2 Author: Chris Wilson Date: Fri Oct 23 18:43:32 2015 +0100 drm/i915: Serialise updates to GGTT with access through GGTT on Braswell but removed again in forlorn hope with commit 4509276ee824bb967885c095c610767e42345c36 Author: Chris Wilson Date: Mon Feb 20 12:47:18 2017 +0000 drm/i915: Remove Braswell GGTT update w/a However, gem_concurrent_blit is once again only stable with the patch applied and CI is detecting the odd failure in forked gem_mmap_gtt tests (which smell like the same issue). Fwiw, a wide variety of CPU memory barriers (around GGTT flushing, fence updates, PTE updates) and GPU flushes/invalidates (between requests, after PTE updates) were tried as part of the investigation to find an alternate cause, nothing comes close to serialised GGTT updates. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105591 Testcase: igt/gem_concurrent_blit Testcase: igt/gem_mmap_gtt/*forked* References: 5bab6f60cb4d ("drm/i915: Serialise updates to GGTT with access through GGTT on Braswell") References: 4509276ee824 ("drm/i915: Remove Braswell GGTT update w/a") Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190114211729.30352-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 74e6d02dcbbf..d24628f184e4 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3232,7 +3232,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.insert_entries = gen8_ggtt_insert_entries; /* Serialize GTT updates with aperture access on BXT if VT-d is on. */ - if (intel_ggtt_update_needs_vtd_wa(dev_priv)) { + if (intel_ggtt_update_needs_vtd_wa(dev_priv) || + IS_CHERRYVIEW(dev_priv) /* fails with concurrent use/update */) { ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; if (ggtt->vm.clear_range != nop_clear_range) From b14c06ec024947eaa35212f2380e90233d5092e0 Mon Sep 17 00:00:00 2001 From: Aditya Swarup Date: Thu, 10 Jan 2019 15:08:44 -0800 Subject: [PATCH 164/539] drm/i915/cnl: Fix CNL macros for Voltage Swing programming CNL macros for register groups CNL_PORT_TX_DW2_* / CNL_PORT_TX_DW5_* are configured incorrectly wrt definition of _CNL_PORT_TX_DW_GRP. v2: Jani suggested to keep the macros organized semantically i.e., by function, secondarily by port/pipe/transcoder.->(dw, port) Fixes: 4e53840fdfdd ("drm/i915/icl: Introduce new macros to get combophy registers") Cc: Clint Taylor Cc: Imre Deak Cc: Jani Nikula Signed-off-by: Aditya Swarup Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190110230844.9213-1-aditya.swarup@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 44958d994bfa..fad5a9e8b44d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1814,7 +1814,7 @@ enum i915_power_well_id { #define _CNL_PORT_TX_C_LN0_OFFSET 0x162C40 #define _CNL_PORT_TX_D_LN0_OFFSET 0x162E40 #define _CNL_PORT_TX_F_LN0_OFFSET 0x162840 -#define _CNL_PORT_TX_DW_GRP(port, dw) (_PICK((port), \ +#define _CNL_PORT_TX_DW_GRP(dw, port) (_PICK((port), \ _CNL_PORT_TX_AE_GRP_OFFSET, \ _CNL_PORT_TX_B_GRP_OFFSET, \ _CNL_PORT_TX_B_GRP_OFFSET, \ @@ -1822,7 +1822,7 @@ enum i915_power_well_id { _CNL_PORT_TX_AE_GRP_OFFSET, \ _CNL_PORT_TX_F_GRP_OFFSET) + \ 4 * (dw)) -#define _CNL_PORT_TX_DW_LN0(port, dw) (_PICK((port), \ +#define _CNL_PORT_TX_DW_LN0(dw, port) (_PICK((port), \ _CNL_PORT_TX_AE_LN0_OFFSET, \ _CNL_PORT_TX_B_LN0_OFFSET, \ _CNL_PORT_TX_B_LN0_OFFSET, \ @@ -1858,9 +1858,9 @@ enum i915_power_well_id { #define _CNL_PORT_TX_DW4_LN0_AE 0x162450 #define _CNL_PORT_TX_DW4_LN1_AE 0x1624D0 -#define CNL_PORT_TX_DW4_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP((port), 4)) -#define CNL_PORT_TX_DW4_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0((port), 4)) -#define CNL_PORT_TX_DW4_LN(port, ln) _MMIO(_CNL_PORT_TX_DW_LN0((port), 4) + \ +#define CNL_PORT_TX_DW4_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP(4, (port))) +#define CNL_PORT_TX_DW4_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0(4, (port))) +#define CNL_PORT_TX_DW4_LN(port, ln) _MMIO(_CNL_PORT_TX_DW_LN0(4, (port)) + \ ((ln) * (_CNL_PORT_TX_DW4_LN1_AE - \ _CNL_PORT_TX_DW4_LN0_AE))) #define ICL_PORT_TX_DW4_AUX(port) _MMIO(_ICL_PORT_TX_DW_AUX(4, port)) @@ -1888,8 +1888,8 @@ enum i915_power_well_id { #define RTERM_SELECT(x) ((x) << 3) #define RTERM_SELECT_MASK (0x7 << 3) -#define CNL_PORT_TX_DW7_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP((port), 7)) -#define CNL_PORT_TX_DW7_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0((port), 7)) +#define CNL_PORT_TX_DW7_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP(7, (port))) +#define CNL_PORT_TX_DW7_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0(7, (port))) #define ICL_PORT_TX_DW7_AUX(port) _MMIO(_ICL_PORT_TX_DW_AUX(7, port)) #define ICL_PORT_TX_DW7_GRP(port) _MMIO(_ICL_PORT_TX_DW_GRP(7, port)) #define ICL_PORT_TX_DW7_LN0(port) _MMIO(_ICL_PORT_TX_DW_LN(7, 0, port)) From fed85691b4083308792a862c50f9492c8f19433e Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Wed, 9 Jan 2019 13:14:14 -0800 Subject: [PATCH 165/539] drm/i915: Fix the static code analysis warning in debugfs intel_dp->dsc_dpcd is defined as an array making the if check redundant. Fixes: e845f099f1c6 ("drm/i915/dsc: Add Per connector debugfs node for DSC support/enable") Cc: Rodrigo Vivi Reported-by: Nathan Chancellor Signed-off-by: Radhakrishna Sripada Reviewed-by: Manasi Navare Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190109211414.15622-1-radhakrishna.sripada@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 37c9aff234e0..24e2d52efa8e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4973,9 +4973,8 @@ static int i915_dsc_fec_support_show(struct seq_file *m, void *data) crtc_state = to_intel_crtc_state(crtc->state); seq_printf(m, "DSC_Enabled: %s\n", yesno(crtc_state->dsc_params.compression_enable)); - if (intel_dp->dsc_dpcd) - seq_printf(m, "DSC_Sink_Support: %s\n", - yesno(drm_dp_sink_supports_dsc(intel_dp->dsc_dpcd))); + seq_printf(m, "DSC_Sink_Support: %s\n", + yesno(drm_dp_sink_supports_dsc(intel_dp->dsc_dpcd))); if (!intel_dp_is_edp(intel_dp)) seq_printf(m, "FEC_Sink_Support: %s\n", yesno(drm_dp_sink_supports_fec(intel_dp->fec_capable))); From 6d2438c8233bd06f24014a1fef17a7ac1c1777fe Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 15 Jan 2019 10:25:05 +0000 Subject: [PATCH 166/539] drm/i915/perf: Annotate i915_perf.wakeref for keneldoc drivers/gpu/drm/i915/i915_drv.h:1375: warning: Function parameter or member 'wakeref' not described in 'i915_perf_stream' Reported-by: kbuild-all@01.org Fixes: 6619c0075f78 ("drm/i915/perf: Track the rpm wakeref") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190115102505.4843-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fa99824f63b3..956c1c86f90d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1336,6 +1336,10 @@ struct i915_perf_stream { */ struct list_head link; + /** + * @wakeref: As we keep the device awake while the perf stream is + * active, we track our runtime pm reference for later release. + */ intel_wakeref_t wakeref; /** From decd29e6b5fe08e20fade72100e2d4a85dc5f766 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 15 Jan 2019 12:20:57 +0000 Subject: [PATCH 167/539] drm/i915: Only dump GPU state on set-wedged if interesting As we may frequently mark the device as wedged to flush requests off it during the normal course of events, quite often we have a large state dump that is of no interest. Don't bother dumping it all if the engines are all idle. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190115122057.1677-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 61037e7292ee..d35dd3d6d3b6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3178,7 +3178,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) GEM_TRACE("start\n"); - if (GEM_SHOW_DEBUG()) { + if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) { struct drm_printer p = drm_debug_printer(__func__); for_each_engine(engine, i915, id) From e9d49bb718f394faab07498ca5f14d7f8414b1da Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 20 Dec 2018 15:26:02 +0200 Subject: [PATCH 168/539] drm/i915/ddi: Move DDI port detection to the corresponding helper We have already a function to detect DDI ports using VBT, so instead of opencoding the DDI specific version of this, move the opencoded part to the existing helper. Cc: Jani Nikula Cc: Mika Kahola Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20181220132604.25222-1-imre.deak@intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/intel_bios.c | 9 +++++++++ drivers/gpu/drm/i915/intel_display.c | 4 +--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 140c218128cb..561a4f9f044c 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1946,6 +1946,15 @@ bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port por }; int i; + if (HAS_DDI(dev_priv)) { + const struct ddi_vbt_port_info *port_info = + &dev_priv->vbt.ddi_port_info[port]; + + return port_info->supports_dp || + port_info->supports_dvi || + port_info->supports_hdmi; + } + /* FIXME maybe deal with port A as well? */ if (WARN_ON(port == PORT_A) || port >= ARRAY_SIZE(port_mapping)) return false; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index accb3081dee1..3d1e1e652d72 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14362,9 +14362,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) * On SKL we don't have a way to detect DDI-E so we rely on VBT. */ if (IS_GEN9_BC(dev_priv) && - (dev_priv->vbt.ddi_port_info[PORT_E].supports_dp || - dev_priv->vbt.ddi_port_info[PORT_E].supports_dvi || - dev_priv->vbt.ddi_port_info[PORT_E].supports_hdmi)) + intel_bios_is_port_present(dev_priv, PORT_E)) intel_ddi_init(dev_priv, PORT_E); } else if (HAS_PCH_SPLIT(dev_priv)) { From 3f2e9ed0b26d65de6d4673233671537ed7fc6c98 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 20 Dec 2018 15:26:03 +0200 Subject: [PATCH 169/539] drm/i915/icl: Detect port F presence via VBT Registering an output for a non-existent port (on a given SKU) can lead to problems when trying to use the port, for instance timeouts during power well enabling. Since there are no strap bits for port detection we have to rely on VBT for this, so do that here. There are no known SKUs where any of the A-E ports are non-existent, so to reduce the likelihood of breakage due to incorrect VBT information, do this detection only for port F (which is known to be missing on some ICL SKUs). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108915 Cc: Mika Kahola Cc: Jani Nikula Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20181220132604.25222-2-imre.deak@intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3d1e1e652d72..ce1cdd2c0c3d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14320,7 +14320,13 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_ddi_init(dev_priv, PORT_C); intel_ddi_init(dev_priv, PORT_D); intel_ddi_init(dev_priv, PORT_E); - intel_ddi_init(dev_priv, PORT_F); + /* + * On some ICL SKUs port F is not present. No strap bits for + * this, so rely on VBT. + */ + if (intel_bios_is_port_present(dev_priv, PORT_F)) + intel_ddi_init(dev_priv, PORT_F); + icl_dsi_init(dev_priv); } else if (IS_GEN9_LP(dev_priv)) { /* From 484d9a844d0d0aeaa4cd3cec20885b7de9986a55 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 15 Jan 2019 12:44:42 +0000 Subject: [PATCH 170/539] drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start Since commit 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers") we have been able to report failure from mmu_invalidate_range_start which allows us to use a trylock on the struct_mutex to avoid potential recursion and report -EBUSY instead. Furthermore, this allows us to pull the work into the main callback and avoid the sleight-of-hand in using a workqueue to avoid lockdep. However, not all paths to mmu_invalidate_range_start are prepared to handle failure, so instead of reporting the recursion, deal with it by propagating the failure upwards, who can decide themselves to handle it or report it. v2: Mark up the recursive lock behaviour and comment on the various weak points. v3: Follow commit 3824e41975ae ("drm/i915: Use mutex_lock_killable() from inside the shrinker") and also use mutex_lock_killable(). v3.1: No leak on EINTR. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108375 References: 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190115124442.3500-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 4 +- drivers/gpu/drm/i915/i915_gem.c | 30 +++- drivers/gpu/drm/i915/i915_gem_object.h | 7 + drivers/gpu/drm/i915/i915_gem_userptr.c | 226 ++++++++++++------------ 4 files changed, 140 insertions(+), 127 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 956c1c86f90d..da055a86db4d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2935,8 +2935,8 @@ enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */ I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */ }; -void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, - enum i915_mm_subclass subclass); +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, + enum i915_mm_subclass subclass); void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj); enum i915_map_type { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d35dd3d6d3b6..565b2fa1607d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2303,8 +2303,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) struct sg_table *pages; pages = fetch_and_zero(&obj->mm.pages); - if (!pages) - return NULL; + if (IS_ERR_OR_NULL(pages)) + return pages; spin_lock(&i915->mm.obj_lock); list_del(&obj->mm.link); @@ -2328,22 +2328,23 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) return pages; } -void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, - enum i915_mm_subclass subclass) +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, + enum i915_mm_subclass subclass) { struct sg_table *pages; + int ret; if (i915_gem_object_has_pinned_pages(obj)) - return; + return -EBUSY; GEM_BUG_ON(obj->bind_count); - if (!i915_gem_object_has_pages(obj)) - return; /* May be called by shrinker from within get_pages() (on another bo) */ mutex_lock_nested(&obj->mm.lock, subclass); - if (unlikely(atomic_read(&obj->mm.pages_pin_count))) + if (unlikely(atomic_read(&obj->mm.pages_pin_count))) { + ret = -EBUSY; goto unlock; + } /* * ->put_pages might need to allocate memory for the bit17 swizzle @@ -2351,11 +2352,24 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, * lists early. */ pages = __i915_gem_object_unset_pages(obj); + + /* + * XXX Temporary hijinx to avoid updating all backends to handle + * NULL pages. In the future, when we have more asynchronous + * get_pages backends we should be better able to handle the + * cancellation of the async task in a more uniform manner. + */ + if (!pages && !i915_gem_object_needs_async_cancel(obj)) + pages = ERR_PTR(-EINVAL); + if (!IS_ERR(pages)) obj->ops->put_pages(obj, pages); + ret = 0; unlock: mutex_unlock(&obj->mm.lock); + + return ret; } bool i915_sg_trim(struct sg_table *orig_st) diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index ff3da64470dd..cb1b0144d274 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -57,6 +57,7 @@ struct drm_i915_gem_object_ops { #define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0) #define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) #define I915_GEM_OBJECT_IS_PROXY BIT(2) +#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(3) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set @@ -387,6 +388,12 @@ i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj) return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY; } +static inline bool +i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj) +{ + return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL; +} + static inline bool i915_gem_object_is_active(const struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 1fb6a7bb5054..38e19a42e0f4 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -49,77 +49,67 @@ struct i915_mmu_notifier { struct hlist_node node; struct mmu_notifier mn; struct rb_root_cached objects; - struct workqueue_struct *wq; + struct i915_mm_struct *mm; }; struct i915_mmu_object { struct i915_mmu_notifier *mn; struct drm_i915_gem_object *obj; struct interval_tree_node it; - struct list_head link; - struct work_struct work; - bool attached; }; -static void cancel_userptr(struct work_struct *work) -{ - struct i915_mmu_object *mo = container_of(work, typeof(*mo), work); - struct drm_i915_gem_object *obj = mo->obj; - struct work_struct *active; - - /* Cancel any active worker and force us to re-evaluate gup */ - mutex_lock(&obj->mm.lock); - active = fetch_and_zero(&obj->userptr.work); - mutex_unlock(&obj->mm.lock); - if (active) - goto out; - - i915_gem_object_wait(obj, I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT, NULL); - - mutex_lock(&obj->base.dev->struct_mutex); - - /* We are inside a kthread context and can't be interrupted */ - if (i915_gem_object_unbind(obj) == 0) - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - WARN_ONCE(i915_gem_object_has_pages(obj), - "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_global=%d\n", - obj->bind_count, - atomic_read(&obj->mm.pages_pin_count), - obj->pin_global); - - mutex_unlock(&obj->base.dev->struct_mutex); - -out: - i915_gem_object_put(obj); -} - static void add_object(struct i915_mmu_object *mo) { - if (mo->attached) - return; - + GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb)); interval_tree_insert(&mo->it, &mo->mn->objects); - mo->attached = true; } static void del_object(struct i915_mmu_object *mo) { - if (!mo->attached) + if (RB_EMPTY_NODE(&mo->it.rb)) return; interval_tree_remove(&mo->it, &mo->mn->objects); - mo->attached = false; + RB_CLEAR_NODE(&mo->it.rb); } -static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, - const struct mmu_notifier_range *range) +static void +__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value) +{ + struct i915_mmu_object *mo = obj->userptr.mmu_object; + + /* + * During mm_invalidate_range we need to cancel any userptr that + * overlaps the range being invalidated. Doing so requires the + * struct_mutex, and that risks recursion. In order to cause + * recursion, the user must alias the userptr address space with + * a GTT mmapping (possible with a MAP_FIXED) - then when we have + * to invalidate that mmaping, mm_invalidate_range is called with + * the userptr address *and* the struct_mutex held. To prevent that + * we set a flag under the i915_mmu_notifier spinlock to indicate + * whether this object is valid. + */ + if (!mo) + return; + + spin_lock(&mo->mn->lock); + if (value) + add_object(mo); + else + del_object(mo); + spin_unlock(&mo->mn->lock); +} + +static int +userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, + const struct mmu_notifier_range *range) { struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn); - struct i915_mmu_object *mo; struct interval_tree_node *it; - LIST_HEAD(cancelled); + struct mutex *unlock = NULL; unsigned long end; + int ret = 0; if (RB_EMPTY_ROOT(&mn->objects.rb_root)) return 0; @@ -130,11 +120,15 @@ static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, spin_lock(&mn->lock); it = interval_tree_iter_first(&mn->objects, range->start, end); while (it) { + struct drm_i915_gem_object *obj; + if (!range->blockable) { - spin_unlock(&mn->lock); - return -EAGAIN; + ret = -EAGAIN; + break; } - /* The mmu_object is released late when destroying the + + /* + * The mmu_object is released late when destroying the * GEM object so it is entirely possible to gain a * reference on an object in the process of being freed * since our serialisation is via the spinlock and not @@ -143,29 +137,65 @@ static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, * use-after-free we only acquire a reference on the * object if it is not in the process of being destroyed. */ - mo = container_of(it, struct i915_mmu_object, it); - if (kref_get_unless_zero(&mo->obj->base.refcount)) - queue_work(mn->wq, &mo->work); + obj = container_of(it, struct i915_mmu_object, it)->obj; + if (!kref_get_unless_zero(&obj->base.refcount)) { + it = interval_tree_iter_next(it, range->start, end); + continue; + } + spin_unlock(&mn->lock); - list_add(&mo->link, &cancelled); - it = interval_tree_iter_next(it, range->start, end); + if (!unlock) { + unlock = &mn->mm->i915->drm.struct_mutex; + + switch (mutex_trylock_recursive(unlock)) { + default: + case MUTEX_TRYLOCK_FAILED: + if (!mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) { + i915_gem_object_put(obj); + return -EINTR; + } + /* fall through */ + case MUTEX_TRYLOCK_SUCCESS: + break; + + case MUTEX_TRYLOCK_RECURSIVE: + unlock = ERR_PTR(-EEXIST); + break; + } + } + + ret = i915_gem_object_unbind(obj); + if (ret == 0) + ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); + i915_gem_object_put(obj); + if (ret) + goto unlock; + + spin_lock(&mn->lock); + + /* + * As we do not (yet) protect the mmu from concurrent insertion + * over this range, there is no guarantee that this search will + * terminate given a pathologic workload. + */ + it = interval_tree_iter_first(&mn->objects, range->start, end); } - list_for_each_entry(mo, &cancelled, link) - del_object(mo); spin_unlock(&mn->lock); - if (!list_empty(&cancelled)) - flush_workqueue(mn->wq); +unlock: + if (!IS_ERR_OR_NULL(unlock)) + mutex_unlock(unlock); + + return ret; - return 0; } static const struct mmu_notifier_ops i915_gem_userptr_notifier = { - .invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start, + .invalidate_range_start = userptr_mn_invalidate_range_start, }; static struct i915_mmu_notifier * -i915_mmu_notifier_create(struct mm_struct *mm) +i915_mmu_notifier_create(struct i915_mm_struct *mm) { struct i915_mmu_notifier *mn; @@ -176,13 +206,7 @@ i915_mmu_notifier_create(struct mm_struct *mm) spin_lock_init(&mn->lock); mn->mn.ops = &i915_gem_userptr_notifier; mn->objects = RB_ROOT_CACHED; - mn->wq = alloc_workqueue("i915-userptr-release", - WQ_UNBOUND | WQ_MEM_RECLAIM, - 0); - if (mn->wq == NULL) { - kfree(mn); - return ERR_PTR(-ENOMEM); - } + mn->mm = mm; return mn; } @@ -192,16 +216,14 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) { struct i915_mmu_object *mo; - mo = obj->userptr.mmu_object; - if (mo == NULL) + mo = fetch_and_zero(&obj->userptr.mmu_object); + if (!mo) return; spin_lock(&mo->mn->lock); del_object(mo); spin_unlock(&mo->mn->lock); kfree(mo); - - obj->userptr.mmu_object = NULL; } static struct i915_mmu_notifier * @@ -214,7 +236,7 @@ i915_mmu_notifier_find(struct i915_mm_struct *mm) if (mn) return mn; - mn = i915_mmu_notifier_create(mm->mm); + mn = i915_mmu_notifier_create(mm); if (IS_ERR(mn)) err = PTR_ERR(mn); @@ -237,10 +259,8 @@ i915_mmu_notifier_find(struct i915_mm_struct *mm) mutex_unlock(&mm->i915->mm_lock); up_write(&mm->mm->mmap_sem); - if (mn && !IS_ERR(mn)) { - destroy_workqueue(mn->wq); + if (mn && !IS_ERR(mn)) kfree(mn); - } return err ? ERR_PTR(err) : mm->mn; } @@ -263,14 +283,14 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj, return PTR_ERR(mn); mo = kzalloc(sizeof(*mo), GFP_KERNEL); - if (mo == NULL) + if (!mo) return -ENOMEM; mo->mn = mn; mo->obj = obj; mo->it.start = obj->userptr.ptr; mo->it.last = obj->userptr.ptr + obj->base.size - 1; - INIT_WORK(&mo->work, cancel_userptr); + RB_CLEAR_NODE(&mo->it.rb); obj->userptr.mmu_object = mo; return 0; @@ -284,12 +304,16 @@ i915_mmu_notifier_free(struct i915_mmu_notifier *mn, return; mmu_notifier_unregister(&mn->mn, mm); - destroy_workqueue(mn->wq); kfree(mn); } #else +static void +__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value) +{ +} + static void i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) { @@ -458,42 +482,6 @@ alloc_table: return st; } -static int -__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, - bool value) -{ - int ret = 0; - - /* During mm_invalidate_range we need to cancel any userptr that - * overlaps the range being invalidated. Doing so requires the - * struct_mutex, and that risks recursion. In order to cause - * recursion, the user must alias the userptr address space with - * a GTT mmapping (possible with a MAP_FIXED) - then when we have - * to invalidate that mmaping, mm_invalidate_range is called with - * the userptr address *and* the struct_mutex held. To prevent that - * we set a flag under the i915_mmu_notifier spinlock to indicate - * whether this object is valid. - */ -#if defined(CONFIG_MMU_NOTIFIER) - if (obj->userptr.mmu_object == NULL) - return 0; - - spin_lock(&obj->userptr.mmu_object->mn->lock); - /* In order to serialise get_pages with an outstanding - * cancel_userptr, we must drop the struct_mutex and try again. - */ - if (!value) - del_object(obj->userptr.mmu_object); - else if (!work_pending(&obj->userptr.mmu_object->work)) - add_object(obj->userptr.mmu_object); - else - ret = -EAGAIN; - spin_unlock(&obj->userptr.mmu_object->mn->lock); -#endif - - return ret; -} - static void __i915_gem_userptr_get_pages_worker(struct work_struct *_work) { @@ -679,8 +667,11 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, struct sgt_iter sgt_iter; struct page *page; - BUG_ON(obj->userptr.work != NULL); + /* Cancel any inflight work and force them to restart their gup */ + obj->userptr.work = NULL; __i915_gem_userptr_set_active(obj, false); + if (!pages) + return; if (obj->mm.madv != I915_MADV_WILLNEED) obj->mm.dirty = false; @@ -718,7 +709,8 @@ i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj) static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_IS_SHRINKABLE, + I915_GEM_OBJECT_IS_SHRINKABLE | + I915_GEM_OBJECT_ASYNC_CANCEL, .get_pages = i915_gem_userptr_get_pages, .put_pages = i915_gem_userptr_put_pages, .dmabuf_export = i915_gem_userptr_dmabuf_export, From 0212bdef5a4de344a179f9c267899df5bb268ca7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 15 Jan 2019 21:29:48 +0000 Subject: [PATCH 171/539] drm/i915: Move intel_execlists_show_requests() aside Move the debug pretty printer into a standalone routine prior to extending it in upcoming feature work. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190115212948.10423-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 55 ++---------------------- drivers/gpu/drm/i915/intel_lrc.c | 58 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.h | 10 ++++- 3 files changed, 71 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 45e33eee76f9..200218cb157f 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1422,15 +1422,12 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...) { - const int MAX_REQUESTS_TO_SHOW = 8; struct intel_breadcrumbs * const b = &engine->breadcrumbs; - const struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_gpu_error * const error = &engine->i915->gpu_error; - struct i915_request *rq, *last; + struct i915_request *rq; intel_wakeref_t wakeref; unsigned long flags; struct rb_node *rb; - int count; if (header) { va_list ap; @@ -1494,52 +1491,9 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tDevice is asleep; skipping register dump\n"); } - local_irq_save(flags); - spin_lock(&engine->timeline.lock); + intel_execlists_show_requests(engine, m, print_request, 8); - last = NULL; - count = 0; - list_for_each_entry(rq, &engine->timeline.requests, link) { - if (count++ < MAX_REQUESTS_TO_SHOW - 1) - print_request(m, rq, "\t\tE "); - else - last = rq; - } - if (last) { - if (count > MAX_REQUESTS_TO_SHOW) { - drm_printf(m, - "\t\t...skipping %d executing requests...\n", - count - MAX_REQUESTS_TO_SHOW); - } - print_request(m, last, "\t\tE "); - } - - last = NULL; - count = 0; - drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); - for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { - struct i915_priolist *p = rb_entry(rb, typeof(*p), node); - int i; - - priolist_for_each_request(rq, p, i) { - if (count++ < MAX_REQUESTS_TO_SHOW - 1) - print_request(m, rq, "\t\tQ "); - else - last = rq; - } - } - if (last) { - if (count > MAX_REQUESTS_TO_SHOW) { - drm_printf(m, - "\t\t...skipping %d queued requests...\n", - count - MAX_REQUESTS_TO_SHOW); - } - print_request(m, last, "\t\tQ "); - } - - spin_unlock(&engine->timeline.lock); - - spin_lock(&b->rb_lock); + spin_lock_irqsave(&b->rb_lock, flags); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = rb_entry(rb, typeof(*w), node); @@ -1548,8 +1502,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, task_state_to_char(w->tsk), w->seqno); } - spin_unlock(&b->rb_lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&b->rb_lock, flags); drm_printf(m, "HWSP:\n"); hexdump(m, engine->status_page.page_addr, PAGE_SIZE); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index dcb11c5f8230..a62ad80fdf97 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2702,6 +2702,64 @@ void intel_lr_context_resume(struct drm_i915_private *i915) } } +void intel_execlists_show_requests(struct intel_engine_cs *engine, + struct drm_printer *m, + void (*show_request)(struct drm_printer *m, + struct i915_request *rq, + const char *prefix), + unsigned int max) +{ + const struct intel_engine_execlists *execlists = &engine->execlists; + struct i915_request *rq, *last; + unsigned long flags; + unsigned int count; + struct rb_node *rb; + + spin_lock_irqsave(&engine->timeline.lock, flags); + + last = NULL; + count = 0; + list_for_each_entry(rq, &engine->timeline.requests, link) { + if (count++ < max - 1) + show_request(m, rq, "\t\tE "); + else + last = rq; + } + if (last) { + if (count > max) { + drm_printf(m, + "\t\t...skipping %d executing requests...\n", + count - max); + } + show_request(m, last, "\t\tE "); + } + + last = NULL; + count = 0; + drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); + for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { + struct i915_priolist *p = rb_entry(rb, typeof(*p), node); + int i; + + priolist_for_each_request(rq, p, i) { + if (count++ < max - 1) + show_request(m, rq, "\t\tQ "); + else + last = rq; + } + } + if (last) { + if (count > max) { + drm_printf(m, + "\t\t...skipping %d queued requests...\n", + count - max); + } + show_request(m, last, "\t\tQ "); + } + + spin_unlock_irqrestore(&engine->timeline.lock, flags); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/intel_lrc.c" #endif diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index f5a5502ecf70..3d86c27c6b32 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -97,11 +97,19 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine); */ #define LRC_HEADER_PAGES LRC_PPHWSP_PN +struct drm_printer; + struct drm_i915_private; struct i915_gem_context; void intel_lr_context_resume(struct drm_i915_private *dev_priv); - void intel_execlists_set_default_submission(struct intel_engine_cs *engine); +void intel_execlists_show_requests(struct intel_engine_cs *engine, + struct drm_printer *m, + void (*show_request)(struct drm_printer *m, + struct i915_request *rq, + const char *prefix), + unsigned int max); + #endif /* _INTEL_LRC_H_ */ From 9e267d286af5c5a67995128df40ca3d1f93277a6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 15 Jan 2019 22:11:18 +0000 Subject: [PATCH 172/539] drm/i915/userptr: Fix error handling of mutex_lock_killable() mutex_lock_killable() returns -EINTR on failure, not the anticipate bool return like trylock. (Oh no, not again.) Fixes: 484d9a844d0d ("drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190115221118.13304-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/i915_gem_userptr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 38e19a42e0f4..1d3f9a31ad61 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -150,7 +150,7 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, switch (mutex_trylock_recursive(unlock)) { default: case MUTEX_TRYLOCK_FAILED: - if (!mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) { + if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) { i915_gem_object_put(obj); return -EINTR; } From 204474a6b859ff2367252d8312ac65d3245823bf Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 15 Jan 2019 15:08:00 -0500 Subject: [PATCH 173/539] drm/i915: Pass down rc in intel_encoder->compute_config() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Something that I completely missed when implementing the new MST VCPI atomic helpers is that with those helpers, there's technically a chance of us having to grab additional modeset locks in ->compute_config() and furthermore, that means we have the potential to hit a normal modeset deadlock. However, because ->compute_config() only returns a bool this means we can't return -EDEADLK when we need to drop locks and try again which means we end up just failing the atomic check permanently. Whoops. So, fix this by modifying ->compute_config() to pass down an actual error code instead of a bool so that the atomic check can be restarted on modeset deadlocks. Thanks to Ville Syrjälä for pointing this out! Changes since v1: * Add some newlines * Return only -EINVAL from hsw_crt_compute_config() * Propogate return code from intel_dp_compute_dsc_params() * Change all of the intel_dp_compute_link_config*() variants * Don't miss if (hdmi_port_clock_valid()) branch in intel_hdmi_compute_config() [Cherry-picked from drm-misc-next to drm-intel-next-queued to fix linux-next & drm-tip conflict, while waiting for proper propagation of the DP MST series that this commit fixes. In hindsight, a topic branch might have been a better approach for it.] Signed-off-by: Lyude Paul Cc: Ville Syrjälä Fixes: eceae1472467 ("drm/dp_mst: Start tracking per-port VCPI allocations") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109320 Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190115200800.3121-1-lyude@redhat.com (cherry picked from commit 96550555a78ca3c9fda4b358549a5622810fe32c) Signed-off-by: Jani Nikula Acked-by: Daniel Vetter --- drivers/gpu/drm/i915/icl_dsi.c | 8 ++-- drivers/gpu/drm/i915/intel_crt.c | 35 +++++++------- drivers/gpu/drm/i915/intel_ddi.c | 6 +-- drivers/gpu/drm/i915/intel_display.c | 11 +++-- drivers/gpu/drm/i915/intel_dp.c | 71 +++++++++++++++------------- drivers/gpu/drm/i915/intel_dp_mst.c | 12 ++--- drivers/gpu/drm/i915/intel_drv.h | 18 +++---- drivers/gpu/drm/i915/intel_dvo.c | 11 +++-- drivers/gpu/drm/i915/intel_hdmi.c | 14 +++--- drivers/gpu/drm/i915/intel_lvds.c | 12 ++--- drivers/gpu/drm/i915/intel_sdvo.c | 14 +++--- drivers/gpu/drm/i915/intel_tv.c | 8 ++-- drivers/gpu/drm/i915/vlv_dsi.c | 14 +++--- 13 files changed, 122 insertions(+), 112 deletions(-) diff --git a/drivers/gpu/drm/i915/icl_dsi.c b/drivers/gpu/drm/i915/icl_dsi.c index f3a5f03646ce..355b48d1c937 100644 --- a/drivers/gpu/drm/i915/icl_dsi.c +++ b/drivers/gpu/drm/i915/icl_dsi.c @@ -1188,9 +1188,9 @@ static void gen11_dsi_get_config(struct intel_encoder *encoder, pipe_config->output_types |= BIT(INTEL_OUTPUT_DSI); } -static bool gen11_dsi_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) +static int gen11_dsi_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct intel_dsi *intel_dsi = container_of(encoder, struct intel_dsi, base); @@ -1215,7 +1215,7 @@ static bool gen11_dsi_compute_config(struct intel_encoder *encoder, pipe_config->clock_set = true; pipe_config->port_clock = intel_dsi_bitrate(intel_dsi) / 5; - return true; + return 0; } static u64 gen11_dsi_get_power_domains(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 33bd2addcbdd..081c333f30d2 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -345,51 +345,52 @@ intel_crt_mode_valid(struct drm_connector *connector, return MODE_OK; } -static bool intel_crt_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) +static int intel_crt_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) - return false; + return -EINVAL; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; - return true; + + return 0; } -static bool pch_crt_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) +static int pch_crt_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) - return false; + return -EINVAL; pipe_config->has_pch_encoder = true; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; - return true; + return 0; } -static bool hsw_crt_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) +static int hsw_crt_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) - return false; + return -EINVAL; /* HSW/BDW FDI limited to 4k */ if (adjusted_mode->crtc_hdisplay > 4096 || adjusted_mode->crtc_hblank_start > 4096) - return false; + return -EINVAL; pipe_config->has_pch_encoder = true; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; @@ -398,7 +399,7 @@ static bool hsw_crt_compute_config(struct intel_encoder *encoder, if (HAS_PCH_LPT(dev_priv)) { if (pipe_config->bw_constrained && pipe_config->pipe_bpp < 24) { DRM_DEBUG_KMS("LPT only supports 24bpp\n"); - return false; + return -EINVAL; } pipe_config->pipe_bpp = 24; @@ -407,7 +408,7 @@ static bool hsw_crt_compute_config(struct intel_encoder *encoder, /* FDI must always be 2.7 GHz */ pipe_config->port_clock = 135000 * 2; - return true; + return 0; } static bool intel_ironlake_crt_detect_hotplug(struct drm_connector *connector) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 7f3cd055de50..ce44744a5f9d 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -3837,9 +3837,9 @@ intel_ddi_compute_output_type(struct intel_encoder *encoder, } } -static bool intel_ddi_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) +static int intel_ddi_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ce1cdd2c0c3d..c6b3b69aaeac 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11553,10 +11553,13 @@ encoder_retry: continue; encoder = to_intel_encoder(connector_state->best_encoder); - - if (!(encoder->compute_config(encoder, pipe_config, connector_state))) { - DRM_DEBUG_KMS("Encoder config failure\n"); - return -EINVAL; + ret = encoder->compute_config(encoder, pipe_config, + connector_state); + if (ret < 0) { + if (ret != -EDEADLK) + DRM_DEBUG_KMS("Encoder config failure: %d\n", + ret); + return ret; } } diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 0a3ac98a779e..df4292bb1a4f 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1819,7 +1819,7 @@ intel_dp_adjust_compliance_config(struct intel_dp *intel_dp, } /* Optimize link config in order: max bpp, min clock, min lanes */ -static bool +static int intel_dp_compute_link_config_wide(struct intel_dp *intel_dp, struct intel_crtc_state *pipe_config, const struct link_config_limits *limits) @@ -1845,17 +1845,17 @@ intel_dp_compute_link_config_wide(struct intel_dp *intel_dp, pipe_config->pipe_bpp = bpp; pipe_config->port_clock = link_clock; - return true; + return 0; } } } } - return false; + return -EINVAL; } /* Optimize link config in order: max bpp, min lanes, min clock */ -static bool +static int intel_dp_compute_link_config_fast(struct intel_dp *intel_dp, struct intel_crtc_state *pipe_config, const struct link_config_limits *limits) @@ -1881,13 +1881,13 @@ intel_dp_compute_link_config_fast(struct intel_dp *intel_dp, pipe_config->pipe_bpp = bpp; pipe_config->port_clock = link_clock; - return true; + return 0; } } } } - return false; + return -EINVAL; } static int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 dsc_max_bpc) @@ -1905,19 +1905,20 @@ static int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 dsc_max_bpc) return 0; } -static bool intel_dp_dsc_compute_config(struct intel_dp *intel_dp, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state, - struct link_config_limits *limits) +static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state, + struct link_config_limits *limits) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; u8 dsc_max_bpc; int pipe_bpp; + int ret; if (!intel_dp_supports_dsc(intel_dp, pipe_config)) - return false; + return -EINVAL; dsc_max_bpc = min_t(u8, DP_DSC_MAX_SUPPORTED_BPC, conn_state->max_requested_bpc); @@ -1925,7 +1926,7 @@ static bool intel_dp_dsc_compute_config(struct intel_dp *intel_dp, pipe_bpp = intel_dp_dsc_compute_bpp(intel_dp, dsc_max_bpc); if (pipe_bpp < DP_DSC_MIN_SUPPORTED_BPC * 3) { DRM_DEBUG_KMS("No DSC support for less than 8bpc\n"); - return false; + return -EINVAL; } /* @@ -1959,7 +1960,7 @@ static bool intel_dp_dsc_compute_config(struct intel_dp *intel_dp, adjusted_mode->crtc_hdisplay); if (!dsc_max_output_bpp || !dsc_dp_slice_count) { DRM_DEBUG_KMS("Compressed BPP/Slice Count not supported\n"); - return false; + return -EINVAL; } pipe_config->dsc_params.compressed_bpp = min_t(u16, dsc_max_output_bpp >> 4, @@ -1976,16 +1977,19 @@ static bool intel_dp_dsc_compute_config(struct intel_dp *intel_dp, pipe_config->dsc_params.dsc_split = true; } else { DRM_DEBUG_KMS("Cannot split stream to use 2 VDSC instances\n"); - return false; + return -EINVAL; } } - if (intel_dp_compute_dsc_params(intel_dp, pipe_config) < 0) { + + ret = intel_dp_compute_dsc_params(intel_dp, pipe_config); + if (ret < 0) { DRM_DEBUG_KMS("Cannot compute valid DSC parameters for Input Bpp = %d " "Compressed BPP = %d\n", pipe_config->pipe_bpp, pipe_config->dsc_params.compressed_bpp); - return false; + return ret; } + pipe_config->dsc_params.compression_enable = true; DRM_DEBUG_KMS("DP DSC computed with Input Bpp = %d " "Compressed Bpp = %d Slice Count = %d\n", @@ -1993,10 +1997,10 @@ static bool intel_dp_dsc_compute_config(struct intel_dp *intel_dp, pipe_config->dsc_params.compressed_bpp, pipe_config->dsc_params.slice_count); - return true; + return 0; } -static bool +static int intel_dp_compute_link_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) @@ -2005,7 +2009,7 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct link_config_limits limits; int common_len; - bool ret; + int ret; common_len = intel_dp_common_len_rate_limit(intel_dp, intel_dp->max_link_rate); @@ -2063,10 +2067,11 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, /* enable compression if the mode doesn't fit available BW */ DRM_DEBUG_KMS("Force DSC en = %d\n", intel_dp->force_dsc_en); - if (!ret || intel_dp->force_dsc_en) { - if (!intel_dp_dsc_compute_config(intel_dp, pipe_config, - conn_state, &limits)) - return false; + if (ret || intel_dp->force_dsc_en) { + ret = intel_dp_dsc_compute_config(intel_dp, pipe_config, + conn_state, &limits); + if (ret < 0) + return ret; } if (pipe_config->dsc_params.compression_enable) { @@ -2091,10 +2096,10 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, intel_dp_max_data_rate(pipe_config->port_clock, pipe_config->lane_count)); } - return true; + return 0; } -bool +int intel_dp_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) @@ -2110,6 +2115,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, to_intel_digital_connector_state(conn_state); bool constant_n = drm_dp_has_quirk(&intel_dp->desc, DP_DPCD_QUIRK_CONSTANT_N); + int ret; if (HAS_PCH_SPLIT(dev_priv) && !HAS_DDI(dev_priv) && port != PORT_A) pipe_config->has_pch_encoder = true; @@ -2131,8 +2137,6 @@ intel_dp_compute_config(struct intel_encoder *encoder, adjusted_mode); if (INTEL_GEN(dev_priv) >= 9) { - int ret; - ret = skl_update_scaler_crtc(pipe_config); if (ret) return ret; @@ -2147,20 +2151,21 @@ intel_dp_compute_config(struct intel_encoder *encoder, } if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) - return false; + return -EINVAL; if (HAS_GMCH_DISPLAY(dev_priv) && adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) - return false; + return -EINVAL; if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) - return false; + return -EINVAL; pipe_config->fec_enable = !intel_dp_is_edp(intel_dp) && intel_dp_supports_fec(intel_dp, pipe_config); - if (!intel_dp_compute_link_config(encoder, pipe_config, conn_state)) - return false; + ret = intel_dp_compute_link_config(encoder, pipe_config, conn_state); + if (ret < 0) + return ret; if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) { /* @@ -2208,7 +2213,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, intel_psr_compute_config(intel_dp, pipe_config); - return true; + return 0; } void intel_dp_set_link_params(struct intel_dp *intel_dp, diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index a8d43ad5352c..778c887108b7 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -29,9 +29,9 @@ #include #include -static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) +static int intel_dp_mst_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); @@ -48,7 +48,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, DP_DPCD_QUIRK_CONSTANT_N); if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) - return false; + return -EINVAL; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; pipe_config->has_pch_encoder = false; @@ -85,7 +85,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, if (slots < 0) { DRM_DEBUG_KMS("failed finding vcpi slots:%d\n", slots); - return false; + return slots; } } @@ -103,7 +103,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); - return true; + return 0; } static int intel_dp_mst_atomic_check(struct drm_connector *connector, diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 5e5ceec7c004..9ecff07598d9 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -225,9 +225,9 @@ struct intel_encoder { enum intel_output_type (*compute_output_type)(struct intel_encoder *, struct intel_crtc_state *, struct drm_connector_state *); - bool (*compute_config)(struct intel_encoder *, - struct intel_crtc_state *, - struct drm_connector_state *); + int (*compute_config)(struct intel_encoder *, + struct intel_crtc_state *, + struct drm_connector_state *); void (*pre_pll_enable)(struct intel_encoder *, const struct intel_crtc_state *, const struct drm_connector_state *); @@ -1817,9 +1817,9 @@ void intel_dp_sink_set_decompression_state(struct intel_dp *intel_dp, void intel_dp_encoder_reset(struct drm_encoder *encoder); void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder); void intel_dp_encoder_flush_work(struct drm_encoder *encoder); -bool intel_dp_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state); +int intel_dp_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state); bool intel_dp_is_edp(struct intel_dp *intel_dp); bool intel_dp_is_port_edp(struct drm_i915_private *dev_priv, enum port port); enum irqreturn intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, @@ -1979,9 +1979,9 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, i915_reg_t hdmi_reg, void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, struct intel_connector *intel_connector); struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder); -bool intel_hdmi_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state); +int intel_hdmi_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state); bool intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder, struct drm_connector *connector, bool high_tmds_clock_ratio, diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index bc3c3cb57ec6..a6c82482a841 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -234,9 +234,9 @@ intel_dvo_mode_valid(struct drm_connector *connector, return intel_dvo->dev.dev_ops->mode_valid(&intel_dvo->dev, mode); } -static bool intel_dvo_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) +static int intel_dvo_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct intel_dvo *intel_dvo = enc_to_dvo(encoder); const struct drm_display_mode *fixed_mode = @@ -253,10 +253,11 @@ static bool intel_dvo_compute_config(struct intel_encoder *encoder, intel_fixed_panel_mode(fixed_mode, adjusted_mode); if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) - return false; + return -EINVAL; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; - return true; + + return 0; } static void intel_dvo_pre_enable(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 14727ac06f67..51ec81d41dca 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1708,9 +1708,9 @@ intel_hdmi_ycbcr420_config(struct drm_connector *connector, return true; } -bool intel_hdmi_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) +int intel_hdmi_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -1726,7 +1726,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, bool force_dvi = intel_conn_state->force_audio == HDMI_AUDIO_OFF_DVI; if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) - return false; + return -EINVAL; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; pipe_config->has_hdmi_sink = !force_dvi && intel_hdmi->has_hdmi_sink; @@ -1757,7 +1757,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, &clock_12bpc, &clock_10bpc, &clock_8bpc)) { DRM_ERROR("Can't support YCBCR420 output\n"); - return false; + return -EINVAL; } } @@ -1807,7 +1807,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, if (hdmi_port_clock_valid(intel_hdmi, pipe_config->port_clock, false, force_dvi) != MODE_OK) { DRM_DEBUG_KMS("unsupported HDMI clock, rejecting mode\n"); - return false; + return -EINVAL; } /* Set user selected PAR to incoming mode's member */ @@ -1826,7 +1826,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, } } - return true; + return 0; } static void diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index b01aacb5d73d..46a5dfd5cdf7 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -380,9 +380,9 @@ intel_lvds_mode_valid(struct drm_connector *connector, return MODE_OK; } -static bool intel_lvds_compute_config(struct intel_encoder *intel_encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) +static int intel_lvds_compute_config(struct intel_encoder *intel_encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); struct intel_lvds_encoder *lvds_encoder = @@ -396,7 +396,7 @@ static bool intel_lvds_compute_config(struct intel_encoder *intel_encoder, /* Should never happen!! */ if (INTEL_GEN(dev_priv) < 4 && intel_crtc->pipe == 0) { DRM_ERROR("Can't support LVDS on pipe A\n"); - return false; + return -EINVAL; } if (lvds_encoder->a3_power == LVDS_A3_POWER_UP) @@ -422,7 +422,7 @@ static bool intel_lvds_compute_config(struct intel_encoder *intel_encoder, adjusted_mode); if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) - return false; + return -EINVAL; if (HAS_PCH_SPLIT(dev_priv)) { pipe_config->has_pch_encoder = true; @@ -441,7 +441,7 @@ static bool intel_lvds_compute_config(struct intel_encoder *intel_encoder, * user's requested refresh rate. */ - return true; + return 0; } static enum drm_connector_status diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index b08fed11219f..ba58b06f730d 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1107,9 +1107,9 @@ static void i9xx_adjust_sdvo_tv_clock(struct intel_crtc_state *pipe_config) pipe_config->clock_set = true; } -static bool intel_sdvo_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) +static int intel_sdvo_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct intel_sdvo *intel_sdvo = to_sdvo(encoder); struct intel_sdvo_connector_state *intel_sdvo_state = @@ -1134,7 +1134,7 @@ static bool intel_sdvo_compute_config(struct intel_encoder *encoder, */ if (IS_TV(intel_sdvo_connector)) { if (!intel_sdvo_set_output_timings_from_mode(intel_sdvo, mode)) - return false; + return -EINVAL; (void) intel_sdvo_get_preferred_input_mode(intel_sdvo, intel_sdvo_connector, @@ -1144,7 +1144,7 @@ static bool intel_sdvo_compute_config(struct intel_encoder *encoder, } else if (IS_LVDS(intel_sdvo_connector)) { if (!intel_sdvo_set_output_timings_from_mode(intel_sdvo, intel_sdvo_connector->base.panel.fixed_mode)) - return false; + return -EINVAL; (void) intel_sdvo_get_preferred_input_mode(intel_sdvo, intel_sdvo_connector, @@ -1153,7 +1153,7 @@ static bool intel_sdvo_compute_config(struct intel_encoder *encoder, } if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) - return false; + return -EINVAL; /* * Make the CRTC code factor in the SDVO pixel multiplier. The @@ -1193,7 +1193,7 @@ static bool intel_sdvo_compute_config(struct intel_encoder *encoder, if (intel_sdvo_connector->is_hdmi) adjusted_mode->picture_aspect_ratio = conn_state->picture_aspect_ratio; - return true; + return 0; } #define UPDATE_PROPERTY(input, NAME) \ diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index d7a414ce2774..bd5536f0ec92 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -869,7 +869,7 @@ intel_tv_get_config(struct intel_encoder *encoder, pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock; } -static bool +static int intel_tv_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) @@ -879,10 +879,10 @@ intel_tv_compute_config(struct intel_encoder *encoder, &pipe_config->base.adjusted_mode; if (!tv_mode) - return false; + return -EINVAL; if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) - return false; + return -EINVAL; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; adjusted_mode->crtc_clock = tv_mode->clock; @@ -897,7 +897,7 @@ intel_tv_compute_config(struct intel_encoder *encoder, * or whether userspace is doing something stupid. */ - return true; + return 0; } static void diff --git a/drivers/gpu/drm/i915/vlv_dsi.c b/drivers/gpu/drm/i915/vlv_dsi.c index d116fead8514..c247ce74b71a 100644 --- a/drivers/gpu/drm/i915/vlv_dsi.c +++ b/drivers/gpu/drm/i915/vlv_dsi.c @@ -256,9 +256,9 @@ static void band_gap_reset(struct drm_i915_private *dev_priv) mutex_unlock(&dev_priv->sb_lock); } -static bool intel_dsi_compute_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) +static int intel_dsi_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = container_of(encoder, struct intel_dsi, @@ -284,7 +284,7 @@ static bool intel_dsi_compute_config(struct intel_encoder *encoder, } if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) - return false; + return -EINVAL; /* DSI uses short packets for sync events, so clear mode flags for DSI */ adjusted_mode->flags = 0; @@ -302,16 +302,16 @@ static bool intel_dsi_compute_config(struct intel_encoder *encoder, ret = bxt_dsi_pll_compute(encoder, pipe_config); if (ret) - return false; + return -EINVAL; } else { ret = vlv_dsi_pll_compute(encoder, pipe_config); if (ret) - return false; + return -EINVAL; } pipe_config->clock_set = true; - return true; + return 0; } static bool glk_dsi_enable_io(struct intel_encoder *encoder) From f25d0a68beb868147571e395de52ced0c55f6cd4 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 10 Dec 2018 17:34:54 +0100 Subject: [PATCH 174/539] drm/tegra: Refactor CEC support Most of the CEC support code already lives in the "output" library code. Move registration and unregistration to the library code as well to make use of the same code with HDMI on Tegra210 and later via the SOR. Signed-off-by: Thierry Reding Reviewed-by: Hans Verkuil Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/drm.h | 2 +- drivers/gpu/drm/tegra/hdmi.c | 9 --------- drivers/gpu/drm/tegra/output.c | 11 +++++++++-- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h index 1012335bb489..f1763b4d5b5f 100644 --- a/drivers/gpu/drm/tegra/drm.h +++ b/drivers/gpu/drm/tegra/drm.h @@ -124,7 +124,7 @@ struct tegra_output { struct drm_panel *panel; struct i2c_adapter *ddc; const struct edid *edid; - struct cec_notifier *notifier; + struct cec_notifier *cec; unsigned int hpd_irq; int hpd_gpio; enum of_gpio_flags hpd_gpio_flags; diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c index 0082468f703c..d19973945614 100644 --- a/drivers/gpu/drm/tegra/hdmi.c +++ b/drivers/gpu/drm/tegra/hdmi.c @@ -22,8 +22,6 @@ #include -#include - #include "hdmi.h" #include "drm.h" #include "dc.h" @@ -1709,10 +1707,6 @@ static int tegra_hdmi_probe(struct platform_device *pdev) return PTR_ERR(hdmi->vdd); } - hdmi->output.notifier = cec_notifier_get(&pdev->dev); - if (hdmi->output.notifier == NULL) - return -ENOMEM; - hdmi->output.dev = &pdev->dev; err = tegra_output_probe(&hdmi->output); @@ -1771,9 +1765,6 @@ static int tegra_hdmi_remove(struct platform_device *pdev) tegra_output_remove(&hdmi->output); - if (hdmi->output.notifier) - cec_notifier_put(hdmi->output.notifier); - return 0; } diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c index c662efc7e413..9c2b9dad55c3 100644 --- a/drivers/gpu/drm/tegra/output.c +++ b/drivers/gpu/drm/tegra/output.c @@ -36,7 +36,7 @@ int tegra_output_connector_get_modes(struct drm_connector *connector) else if (output->ddc) edid = drm_get_edid(connector, output->ddc); - cec_notifier_set_phys_addr_from_edid(output->notifier, edid); + cec_notifier_set_phys_addr_from_edid(output->cec, edid); drm_connector_update_edid_property(connector, edid); if (edid) { @@ -73,7 +73,7 @@ tegra_output_connector_detect(struct drm_connector *connector, bool force) } if (status != connector_status_connected) - cec_notifier_phys_addr_invalidate(output->notifier); + cec_notifier_phys_addr_invalidate(output->cec); return status; } @@ -174,11 +174,18 @@ int tegra_output_probe(struct tegra_output *output) disable_irq(output->hpd_irq); } + output->cec = cec_notifier_get(output->dev); + if (!output->cec) + return -ENOMEM; + return 0; } void tegra_output_remove(struct tegra_output *output) { + if (output->cec) + cec_notifier_put(output->cec); + if (gpio_is_valid(output->hpd_gpio)) { free_irq(output->hpd_irq, output); gpio_free(output->hpd_gpio); From cd54fb96e568cf95bad6deb8b070e05643e80935 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 3 Jan 2019 15:23:15 +0100 Subject: [PATCH 175/539] drm/tegra: sor: Parse more data from HDA format The HDA format data passed to the SOR from the HDA codec contains more information than just the rate and number of channels. Parse all the fields and store them in an internal structure for subsequent use. While at it, also fix an off-by-one error in the number of channels. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/sor.c | 69 ++++++++++++++++++++++++++++--------- 1 file changed, 53 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c index ef8692b7075a..7839223aa040 100644 --- a/drivers/gpu/drm/tegra/sor.c +++ b/drivers/gpu/drm/tegra/sor.c @@ -393,6 +393,13 @@ struct tegra_sor_ops { int (*remove)(struct tegra_sor *sor); }; +struct tegra_sor_audio { + unsigned int sample_rate; + unsigned int channels; + unsigned int bits; + bool pcm; +}; + struct tegra_sor { struct host1x_client client; struct tegra_output output; @@ -429,10 +436,7 @@ struct tegra_sor { struct delayed_work scdc; bool scdc_enabled; - struct { - unsigned int sample_rate; - unsigned int channels; - } audio; + struct tegra_sor_audio audio; }; struct tegra_sor_state { @@ -3195,22 +3199,58 @@ static int tegra_sor_parse_dt(struct tegra_sor *sor) return 0; } -static void tegra_hda_parse_format(unsigned int format, unsigned int *rate, - unsigned int *channels) +static void tegra_hda_parse_format(unsigned int format, + struct tegra_sor_audio *audio) { - unsigned int mul, div; + unsigned int mul, div, bits, channels; + + if (format & AC_FMT_TYPE_NON_PCM) + audio->pcm = false; + else + audio->pcm = true; if (format & AC_FMT_BASE_44K) - *rate = 44100; + audio->sample_rate = 44100; else - *rate = 48000; + audio->sample_rate = 48000; mul = (format & AC_FMT_MULT_MASK) >> AC_FMT_MULT_SHIFT; div = (format & AC_FMT_DIV_MASK) >> AC_FMT_DIV_SHIFT; - *rate = *rate * (mul + 1) / (div + 1); + audio->sample_rate = audio->sample_rate * (mul + 1) / (div + 1); - *channels = (format & AC_FMT_CHAN_MASK) >> AC_FMT_CHAN_SHIFT; + switch (format & AC_FMT_BITS_MASK) { + case AC_FMT_BITS_8: + audio->bits = 8; + break; + + case AC_FMT_BITS_16: + audio->bits = 16; + break; + + case AC_FMT_BITS_20: + audio->bits = 20; + break; + + case AC_FMT_BITS_24: + audio->bits = 24; + break; + + case AC_FMT_BITS_32: + audio->bits = 32; + break; + + default: + bits = (format & AC_FMT_BITS_MASK) >> AC_FMT_BITS_SHIFT; + WARN(1, "invalid number of bits: %#x\n", bits); + audio->bits = 8; + break; + } + + channels = (format & AC_FMT_CHAN_MASK) >> AC_FMT_CHAN_SHIFT; + + /* channels are encoded as n - 1 */ + audio->channels = channels + 1; } static irqreturn_t tegra_sor_irq(int irq, void *data) @@ -3225,14 +3265,11 @@ static irqreturn_t tegra_sor_irq(int irq, void *data) value = tegra_sor_readl(sor, SOR_AUDIO_HDA_CODEC_SCRATCH0); if (value & SOR_AUDIO_HDA_CODEC_SCRATCH0_VALID) { - unsigned int format, sample_rate, channels; + unsigned int format; format = value & SOR_AUDIO_HDA_CODEC_SCRATCH0_FMT_MASK; - tegra_hda_parse_format(format, &sample_rate, &channels); - - sor->audio.sample_rate = sample_rate; - sor->audio.channels = channels; + tegra_hda_parse_format(format, &sor->audio); tegra_sor_hdmi_audio_enable(sor); } else { From fad7b80643100d16b157b4367a6d2c7f8b19f812 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 3 Jan 2019 15:23:16 +0100 Subject: [PATCH 176/539] drm/tegra: hda: Extract HDA format parsing code This code can be reused for HDMI, so extract it into a reusable function. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/Makefile | 1 + drivers/gpu/drm/tegra/hda.c | 63 ++++++++++++++++++++++++++ drivers/gpu/drm/tegra/hda.h | 20 +++++++++ drivers/gpu/drm/tegra/sor.c | 80 ++++------------------------------ 4 files changed, 93 insertions(+), 71 deletions(-) create mode 100644 drivers/gpu/drm/tegra/hda.c create mode 100644 drivers/gpu/drm/tegra/hda.h diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile index 2e0d6213f6bc..33c463e8d49f 100644 --- a/drivers/gpu/drm/tegra/Makefile +++ b/drivers/gpu/drm/tegra/Makefile @@ -10,6 +10,7 @@ tegra-drm-y := \ dc.o \ output.o \ rgb.o \ + hda.o \ hdmi.o \ mipi-phy.o \ dsi.o \ diff --git a/drivers/gpu/drm/tegra/hda.c b/drivers/gpu/drm/tegra/hda.c new file mode 100644 index 000000000000..94245a18a043 --- /dev/null +++ b/drivers/gpu/drm/tegra/hda.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright (C) 2019 NVIDIA Corporation + */ + +#include + +#include + +#include "hda.h" + +void tegra_hda_parse_format(unsigned int format, struct tegra_hda_format *fmt) +{ + unsigned int mul, div, bits, channels; + + if (format & AC_FMT_TYPE_NON_PCM) + fmt->pcm = false; + else + fmt->pcm = true; + + if (format & AC_FMT_BASE_44K) + fmt->sample_rate = 44100; + else + fmt->sample_rate = 48000; + + mul = (format & AC_FMT_MULT_MASK) >> AC_FMT_MULT_SHIFT; + div = (format & AC_FMT_DIV_MASK) >> AC_FMT_DIV_SHIFT; + + fmt->sample_rate *= (mul + 1) / (div + 1); + + switch (format & AC_FMT_BITS_MASK) { + case AC_FMT_BITS_8: + fmt->bits = 8; + break; + + case AC_FMT_BITS_16: + fmt->bits = 16; + break; + + case AC_FMT_BITS_20: + fmt->bits = 20; + break; + + case AC_FMT_BITS_24: + fmt->bits = 24; + break; + + case AC_FMT_BITS_32: + fmt->bits = 32; + break; + + default: + bits = (format & AC_FMT_BITS_MASK) >> AC_FMT_BITS_SHIFT; + WARN(1, "invalid number of bits: %#x\n", bits); + fmt->bits = 8; + break; + } + + channels = (format & AC_FMT_CHAN_MASK) >> AC_FMT_CHAN_SHIFT; + + /* channels are encoded as n - 1 */ + fmt->channels = channels + 1; +} diff --git a/drivers/gpu/drm/tegra/hda.h b/drivers/gpu/drm/tegra/hda.h new file mode 100644 index 000000000000..77269955a4f2 --- /dev/null +++ b/drivers/gpu/drm/tegra/hda.h @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright (C) 2019 NVIDIA Corporation + */ + +#ifndef DRM_TEGRA_HDA_H +#define DRM_TEGRA_HDA_H 1 + +#include + +struct tegra_hda_format { + unsigned int sample_rate; + unsigned int channels; + unsigned int bits; + bool pcm; +}; + +void tegra_hda_parse_format(unsigned int format, struct tegra_hda_format *fmt); + +#endif diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c index 7839223aa040..79602debf231 100644 --- a/drivers/gpu/drm/tegra/sor.c +++ b/drivers/gpu/drm/tegra/sor.c @@ -19,8 +19,6 @@ #include -#include - #include #include #include @@ -28,6 +26,7 @@ #include "dc.h" #include "drm.h" +#include "hda.h" #include "sor.h" #include "trace.h" @@ -393,13 +392,6 @@ struct tegra_sor_ops { int (*remove)(struct tegra_sor *sor); }; -struct tegra_sor_audio { - unsigned int sample_rate; - unsigned int channels; - unsigned int bits; - bool pcm; -}; - struct tegra_sor { struct host1x_client client; struct tegra_output output; @@ -436,7 +428,7 @@ struct tegra_sor { struct delayed_work scdc; bool scdc_enabled; - struct tegra_sor_audio audio; + struct tegra_hda_format format; }; struct tegra_sor_state { @@ -2189,7 +2181,7 @@ static int tegra_sor_hdmi_enable_audio_infoframe(struct tegra_sor *sor) return err; } - frame.channels = sor->audio.channels; + frame.channels = sor->format.channels; err = hdmi_audio_infoframe_pack(&frame, buffer, sizeof(buffer)); if (err < 0) { @@ -2218,7 +2210,7 @@ static void tegra_sor_hdmi_audio_enable(struct tegra_sor *sor) value |= SOR_AUDIO_CNTRL_SOURCE_SELECT(SOURCE_SELECT_HDA); /* inject null samples */ - if (sor->audio.channels != 2) + if (sor->format.channels != 2) value &= ~SOR_AUDIO_CNTRL_INJECT_NULLSMPL; else value |= SOR_AUDIO_CNTRL_INJECT_NULLSMPL; @@ -2249,7 +2241,7 @@ static void tegra_sor_hdmi_audio_enable(struct tegra_sor *sor) value = SOR_HDMI_AUDIO_N_RESET | SOR_HDMI_AUDIO_N_LOOKUP; tegra_sor_writel(sor, value, SOR_HDMI_AUDIO_N); - value = (24000 * 4096) / (128 * sor->audio.sample_rate / 1000); + value = (24000 * 4096) / (128 * sor->format.sample_rate / 1000); tegra_sor_writel(sor, value, SOR_AUDIO_AVAL_0320); tegra_sor_writel(sor, 4096, SOR_AUDIO_NVAL_0320); @@ -2262,15 +2254,15 @@ static void tegra_sor_hdmi_audio_enable(struct tegra_sor *sor) tegra_sor_writel(sor, 20000, SOR_AUDIO_AVAL_1764); tegra_sor_writel(sor, 18816, SOR_AUDIO_NVAL_1764); - value = (24000 * 6144) / (128 * sor->audio.sample_rate / 1000); + value = (24000 * 6144) / (128 * sor->format.sample_rate / 1000); tegra_sor_writel(sor, value, SOR_AUDIO_AVAL_0480); tegra_sor_writel(sor, 6144, SOR_AUDIO_NVAL_0480); - value = (24000 * 12288) / (128 * sor->audio.sample_rate / 1000); + value = (24000 * 12288) / (128 * sor->format.sample_rate / 1000); tegra_sor_writel(sor, value, SOR_AUDIO_AVAL_0960); tegra_sor_writel(sor, 12288, SOR_AUDIO_NVAL_0960); - value = (24000 * 24576) / (128 * sor->audio.sample_rate / 1000); + value = (24000 * 24576) / (128 * sor->format.sample_rate / 1000); tegra_sor_writel(sor, value, SOR_AUDIO_AVAL_1920); tegra_sor_writel(sor, 24576, SOR_AUDIO_NVAL_1920); @@ -3199,60 +3191,6 @@ static int tegra_sor_parse_dt(struct tegra_sor *sor) return 0; } -static void tegra_hda_parse_format(unsigned int format, - struct tegra_sor_audio *audio) -{ - unsigned int mul, div, bits, channels; - - if (format & AC_FMT_TYPE_NON_PCM) - audio->pcm = false; - else - audio->pcm = true; - - if (format & AC_FMT_BASE_44K) - audio->sample_rate = 44100; - else - audio->sample_rate = 48000; - - mul = (format & AC_FMT_MULT_MASK) >> AC_FMT_MULT_SHIFT; - div = (format & AC_FMT_DIV_MASK) >> AC_FMT_DIV_SHIFT; - - audio->sample_rate = audio->sample_rate * (mul + 1) / (div + 1); - - switch (format & AC_FMT_BITS_MASK) { - case AC_FMT_BITS_8: - audio->bits = 8; - break; - - case AC_FMT_BITS_16: - audio->bits = 16; - break; - - case AC_FMT_BITS_20: - audio->bits = 20; - break; - - case AC_FMT_BITS_24: - audio->bits = 24; - break; - - case AC_FMT_BITS_32: - audio->bits = 32; - break; - - default: - bits = (format & AC_FMT_BITS_MASK) >> AC_FMT_BITS_SHIFT; - WARN(1, "invalid number of bits: %#x\n", bits); - audio->bits = 8; - break; - } - - channels = (format & AC_FMT_CHAN_MASK) >> AC_FMT_CHAN_SHIFT; - - /* channels are encoded as n - 1 */ - audio->channels = channels + 1; -} - static irqreturn_t tegra_sor_irq(int irq, void *data) { struct tegra_sor *sor = data; @@ -3269,7 +3207,7 @@ static irqreturn_t tegra_sor_irq(int irq, void *data) format = value & SOR_AUDIO_HDA_CODEC_SCRATCH0_FMT_MASK; - tegra_hda_parse_format(format, &sor->audio); + tegra_hda_parse_format(format, &sor->format); tegra_sor_hdmi_audio_enable(sor); } else { From e3c702dcc7b047a5187a5ad132bfdf3850cc33e1 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 3 Jan 2019 15:23:17 +0100 Subject: [PATCH 177/539] drm/tegra: hdmi: Reuse common HDA format parser Eliminate some duplicate code by reusing the HDA format parser already used by the SOR. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/hdmi.c | 43 +++++++----------------------------- 1 file changed, 8 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c index d19973945614..8fdff801c6f2 100644 --- a/drivers/gpu/drm/tegra/hdmi.c +++ b/drivers/gpu/drm/tegra/hdmi.c @@ -20,8 +20,7 @@ #include #include -#include - +#include "hda.h" #include "hdmi.h" #include "drm.h" #include "dc.h" @@ -69,8 +68,7 @@ struct tegra_hdmi { const struct tegra_hdmi_config *config; unsigned int audio_source; - unsigned int audio_sample_rate; - unsigned int audio_channels; + struct tegra_hda_format format; unsigned int pixel_clock; bool stereo; @@ -508,7 +506,7 @@ static void tegra_hdmi_write_aval(struct tegra_hdmi *hdmi, u32 value) unsigned int i; for (i = 0; i < ARRAY_SIZE(regs); i++) { - if (regs[i].sample_rate == hdmi->audio_sample_rate) { + if (regs[i].sample_rate == hdmi->format.sample_rate) { tegra_hdmi_writel(hdmi, value, regs[i].offset); break; } @@ -562,7 +560,7 @@ static int tegra_hdmi_setup_audio(struct tegra_hdmi *hdmi) * play back system startup sounds early. It is possibly not * needed on Linux at all. */ - if (hdmi->audio_channels == 2) + if (hdmi->format.channels == 2) value = SOR_AUDIO_CNTRL0_INJECT_NULLSMPL; else value = 0; @@ -593,12 +591,12 @@ static int tegra_hdmi_setup_audio(struct tegra_hdmi *hdmi) tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_AUDIO_SPARE0); } - config = tegra_hdmi_get_audio_config(hdmi->audio_sample_rate, + config = tegra_hdmi_get_audio_config(hdmi->format.sample_rate, hdmi->pixel_clock); if (!config) { dev_err(hdmi->dev, "cannot set audio to %u Hz at %u Hz pixel clock\n", - hdmi->audio_sample_rate, hdmi->pixel_clock); + hdmi->format.sample_rate, hdmi->pixel_clock); return -EINVAL; } @@ -785,7 +783,7 @@ static void tegra_hdmi_setup_audio_infoframe(struct tegra_hdmi *hdmi) return; } - frame.channels = hdmi->audio_channels; + frame.channels = hdmi->format.channels; err = hdmi_audio_infoframe_pack(&frame, buffer, sizeof(buffer)); if (err < 0) { @@ -1587,24 +1585,6 @@ static const struct of_device_id tegra_hdmi_of_match[] = { }; MODULE_DEVICE_TABLE(of, tegra_hdmi_of_match); -static void hda_format_parse(unsigned int format, unsigned int *rate, - unsigned int *channels) -{ - unsigned int mul, div; - - if (format & AC_FMT_BASE_44K) - *rate = 44100; - else - *rate = 48000; - - mul = (format & AC_FMT_MULT_MASK) >> AC_FMT_MULT_SHIFT; - div = (format & AC_FMT_DIV_MASK) >> AC_FMT_DIV_SHIFT; - - *rate = *rate * (mul + 1) / (div + 1); - - *channels = (format & AC_FMT_CHAN_MASK) >> AC_FMT_CHAN_SHIFT; -} - static irqreturn_t tegra_hdmi_irq(int irq, void *data) { struct tegra_hdmi *hdmi = data; @@ -1621,14 +1601,9 @@ static irqreturn_t tegra_hdmi_irq(int irq, void *data) value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_SOR_AUDIO_HDA_CODEC_SCRATCH0); if (value & SOR_AUDIO_HDA_CODEC_SCRATCH0_VALID) { - unsigned int sample_rate, channels; - format = value & SOR_AUDIO_HDA_CODEC_SCRATCH0_FMT_MASK; - hda_format_parse(format, &sample_rate, &channels); - - hdmi->audio_sample_rate = sample_rate; - hdmi->audio_channels = channels; + tegra_hda_parse_format(format, &hdmi->format); err = tegra_hdmi_setup_audio(hdmi); if (err < 0) { @@ -1662,8 +1637,6 @@ static int tegra_hdmi_probe(struct platform_device *pdev) hdmi->dev = &pdev->dev; hdmi->audio_source = AUTO; - hdmi->audio_sample_rate = 48000; - hdmi->audio_channels = 2; hdmi->stereo = false; hdmi->dvi = false; From db5adf4d6dced4e3326ce369fe0c213c968095f4 Mon Sep 17 00:00:00 2001 From: Alban Bedel Date: Wed, 14 Dec 2016 18:20:39 +0100 Subject: [PATCH 178/539] drm/tegra: hdmi: Fix audio to work with any pixel clock rate The audio setting implementation was limited to a few specific pixel clocks. This prevented HDMI audio from working on several test devices as they need a pixel clock that is not supported by this implementation. Fix this by implementing the algorithm provided in the TRM using fixed point arithmetic. This allows the driver to cope with any sane pixel clock rate. Signed-off-by: Alban Bedel [treding@nvidia.com: fix uninitialized variable warning] Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/hdmi.c | 155 ++++++++++++----------------------- 1 file changed, 52 insertions(+), 103 deletions(-) diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c index 8fdff801c6f2..3ce551911548 100644 --- a/drivers/gpu/drm/tegra/hdmi.c +++ b/drivers/gpu/drm/tegra/hdmi.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -115,68 +116,11 @@ static inline void tegra_hdmi_writel(struct tegra_hdmi *hdmi, u32 value, } struct tegra_hdmi_audio_config { - unsigned int pclk; unsigned int n; unsigned int cts; unsigned int aval; }; -static const struct tegra_hdmi_audio_config tegra_hdmi_audio_32k[] = { - { 25200000, 4096, 25200, 24000 }, - { 27000000, 4096, 27000, 24000 }, - { 74250000, 4096, 74250, 24000 }, - { 148500000, 4096, 148500, 24000 }, - { 0, 0, 0, 0 }, -}; - -static const struct tegra_hdmi_audio_config tegra_hdmi_audio_44_1k[] = { - { 25200000, 5880, 26250, 25000 }, - { 27000000, 5880, 28125, 25000 }, - { 74250000, 4704, 61875, 20000 }, - { 148500000, 4704, 123750, 20000 }, - { 0, 0, 0, 0 }, -}; - -static const struct tegra_hdmi_audio_config tegra_hdmi_audio_48k[] = { - { 25200000, 6144, 25200, 24000 }, - { 27000000, 6144, 27000, 24000 }, - { 74250000, 6144, 74250, 24000 }, - { 148500000, 6144, 148500, 24000 }, - { 0, 0, 0, 0 }, -}; - -static const struct tegra_hdmi_audio_config tegra_hdmi_audio_88_2k[] = { - { 25200000, 11760, 26250, 25000 }, - { 27000000, 11760, 28125, 25000 }, - { 74250000, 9408, 61875, 20000 }, - { 148500000, 9408, 123750, 20000 }, - { 0, 0, 0, 0 }, -}; - -static const struct tegra_hdmi_audio_config tegra_hdmi_audio_96k[] = { - { 25200000, 12288, 25200, 24000 }, - { 27000000, 12288, 27000, 24000 }, - { 74250000, 12288, 74250, 24000 }, - { 148500000, 12288, 148500, 24000 }, - { 0, 0, 0, 0 }, -}; - -static const struct tegra_hdmi_audio_config tegra_hdmi_audio_176_4k[] = { - { 25200000, 23520, 26250, 25000 }, - { 27000000, 23520, 28125, 25000 }, - { 74250000, 18816, 61875, 20000 }, - { 148500000, 18816, 123750, 20000 }, - { 0, 0, 0, 0 }, -}; - -static const struct tegra_hdmi_audio_config tegra_hdmi_audio_192k[] = { - { 25200000, 24576, 25200, 24000 }, - { 27000000, 24576, 27000, 24000 }, - { 74250000, 24576, 74250, 24000 }, - { 148500000, 24576, 148500, 24000 }, - { 0, 0, 0, 0 }, -}; - static const struct tmds_config tegra20_tmds_config[] = { { /* slow pixel clock modes */ .pclk = 27000000, @@ -414,52 +358,53 @@ static const struct tmds_config tegra124_tmds_config[] = { }, }; -static const struct tegra_hdmi_audio_config * -tegra_hdmi_get_audio_config(unsigned int sample_rate, unsigned int pclk) +static int +tegra_hdmi_get_audio_config(unsigned int audio_freq, unsigned int pix_clock, + struct tegra_hdmi_audio_config *config) { - const struct tegra_hdmi_audio_config *table; + const unsigned int afreq = 128 * audio_freq; + const unsigned int min_n = afreq / 1500; + const unsigned int max_n = afreq / 300; + const unsigned int ideal_n = afreq / 1000; + int64_t min_err = (uint64_t)-1 >> 1; + unsigned int min_delta = -1; + int n; - switch (sample_rate) { - case 32000: - table = tegra_hdmi_audio_32k; - break; + memset(config, 0, sizeof(*config)); + config->n = -1; - case 44100: - table = tegra_hdmi_audio_44_1k; - break; + for (n = min_n; n <= max_n; n++) { + uint64_t cts_f, aval_f; + unsigned int delta; + int64_t cts, err; - case 48000: - table = tegra_hdmi_audio_48k; - break; + /* compute aval in 48.16 fixed point */ + aval_f = ((int64_t)24000000 << 16) * n; + do_div(aval_f, afreq); + /* It should round without any rest */ + if (aval_f & 0xFFFF) + continue; - case 88200: - table = tegra_hdmi_audio_88_2k; - break; + /* Compute cts in 48.16 fixed point */ + cts_f = ((int64_t)pix_clock << 16) * n; + do_div(cts_f, afreq); + /* Round it to the nearest integer */ + cts = (cts_f & ~0xFFFF) + ((cts_f & BIT(15)) << 1); - case 96000: - table = tegra_hdmi_audio_96k; - break; + delta = abs(n - ideal_n); - case 176400: - table = tegra_hdmi_audio_176_4k; - break; - - case 192000: - table = tegra_hdmi_audio_192k; - break; - - default: - return NULL; + /* Compute the absolute error */ + err = abs((int64_t)cts_f - cts); + if (err < min_err || (err == min_err && delta < min_delta)) { + config->n = n; + config->cts = cts >> 16; + config->aval = aval_f >> 16; + min_delta = delta; + min_err = err; + } } - while (table->pclk) { - if (table->pclk == pclk) - return table; - - table++; - } - - return NULL; + return config->n != -1 ? 0 : -EINVAL; } static void tegra_hdmi_setup_audio_fs_tables(struct tegra_hdmi *hdmi) @@ -515,8 +460,9 @@ static void tegra_hdmi_write_aval(struct tegra_hdmi *hdmi, u32 value) static int tegra_hdmi_setup_audio(struct tegra_hdmi *hdmi) { - const struct tegra_hdmi_audio_config *config; + struct tegra_hdmi_audio_config config; u32 source, value; + int err; switch (hdmi->audio_source) { case HDA: @@ -591,25 +537,28 @@ static int tegra_hdmi_setup_audio(struct tegra_hdmi *hdmi) tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_AUDIO_SPARE0); } - config = tegra_hdmi_get_audio_config(hdmi->format.sample_rate, - hdmi->pixel_clock); - if (!config) { + err = tegra_hdmi_get_audio_config(hdmi->format.sample_rate, + hdmi->pixel_clock, &config); + if (err < 0) { dev_err(hdmi->dev, "cannot set audio to %u Hz at %u Hz pixel clock\n", hdmi->format.sample_rate, hdmi->pixel_clock); - return -EINVAL; + return err; } + dev_dbg(hdmi->dev, "audio: pixclk=%u, n=%u, cts=%u, aval=%u\n", + hdmi->pixel_clock, config.n, config.cts, config.aval); + tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_HDMI_ACR_CTRL); value = AUDIO_N_RESETF | AUDIO_N_GENERATE_ALTERNATE | - AUDIO_N_VALUE(config->n - 1); + AUDIO_N_VALUE(config.n - 1); tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_N); - tegra_hdmi_writel(hdmi, ACR_SUBPACK_N(config->n) | ACR_ENABLE, + tegra_hdmi_writel(hdmi, ACR_SUBPACK_N(config.n) | ACR_ENABLE, HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_HIGH); - tegra_hdmi_writel(hdmi, ACR_SUBPACK_CTS(config->cts), + tegra_hdmi_writel(hdmi, ACR_SUBPACK_CTS(config.cts), HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_LOW); value = SPARE_HW_CTS | SPARE_FORCE_SW_CTS | SPARE_CTS_RESET_VAL(1); @@ -620,7 +569,7 @@ static int tegra_hdmi_setup_audio(struct tegra_hdmi *hdmi) tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_N); if (hdmi->config->has_hda) - tegra_hdmi_write_aval(hdmi, config->aval); + tegra_hdmi_write_aval(hdmi, config.aval); tegra_hdmi_setup_audio_fs_tables(hdmi); From 18bb2bccb5492fb5c36908191b8af77e54c58814 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Jan 2019 21:04:01 +0000 Subject: [PATCH 179/539] drm/i915: Serialise concurrent calls to i915_gem_set_wedged() Make i915_gem_set_wedged() and i915_gem_unset_wedged() behaviour more consistent if called concurrently, and only do the wedging and reporting once, curtailing any possible race where we start unwedging in the middle of a wedge. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190114210408.4561-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 34 ++++++++++++++----- drivers/gpu/drm/i915/i915_gpu_error.h | 4 ++- .../gpu/drm/i915/selftests/mock_gem_device.c | 1 + 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 565b2fa1607d..5c6089777fde 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3187,10 +3187,15 @@ static void nop_submit_request(struct i915_request *request) void i915_gem_set_wedged(struct drm_i915_private *i915) { + struct i915_gpu_error *error = &i915->gpu_error; struct intel_engine_cs *engine; enum intel_engine_id id; - GEM_TRACE("start\n"); + mutex_lock(&error->wedge_mutex); + if (test_bit(I915_WEDGED, &error->flags)) { + mutex_unlock(&error->wedge_mutex); + return; + } if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) { struct drm_printer p = drm_debug_printer(__func__); @@ -3199,8 +3204,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) intel_engine_dump(engine, &p, "%s\n", engine->name); } - if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags)) - goto out; + GEM_TRACE("start\n"); /* * First, stop submission to hw, but do not yet complete requests by @@ -3236,23 +3240,31 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) intel_engine_wakeup(engine); } -out: - GEM_TRACE("end\n"); + smp_mb__before_atomic(); + set_bit(I915_WEDGED, &error->flags); - wake_up_all(&i915->gpu_error.reset_queue); + GEM_TRACE("end\n"); + mutex_unlock(&error->wedge_mutex); + + wake_up_all(&error->reset_queue); } bool i915_gem_unset_wedged(struct drm_i915_private *i915) { + struct i915_gpu_error *error = &i915->gpu_error; struct i915_timeline *tl; + bool ret = false; lockdep_assert_held(&i915->drm.struct_mutex); - if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) + + if (!test_bit(I915_WEDGED, &error->flags)) return true; if (!i915->gt.scratch) /* Never full initialised, recovery impossible */ return false; + mutex_lock(&error->wedge_mutex); + GEM_TRACE("start\n"); /* @@ -3286,7 +3298,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) */ if (dma_fence_default_wait(&rq->fence, true, MAX_SCHEDULE_TIMEOUT) < 0) - return false; + goto unlock; } i915_retire_requests(i915); GEM_BUG_ON(i915->gt.active_requests); @@ -3309,8 +3321,11 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ clear_bit(I915_WEDGED, &i915->gpu_error.flags); + ret = true; +unlock: + mutex_unlock(&i915->gpu_error.wedge_mutex); - return true; + return ret; } static void @@ -5706,6 +5721,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) i915_gem_idle_work_handler); init_waitqueue_head(&dev_priv->gpu_error.wait_queue); init_waitqueue_head(&dev_priv->gpu_error.reset_queue); + mutex_init(&dev_priv->gpu_error.wedge_mutex); atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 6d9f45468ac1..604291f7762d 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -271,8 +271,8 @@ struct i915_gpu_error { #define I915_RESET_BACKOFF 0 #define I915_RESET_HANDOFF 1 #define I915_RESET_MODESET 2 +#define I915_RESET_ENGINE 3 #define I915_WEDGED (BITS_PER_LONG - 1) -#define I915_RESET_ENGINE (I915_WEDGED - I915_NUM_ENGINES) /** Number of times an engine has been reset */ u32 reset_engine_count[I915_NUM_ENGINES]; @@ -283,6 +283,8 @@ struct i915_gpu_error { /** Reason for the current *global* reset */ const char *reason; + struct mutex wedge_mutex; /* serialises wedging/unwedging */ + /** * Waitqueue to signal when a hang is detected. Used to for waiters * to release the struct_mutex for the reset to procede. diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 082809569681..3cda66292e76 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -188,6 +188,7 @@ struct drm_i915_private *mock_gem_device(void) init_waitqueue_head(&i915->gpu_error.wait_queue); init_waitqueue_head(&i915->gpu_error.reset_queue); + mutex_init(&i915->gpu_error.wedge_mutex); i915->wq = alloc_ordered_workqueue("mock", 0); if (!i915->wq) From 9f58892ea9962002399132fd3f40c6a273f8d9e1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 16 Jan 2019 15:33:04 +0000 Subject: [PATCH 180/539] drm/i915: Pull all the reset functionality together into i915_reset.c Currently the code to reset the GPU and our state is spread widely across a few files. Pull the logic together into a common file. Signed-off-by: Chris Wilson Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190116153304.787-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 3 +- drivers/gpu/drm/i915/i915_debugfs.c | 2 + drivers/gpu/drm/i915/i915_drv.c | 206 +-- drivers/gpu/drm/i915/i915_drv.h | 31 - drivers/gpu/drm/i915/i915_gem.c | 446 +----- drivers/gpu/drm/i915/i915_gem_gtt.c | 1 + drivers/gpu/drm/i915/i915_irq.c | 238 --- drivers/gpu/drm/i915/i915_request.c | 1 + drivers/gpu/drm/i915/i915_reset.c | 1389 +++++++++++++++++ drivers/gpu/drm/i915/i915_reset.h | 56 + drivers/gpu/drm/i915/intel_display.c | 15 +- drivers/gpu/drm/i915/intel_engine_cs.c | 1 + drivers/gpu/drm/i915/intel_guc.h | 3 + drivers/gpu/drm/i915/intel_hangcheck.c | 1 + drivers/gpu/drm/i915/intel_uc.c | 1 + drivers/gpu/drm/i915/intel_uncore.c | 556 ------- drivers/gpu/drm/i915/selftests/intel_lrc.c | 2 + .../drm/i915/selftests/intel_workarounds.c | 1 + 18 files changed, 1482 insertions(+), 1471 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_reset.c create mode 100644 drivers/gpu/drm/i915/i915_reset.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index c34bee16730d..611115ed00db 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -40,9 +40,10 @@ i915-y := i915_drv.o \ i915_mm.o \ i915_params.o \ i915_pci.o \ + i915_reset.o \ i915_suspend.o \ - i915_syncmap.o \ i915_sw_fence.o \ + i915_syncmap.o \ i915_sysfs.o \ intel_csr.o \ intel_device_info.o \ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 24e2d52efa8e..ece72e0e41bc 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -32,6 +32,8 @@ #include "intel_drv.h" #include "intel_guc_submission.h" +#include "i915_reset.h" + static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) { return to_i915(node->minor->dev); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index dafbbfadd1ad..f462a4d28af4 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -48,6 +48,7 @@ #include "i915_drv.h" #include "i915_trace.h" #include "i915_pmu.h" +#include "i915_reset.h" #include "i915_query.h" #include "i915_vgpu.h" #include "intel_drv.h" @@ -2205,211 +2206,6 @@ static int i915_resume_switcheroo(struct drm_device *dev) return i915_drm_resume(dev); } -/** - * i915_reset - reset chip after a hang - * @i915: #drm_i915_private to reset - * @stalled_mask: mask of the stalled engines with the guilty requests - * @reason: user error message for why we are resetting - * - * Reset the chip. Useful if a hang is detected. Marks the device as wedged - * on failure. - * - * Caller must hold the struct_mutex. - * - * Procedure is fairly simple: - * - reset the chip using the reset reg - * - re-init context state - * - re-init hardware status page - * - re-init ring buffer - * - re-init interrupt state - * - re-init display - */ -void i915_reset(struct drm_i915_private *i915, - unsigned int stalled_mask, - const char *reason) -{ - struct i915_gpu_error *error = &i915->gpu_error; - int ret; - int i; - - GEM_TRACE("flags=%lx\n", error->flags); - - might_sleep(); - lockdep_assert_held(&i915->drm.struct_mutex); - assert_rpm_wakelock_held(i915); - GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); - - if (!test_bit(I915_RESET_HANDOFF, &error->flags)) - return; - - /* Clear any previous failed attempts at recovery. Time to try again. */ - if (!i915_gem_unset_wedged(i915)) - goto wakeup; - - if (reason) - dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); - error->reset_count++; - - ret = i915_gem_reset_prepare(i915); - if (ret) { - dev_err(i915->drm.dev, "GPU recovery failed\n"); - goto taint; - } - - if (!intel_has_gpu_reset(i915)) { - if (i915_modparams.reset) - dev_err(i915->drm.dev, "GPU reset not supported\n"); - else - DRM_DEBUG_DRIVER("GPU reset disabled\n"); - goto error; - } - - for (i = 0; i < 3; i++) { - ret = intel_gpu_reset(i915, ALL_ENGINES); - if (ret == 0) - break; - - msleep(100); - } - if (ret) { - dev_err(i915->drm.dev, "Failed to reset chip\n"); - goto taint; - } - - /* Ok, now get things going again... */ - - /* - * Everything depends on having the GTT running, so we need to start - * there. - */ - ret = i915_ggtt_enable_hw(i915); - if (ret) { - DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n", - ret); - goto error; - } - - i915_gem_reset(i915, stalled_mask); - intel_overlay_reset(i915); - - /* - * Next we need to restore the context, but we don't use those - * yet either... - * - * Ring buffer needs to be re-initialized in the KMS case, or if X - * was running at the time of the reset (i.e. we weren't VT - * switched away). - */ - ret = i915_gem_init_hw(i915); - if (ret) { - DRM_ERROR("Failed to initialise HW following reset (%d)\n", - ret); - goto error; - } - - i915_queue_hangcheck(i915); - -finish: - i915_gem_reset_finish(i915); -wakeup: - clear_bit(I915_RESET_HANDOFF, &error->flags); - wake_up_bit(&error->flags, I915_RESET_HANDOFF); - return; - -taint: - /* - * History tells us that if we cannot reset the GPU now, we - * never will. This then impacts everything that is run - * subsequently. On failing the reset, we mark the driver - * as wedged, preventing further execution on the GPU. - * We also want to go one step further and add a taint to the - * kernel so that any subsequent faults can be traced back to - * this failure. This is important for CI, where if the - * GPU/driver fails we would like to reboot and restart testing - * rather than continue on into oblivion. For everyone else, - * the system should still plod along, but they have been warned! - */ - add_taint(TAINT_WARN, LOCKDEP_STILL_OK); -error: - i915_gem_set_wedged(i915); - i915_retire_requests(i915); - goto finish; -} - -static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv, - struct intel_engine_cs *engine) -{ - return intel_gpu_reset(dev_priv, intel_engine_flag(engine)); -} - -/** - * i915_reset_engine - reset GPU engine to recover from a hang - * @engine: engine to reset - * @msg: reason for GPU reset; or NULL for no dev_notice() - * - * Reset a specific GPU engine. Useful if a hang is detected. - * Returns zero on successful reset or otherwise an error code. - * - * Procedure is: - * - identifies the request that caused the hang and it is dropped - * - reset engine (which will force the engine to idle) - * - re-init/configure engine - */ -int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) -{ - struct i915_gpu_error *error = &engine->i915->gpu_error; - struct i915_request *active_request; - int ret; - - GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); - GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); - - active_request = i915_gem_reset_prepare_engine(engine); - if (IS_ERR_OR_NULL(active_request)) { - /* Either the previous reset failed, or we pardon the reset. */ - ret = PTR_ERR(active_request); - goto out; - } - - if (msg) - dev_notice(engine->i915->drm.dev, - "Resetting %s for %s\n", engine->name, msg); - error->reset_engine_count[engine->id]++; - - if (!engine->i915->guc.execbuf_client) - ret = intel_gt_reset_engine(engine->i915, engine); - else - ret = intel_guc_reset_engine(&engine->i915->guc, engine); - if (ret) { - /* If we fail here, we expect to fallback to a global reset */ - DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n", - engine->i915->guc.execbuf_client ? "GuC " : "", - engine->name, ret); - goto out; - } - - /* - * The request that caused the hang is stuck on elsp, we know the - * active request and can drop it, adjust head to skip the offending - * request to resume executing remaining requests in the queue. - */ - i915_gem_reset_engine(engine, active_request, true); - - /* - * The engine and its registers (and workarounds in case of render) - * have been reset to their default values. Follow the init_ring - * process to program RING_MODE, HWSP and re-enable submission. - */ - ret = engine->init_hw(engine); - if (ret) - goto out; - -out: - intel_engine_cancel_stop_cs(engine); - i915_gem_reset_finish_engine(engine); - return ret; -} - static int i915_pm_prepare(struct device *kdev) { struct pci_dev *pdev = to_pci_dev(kdev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index da055a86db4d..310d9e1e1620 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2615,19 +2615,7 @@ extern const struct dev_pm_ops i915_pm_ops; extern int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent); extern void i915_driver_unload(struct drm_device *dev); -extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask); -extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv); -extern void i915_reset(struct drm_i915_private *i915, - unsigned int stalled_mask, - const char *reason); -extern int i915_reset_engine(struct intel_engine_cs *engine, - const char *reason); - -extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv); -extern int intel_reset_guc(struct drm_i915_private *dev_priv); -extern int intel_guc_reset_engine(struct intel_guc *guc, - struct intel_engine_cs *engine); extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine); extern void intel_hangcheck_init(struct drm_i915_private *dev_priv); extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv); @@ -2670,20 +2658,11 @@ static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv) &dev_priv->gpu_error.hangcheck_work, delay); } -__printf(4, 5) -void i915_handle_error(struct drm_i915_private *dev_priv, - u32 engine_mask, - unsigned long flags, - const char *fmt, ...); -#define I915_ERROR_CAPTURE BIT(0) - extern void intel_irq_init(struct drm_i915_private *dev_priv); extern void intel_irq_fini(struct drm_i915_private *dev_priv); int intel_irq_install(struct drm_i915_private *dev_priv); void intel_irq_uninstall(struct drm_i915_private *dev_priv); -void i915_clear_error_registers(struct drm_i915_private *dev_priv); - static inline bool intel_gvt_active(struct drm_i915_private *dev_priv) { return dev_priv->gvt; @@ -3048,18 +3027,8 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, return READ_ONCE(error->reset_engine_count[engine->id]); } -struct i915_request * -i915_gem_reset_prepare_engine(struct intel_engine_cs *engine); -int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); -void i915_gem_reset(struct drm_i915_private *dev_priv, - unsigned int stalled_mask); -void i915_gem_reset_finish_engine(struct intel_engine_cs *engine); -void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv); -void i915_gem_reset_engine(struct intel_engine_cs *engine, - struct i915_request *request, - bool stalled); void i915_gem_init_mmio(struct drm_i915_private *i915); int __must_check i915_gem_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5c6089777fde..7185a5b4a5ca 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -27,15 +27,6 @@ #include #include -#include "i915_drv.h" -#include "i915_gem_clflush.h" -#include "i915_vgpu.h" -#include "i915_trace.h" -#include "intel_drv.h" -#include "intel_frontbuffer.h" -#include "intel_mocs.h" -#include "intel_workarounds.h" -#include "i915_gemfs.h" #include #include #include @@ -46,6 +37,18 @@ #include #include +#include "i915_drv.h" +#include "i915_gem_clflush.h" +#include "i915_gemfs.h" +#include "i915_reset.h" +#include "i915_trace.h" +#include "i915_vgpu.h" + +#include "intel_drv.h" +#include "intel_frontbuffer.h" +#include "intel_mocs.h" +#include "intel_workarounds.h" + static void i915_gem_flush_free_objects(struct drm_i915_private *i915); static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) @@ -2873,61 +2876,6 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, return 0; } -static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv, - const struct i915_gem_context *ctx) -{ - unsigned int score; - unsigned long prev_hang; - - if (i915_gem_context_is_banned(ctx)) - score = I915_CLIENT_SCORE_CONTEXT_BAN; - else - score = 0; - - prev_hang = xchg(&file_priv->hang_timestamp, jiffies); - if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) - score += I915_CLIENT_SCORE_HANG_FAST; - - if (score) { - atomic_add(score, &file_priv->ban_score); - - DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n", - ctx->name, score, - atomic_read(&file_priv->ban_score)); - } -} - -static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) -{ - unsigned int score; - bool banned, bannable; - - atomic_inc(&ctx->guilty_count); - - bannable = i915_gem_context_is_bannable(ctx); - score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score); - banned = score >= CONTEXT_SCORE_BAN_THRESHOLD; - - /* Cool contexts don't accumulate client ban score */ - if (!bannable) - return; - - if (banned) { - DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n", - ctx->name, atomic_read(&ctx->guilty_count), - score); - i915_gem_context_set_banned(ctx); - } - - if (!IS_ERR_OR_NULL(ctx->file_priv)) - i915_gem_client_mark_guilty(ctx->file_priv, ctx); -} - -static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) -{ - atomic_inc(&ctx->active_count); -} - struct i915_request * i915_gem_find_active_request(struct intel_engine_cs *engine) { @@ -2958,376 +2906,6 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) return active; } -/* - * Ensure irq handler finishes, and not run again. - * Also return the active request so that we only search for it once. - */ -struct i915_request * -i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) -{ - struct i915_request *request; - - /* - * During the reset sequence, we must prevent the engine from - * entering RC6. As the context state is undefined until we restart - * the engine, if it does enter RC6 during the reset, the state - * written to the powercontext is undefined and so we may lose - * GPU state upon resume, i.e. fail to restart after a reset. - */ - intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); - - request = engine->reset.prepare(engine); - if (request && request->fence.error == -EIO) - request = ERR_PTR(-EIO); /* Previous reset failed! */ - - return request; -} - -int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - struct i915_request *request; - enum intel_engine_id id; - int err = 0; - - for_each_engine(engine, dev_priv, id) { - request = i915_gem_reset_prepare_engine(engine); - if (IS_ERR(request)) { - err = PTR_ERR(request); - continue; - } - - engine->hangcheck.active_request = request; - } - - i915_gem_revoke_fences(dev_priv); - intel_uc_sanitize(dev_priv); - - return err; -} - -static void engine_skip_context(struct i915_request *request) -{ - struct intel_engine_cs *engine = request->engine; - struct i915_gem_context *hung_ctx = request->gem_context; - struct i915_timeline *timeline = request->timeline; - unsigned long flags; - - GEM_BUG_ON(timeline == &engine->timeline); - - spin_lock_irqsave(&engine->timeline.lock, flags); - spin_lock(&timeline->lock); - - list_for_each_entry_continue(request, &engine->timeline.requests, link) - if (request->gem_context == hung_ctx) - i915_request_skip(request, -EIO); - - list_for_each_entry(request, &timeline->requests, link) - i915_request_skip(request, -EIO); - - spin_unlock(&timeline->lock); - spin_unlock_irqrestore(&engine->timeline.lock, flags); -} - -/* Returns the request if it was guilty of the hang */ -static struct i915_request * -i915_gem_reset_request(struct intel_engine_cs *engine, - struct i915_request *request, - bool stalled) -{ - /* The guilty request will get skipped on a hung engine. - * - * Users of client default contexts do not rely on logical - * state preserved between batches so it is safe to execute - * queued requests following the hang. Non default contexts - * rely on preserved state, so skipping a batch loses the - * evolution of the state and it needs to be considered corrupted. - * Executing more queued batches on top of corrupted state is - * risky. But we take the risk by trying to advance through - * the queued requests in order to make the client behaviour - * more predictable around resets, by not throwing away random - * amount of batches it has prepared for execution. Sophisticated - * clients can use gem_reset_stats_ioctl and dma fence status - * (exported via sync_file info ioctl on explicit fences) to observe - * when it loses the context state and should rebuild accordingly. - * - * The context ban, and ultimately the client ban, mechanism are safety - * valves if client submission ends up resulting in nothing more than - * subsequent hangs. - */ - - if (i915_request_completed(request)) { - GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n", - engine->name, request->global_seqno, - request->fence.context, request->fence.seqno, - intel_engine_get_seqno(engine)); - stalled = false; - } - - if (stalled) { - i915_gem_context_mark_guilty(request->gem_context); - i915_request_skip(request, -EIO); - - /* If this context is now banned, skip all pending requests. */ - if (i915_gem_context_is_banned(request->gem_context)) - engine_skip_context(request); - } else { - /* - * Since this is not the hung engine, it may have advanced - * since the hang declaration. Double check by refinding - * the active request at the time of the reset. - */ - request = i915_gem_find_active_request(engine); - if (request) { - unsigned long flags; - - i915_gem_context_mark_innocent(request->gem_context); - dma_fence_set_error(&request->fence, -EAGAIN); - - /* Rewind the engine to replay the incomplete rq */ - spin_lock_irqsave(&engine->timeline.lock, flags); - request = list_prev_entry(request, link); - if (&request->link == &engine->timeline.requests) - request = NULL; - spin_unlock_irqrestore(&engine->timeline.lock, flags); - } - } - - return request; -} - -void i915_gem_reset_engine(struct intel_engine_cs *engine, - struct i915_request *request, - bool stalled) -{ - if (request) - request = i915_gem_reset_request(engine, request, stalled); - - /* Setup the CS to resume from the breadcrumb of the hung request */ - engine->reset.reset(engine, request); -} - -void i915_gem_reset(struct drm_i915_private *dev_priv, - unsigned int stalled_mask) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - i915_retire_requests(dev_priv); - - for_each_engine(engine, dev_priv, id) { - struct intel_context *ce; - - i915_gem_reset_engine(engine, - engine->hangcheck.active_request, - stalled_mask & ENGINE_MASK(id)); - ce = fetch_and_zero(&engine->last_retired_context); - if (ce) - intel_context_unpin(ce); - - /* - * Ostensibily, we always want a context loaded for powersaving, - * so if the engine is idle after the reset, send a request - * to load our scratch kernel_context. - * - * More mysteriously, if we leave the engine idle after a reset, - * the next userspace batch may hang, with what appears to be - * an incoherent read by the CS (presumably stale TLB). An - * empty request appears sufficient to paper over the glitch. - */ - if (intel_engine_is_idle(engine)) { - struct i915_request *rq; - - rq = i915_request_alloc(engine, - dev_priv->kernel_context); - if (!IS_ERR(rq)) - i915_request_add(rq); - } - } - - i915_gem_restore_fences(dev_priv); -} - -void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) -{ - engine->reset.finish(engine); - - intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); -} - -void i915_gem_reset_finish(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - for_each_engine(engine, dev_priv, id) { - engine->hangcheck.active_request = NULL; - i915_gem_reset_finish_engine(engine); - } -} - -static void nop_submit_request(struct i915_request *request) -{ - unsigned long flags; - - GEM_TRACE("%s fence %llx:%d -> -EIO\n", - request->engine->name, - request->fence.context, request->fence.seqno); - dma_fence_set_error(&request->fence, -EIO); - - spin_lock_irqsave(&request->engine->timeline.lock, flags); - __i915_request_submit(request); - intel_engine_write_global_seqno(request->engine, request->global_seqno); - spin_unlock_irqrestore(&request->engine->timeline.lock, flags); -} - -void i915_gem_set_wedged(struct drm_i915_private *i915) -{ - struct i915_gpu_error *error = &i915->gpu_error; - struct intel_engine_cs *engine; - enum intel_engine_id id; - - mutex_lock(&error->wedge_mutex); - if (test_bit(I915_WEDGED, &error->flags)) { - mutex_unlock(&error->wedge_mutex); - return; - } - - if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) { - struct drm_printer p = drm_debug_printer(__func__); - - for_each_engine(engine, i915, id) - intel_engine_dump(engine, &p, "%s\n", engine->name); - } - - GEM_TRACE("start\n"); - - /* - * First, stop submission to hw, but do not yet complete requests by - * rolling the global seqno forward (since this would complete requests - * for which we haven't set the fence error to EIO yet). - */ - for_each_engine(engine, i915, id) - i915_gem_reset_prepare_engine(engine); - - /* Even if the GPU reset fails, it should still stop the engines */ - if (INTEL_GEN(i915) >= 5) - intel_gpu_reset(i915, ALL_ENGINES); - - for_each_engine(engine, i915, id) { - engine->submit_request = nop_submit_request; - engine->schedule = NULL; - } - i915->caps.scheduler = 0; - - /* - * Make sure no request can slip through without getting completed by - * either this call here to intel_engine_write_global_seqno, or the one - * in nop_submit_request. - */ - synchronize_rcu(); - - /* Mark all executing requests as skipped */ - for_each_engine(engine, i915, id) - engine->cancel_requests(engine); - - for_each_engine(engine, i915, id) { - i915_gem_reset_finish_engine(engine); - intel_engine_wakeup(engine); - } - - smp_mb__before_atomic(); - set_bit(I915_WEDGED, &error->flags); - - GEM_TRACE("end\n"); - mutex_unlock(&error->wedge_mutex); - - wake_up_all(&error->reset_queue); -} - -bool i915_gem_unset_wedged(struct drm_i915_private *i915) -{ - struct i915_gpu_error *error = &i915->gpu_error; - struct i915_timeline *tl; - bool ret = false; - - lockdep_assert_held(&i915->drm.struct_mutex); - - if (!test_bit(I915_WEDGED, &error->flags)) - return true; - - if (!i915->gt.scratch) /* Never full initialised, recovery impossible */ - return false; - - mutex_lock(&error->wedge_mutex); - - GEM_TRACE("start\n"); - - /* - * Before unwedging, make sure that all pending operations - * are flushed and errored out - we may have requests waiting upon - * third party fences. We marked all inflight requests as EIO, and - * every execbuf since returned EIO, for consistency we want all - * the currently pending requests to also be marked as EIO, which - * is done inside our nop_submit_request - and so we must wait. - * - * No more can be submitted until we reset the wedged bit. - */ - list_for_each_entry(tl, &i915->gt.timelines, link) { - struct i915_request *rq; - - rq = i915_gem_active_peek(&tl->last_request, - &i915->drm.struct_mutex); - if (!rq) - continue; - - /* - * We can't use our normal waiter as we want to - * avoid recursively trying to handle the current - * reset. The basic dma_fence_default_wait() installs - * a callback for dma_fence_signal(), which is - * triggered by our nop handler (indirectly, the - * callback enables the signaler thread which is - * woken by the nop_submit_request() advancing the seqno - * and when the seqno passes the fence, the signaler - * then signals the fence waking us up). - */ - if (dma_fence_default_wait(&rq->fence, true, - MAX_SCHEDULE_TIMEOUT) < 0) - goto unlock; - } - i915_retire_requests(i915); - GEM_BUG_ON(i915->gt.active_requests); - - intel_engines_sanitize(i915, false); - - /* - * Undo nop_submit_request. We prevent all new i915 requests from - * being queued (by disallowing execbuf whilst wedged) so having - * waited for all active requests above, we know the system is idle - * and do not have to worry about a thread being inside - * engine->submit_request() as we swap over. So unlike installing - * the nop_submit_request on reset, we can do this from normal - * context and do not require stop_machine(). - */ - intel_engines_reset_default_submission(i915); - i915_gem_contexts_lost(i915); - - GEM_TRACE("end\n"); - - smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ - clear_bit(I915_WEDGED, &i915->gpu_error.flags); - ret = true; -unlock: - mutex_unlock(&i915->gpu_error.wedge_mutex); - - return ret; -} - static void i915_gem_retire_work_handler(struct work_struct *work) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index d24628f184e4..9081e3bc5a59 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -37,6 +37,7 @@ #include "i915_drv.h" #include "i915_vgpu.h" +#include "i915_reset.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_frontbuffer.h" diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 94187e68d39a..1c6cf024a509 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2930,46 +2930,6 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg) return IRQ_HANDLED; } -struct wedge_me { - struct delayed_work work; - struct drm_i915_private *i915; - const char *name; -}; - -static void wedge_me(struct work_struct *work) -{ - struct wedge_me *w = container_of(work, typeof(*w), work.work); - - dev_err(w->i915->drm.dev, - "%s timed out, cancelling all in-flight rendering.\n", - w->name); - i915_gem_set_wedged(w->i915); -} - -static void __init_wedge(struct wedge_me *w, - struct drm_i915_private *i915, - long timeout, - const char *name) -{ - w->i915 = i915; - w->name = name; - - INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me); - schedule_delayed_work(&w->work, timeout); -} - -static void __fini_wedge(struct wedge_me *w) -{ - cancel_delayed_work_sync(&w->work); - destroy_delayed_work_on_stack(&w->work); - w->i915 = NULL; -} - -#define i915_wedge_on_timeout(W, DEV, TIMEOUT) \ - for (__init_wedge((W), (DEV), (TIMEOUT), __func__); \ - (W)->i915; \ - __fini_wedge((W))) - static u32 gen11_gt_engine_identity(struct drm_i915_private * const i915, const unsigned int bank, const unsigned int bit) @@ -3180,204 +3140,6 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg) return IRQ_HANDLED; } -static void i915_reset_device(struct drm_i915_private *dev_priv, - u32 engine_mask, - const char *reason) -{ - struct i915_gpu_error *error = &dev_priv->gpu_error; - struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj; - char *error_event[] = { I915_ERROR_UEVENT "=1", NULL }; - char *reset_event[] = { I915_RESET_UEVENT "=1", NULL }; - char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL }; - struct wedge_me w; - - kobject_uevent_env(kobj, KOBJ_CHANGE, error_event); - - DRM_DEBUG_DRIVER("resetting chip\n"); - kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); - - /* Use a watchdog to ensure that our reset completes */ - i915_wedge_on_timeout(&w, dev_priv, 5*HZ) { - intel_prepare_reset(dev_priv); - - error->reason = reason; - error->stalled_mask = engine_mask; - - /* Signal that locked waiters should reset the GPU */ - smp_mb__before_atomic(); - set_bit(I915_RESET_HANDOFF, &error->flags); - wake_up_all(&error->wait_queue); - - /* Wait for anyone holding the lock to wakeup, without - * blocking indefinitely on struct_mutex. - */ - do { - if (mutex_trylock(&dev_priv->drm.struct_mutex)) { - i915_reset(dev_priv, engine_mask, reason); - mutex_unlock(&dev_priv->drm.struct_mutex); - } - } while (wait_on_bit_timeout(&error->flags, - I915_RESET_HANDOFF, - TASK_UNINTERRUPTIBLE, - 1)); - - error->stalled_mask = 0; - error->reason = NULL; - - intel_finish_reset(dev_priv); - } - - if (!test_bit(I915_WEDGED, &error->flags)) - kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event); -} - -void i915_clear_error_registers(struct drm_i915_private *dev_priv) -{ - u32 eir; - - if (!IS_GEN(dev_priv, 2)) - I915_WRITE(PGTBL_ER, I915_READ(PGTBL_ER)); - - if (INTEL_GEN(dev_priv) < 4) - I915_WRITE(IPEIR, I915_READ(IPEIR)); - else - I915_WRITE(IPEIR_I965, I915_READ(IPEIR_I965)); - - I915_WRITE(EIR, I915_READ(EIR)); - eir = I915_READ(EIR); - if (eir) { - /* - * some errors might have become stuck, - * mask them. - */ - DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir); - I915_WRITE(EMR, I915_READ(EMR) | eir); - I915_WRITE(IIR, I915_MASTER_ERROR_INTERRUPT); - } - - if (INTEL_GEN(dev_priv) >= 8) { - I915_WRITE(GEN8_RING_FAULT_REG, - I915_READ(GEN8_RING_FAULT_REG) & ~RING_FAULT_VALID); - POSTING_READ(GEN8_RING_FAULT_REG); - } else if (INTEL_GEN(dev_priv) >= 6) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, dev_priv, id) { - I915_WRITE(RING_FAULT_REG(engine), - I915_READ(RING_FAULT_REG(engine)) & - ~RING_FAULT_VALID); - } - POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS])); - } -} - -/** - * i915_handle_error - handle a gpu error - * @dev_priv: i915 device private - * @engine_mask: mask representing engines that are hung - * @flags: control flags - * @fmt: Error message format string - * - * Do some basic checking of register state at error time and - * dump it to the syslog. Also call i915_capture_error_state() to make - * sure we get a record and make it available in debugfs. Fire a uevent - * so userspace knows something bad happened (should trigger collection - * of a ring dump etc.). - */ -void i915_handle_error(struct drm_i915_private *dev_priv, - u32 engine_mask, - unsigned long flags, - const char *fmt, ...) -{ - struct intel_engine_cs *engine; - intel_wakeref_t wakeref; - unsigned int tmp; - char error_msg[80]; - char *msg = NULL; - - if (fmt) { - va_list args; - - va_start(args, fmt); - vscnprintf(error_msg, sizeof(error_msg), fmt, args); - va_end(args); - - msg = error_msg; - } - - /* - * In most cases it's guaranteed that we get here with an RPM - * reference held, for example because there is a pending GPU - * request that won't finish until the reset is done. This - * isn't the case at least when we get here by doing a - * simulated reset via debugfs, so get an RPM reference. - */ - wakeref = intel_runtime_pm_get(dev_priv); - - engine_mask &= INTEL_INFO(dev_priv)->ring_mask; - - if (flags & I915_ERROR_CAPTURE) { - i915_capture_error_state(dev_priv, engine_mask, msg); - i915_clear_error_registers(dev_priv); - } - - /* - * Try engine reset when available. We fall back to full reset if - * single reset fails. - */ - if (intel_has_reset_engine(dev_priv) && - !i915_terminally_wedged(&dev_priv->gpu_error)) { - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { - BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); - if (test_and_set_bit(I915_RESET_ENGINE + engine->id, - &dev_priv->gpu_error.flags)) - continue; - - if (i915_reset_engine(engine, msg) == 0) - engine_mask &= ~intel_engine_flag(engine); - - clear_bit(I915_RESET_ENGINE + engine->id, - &dev_priv->gpu_error.flags); - wake_up_bit(&dev_priv->gpu_error.flags, - I915_RESET_ENGINE + engine->id); - } - } - - if (!engine_mask) - goto out; - - /* Full reset needs the mutex, stop any other user trying to do so. */ - if (test_and_set_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags)) { - wait_event(dev_priv->gpu_error.reset_queue, - !test_bit(I915_RESET_BACKOFF, - &dev_priv->gpu_error.flags)); - goto out; - } - - /* Prevent any other reset-engine attempt. */ - for_each_engine(engine, dev_priv, tmp) { - while (test_and_set_bit(I915_RESET_ENGINE + engine->id, - &dev_priv->gpu_error.flags)) - wait_on_bit(&dev_priv->gpu_error.flags, - I915_RESET_ENGINE + engine->id, - TASK_UNINTERRUPTIBLE); - } - - i915_reset_device(dev_priv, engine_mask, msg); - - for_each_engine(engine, dev_priv, tmp) { - clear_bit(I915_RESET_ENGINE + engine->id, - &dev_priv->gpu_error.flags); - } - - clear_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags); - wake_up_all(&dev_priv->gpu_error.reset_queue); - -out: - intel_runtime_pm_put(dev_priv, wakeref); -} - /* Called from drm generic code, passed 'crtc' which * we use as a pipe index */ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index f3c3593362ec..33eb9df0dd0e 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -29,6 +29,7 @@ #include #include "i915_drv.h" +#include "i915_reset.h" static const char *i915_fence_get_driver_name(struct dma_fence *fence) { diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c new file mode 100644 index 000000000000..342d9ee42601 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -0,0 +1,1389 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2008-2018 Intel Corporation + */ + +#include + +#include "i915_drv.h" +#include "i915_gpu_error.h" +#include "i915_reset.h" + +#include "intel_guc.h" + +static void engine_skip_context(struct i915_request *rq) +{ + struct intel_engine_cs *engine = rq->engine; + struct i915_gem_context *hung_ctx = rq->gem_context; + struct i915_timeline *timeline = rq->timeline; + unsigned long flags; + + GEM_BUG_ON(timeline == &engine->timeline); + + spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock(&timeline->lock); + + list_for_each_entry_continue(rq, &engine->timeline.requests, link) + if (rq->gem_context == hung_ctx) + i915_request_skip(rq, -EIO); + + list_for_each_entry(rq, &timeline->requests, link) + i915_request_skip(rq, -EIO); + + spin_unlock(&timeline->lock); + spin_unlock_irqrestore(&engine->timeline.lock, flags); +} + +static void client_mark_guilty(struct drm_i915_file_private *file_priv, + const struct i915_gem_context *ctx) +{ + unsigned int score; + unsigned long prev_hang; + + if (i915_gem_context_is_banned(ctx)) + score = I915_CLIENT_SCORE_CONTEXT_BAN; + else + score = 0; + + prev_hang = xchg(&file_priv->hang_timestamp, jiffies); + if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) + score += I915_CLIENT_SCORE_HANG_FAST; + + if (score) { + atomic_add(score, &file_priv->ban_score); + + DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n", + ctx->name, score, + atomic_read(&file_priv->ban_score)); + } +} + +static void context_mark_guilty(struct i915_gem_context *ctx) +{ + unsigned int score; + bool banned, bannable; + + atomic_inc(&ctx->guilty_count); + + bannable = i915_gem_context_is_bannable(ctx); + score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score); + banned = score >= CONTEXT_SCORE_BAN_THRESHOLD; + + /* Cool contexts don't accumulate client ban score */ + if (!bannable) + return; + + if (banned) { + DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n", + ctx->name, atomic_read(&ctx->guilty_count), + score); + i915_gem_context_set_banned(ctx); + } + + if (!IS_ERR_OR_NULL(ctx->file_priv)) + client_mark_guilty(ctx->file_priv, ctx); +} + +static void context_mark_innocent(struct i915_gem_context *ctx) +{ + atomic_inc(&ctx->active_count); +} + +static void gen3_stop_engine(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + const u32 base = engine->mmio_base; + + if (intel_engine_stop_cs(engine)) + DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n", engine->name); + + I915_WRITE_FW(RING_HEAD(base), I915_READ_FW(RING_TAIL(base))); + POSTING_READ_FW(RING_HEAD(base)); /* paranoia */ + + I915_WRITE_FW(RING_HEAD(base), 0); + I915_WRITE_FW(RING_TAIL(base), 0); + POSTING_READ_FW(RING_TAIL(base)); + + /* The ring must be empty before it is disabled */ + I915_WRITE_FW(RING_CTL(base), 0); + + /* Check acts as a post */ + if (I915_READ_FW(RING_HEAD(base)) != 0) + DRM_DEBUG_DRIVER("%s: ring head not parked\n", + engine->name); +} + +static void i915_stop_engines(struct drm_i915_private *i915, + unsigned int engine_mask) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + if (INTEL_GEN(i915) < 3) + return; + + for_each_engine_masked(engine, i915, engine_mask, id) + gen3_stop_engine(engine); +} + +static bool i915_in_reset(struct pci_dev *pdev) +{ + u8 gdrst; + + pci_read_config_byte(pdev, I915_GDRST, &gdrst); + return gdrst & GRDOM_RESET_STATUS; +} + +static int i915_do_reset(struct drm_i915_private *i915, + unsigned int engine_mask, + unsigned int retry) +{ + struct pci_dev *pdev = i915->drm.pdev; + int err; + + /* Assert reset for at least 20 usec, and wait for acknowledgement. */ + pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); + usleep_range(50, 200); + err = wait_for(i915_in_reset(pdev), 500); + + /* Clear the reset request. */ + pci_write_config_byte(pdev, I915_GDRST, 0); + usleep_range(50, 200); + if (!err) + err = wait_for(!i915_in_reset(pdev), 500); + + return err; +} + +static bool g4x_reset_complete(struct pci_dev *pdev) +{ + u8 gdrst; + + pci_read_config_byte(pdev, I915_GDRST, &gdrst); + return (gdrst & GRDOM_RESET_ENABLE) == 0; +} + +static int g33_do_reset(struct drm_i915_private *i915, + unsigned int engine_mask, + unsigned int retry) +{ + struct pci_dev *pdev = i915->drm.pdev; + + pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); + return wait_for(g4x_reset_complete(pdev), 500); +} + +static int g4x_do_reset(struct drm_i915_private *dev_priv, + unsigned int engine_mask, + unsigned int retry) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + int ret; + + /* WaVcpClkGateDisableForMediaReset:ctg,elk */ + I915_WRITE(VDECCLK_GATE_D, + I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); + POSTING_READ(VDECCLK_GATE_D); + + pci_write_config_byte(pdev, I915_GDRST, + GRDOM_MEDIA | GRDOM_RESET_ENABLE); + ret = wait_for(g4x_reset_complete(pdev), 500); + if (ret) { + DRM_DEBUG_DRIVER("Wait for media reset failed\n"); + goto out; + } + + pci_write_config_byte(pdev, I915_GDRST, + GRDOM_RENDER | GRDOM_RESET_ENABLE); + ret = wait_for(g4x_reset_complete(pdev), 500); + if (ret) { + DRM_DEBUG_DRIVER("Wait for render reset failed\n"); + goto out; + } + +out: + pci_write_config_byte(pdev, I915_GDRST, 0); + + I915_WRITE(VDECCLK_GATE_D, + I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE); + POSTING_READ(VDECCLK_GATE_D); + + return ret; +} + +static int ironlake_do_reset(struct drm_i915_private *dev_priv, + unsigned int engine_mask, + unsigned int retry) +{ + int ret; + + I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); + ret = intel_wait_for_register(dev_priv, + ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, + 500); + if (ret) { + DRM_DEBUG_DRIVER("Wait for render reset failed\n"); + goto out; + } + + I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); + ret = intel_wait_for_register(dev_priv, + ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, + 500); + if (ret) { + DRM_DEBUG_DRIVER("Wait for media reset failed\n"); + goto out; + } + +out: + I915_WRITE(ILK_GDSR, 0); + POSTING_READ(ILK_GDSR); + return ret; +} + +/* Reset the hardware domains (GENX_GRDOM_*) specified by mask */ +static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv, + u32 hw_domain_mask) +{ + int err; + + /* + * GEN6_GDRST is not in the gt power well, no need to check + * for fifo space for the write or forcewake the chip for + * the read + */ + I915_WRITE_FW(GEN6_GDRST, hw_domain_mask); + + /* Wait for the device to ack the reset requests */ + err = __intel_wait_for_register_fw(dev_priv, + GEN6_GDRST, hw_domain_mask, 0, + 500, 0, + NULL); + if (err) + DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n", + hw_domain_mask); + + return err; +} + +static int gen6_reset_engines(struct drm_i915_private *i915, + unsigned int engine_mask, + unsigned int retry) +{ + struct intel_engine_cs *engine; + const u32 hw_engine_mask[I915_NUM_ENGINES] = { + [RCS] = GEN6_GRDOM_RENDER, + [BCS] = GEN6_GRDOM_BLT, + [VCS] = GEN6_GRDOM_MEDIA, + [VCS2] = GEN8_GRDOM_MEDIA2, + [VECS] = GEN6_GRDOM_VECS, + }; + u32 hw_mask; + + if (engine_mask == ALL_ENGINES) { + hw_mask = GEN6_GRDOM_FULL; + } else { + unsigned int tmp; + + hw_mask = 0; + for_each_engine_masked(engine, i915, engine_mask, tmp) + hw_mask |= hw_engine_mask[engine->id]; + } + + return gen6_hw_domain_reset(i915, hw_mask); +} + +static u32 gen11_lock_sfc(struct drm_i915_private *dev_priv, + struct intel_engine_cs *engine) +{ + u8 vdbox_sfc_access = RUNTIME_INFO(dev_priv)->vdbox_sfc_access; + i915_reg_t sfc_forced_lock, sfc_forced_lock_ack; + u32 sfc_forced_lock_bit, sfc_forced_lock_ack_bit; + i915_reg_t sfc_usage; + u32 sfc_usage_bit; + u32 sfc_reset_bit; + + switch (engine->class) { + case VIDEO_DECODE_CLASS: + if ((BIT(engine->instance) & vdbox_sfc_access) == 0) + return 0; + + sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine); + sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT; + + sfc_forced_lock_ack = GEN11_VCS_SFC_LOCK_STATUS(engine); + sfc_forced_lock_ack_bit = GEN11_VCS_SFC_LOCK_ACK_BIT; + + sfc_usage = GEN11_VCS_SFC_LOCK_STATUS(engine); + sfc_usage_bit = GEN11_VCS_SFC_USAGE_BIT; + sfc_reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance); + break; + + case VIDEO_ENHANCEMENT_CLASS: + sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine); + sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT; + + sfc_forced_lock_ack = GEN11_VECS_SFC_LOCK_ACK(engine); + sfc_forced_lock_ack_bit = GEN11_VECS_SFC_LOCK_ACK_BIT; + + sfc_usage = GEN11_VECS_SFC_USAGE(engine); + sfc_usage_bit = GEN11_VECS_SFC_USAGE_BIT; + sfc_reset_bit = GEN11_VECS_SFC_RESET_BIT(engine->instance); + break; + + default: + return 0; + } + + /* + * Tell the engine that a software reset is going to happen. The engine + * will then try to force lock the SFC (if currently locked, it will + * remain so until we tell the engine it is safe to unlock; if currently + * unlocked, it will ignore this and all new lock requests). If SFC + * ends up being locked to the engine we want to reset, we have to reset + * it as well (we will unlock it once the reset sequence is completed). + */ + I915_WRITE_FW(sfc_forced_lock, + I915_READ_FW(sfc_forced_lock) | sfc_forced_lock_bit); + + if (__intel_wait_for_register_fw(dev_priv, + sfc_forced_lock_ack, + sfc_forced_lock_ack_bit, + sfc_forced_lock_ack_bit, + 1000, 0, NULL)) { + DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n"); + return 0; + } + + if (I915_READ_FW(sfc_usage) & sfc_usage_bit) + return sfc_reset_bit; + + return 0; +} + +static void gen11_unlock_sfc(struct drm_i915_private *dev_priv, + struct intel_engine_cs *engine) +{ + u8 vdbox_sfc_access = RUNTIME_INFO(dev_priv)->vdbox_sfc_access; + i915_reg_t sfc_forced_lock; + u32 sfc_forced_lock_bit; + + switch (engine->class) { + case VIDEO_DECODE_CLASS: + if ((BIT(engine->instance) & vdbox_sfc_access) == 0) + return; + + sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine); + sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT; + break; + + case VIDEO_ENHANCEMENT_CLASS: + sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine); + sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT; + break; + + default: + return; + } + + I915_WRITE_FW(sfc_forced_lock, + I915_READ_FW(sfc_forced_lock) & ~sfc_forced_lock_bit); +} + +static int gen11_reset_engines(struct drm_i915_private *i915, + unsigned int engine_mask, + unsigned int retry) +{ + const u32 hw_engine_mask[I915_NUM_ENGINES] = { + [RCS] = GEN11_GRDOM_RENDER, + [BCS] = GEN11_GRDOM_BLT, + [VCS] = GEN11_GRDOM_MEDIA, + [VCS2] = GEN11_GRDOM_MEDIA2, + [VCS3] = GEN11_GRDOM_MEDIA3, + [VCS4] = GEN11_GRDOM_MEDIA4, + [VECS] = GEN11_GRDOM_VECS, + [VECS2] = GEN11_GRDOM_VECS2, + }; + struct intel_engine_cs *engine; + unsigned int tmp; + u32 hw_mask; + int ret; + + BUILD_BUG_ON(VECS2 + 1 != I915_NUM_ENGINES); + + if (engine_mask == ALL_ENGINES) { + hw_mask = GEN11_GRDOM_FULL; + } else { + hw_mask = 0; + for_each_engine_masked(engine, i915, engine_mask, tmp) { + hw_mask |= hw_engine_mask[engine->id]; + hw_mask |= gen11_lock_sfc(i915, engine); + } + } + + ret = gen6_hw_domain_reset(i915, hw_mask); + + if (engine_mask != ALL_ENGINES) + for_each_engine_masked(engine, i915, engine_mask, tmp) + gen11_unlock_sfc(i915, engine); + + return ret; +} + +static int gen8_engine_reset_prepare(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base), + _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET)); + + ret = __intel_wait_for_register_fw(dev_priv, + RING_RESET_CTL(engine->mmio_base), + RESET_CTL_READY_TO_RESET, + RESET_CTL_READY_TO_RESET, + 700, 0, + NULL); + if (ret) + DRM_ERROR("%s: reset request timeout\n", engine->name); + + return ret; +} + +static void gen8_engine_reset_cancel(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base), + _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET)); +} + +static int gen8_reset_engines(struct drm_i915_private *i915, + unsigned int engine_mask, + unsigned int retry) +{ + struct intel_engine_cs *engine; + const bool reset_non_ready = retry >= 1; + unsigned int tmp; + int ret; + + for_each_engine_masked(engine, i915, engine_mask, tmp) { + ret = gen8_engine_reset_prepare(engine); + if (ret && !reset_non_ready) + goto skip_reset; + + /* + * If this is not the first failed attempt to prepare, + * we decide to proceed anyway. + * + * By doing so we risk context corruption and with + * some gens (kbl), possible system hang if reset + * happens during active bb execution. + * + * We rather take context corruption instead of + * failed reset with a wedged driver/gpu. And + * active bb execution case should be covered by + * i915_stop_engines we have before the reset. + */ + } + + if (INTEL_GEN(i915) >= 11) + ret = gen11_reset_engines(i915, engine_mask, retry); + else + ret = gen6_reset_engines(i915, engine_mask, retry); + +skip_reset: + for_each_engine_masked(engine, i915, engine_mask, tmp) + gen8_engine_reset_cancel(engine); + + return ret; +} + +typedef int (*reset_func)(struct drm_i915_private *, + unsigned int engine_mask, + unsigned int retry); + +static reset_func intel_get_gpu_reset(struct drm_i915_private *i915) +{ + if (!i915_modparams.reset) + return NULL; + + if (INTEL_GEN(i915) >= 8) + return gen8_reset_engines; + else if (INTEL_GEN(i915) >= 6) + return gen6_reset_engines; + else if (INTEL_GEN(i915) >= 5) + return ironlake_do_reset; + else if (IS_G4X(i915)) + return g4x_do_reset; + else if (IS_G33(i915) || IS_PINEVIEW(i915)) + return g33_do_reset; + else if (INTEL_GEN(i915) >= 3) + return i915_do_reset; + else + return NULL; +} + +int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask) +{ + reset_func reset = intel_get_gpu_reset(i915); + int retry; + int ret; + + /* + * We want to perform per-engine reset from atomic context (e.g. + * softirq), which imposes the constraint that we cannot sleep. + * However, experience suggests that spending a bit of time waiting + * for a reset helps in various cases, so for a full-device reset + * we apply the opposite rule and wait if we want to. As we should + * always follow up a failed per-engine reset with a full device reset, + * being a little faster, stricter and more error prone for the + * atomic case seems an acceptable compromise. + * + * Unfortunately this leads to a bimodal routine, when the goal was + * to have a single reset function that worked for resetting any + * number of engines simultaneously. + */ + might_sleep_if(engine_mask == ALL_ENGINES); + + /* + * If the power well sleeps during the reset, the reset + * request may be dropped and never completes (causing -EIO). + */ + intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); + for (retry = 0; retry < 3; retry++) { + /* + * We stop engines, otherwise we might get failed reset and a + * dead gpu (on elk). Also as modern gpu as kbl can suffer + * from system hang if batchbuffer is progressing when + * the reset is issued, regardless of READY_TO_RESET ack. + * Thus assume it is best to stop engines on all gens + * where we have a gpu reset. + * + * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) + * + * WaMediaResetMainRingCleanup:ctg,elk (presumably) + * + * FIXME: Wa for more modern gens needs to be validated + */ + i915_stop_engines(i915, engine_mask); + + ret = -ENODEV; + if (reset) { + GEM_TRACE("engine_mask=%x\n", engine_mask); + ret = reset(i915, engine_mask, retry); + } + if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES) + break; + + cond_resched(); + } + intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); + + return ret; +} + +bool intel_has_gpu_reset(struct drm_i915_private *i915) +{ + return intel_get_gpu_reset(i915); +} + +bool intel_has_reset_engine(struct drm_i915_private *i915) +{ + return INTEL_INFO(i915)->has_reset_engine && i915_modparams.reset >= 2; +} + +int intel_reset_guc(struct drm_i915_private *i915) +{ + u32 guc_domain = + INTEL_GEN(i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC; + int ret; + + GEM_BUG_ON(!HAS_GUC(i915)); + + intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); + ret = gen6_hw_domain_reset(i915, guc_domain); + intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); + + return ret; +} + +/* + * Ensure irq handler finishes, and not run again. + * Also return the active request so that we only search for it once. + */ +static struct i915_request * +reset_prepare_engine(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + /* + * During the reset sequence, we must prevent the engine from + * entering RC6. As the context state is undefined until we restart + * the engine, if it does enter RC6 during the reset, the state + * written to the powercontext is undefined and so we may lose + * GPU state upon resume, i.e. fail to restart after a reset. + */ + intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); + + rq = engine->reset.prepare(engine); + if (rq && rq->fence.error == -EIO) + rq = ERR_PTR(-EIO); /* Previous reset failed! */ + + return rq; +} + +static int reset_prepare(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + struct i915_request *rq; + enum intel_engine_id id; + int err = 0; + + for_each_engine(engine, i915, id) { + rq = reset_prepare_engine(engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + continue; + } + + engine->hangcheck.active_request = rq; + } + + i915_gem_revoke_fences(i915); + intel_uc_sanitize(i915); + + return err; +} + +/* Returns the request if it was guilty of the hang */ +static struct i915_request * +reset_request(struct intel_engine_cs *engine, + struct i915_request *rq, + bool stalled) +{ + /* + * The guilty request will get skipped on a hung engine. + * + * Users of client default contexts do not rely on logical + * state preserved between batches so it is safe to execute + * queued requests following the hang. Non default contexts + * rely on preserved state, so skipping a batch loses the + * evolution of the state and it needs to be considered corrupted. + * Executing more queued batches on top of corrupted state is + * risky. But we take the risk by trying to advance through + * the queued requests in order to make the client behaviour + * more predictable around resets, by not throwing away random + * amount of batches it has prepared for execution. Sophisticated + * clients can use gem_reset_stats_ioctl and dma fence status + * (exported via sync_file info ioctl on explicit fences) to observe + * when it loses the context state and should rebuild accordingly. + * + * The context ban, and ultimately the client ban, mechanism are safety + * valves if client submission ends up resulting in nothing more than + * subsequent hangs. + */ + + if (i915_request_completed(rq)) { + GEM_TRACE("%s pardoned global=%d (fence %llx:%lld), current %d\n", + engine->name, rq->global_seqno, + rq->fence.context, rq->fence.seqno, + intel_engine_get_seqno(engine)); + stalled = false; + } + + if (stalled) { + context_mark_guilty(rq->gem_context); + i915_request_skip(rq, -EIO); + + /* If this context is now banned, skip all pending requests. */ + if (i915_gem_context_is_banned(rq->gem_context)) + engine_skip_context(rq); + } else { + /* + * Since this is not the hung engine, it may have advanced + * since the hang declaration. Double check by refinding + * the active request at the time of the reset. + */ + rq = i915_gem_find_active_request(engine); + if (rq) { + unsigned long flags; + + context_mark_innocent(rq->gem_context); + dma_fence_set_error(&rq->fence, -EAGAIN); + + /* Rewind the engine to replay the incomplete rq */ + spin_lock_irqsave(&engine->timeline.lock, flags); + rq = list_prev_entry(rq, link); + if (&rq->link == &engine->timeline.requests) + rq = NULL; + spin_unlock_irqrestore(&engine->timeline.lock, flags); + } + } + + return rq; +} + +static void reset_engine(struct intel_engine_cs *engine, + struct i915_request *rq, + bool stalled) +{ + if (rq) + rq = reset_request(engine, rq, stalled); + + /* Setup the CS to resume from the breadcrumb of the hung request */ + engine->reset.reset(engine, rq); +} + +static void gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + lockdep_assert_held(&i915->drm.struct_mutex); + + i915_retire_requests(i915); + + for_each_engine(engine, i915, id) { + struct intel_context *ce; + + reset_engine(engine, + engine->hangcheck.active_request, + stalled_mask & ENGINE_MASK(id)); + ce = fetch_and_zero(&engine->last_retired_context); + if (ce) + intel_context_unpin(ce); + + /* + * Ostensibily, we always want a context loaded for powersaving, + * so if the engine is idle after the reset, send a request + * to load our scratch kernel_context. + * + * More mysteriously, if we leave the engine idle after a reset, + * the next userspace batch may hang, with what appears to be + * an incoherent read by the CS (presumably stale TLB). An + * empty request appears sufficient to paper over the glitch. + */ + if (intel_engine_is_idle(engine)) { + struct i915_request *rq; + + rq = i915_request_alloc(engine, i915->kernel_context); + if (!IS_ERR(rq)) + i915_request_add(rq); + } + } + + i915_gem_restore_fences(i915); +} + +static void reset_finish_engine(struct intel_engine_cs *engine) +{ + engine->reset.finish(engine); + + intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); +} + +static void reset_finish(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + lockdep_assert_held(&i915->drm.struct_mutex); + + for_each_engine(engine, i915, id) { + engine->hangcheck.active_request = NULL; + reset_finish_engine(engine); + } +} + +static void nop_submit_request(struct i915_request *request) +{ + unsigned long flags; + + GEM_TRACE("%s fence %llx:%lld -> -EIO\n", + request->engine->name, + request->fence.context, request->fence.seqno); + dma_fence_set_error(&request->fence, -EIO); + + spin_lock_irqsave(&request->engine->timeline.lock, flags); + __i915_request_submit(request); + intel_engine_write_global_seqno(request->engine, request->global_seqno); + spin_unlock_irqrestore(&request->engine->timeline.lock, flags); +} + +void i915_gem_set_wedged(struct drm_i915_private *i915) +{ + struct i915_gpu_error *error = &i915->gpu_error; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + mutex_lock(&error->wedge_mutex); + if (test_bit(I915_WEDGED, &error->flags)) { + mutex_unlock(&error->wedge_mutex); + return; + } + + if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) { + struct drm_printer p = drm_debug_printer(__func__); + + for_each_engine(engine, i915, id) + intel_engine_dump(engine, &p, "%s\n", engine->name); + } + + GEM_TRACE("start\n"); + + /* + * First, stop submission to hw, but do not yet complete requests by + * rolling the global seqno forward (since this would complete requests + * for which we haven't set the fence error to EIO yet). + */ + for_each_engine(engine, i915, id) + reset_prepare_engine(engine); + + /* Even if the GPU reset fails, it should still stop the engines */ + if (INTEL_GEN(i915) >= 5) + intel_gpu_reset(i915, ALL_ENGINES); + + for_each_engine(engine, i915, id) { + engine->submit_request = nop_submit_request; + engine->schedule = NULL; + } + i915->caps.scheduler = 0; + + /* + * Make sure no request can slip through without getting completed by + * either this call here to intel_engine_write_global_seqno, or the one + * in nop_submit_request. + */ + synchronize_rcu(); + + /* Mark all executing requests as skipped */ + for_each_engine(engine, i915, id) + engine->cancel_requests(engine); + + for_each_engine(engine, i915, id) { + reset_finish_engine(engine); + intel_engine_wakeup(engine); + } + + smp_mb__before_atomic(); + set_bit(I915_WEDGED, &error->flags); + + GEM_TRACE("end\n"); + mutex_unlock(&error->wedge_mutex); + + wake_up_all(&error->reset_queue); +} + +bool i915_gem_unset_wedged(struct drm_i915_private *i915) +{ + struct i915_gpu_error *error = &i915->gpu_error; + struct i915_timeline *tl; + bool ret = false; + + lockdep_assert_held(&i915->drm.struct_mutex); + + if (!test_bit(I915_WEDGED, &error->flags)) + return true; + + if (!i915->gt.scratch) /* Never full initialised, recovery impossible */ + return false; + + mutex_lock(&error->wedge_mutex); + + GEM_TRACE("start\n"); + + /* + * Before unwedging, make sure that all pending operations + * are flushed and errored out - we may have requests waiting upon + * third party fences. We marked all inflight requests as EIO, and + * every execbuf since returned EIO, for consistency we want all + * the currently pending requests to also be marked as EIO, which + * is done inside our nop_submit_request - and so we must wait. + * + * No more can be submitted until we reset the wedged bit. + */ + list_for_each_entry(tl, &i915->gt.timelines, link) { + struct i915_request *rq; + + rq = i915_gem_active_peek(&tl->last_request, + &i915->drm.struct_mutex); + if (!rq) + continue; + + /* + * We can't use our normal waiter as we want to + * avoid recursively trying to handle the current + * reset. The basic dma_fence_default_wait() installs + * a callback for dma_fence_signal(), which is + * triggered by our nop handler (indirectly, the + * callback enables the signaler thread which is + * woken by the nop_submit_request() advancing the seqno + * and when the seqno passes the fence, the signaler + * then signals the fence waking us up). + */ + if (dma_fence_default_wait(&rq->fence, true, + MAX_SCHEDULE_TIMEOUT) < 0) + goto unlock; + } + i915_retire_requests(i915); + GEM_BUG_ON(i915->gt.active_requests); + + intel_engines_sanitize(i915, false); + + /* + * Undo nop_submit_request. We prevent all new i915 requests from + * being queued (by disallowing execbuf whilst wedged) so having + * waited for all active requests above, we know the system is idle + * and do not have to worry about a thread being inside + * engine->submit_request() as we swap over. So unlike installing + * the nop_submit_request on reset, we can do this from normal + * context and do not require stop_machine(). + */ + intel_engines_reset_default_submission(i915); + i915_gem_contexts_lost(i915); + + GEM_TRACE("end\n"); + + smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ + clear_bit(I915_WEDGED, &i915->gpu_error.flags); + ret = true; +unlock: + mutex_unlock(&i915->gpu_error.wedge_mutex); + + return ret; +} + +/** + * i915_reset - reset chip after a hang + * @i915: #drm_i915_private to reset + * @stalled_mask: mask of the stalled engines with the guilty requests + * @reason: user error message for why we are resetting + * + * Reset the chip. Useful if a hang is detected. Marks the device as wedged + * on failure. + * + * Caller must hold the struct_mutex. + * + * Procedure is fairly simple: + * - reset the chip using the reset reg + * - re-init context state + * - re-init hardware status page + * - re-init ring buffer + * - re-init interrupt state + * - re-init display + */ +void i915_reset(struct drm_i915_private *i915, + unsigned int stalled_mask, + const char *reason) +{ + struct i915_gpu_error *error = &i915->gpu_error; + int ret; + int i; + + GEM_TRACE("flags=%lx\n", error->flags); + + might_sleep(); + lockdep_assert_held(&i915->drm.struct_mutex); + assert_rpm_wakelock_held(i915); + GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); + + if (!test_bit(I915_RESET_HANDOFF, &error->flags)) + return; + + /* Clear any previous failed attempts at recovery. Time to try again. */ + if (!i915_gem_unset_wedged(i915)) + goto wakeup; + + if (reason) + dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); + error->reset_count++; + + ret = reset_prepare(i915); + if (ret) { + dev_err(i915->drm.dev, "GPU recovery failed\n"); + goto taint; + } + + if (!intel_has_gpu_reset(i915)) { + if (i915_modparams.reset) + dev_err(i915->drm.dev, "GPU reset not supported\n"); + else + DRM_DEBUG_DRIVER("GPU reset disabled\n"); + goto error; + } + + for (i = 0; i < 3; i++) { + ret = intel_gpu_reset(i915, ALL_ENGINES); + if (ret == 0) + break; + + msleep(100); + } + if (ret) { + dev_err(i915->drm.dev, "Failed to reset chip\n"); + goto taint; + } + + /* Ok, now get things going again... */ + + /* + * Everything depends on having the GTT running, so we need to start + * there. + */ + ret = i915_ggtt_enable_hw(i915); + if (ret) { + DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n", + ret); + goto error; + } + + gt_reset(i915, stalled_mask); + intel_overlay_reset(i915); + + /* + * Next we need to restore the context, but we don't use those + * yet either... + * + * Ring buffer needs to be re-initialized in the KMS case, or if X + * was running at the time of the reset (i.e. we weren't VT + * switched away). + */ + ret = i915_gem_init_hw(i915); + if (ret) { + DRM_ERROR("Failed to initialise HW following reset (%d)\n", + ret); + goto error; + } + + i915_queue_hangcheck(i915); + +finish: + reset_finish(i915); +wakeup: + clear_bit(I915_RESET_HANDOFF, &error->flags); + wake_up_bit(&error->flags, I915_RESET_HANDOFF); + return; + +taint: + /* + * History tells us that if we cannot reset the GPU now, we + * never will. This then impacts everything that is run + * subsequently. On failing the reset, we mark the driver + * as wedged, preventing further execution on the GPU. + * We also want to go one step further and add a taint to the + * kernel so that any subsequent faults can be traced back to + * this failure. This is important for CI, where if the + * GPU/driver fails we would like to reboot and restart testing + * rather than continue on into oblivion. For everyone else, + * the system should still plod along, but they have been warned! + */ + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); +error: + i915_gem_set_wedged(i915); + i915_retire_requests(i915); + goto finish; +} + +static inline int intel_gt_reset_engine(struct drm_i915_private *i915, + struct intel_engine_cs *engine) +{ + return intel_gpu_reset(i915, intel_engine_flag(engine)); +} + +/** + * i915_reset_engine - reset GPU engine to recover from a hang + * @engine: engine to reset + * @msg: reason for GPU reset; or NULL for no dev_notice() + * + * Reset a specific GPU engine. Useful if a hang is detected. + * Returns zero on successful reset or otherwise an error code. + * + * Procedure is: + * - identifies the request that caused the hang and it is dropped + * - reset engine (which will force the engine to idle) + * - re-init/configure engine + */ +int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) +{ + struct i915_gpu_error *error = &engine->i915->gpu_error; + struct i915_request *active_request; + int ret; + + GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); + GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); + + active_request = reset_prepare_engine(engine); + if (IS_ERR_OR_NULL(active_request)) { + /* Either the previous reset failed, or we pardon the reset. */ + ret = PTR_ERR(active_request); + goto out; + } + + if (msg) + dev_notice(engine->i915->drm.dev, + "Resetting %s for %s\n", engine->name, msg); + error->reset_engine_count[engine->id]++; + + if (!engine->i915->guc.execbuf_client) + ret = intel_gt_reset_engine(engine->i915, engine); + else + ret = intel_guc_reset_engine(&engine->i915->guc, engine); + if (ret) { + /* If we fail here, we expect to fallback to a global reset */ + DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n", + engine->i915->guc.execbuf_client ? "GuC " : "", + engine->name, ret); + goto out; + } + + /* + * The request that caused the hang is stuck on elsp, we know the + * active request and can drop it, adjust head to skip the offending + * request to resume executing remaining requests in the queue. + */ + reset_engine(engine, active_request, true); + + /* + * The engine and its registers (and workarounds in case of render) + * have been reset to their default values. Follow the init_ring + * process to program RING_MODE, HWSP and re-enable submission. + */ + ret = engine->init_hw(engine); + if (ret) + goto out; + +out: + intel_engine_cancel_stop_cs(engine); + reset_finish_engine(engine); + return ret; +} + +static void i915_reset_device(struct drm_i915_private *i915, + u32 engine_mask, + const char *reason) +{ + struct i915_gpu_error *error = &i915->gpu_error; + struct kobject *kobj = &i915->drm.primary->kdev->kobj; + char *error_event[] = { I915_ERROR_UEVENT "=1", NULL }; + char *reset_event[] = { I915_RESET_UEVENT "=1", NULL }; + char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL }; + struct i915_wedge_me w; + + kobject_uevent_env(kobj, KOBJ_CHANGE, error_event); + + DRM_DEBUG_DRIVER("resetting chip\n"); + kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); + + /* Use a watchdog to ensure that our reset completes */ + i915_wedge_on_timeout(&w, i915, 5 * HZ) { + intel_prepare_reset(i915); + + error->reason = reason; + error->stalled_mask = engine_mask; + + /* Signal that locked waiters should reset the GPU */ + smp_mb__before_atomic(); + set_bit(I915_RESET_HANDOFF, &error->flags); + wake_up_all(&error->wait_queue); + + /* + * Wait for anyone holding the lock to wakeup, without + * blocking indefinitely on struct_mutex. + */ + do { + if (mutex_trylock(&i915->drm.struct_mutex)) { + i915_reset(i915, engine_mask, reason); + mutex_unlock(&i915->drm.struct_mutex); + } + } while (wait_on_bit_timeout(&error->flags, + I915_RESET_HANDOFF, + TASK_UNINTERRUPTIBLE, + 1)); + + error->stalled_mask = 0; + error->reason = NULL; + + intel_finish_reset(i915); + } + + if (!test_bit(I915_WEDGED, &error->flags)) + kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event); +} + +void i915_clear_error_registers(struct drm_i915_private *dev_priv) +{ + u32 eir; + + if (!IS_GEN(dev_priv, 2)) + I915_WRITE(PGTBL_ER, I915_READ(PGTBL_ER)); + + if (INTEL_GEN(dev_priv) < 4) + I915_WRITE(IPEIR, I915_READ(IPEIR)); + else + I915_WRITE(IPEIR_I965, I915_READ(IPEIR_I965)); + + I915_WRITE(EIR, I915_READ(EIR)); + eir = I915_READ(EIR); + if (eir) { + /* + * some errors might have become stuck, + * mask them. + */ + DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir); + I915_WRITE(EMR, I915_READ(EMR) | eir); + I915_WRITE(IIR, I915_MASTER_ERROR_INTERRUPT); + } + + if (INTEL_GEN(dev_priv) >= 8) { + I915_WRITE(GEN8_RING_FAULT_REG, + I915_READ(GEN8_RING_FAULT_REG) & ~RING_FAULT_VALID); + POSTING_READ(GEN8_RING_FAULT_REG); + } else if (INTEL_GEN(dev_priv) >= 6) { + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, dev_priv, id) { + I915_WRITE(RING_FAULT_REG(engine), + I915_READ(RING_FAULT_REG(engine)) & + ~RING_FAULT_VALID); + } + POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS])); + } +} + +/** + * i915_handle_error - handle a gpu error + * @i915: i915 device private + * @engine_mask: mask representing engines that are hung + * @flags: control flags + * @fmt: Error message format string + * + * Do some basic checking of register state at error time and + * dump it to the syslog. Also call i915_capture_error_state() to make + * sure we get a record and make it available in debugfs. Fire a uevent + * so userspace knows something bad happened (should trigger collection + * of a ring dump etc.). + */ +void i915_handle_error(struct drm_i915_private *i915, + u32 engine_mask, + unsigned long flags, + const char *fmt, ...) +{ + struct intel_engine_cs *engine; + intel_wakeref_t wakeref; + unsigned int tmp; + char error_msg[80]; + char *msg = NULL; + + if (fmt) { + va_list args; + + va_start(args, fmt); + vscnprintf(error_msg, sizeof(error_msg), fmt, args); + va_end(args); + + msg = error_msg; + } + + /* + * In most cases it's guaranteed that we get here with an RPM + * reference held, for example because there is a pending GPU + * request that won't finish until the reset is done. This + * isn't the case at least when we get here by doing a + * simulated reset via debugfs, so get an RPM reference. + */ + wakeref = intel_runtime_pm_get(i915); + + engine_mask &= INTEL_INFO(i915)->ring_mask; + + if (flags & I915_ERROR_CAPTURE) { + i915_capture_error_state(i915, engine_mask, msg); + i915_clear_error_registers(i915); + } + + /* + * Try engine reset when available. We fall back to full reset if + * single reset fails. + */ + if (intel_has_reset_engine(i915) && + !i915_terminally_wedged(&i915->gpu_error)) { + for_each_engine_masked(engine, i915, engine_mask, tmp) { + BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); + if (test_and_set_bit(I915_RESET_ENGINE + engine->id, + &i915->gpu_error.flags)) + continue; + + if (i915_reset_engine(engine, msg) == 0) + engine_mask &= ~intel_engine_flag(engine); + + clear_bit(I915_RESET_ENGINE + engine->id, + &i915->gpu_error.flags); + wake_up_bit(&i915->gpu_error.flags, + I915_RESET_ENGINE + engine->id); + } + } + + if (!engine_mask) + goto out; + + /* Full reset needs the mutex, stop any other user trying to do so. */ + if (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags)) { + wait_event(i915->gpu_error.reset_queue, + !test_bit(I915_RESET_BACKOFF, + &i915->gpu_error.flags)); + goto out; + } + + /* Prevent any other reset-engine attempt. */ + for_each_engine(engine, i915, tmp) { + while (test_and_set_bit(I915_RESET_ENGINE + engine->id, + &i915->gpu_error.flags)) + wait_on_bit(&i915->gpu_error.flags, + I915_RESET_ENGINE + engine->id, + TASK_UNINTERRUPTIBLE); + } + + i915_reset_device(i915, engine_mask, msg); + + for_each_engine(engine, i915, tmp) { + clear_bit(I915_RESET_ENGINE + engine->id, + &i915->gpu_error.flags); + } + + clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); + wake_up_all(&i915->gpu_error.reset_queue); + +out: + intel_runtime_pm_put(i915, wakeref); +} + +static void i915_wedge_me(struct work_struct *work) +{ + struct i915_wedge_me *w = container_of(work, typeof(*w), work.work); + + dev_err(w->i915->drm.dev, + "%s timed out, cancelling all in-flight rendering.\n", + w->name); + i915_gem_set_wedged(w->i915); +} + +void __i915_init_wedge(struct i915_wedge_me *w, + struct drm_i915_private *i915, + long timeout, + const char *name) +{ + w->i915 = i915; + w->name = name; + + INIT_DELAYED_WORK_ONSTACK(&w->work, i915_wedge_me); + schedule_delayed_work(&w->work, timeout); +} + +void __i915_fini_wedge(struct i915_wedge_me *w) +{ + cancel_delayed_work_sync(&w->work); + destroy_delayed_work_on_stack(&w->work); + w->i915 = NULL; +} diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h new file mode 100644 index 000000000000..b6a519bde67d --- /dev/null +++ b/drivers/gpu/drm/i915/i915_reset.h @@ -0,0 +1,56 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2008-2018 Intel Corporation + */ + +#ifndef I915_RESET_H +#define I915_RESET_H + +#include +#include + +struct drm_i915_private; +struct intel_engine_cs; +struct intel_guc; + +__printf(4, 5) +void i915_handle_error(struct drm_i915_private *i915, + u32 engine_mask, + unsigned long flags, + const char *fmt, ...); +#define I915_ERROR_CAPTURE BIT(0) + +void i915_clear_error_registers(struct drm_i915_private *i915); + +void i915_reset(struct drm_i915_private *i915, + unsigned int stalled_mask, + const char *reason); +int i915_reset_engine(struct intel_engine_cs *engine, + const char *reason); + +bool intel_has_gpu_reset(struct drm_i915_private *i915); +bool intel_has_reset_engine(struct drm_i915_private *i915); + +int intel_gpu_reset(struct drm_i915_private *i915, u32 engine_mask); + +int intel_reset_guc(struct drm_i915_private *i915); + +struct i915_wedge_me { + struct delayed_work work; + struct drm_i915_private *i915; + const char *name; +}; + +void __i915_init_wedge(struct i915_wedge_me *w, + struct drm_i915_private *i915, + long timeout, + const char *name); +void __i915_fini_wedge(struct i915_wedge_me *w); + +#define i915_wedge_on_timeout(W, DEV, TIMEOUT) \ + for (__i915_init_wedge((W), (DEV), (TIMEOUT), __func__); \ + (W)->i915; \ + __i915_fini_wedge((W))) + +#endif /* I915_RESET_H */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c6b3b69aaeac..8d6d7ae311f4 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -31,13 +31,7 @@ #include #include #include -#include "intel_drv.h" -#include "intel_frontbuffer.h" #include -#include "i915_drv.h" -#include "i915_gem_clflush.h" -#include "intel_dsi.h" -#include "i915_trace.h" #include #include #include @@ -48,6 +42,15 @@ #include #include +#include "intel_drv.h" +#include "intel_dsi.h" +#include "intel_frontbuffer.h" + +#include "i915_drv.h" +#include "i915_gem_clflush.h" +#include "i915_reset.h" +#include "i915_trace.h" + /* Primary plane formats for gen <= 3 */ static const uint32_t i8xx_primary_formats[] = { DRM_FORMAT_C8, diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 200218cb157f..eed0da03ff5e 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -25,6 +25,7 @@ #include #include "i915_drv.h" +#include "i915_reset.h" #include "intel_ringbuffer.h" #include "intel_lrc.h" diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 0f1c4f9ebfd8..744220296653 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -192,4 +192,7 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask) spin_unlock_irq(&guc->irq_lock); } +int intel_guc_reset_engine(struct intel_guc *guc, + struct intel_engine_cs *engine); + #endif diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index 51e9efec5116..7dc11fcb13de 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -23,6 +23,7 @@ */ #include "i915_drv.h" +#include "i915_reset.h" static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone) { diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 731b82afe636..e711eb3268bc 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -26,6 +26,7 @@ #include "intel_guc_submission.h" #include "intel_guc.h" #include "i915_drv.h" +#include "i915_reset.h" static void guc_free_load_err_log(struct intel_guc *guc); diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 681ea532585e..e88f0252d77e 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1715,372 +1715,6 @@ int i915_reg_read_ioctl(struct drm_device *dev, return ret; } -static void gen3_stop_engine(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - const u32 base = engine->mmio_base; - - if (intel_engine_stop_cs(engine)) - DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n", engine->name); - - I915_WRITE_FW(RING_HEAD(base), I915_READ_FW(RING_TAIL(base))); - POSTING_READ_FW(RING_HEAD(base)); /* paranoia */ - - I915_WRITE_FW(RING_HEAD(base), 0); - I915_WRITE_FW(RING_TAIL(base), 0); - POSTING_READ_FW(RING_TAIL(base)); - - /* The ring must be empty before it is disabled */ - I915_WRITE_FW(RING_CTL(base), 0); - - /* Check acts as a post */ - if (I915_READ_FW(RING_HEAD(base)) != 0) - DRM_DEBUG_DRIVER("%s: ring head not parked\n", - engine->name); -} - -static void i915_stop_engines(struct drm_i915_private *dev_priv, - unsigned int engine_mask) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - if (INTEL_GEN(dev_priv) < 3) - return; - - for_each_engine_masked(engine, dev_priv, engine_mask, id) - gen3_stop_engine(engine); -} - -static bool i915_in_reset(struct pci_dev *pdev) -{ - u8 gdrst; - - pci_read_config_byte(pdev, I915_GDRST, &gdrst); - return gdrst & GRDOM_RESET_STATUS; -} - -static int i915_do_reset(struct drm_i915_private *dev_priv, - unsigned int engine_mask, - unsigned int retry) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - int err; - - /* Assert reset for at least 20 usec, and wait for acknowledgement. */ - pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); - usleep_range(50, 200); - err = wait_for(i915_in_reset(pdev), 500); - - /* Clear the reset request. */ - pci_write_config_byte(pdev, I915_GDRST, 0); - usleep_range(50, 200); - if (!err) - err = wait_for(!i915_in_reset(pdev), 500); - - return err; -} - -static bool g4x_reset_complete(struct pci_dev *pdev) -{ - u8 gdrst; - - pci_read_config_byte(pdev, I915_GDRST, &gdrst); - return (gdrst & GRDOM_RESET_ENABLE) == 0; -} - -static int g33_do_reset(struct drm_i915_private *dev_priv, - unsigned int engine_mask, - unsigned int retry) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - - pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); - return wait_for(g4x_reset_complete(pdev), 500); -} - -static int g4x_do_reset(struct drm_i915_private *dev_priv, - unsigned int engine_mask, - unsigned int retry) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - int ret; - - /* WaVcpClkGateDisableForMediaReset:ctg,elk */ - I915_WRITE(VDECCLK_GATE_D, - I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); - POSTING_READ(VDECCLK_GATE_D); - - pci_write_config_byte(pdev, I915_GDRST, - GRDOM_MEDIA | GRDOM_RESET_ENABLE); - ret = wait_for(g4x_reset_complete(pdev), 500); - if (ret) { - DRM_DEBUG_DRIVER("Wait for media reset failed\n"); - goto out; - } - - pci_write_config_byte(pdev, I915_GDRST, - GRDOM_RENDER | GRDOM_RESET_ENABLE); - ret = wait_for(g4x_reset_complete(pdev), 500); - if (ret) { - DRM_DEBUG_DRIVER("Wait for render reset failed\n"); - goto out; - } - -out: - pci_write_config_byte(pdev, I915_GDRST, 0); - - I915_WRITE(VDECCLK_GATE_D, - I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE); - POSTING_READ(VDECCLK_GATE_D); - - return ret; -} - -static int ironlake_do_reset(struct drm_i915_private *dev_priv, - unsigned int engine_mask, - unsigned int retry) -{ - int ret; - - I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); - ret = intel_wait_for_register(dev_priv, - ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, - 500); - if (ret) { - DRM_DEBUG_DRIVER("Wait for render reset failed\n"); - goto out; - } - - I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); - ret = intel_wait_for_register(dev_priv, - ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, - 500); - if (ret) { - DRM_DEBUG_DRIVER("Wait for media reset failed\n"); - goto out; - } - -out: - I915_WRITE(ILK_GDSR, 0); - POSTING_READ(ILK_GDSR); - return ret; -} - -/* Reset the hardware domains (GENX_GRDOM_*) specified by mask */ -static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv, - u32 hw_domain_mask) -{ - int err; - - /* GEN6_GDRST is not in the gt power well, no need to check - * for fifo space for the write or forcewake the chip for - * the read - */ - __raw_i915_write32(dev_priv, GEN6_GDRST, hw_domain_mask); - - /* Wait for the device to ack the reset requests */ - err = __intel_wait_for_register_fw(dev_priv, - GEN6_GDRST, hw_domain_mask, 0, - 500, 0, - NULL); - if (err) - DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n", - hw_domain_mask); - - return err; -} - -/** - * gen6_reset_engines - reset individual engines - * @dev_priv: i915 device - * @engine_mask: mask of intel_ring_flag() engines or ALL_ENGINES for full reset - * @retry: the count of of previous attempts to reset. - * - * This function will reset the individual engines that are set in engine_mask. - * If you provide ALL_ENGINES as mask, full global domain reset will be issued. - * - * Note: It is responsibility of the caller to handle the difference between - * asking full domain reset versus reset for all available individual engines. - * - * Returns 0 on success, nonzero on error. - */ -static int gen6_reset_engines(struct drm_i915_private *dev_priv, - unsigned int engine_mask, - unsigned int retry) -{ - struct intel_engine_cs *engine; - const u32 hw_engine_mask[I915_NUM_ENGINES] = { - [RCS] = GEN6_GRDOM_RENDER, - [BCS] = GEN6_GRDOM_BLT, - [VCS] = GEN6_GRDOM_MEDIA, - [VCS2] = GEN8_GRDOM_MEDIA2, - [VECS] = GEN6_GRDOM_VECS, - }; - u32 hw_mask; - - if (engine_mask == ALL_ENGINES) { - hw_mask = GEN6_GRDOM_FULL; - } else { - unsigned int tmp; - - hw_mask = 0; - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) - hw_mask |= hw_engine_mask[engine->id]; - } - - return gen6_hw_domain_reset(dev_priv, hw_mask); -} - -static u32 gen11_lock_sfc(struct drm_i915_private *dev_priv, - struct intel_engine_cs *engine) -{ - u8 vdbox_sfc_access = RUNTIME_INFO(dev_priv)->vdbox_sfc_access; - i915_reg_t sfc_forced_lock, sfc_forced_lock_ack; - u32 sfc_forced_lock_bit, sfc_forced_lock_ack_bit; - i915_reg_t sfc_usage; - u32 sfc_usage_bit; - u32 sfc_reset_bit; - - switch (engine->class) { - case VIDEO_DECODE_CLASS: - if ((BIT(engine->instance) & vdbox_sfc_access) == 0) - return 0; - - sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine); - sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT; - - sfc_forced_lock_ack = GEN11_VCS_SFC_LOCK_STATUS(engine); - sfc_forced_lock_ack_bit = GEN11_VCS_SFC_LOCK_ACK_BIT; - - sfc_usage = GEN11_VCS_SFC_LOCK_STATUS(engine); - sfc_usage_bit = GEN11_VCS_SFC_USAGE_BIT; - sfc_reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance); - break; - - case VIDEO_ENHANCEMENT_CLASS: - sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine); - sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT; - - sfc_forced_lock_ack = GEN11_VECS_SFC_LOCK_ACK(engine); - sfc_forced_lock_ack_bit = GEN11_VECS_SFC_LOCK_ACK_BIT; - - sfc_usage = GEN11_VECS_SFC_USAGE(engine); - sfc_usage_bit = GEN11_VECS_SFC_USAGE_BIT; - sfc_reset_bit = GEN11_VECS_SFC_RESET_BIT(engine->instance); - break; - - default: - return 0; - } - - /* - * Tell the engine that a software reset is going to happen. The engine - * will then try to force lock the SFC (if currently locked, it will - * remain so until we tell the engine it is safe to unlock; if currently - * unlocked, it will ignore this and all new lock requests). If SFC - * ends up being locked to the engine we want to reset, we have to reset - * it as well (we will unlock it once the reset sequence is completed). - */ - I915_WRITE_FW(sfc_forced_lock, - I915_READ_FW(sfc_forced_lock) | sfc_forced_lock_bit); - - if (__intel_wait_for_register_fw(dev_priv, - sfc_forced_lock_ack, - sfc_forced_lock_ack_bit, - sfc_forced_lock_ack_bit, - 1000, 0, NULL)) { - DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n"); - return 0; - } - - if (I915_READ_FW(sfc_usage) & sfc_usage_bit) - return sfc_reset_bit; - - return 0; -} - -static void gen11_unlock_sfc(struct drm_i915_private *dev_priv, - struct intel_engine_cs *engine) -{ - u8 vdbox_sfc_access = RUNTIME_INFO(dev_priv)->vdbox_sfc_access; - i915_reg_t sfc_forced_lock; - u32 sfc_forced_lock_bit; - - switch (engine->class) { - case VIDEO_DECODE_CLASS: - if ((BIT(engine->instance) & vdbox_sfc_access) == 0) - return; - - sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine); - sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT; - break; - - case VIDEO_ENHANCEMENT_CLASS: - sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine); - sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT; - break; - - default: - return; - } - - I915_WRITE_FW(sfc_forced_lock, - I915_READ_FW(sfc_forced_lock) & ~sfc_forced_lock_bit); -} - -/** - * gen11_reset_engines - reset individual engines - * @dev_priv: i915 device - * @engine_mask: mask of intel_ring_flag() engines or ALL_ENGINES for full reset - * - * This function will reset the individual engines that are set in engine_mask. - * If you provide ALL_ENGINES as mask, full global domain reset will be issued. - * - * Note: It is responsibility of the caller to handle the difference between - * asking full domain reset versus reset for all available individual engines. - * - * Returns 0 on success, nonzero on error. - */ -static int gen11_reset_engines(struct drm_i915_private *dev_priv, - unsigned int engine_mask) -{ - const u32 hw_engine_mask[I915_NUM_ENGINES] = { - [RCS] = GEN11_GRDOM_RENDER, - [BCS] = GEN11_GRDOM_BLT, - [VCS] = GEN11_GRDOM_MEDIA, - [VCS2] = GEN11_GRDOM_MEDIA2, - [VCS3] = GEN11_GRDOM_MEDIA3, - [VCS4] = GEN11_GRDOM_MEDIA4, - [VECS] = GEN11_GRDOM_VECS, - [VECS2] = GEN11_GRDOM_VECS2, - }; - struct intel_engine_cs *engine; - unsigned int tmp; - u32 hw_mask; - int ret; - - BUILD_BUG_ON(VECS2 + 1 != I915_NUM_ENGINES); - - if (engine_mask == ALL_ENGINES) { - hw_mask = GEN11_GRDOM_FULL; - } else { - hw_mask = 0; - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { - hw_mask |= hw_engine_mask[engine->id]; - hw_mask |= gen11_lock_sfc(dev_priv, engine); - } - } - - ret = gen6_hw_domain_reset(dev_priv, hw_mask); - - if (engine_mask != ALL_ENGINES) - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) - gen11_unlock_sfc(dev_priv, engine); - - return ret; -} - /** * __intel_wait_for_register_fw - wait until register matches expected state * @dev_priv: the i915 device @@ -2191,196 +1825,6 @@ int __intel_wait_for_register(struct drm_i915_private *dev_priv, return ret; } -static int gen8_engine_reset_prepare(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base), - _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET)); - - ret = __intel_wait_for_register_fw(dev_priv, - RING_RESET_CTL(engine->mmio_base), - RESET_CTL_READY_TO_RESET, - RESET_CTL_READY_TO_RESET, - 700, 0, - NULL); - if (ret) - DRM_ERROR("%s: reset request timeout\n", engine->name); - - return ret; -} - -static void gen8_engine_reset_cancel(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base), - _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET)); -} - -static int reset_engines(struct drm_i915_private *i915, - unsigned int engine_mask, - unsigned int retry) -{ - if (INTEL_GEN(i915) >= 11) - return gen11_reset_engines(i915, engine_mask); - else - return gen6_reset_engines(i915, engine_mask, retry); -} - -static int gen8_reset_engines(struct drm_i915_private *dev_priv, - unsigned int engine_mask, - unsigned int retry) -{ - struct intel_engine_cs *engine; - const bool reset_non_ready = retry >= 1; - unsigned int tmp; - int ret; - - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { - ret = gen8_engine_reset_prepare(engine); - if (ret && !reset_non_ready) - goto skip_reset; - - /* - * If this is not the first failed attempt to prepare, - * we decide to proceed anyway. - * - * By doing so we risk context corruption and with - * some gens (kbl), possible system hang if reset - * happens during active bb execution. - * - * We rather take context corruption instead of - * failed reset with a wedged driver/gpu. And - * active bb execution case should be covered by - * i915_stop_engines we have before the reset. - */ - } - - ret = reset_engines(dev_priv, engine_mask, retry); - -skip_reset: - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) - gen8_engine_reset_cancel(engine); - - return ret; -} - -typedef int (*reset_func)(struct drm_i915_private *, - unsigned int engine_mask, unsigned int retry); - -static reset_func intel_get_gpu_reset(struct drm_i915_private *dev_priv) -{ - if (!i915_modparams.reset) - return NULL; - - if (INTEL_GEN(dev_priv) >= 8) - return gen8_reset_engines; - else if (INTEL_GEN(dev_priv) >= 6) - return gen6_reset_engines; - else if (IS_GEN(dev_priv, 5)) - return ironlake_do_reset; - else if (IS_G4X(dev_priv)) - return g4x_do_reset; - else if (IS_G33(dev_priv) || IS_PINEVIEW(dev_priv)) - return g33_do_reset; - else if (INTEL_GEN(dev_priv) >= 3) - return i915_do_reset; - else - return NULL; -} - -int intel_gpu_reset(struct drm_i915_private *dev_priv, - const unsigned int engine_mask) -{ - reset_func reset = intel_get_gpu_reset(dev_priv); - unsigned int retry; - int ret; - - GEM_BUG_ON(!engine_mask); - - /* - * We want to perform per-engine reset from atomic context (e.g. - * softirq), which imposes the constraint that we cannot sleep. - * However, experience suggests that spending a bit of time waiting - * for a reset helps in various cases, so for a full-device reset - * we apply the opposite rule and wait if we want to. As we should - * always follow up a failed per-engine reset with a full device reset, - * being a little faster, stricter and more error prone for the - * atomic case seems an acceptable compromise. - * - * Unfortunately this leads to a bimodal routine, when the goal was - * to have a single reset function that worked for resetting any - * number of engines simultaneously. - */ - might_sleep_if(engine_mask == ALL_ENGINES); - - /* - * If the power well sleeps during the reset, the reset - * request may be dropped and never completes (causing -EIO). - */ - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - for (retry = 0; retry < 3; retry++) { - - /* - * We stop engines, otherwise we might get failed reset and a - * dead gpu (on elk). Also as modern gpu as kbl can suffer - * from system hang if batchbuffer is progressing when - * the reset is issued, regardless of READY_TO_RESET ack. - * Thus assume it is best to stop engines on all gens - * where we have a gpu reset. - * - * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) - * - * WaMediaResetMainRingCleanup:ctg,elk (presumably) - * - * FIXME: Wa for more modern gens needs to be validated - */ - i915_stop_engines(dev_priv, engine_mask); - - ret = -ENODEV; - if (reset) { - ret = reset(dev_priv, engine_mask, retry); - GEM_TRACE("engine_mask=%x, ret=%d, retry=%d\n", - engine_mask, ret, retry); - } - if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES) - break; - - cond_resched(); - } - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - - return ret; -} - -bool intel_has_gpu_reset(struct drm_i915_private *dev_priv) -{ - return intel_get_gpu_reset(dev_priv) != NULL; -} - -bool intel_has_reset_engine(struct drm_i915_private *dev_priv) -{ - return (INTEL_INFO(dev_priv)->has_reset_engine && - i915_modparams.reset >= 2); -} - -int intel_reset_guc(struct drm_i915_private *dev_priv) -{ - u32 guc_domain = INTEL_GEN(dev_priv) >= 11 ? GEN11_GRDOM_GUC : - GEN9_GRDOM_GUC; - int ret; - - GEM_BUG_ON(!HAS_GUC(dev_priv)); - - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - ret = gen6_hw_domain_reset(dev_priv, guc_domain); - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - - return ret; -} - bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv) { return check_for_unclaimed_mmio(dev_priv); diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index e6073cd4719c..2b2ecd76c2ac 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -4,6 +4,8 @@ * Copyright © 2018 Intel Corporation */ +#include "../i915_reset.h" + #include "../i915_selftest.h" #include "igt_flush_test.h" #include "igt_spinner.h" diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index 9009d7b8b136..a8cac56be835 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -5,6 +5,7 @@ */ #include "../i915_selftest.h" +#include "../i915_reset.h" #include "igt_flush_test.h" #include "igt_reset.h" From 739f3abdbfcf8c950bb35eb08530cee489e1a967 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 16 Jan 2019 11:15:19 +0200 Subject: [PATCH 181/539] drm/i915: small isolated c99 types to kernel types switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mixed C99 and kernel types use is getting ugly. Prefer kernel types. sed -i 's/\buint\(8\|16\|32\|64\)_t\b/u\1/g' Minor checkpatch fixes sprinkled on top of the changed lines. Reviewed-by: Chris Wilson Reviewed-by: Ville Syrjälä Reviewed-by: José Roberto de Souza Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/14ed72e7f04c9340a057855c5950b54811f8a477.1547629303.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_gem.c | 14 +++++++------- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 8 ++++---- drivers/gpu/drm/i915/i915_gpu_error.c | 10 +++++----- drivers/gpu/drm/i915/i915_perf.c | 2 +- drivers/gpu/drm/i915/i915_reg.h | 4 ++-- drivers/gpu/drm/i915/intel_atomic.c | 4 ++-- drivers/gpu/drm/i915/intel_atomic_plane.c | 4 ++-- drivers/gpu/drm/i915/intel_dp_mst.c | 2 +- drivers/gpu/drm/i915/intel_dpio_phy.c | 18 +++++++++--------- drivers/gpu/drm/i915/intel_engine_cs.c | 12 ++++++------ drivers/gpu/drm/i915/intel_fbc.c | 2 +- drivers/gpu/drm/i915/intel_fifo_underrun.c | 12 ++++++------ drivers/gpu/drm/i915/intel_hdcp.c | 4 ++-- drivers/gpu/drm/i915/intel_lrc.c | 2 +- drivers/gpu/drm/i915/intel_pipe_crc.c | 18 +++++++++--------- drivers/gpu/drm/i915/intel_psr.c | 6 +++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- drivers/gpu/drm/i915/intel_runtime_pm.c | 20 ++++++++++---------- 18 files changed, 72 insertions(+), 72 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7185a5b4a5ca..b359390ba22c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -713,8 +713,8 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj) static int i915_gem_create(struct drm_file *file, struct drm_i915_private *dev_priv, - uint64_t size, - uint32_t *handle_p) + u64 size, + u32 *handle_p) { struct drm_i915_gem_object *obj; int ret; @@ -1573,8 +1573,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_set_domain *args = data; struct drm_i915_gem_object *obj; - uint32_t read_domains = args->read_domains; - uint32_t write_domain = args->write_domain; + u32 read_domains = args->read_domains; + u32 write_domain = args->write_domain; int err; /* Only handle setting domains to types used by the CPU. */ @@ -1756,7 +1756,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, if (IS_ERR((void *)addr)) return addr; - args->addr_ptr = (uint64_t) addr; + args->addr_ptr = (u64)addr; return 0; } @@ -2158,8 +2158,8 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) int i915_gem_mmap_gtt(struct drm_file *file, struct drm_device *dev, - uint32_t handle, - uint64_t *offset) + u32 handle, + u64 *offset) { struct drm_i915_gem_object *obj; int ret; diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index f7947d89cf45..46e259661294 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -555,8 +555,8 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv) void i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv) { - uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; - uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + u32 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + u32 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; if (INTEL_GEN(dev_priv) >= 8 || IS_VALLEYVIEW(dev_priv)) { /* @@ -579,7 +579,7 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv) swizzle_y = I915_BIT_6_SWIZZLE_NONE; } } else { - uint32_t dimm_c0, dimm_c1; + u32 dimm_c0, dimm_c1; dimm_c0 = I915_READ(MAD_DIMM_C0); dimm_c1 = I915_READ(MAD_DIMM_C1); dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; @@ -611,7 +611,7 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv) swizzle_y = I915_BIT_6_SWIZZLE_NONE; } else if (IS_MOBILE(dev_priv) || IS_I915G(dev_priv) || IS_I945G(dev_priv)) { - uint32_t dcc; + u32 dcc; /* On 9xx chipsets, channel interleave by the CPU is * determined by DCC. For single-channel, neither the CPU diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 5eaf586c4d48..1f8e80e31b49 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1082,7 +1082,7 @@ i915_error_object_create(struct drm_i915_private *i915, /* The error capture is special as tries to run underneath the normal * locking rules - so we use the raw version of the i915_gem_active lookup. */ -static inline uint32_t +static inline u32 __active_get_seqno(struct i915_gem_active *active) { struct i915_request *request; @@ -1153,11 +1153,11 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err, * * It's only a small step better than a random number in its current form. */ -static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error, - int *engine_id) +static u32 i915_error_generate_code(struct drm_i915_private *dev_priv, + struct i915_gpu_state *error, + int *engine_id) { - uint32_t error_code = 0; + u32 error_code = 0; int i; /* IPEHR would be an ideal way to detect errors, as it's the gross diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index faff6cf1aaa1..727118301f91 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3021,7 +3021,7 @@ static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) (addr >= 0x182300 && addr <= 0x1823A4); } -static uint32_t mask_reg_value(u32 reg, u32 val) +static u32 mask_reg_value(u32 reg, u32 val) { /* HALF_SLICE_CHICKEN2 is programmed with a the * WaDisableSTUnitPowerOptimization workaround. Make sure the value diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index fad5a9e8b44d..9a1340cfda6c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -117,14 +117,14 @@ */ typedef struct { - uint32_t reg; + u32 reg; } i915_reg_t; #define _MMIO(r) ((const i915_reg_t){ .reg = (r) }) #define INVALID_MMIO_REG _MMIO(0) -static inline uint32_t i915_mmio_reg_offset(i915_reg_t reg) +static inline u32 i915_mmio_reg_offset(i915_reg_t reg) { return reg.reg; } diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c index d8dbc9980281..16263add3cdd 100644 --- a/drivers/gpu/drm/i915/intel_atomic.c +++ b/drivers/gpu/drm/i915/intel_atomic.c @@ -46,7 +46,7 @@ int intel_digital_connector_atomic_get_property(struct drm_connector *connector, const struct drm_connector_state *state, struct drm_property *property, - uint64_t *val) + u64 *val) { struct drm_device *dev = connector->dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -78,7 +78,7 @@ int intel_digital_connector_atomic_get_property(struct drm_connector *connector, int intel_digital_connector_atomic_set_property(struct drm_connector *connector, struct drm_connector_state *state, struct drm_property *property, - uint64_t val) + u64 val) { struct drm_device *dev = connector->dev; struct drm_i915_private *dev_priv = to_i915(dev); diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index 683a75dad4fb..9a2fdc77ebcb 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -311,7 +311,7 @@ int intel_plane_atomic_get_property(struct drm_plane *plane, const struct drm_plane_state *state, struct drm_property *property, - uint64_t *val) + u64 *val) { DRM_DEBUG_KMS("Unknown property [PROP:%d:%s]\n", property->base.id, property->name); @@ -334,7 +334,7 @@ int intel_plane_atomic_set_property(struct drm_plane *plane, struct drm_plane_state *state, struct drm_property *property, - uint64_t val) + u64 val) { DRM_DEBUG_KMS("Unknown property [PROP:%d:%s]\n", property->base.id, property->name); diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 778c887108b7..909b9f555458 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -239,7 +239,7 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, struct intel_connector *connector = to_intel_connector(conn_state->connector); int ret; - uint32_t temp; + u32 temp; /* MST encoders are bound to a crtc, not to a connector, * force the mapping here for get_hw_state. diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index 3c7f10d17658..95cb8b154f87 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -413,7 +413,7 @@ static void _bxt_ddi_phy_init(struct drm_i915_private *dev_priv, } if (phy_info->rcomp_phy != -1) { - uint32_t grc_code; + u32 grc_code; bxt_phy_wait_grc_done(dev_priv, phy_info->rcomp_phy); @@ -445,7 +445,7 @@ static void _bxt_ddi_phy_init(struct drm_i915_private *dev_priv, void bxt_ddi_phy_uninit(struct drm_i915_private *dev_priv, enum dpio_phy phy) { const struct bxt_ddi_phy_info *phy_info; - uint32_t val; + u32 val; phy_info = bxt_get_phy_info(dev_priv, phy); @@ -515,7 +515,7 @@ bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, enum dpio_phy phy) { const struct bxt_ddi_phy_info *phy_info; - uint32_t mask; + u32 mask; bool ok; phy_info = bxt_get_phy_info(dev_priv, phy); @@ -567,8 +567,8 @@ bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, #undef _CHK } -uint8_t -bxt_ddi_phy_calc_lane_lat_optim_mask(uint8_t lane_count) +u8 +bxt_ddi_phy_calc_lane_lat_optim_mask(u8 lane_count) { switch (lane_count) { case 1: @@ -585,7 +585,7 @@ bxt_ddi_phy_calc_lane_lat_optim_mask(uint8_t lane_count) } void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder, - uint8_t lane_lat_optim_mask) + u8 lane_lat_optim_mask) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; @@ -610,7 +610,7 @@ void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder, } } -uint8_t +u8 bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -618,7 +618,7 @@ bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder) enum dpio_phy phy; enum dpio_channel ch; int lane; - uint8_t mask; + u8 mask; bxt_port_to_phy_channel(dev_priv, port, &phy, &ch); @@ -739,7 +739,7 @@ void chv_data_lane_soft_reset(struct intel_encoder *encoder, enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(&encoder->base)); struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); enum pipe pipe = crtc->pipe; - uint32_t val; + u32 val; val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW0(ch)); if (reset) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index eed0da03ff5e..cc6379422bc0 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -800,15 +800,15 @@ u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv) return mcr_s_ss_select; } -static inline uint32_t +static inline u32 read_subslice_reg(struct drm_i915_private *dev_priv, int slice, int subslice, i915_reg_t reg) { - uint32_t mcr_slice_subslice_mask; - uint32_t mcr_slice_subslice_select; - uint32_t default_mcr_s_ss_select; - uint32_t mcr; - uint32_t ret; + u32 mcr_slice_subslice_mask; + u32 mcr_slice_subslice_select; + u32 default_mcr_s_ss_select; + u32 mcr; + u32 ret; enum forcewake_domains fw_domains; if (INTEL_GEN(dev_priv) >= 11) { diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index ccd5e110a19c..ec72be4b7a7b 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -594,7 +594,7 @@ static bool stride_is_valid(struct drm_i915_private *dev_priv, } static bool pixel_format_is_valid(struct drm_i915_private *dev_priv, - uint32_t pixel_format) + u32 pixel_format) { switch (pixel_format) { case DRM_FORMAT_XRGB8888: diff --git a/drivers/gpu/drm/i915/intel_fifo_underrun.c b/drivers/gpu/drm/i915/intel_fifo_underrun.c index 9b39975c8389..3b9285130ef5 100644 --- a/drivers/gpu/drm/i915/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/intel_fifo_underrun.c @@ -127,8 +127,8 @@ static void ironlake_set_fifo_underrun_reporting(struct drm_device *dev, enum pipe pipe, bool enable) { struct drm_i915_private *dev_priv = to_i915(dev); - uint32_t bit = (pipe == PIPE_A) ? DE_PIPEA_FIFO_UNDERRUN : - DE_PIPEB_FIFO_UNDERRUN; + u32 bit = (pipe == PIPE_A) ? + DE_PIPEA_FIFO_UNDERRUN : DE_PIPEB_FIFO_UNDERRUN; if (enable) ilk_enable_display_irq(dev_priv, bit); @@ -140,7 +140,7 @@ static void ivybridge_check_fifo_underruns(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; - uint32_t err_int = I915_READ(GEN7_ERR_INT); + u32 err_int = I915_READ(GEN7_ERR_INT); lockdep_assert_held(&dev_priv->irq_lock); @@ -193,8 +193,8 @@ static void ibx_set_fifo_underrun_reporting(struct drm_device *dev, bool enable) { struct drm_i915_private *dev_priv = to_i915(dev); - uint32_t bit = (pch_transcoder == PIPE_A) ? - SDE_TRANSA_FIFO_UNDER : SDE_TRANSB_FIFO_UNDER; + u32 bit = (pch_transcoder == PIPE_A) ? + SDE_TRANSA_FIFO_UNDER : SDE_TRANSB_FIFO_UNDER; if (enable) ibx_enable_display_interrupt(dev_priv, bit); @@ -206,7 +206,7 @@ static void cpt_check_pch_fifo_underruns(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pch_transcoder = crtc->pipe; - uint32_t serr_int = I915_READ(SERR_INT); + u32 serr_int = I915_READ(SERR_INT); lockdep_assert_held(&dev_priv->irq_lock); diff --git a/drivers/gpu/drm/i915/intel_hdcp.c b/drivers/gpu/drm/i915/intel_hdcp.c index 3fcb3b775948..ce7ba3a9c000 100644 --- a/drivers/gpu/drm/i915/intel_hdcp.c +++ b/drivers/gpu/drm/i915/intel_hdcp.c @@ -838,8 +838,8 @@ void intel_hdcp_atomic_check(struct drm_connector *connector, struct drm_connector_state *old_state, struct drm_connector_state *new_state) { - uint64_t old_cp = old_state->content_protection; - uint64_t new_cp = new_state->content_protection; + u64 old_cp = old_state->content_protection; + u64 new_cp = new_state->content_protection; struct drm_crtc_state *crtc_state; if (!new_state->crtc) { diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index a62ad80fdf97..f0fa0f767eb6 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2608,7 +2608,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, { struct drm_i915_gem_object *ctx_obj; struct i915_vma *vma; - uint32_t context_size; + u32 context_size; struct intel_ring *ring; struct i915_timeline *timeline; int ret; diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index 56d614b02302..a8554dc4f196 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -44,7 +44,7 @@ static const char * const pipe_crc_sources[] = { }; static int i8xx_pipe_crc_ctl_reg(enum intel_pipe_crc_source *source, - uint32_t *val) + u32 *val) { if (*source == INTEL_PIPE_CRC_SOURCE_AUTO) *source = INTEL_PIPE_CRC_SOURCE_PIPE; @@ -120,7 +120,7 @@ static int i9xx_pipe_crc_auto_source(struct drm_i915_private *dev_priv, static int vlv_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, enum pipe pipe, enum intel_pipe_crc_source *source, - uint32_t *val) + u32 *val) { bool need_stable_symbols = false; @@ -165,7 +165,7 @@ static int vlv_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, * - DisplayPort scrambling: used for EMI reduction */ if (need_stable_symbols) { - uint32_t tmp = I915_READ(PORT_DFT2_G4X); + u32 tmp = I915_READ(PORT_DFT2_G4X); tmp |= DC_BALANCE_RESET_VLV; switch (pipe) { @@ -190,7 +190,7 @@ static int vlv_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, static int i9xx_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, enum pipe pipe, enum intel_pipe_crc_source *source, - uint32_t *val) + u32 *val) { bool need_stable_symbols = false; @@ -244,7 +244,7 @@ static int i9xx_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, * - DisplayPort scrambling: used for EMI reduction */ if (need_stable_symbols) { - uint32_t tmp = I915_READ(PORT_DFT2_G4X); + u32 tmp = I915_READ(PORT_DFT2_G4X); WARN_ON(!IS_G4X(dev_priv)); @@ -265,7 +265,7 @@ static int i9xx_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, static void vlv_undo_pipe_scramble_reset(struct drm_i915_private *dev_priv, enum pipe pipe) { - uint32_t tmp = I915_READ(PORT_DFT2_G4X); + u32 tmp = I915_READ(PORT_DFT2_G4X); switch (pipe) { case PIPE_A: @@ -289,7 +289,7 @@ static void vlv_undo_pipe_scramble_reset(struct drm_i915_private *dev_priv, static void g4x_undo_pipe_scramble_reset(struct drm_i915_private *dev_priv, enum pipe pipe) { - uint32_t tmp = I915_READ(PORT_DFT2_G4X); + u32 tmp = I915_READ(PORT_DFT2_G4X); if (pipe == PIPE_A) tmp &= ~PIPE_A_SCRAMBLE_RESET; @@ -304,7 +304,7 @@ static void g4x_undo_pipe_scramble_reset(struct drm_i915_private *dev_priv, } static int ilk_pipe_crc_ctl_reg(enum intel_pipe_crc_source *source, - uint32_t *val) + u32 *val) { if (*source == INTEL_PIPE_CRC_SOURCE_AUTO) *source = INTEL_PIPE_CRC_SOURCE_PIPE; @@ -392,7 +392,7 @@ unlock: static int ivb_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, enum pipe pipe, enum intel_pipe_crc_source *source, - uint32_t *val, + u32 *val, bool set_wa) { if (*source == INTEL_PIPE_CRC_SOURCE_AUTO) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 0f6b2b4702e3..8dbf26c212cc 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -230,7 +230,7 @@ void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir) static bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp) { - uint8_t dprx = 0; + u8 dprx = 0; if (drm_dp_dpcd_readb(&intel_dp->aux, DP_DPRX_FEATURE_ENUMERATION_LIST, &dprx) != 1) @@ -240,7 +240,7 @@ static bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp) static bool intel_dp_get_alpm_status(struct intel_dp *intel_dp) { - uint8_t alpm_caps = 0; + u8 alpm_caps = 0; if (drm_dp_dpcd_readb(&intel_dp->aux, DP_RECEIVER_ALPM_CAP, &alpm_caps) != 1) @@ -384,7 +384,7 @@ static void hsw_psr_setup_aux(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); u32 aux_clock_divider, aux_ctl; int i; - static const uint8_t aux_msg[] = { + static const u8 aux_msg[] = { [0] = DP_AUX_NATIVE_WRITE << 4, [1] = DP_SET_POWER >> 8, [2] = DP_SET_POWER & 0xff, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 3c1366c58cf3..616f6bbb18ad 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -28,7 +28,7 @@ struct i915_sched_attr; * workarounds! */ #define CACHELINE_BYTES 64 -#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t)) +#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32)) struct intel_hw_status_page { struct i915_vma *vma; diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 79f00610860b..a017a4232c0f 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -903,10 +903,10 @@ void gen9_sanitize_dc_state(struct drm_i915_private *dev_priv) * back on and register state is restored. This is guaranteed by the MMIO write * to DC_STATE_EN blocking until the state is restored. */ -static void gen9_set_dc_state(struct drm_i915_private *dev_priv, uint32_t state) +static void gen9_set_dc_state(struct drm_i915_private *dev_priv, u32 state) { - uint32_t val; - uint32_t mask; + u32 val; + u32 mask; if (WARN_ON_ONCE(state & ~dev_priv->csr.allowed_dc_mask)) state &= dev_priv->csr.allowed_dc_mask; @@ -1538,7 +1538,7 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, { enum dpio_phy phy; enum pipe pipe; - uint32_t tmp; + u32 tmp; WARN_ON_ONCE(power_well->desc->id != VLV_DISP_PW_DPIO_CMN_BC && power_well->desc->id != CHV_DISP_PW_DPIO_CMN_D); @@ -3328,10 +3328,10 @@ sanitize_disable_power_well_option(const struct drm_i915_private *dev_priv, return 1; } -static uint32_t get_allowed_dc_mask(const struct drm_i915_private *dev_priv, - int enable_dc) +static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, + int enable_dc) { - uint32_t mask; + u32 mask; int requested_dc; int max_dc; @@ -3596,7 +3596,7 @@ static void icl_dbuf_disable(struct drm_i915_private *dev_priv) static void icl_mbus_init(struct drm_i915_private *dev_priv) { - uint32_t val; + u32 val; val = MBUS_ABOX_BT_CREDIT_POOL1(16) | MBUS_ABOX_BT_CREDIT_POOL2(16) | @@ -3907,7 +3907,7 @@ static void chv_phy_control_init(struct drm_i915_private *dev_priv) * current lane status. */ if (cmn_bc->desc->ops->is_enabled(dev_priv, cmn_bc)) { - uint32_t status = I915_READ(DPLL(PIPE_A)); + u32 status = I915_READ(DPLL(PIPE_A)); unsigned int mask; mask = status & DPLL_PORTB_READY_MASK; @@ -3938,7 +3938,7 @@ static void chv_phy_control_init(struct drm_i915_private *dev_priv) } if (cmn_d->desc->ops->is_enabled(dev_priv, cmn_d)) { - uint32_t status = I915_READ(DPIO_PHY_STATUS); + u32 status = I915_READ(DPIO_PHY_STATUS); unsigned int mask; mask = status & DPLL_PORTD_READY_MASK; From c7cc52167541f4c8f97aad58d86d93886ba057ce Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 16 Jan 2019 11:15:20 +0200 Subject: [PATCH 182/539] drm/i915/crt: switch to kernel types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mixed C99 and kernel types use is getting ugly. Prefer kernel types. sed -i 's/\buint\(8\|16\|32\|64\)_t\b/u\1/g' Reviewed-by: Ville Syrjälä Reviewed-by: José Roberto de Souza Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/4deb1838b288e027b6483e7ebd6b7b365d0ef979.1547629303.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_crt.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 081c333f30d2..c2e799a5e63e 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -631,19 +631,19 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector) } static enum drm_connector_status -intel_crt_load_detect(struct intel_crt *crt, uint32_t pipe) +intel_crt_load_detect(struct intel_crt *crt, u32 pipe) { struct drm_device *dev = crt->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - uint32_t save_bclrpat; - uint32_t save_vtotal; - uint32_t vtotal, vactive; - uint32_t vsample; - uint32_t vblank, vblank_start, vblank_end; - uint32_t dsl; + u32 save_bclrpat; + u32 save_vtotal; + u32 vtotal, vactive; + u32 vsample; + u32 vblank, vblank_start, vblank_end; + u32 dsl; i915_reg_t bclrpat_reg, vtotal_reg, vblank_reg, vsync_reg, pipeconf_reg, pipe_dsl_reg; - uint8_t st00; + u8 st00; enum drm_connector_status status; DRM_DEBUG_KMS("starting load-detect on CRT\n"); @@ -669,7 +669,7 @@ intel_crt_load_detect(struct intel_crt *crt, uint32_t pipe) I915_WRITE(bclrpat_reg, 0x500050); if (!IS_GEN(dev_priv, 2)) { - uint32_t pipeconf = I915_READ(pipeconf_reg); + u32 pipeconf = I915_READ(pipeconf_reg); I915_WRITE(pipeconf_reg, pipeconf | PIPECONF_FORCE_BORDER); POSTING_READ(pipeconf_reg); /* Wait for next Vblank to substitue @@ -690,8 +690,8 @@ intel_crt_load_detect(struct intel_crt *crt, uint32_t pipe) * Yes, this will flicker */ if (vblank_start <= vactive && vblank_end >= vtotal) { - uint32_t vsync = I915_READ(vsync_reg); - uint32_t vsync_start = (vsync & 0xffff) + 1; + u32 vsync = I915_READ(vsync_reg); + u32 vsync_start = (vsync & 0xffff) + 1; vblank_start = vsync_start; I915_WRITE(vblank_reg, From 977dcc06c3e9d42318afab68a39dfaf0895cea31 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 16 Jan 2019 11:15:22 +0200 Subject: [PATCH 183/539] drm/i915/lspcon: switch to kernel types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mixed C99 and kernel types use is getting ugly. Prefer kernel types. sed -i 's/\buint\(8\|16\|32\|64\)_t\b/u\1/g' Reviewed-by: Ville Syrjälä Reviewed-by: José Roberto de Souza Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/0c2d399bfb8fd9f90c7899eaaa0a9cab82f0d68d.1547629303.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_lspcon.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index 96a8d9524b0c..f08a9b4058f0 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -288,12 +288,12 @@ static bool lspcon_parade_fw_ready(struct drm_dp_aux *aux) } static bool _lspcon_parade_write_infoframe_blocks(struct drm_dp_aux *aux, - uint8_t *avi_buf) + u8 *avi_buf) { u8 avi_if_ctrl; u8 block_count = 0; u8 *data; - uint16_t reg; + u16 reg; ssize_t ret; while (block_count < 4) { @@ -335,10 +335,10 @@ static bool _lspcon_parade_write_infoframe_blocks(struct drm_dp_aux *aux, } static bool _lspcon_write_avi_infoframe_parade(struct drm_dp_aux *aux, - const uint8_t *frame, + const u8 *frame, ssize_t len) { - uint8_t avi_if[LSPCON_PARADE_AVI_IF_DATA_SIZE] = {1, }; + u8 avi_if[LSPCON_PARADE_AVI_IF_DATA_SIZE] = {1, }; /* * Parade's frames contains 32 bytes of data, divided @@ -367,13 +367,13 @@ static bool _lspcon_write_avi_infoframe_parade(struct drm_dp_aux *aux, } static bool _lspcon_write_avi_infoframe_mca(struct drm_dp_aux *aux, - const uint8_t *buffer, ssize_t len) + const u8 *buffer, ssize_t len) { int ret; - uint32_t val = 0; - uint32_t retry; - uint16_t reg; - const uint8_t *data = buffer; + u32 val = 0; + u32 retry; + u16 reg; + const u8 *data = buffer; reg = LSPCON_MCA_AVI_IF_WRITE_OFFSET; while (val < len) { @@ -459,7 +459,7 @@ void lspcon_set_infoframes(struct intel_encoder *encoder, { ssize_t ret; union hdmi_infoframe frame; - uint8_t buf[VIDEO_DIP_DATA_SIZE]; + u8 buf[VIDEO_DIP_DATA_SIZE]; struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); struct intel_lspcon *lspcon = &dig_port->lspcon; struct intel_dp *intel_dp = &dig_port->dp; From e5315213ecd26dbb24fb1ffa2200b3c94c7d32cb Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 16 Jan 2019 11:15:23 +0200 Subject: [PATCH 184/539] drm/i915/debugfs: switch to kernel types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mixed C99 and kernel types use is getting ugly. Prefer kernel types. sed -i 's/\buint\(8\|16\|32\|64\)_t\b/u\1/g' Reviewed-by: Ville Syrjälä Reviewed-by: José Roberto de Souza Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/4d71ed8a432b4121516049334512d35623c8acaa.1547629303.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index ece72e0e41bc..cdf7730a79df 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2251,7 +2251,7 @@ static void i915_guc_client_info(struct seq_file *m, { struct intel_engine_cs *engine; enum intel_engine_id id; - uint64_t tot = 0; + u64 tot = 0; seq_printf(m, "\tPriority %d, GuC stage index: %u, PD offset 0x%x\n", client->priority, client->stage_id, client->proc_desc_offset); @@ -3646,7 +3646,7 @@ static int i915_displayport_test_type_show(struct seq_file *m, void *data) } DEFINE_SHOW_ATTRIBUTE(i915_displayport_test_type); -static void wm_latency_show(struct seq_file *m, const uint16_t wm[8]) +static void wm_latency_show(struct seq_file *m, const u16 wm[8]) { struct drm_i915_private *dev_priv = m->private; struct drm_device *dev = &dev_priv->drm; @@ -3689,7 +3689,7 @@ static void wm_latency_show(struct seq_file *m, const uint16_t wm[8]) static int pri_wm_latency_show(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = m->private; - const uint16_t *latencies; + const u16 *latencies; if (INTEL_GEN(dev_priv) >= 9) latencies = dev_priv->wm.skl_latency; @@ -3704,7 +3704,7 @@ static int pri_wm_latency_show(struct seq_file *m, void *data) static int spr_wm_latency_show(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = m->private; - const uint16_t *latencies; + const u16 *latencies; if (INTEL_GEN(dev_priv) >= 9) latencies = dev_priv->wm.skl_latency; @@ -3719,7 +3719,7 @@ static int spr_wm_latency_show(struct seq_file *m, void *data) static int cur_wm_latency_show(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = m->private; - const uint16_t *latencies; + const u16 *latencies; if (INTEL_GEN(dev_priv) >= 9) latencies = dev_priv->wm.skl_latency; @@ -3762,12 +3762,12 @@ static int cur_wm_latency_open(struct inode *inode, struct file *file) } static ssize_t wm_latency_write(struct file *file, const char __user *ubuf, - size_t len, loff_t *offp, uint16_t wm[8]) + size_t len, loff_t *offp, u16 wm[8]) { struct seq_file *m = file->private_data; struct drm_i915_private *dev_priv = m->private; struct drm_device *dev = &dev_priv->drm; - uint16_t new[8] = { 0 }; + u16 new[8] = { 0 }; int num_levels; int level; int ret; @@ -3812,7 +3812,7 @@ static ssize_t pri_wm_latency_write(struct file *file, const char __user *ubuf, { struct seq_file *m = file->private_data; struct drm_i915_private *dev_priv = m->private; - uint16_t *latencies; + u16 *latencies; if (INTEL_GEN(dev_priv) >= 9) latencies = dev_priv->wm.skl_latency; @@ -3827,7 +3827,7 @@ static ssize_t spr_wm_latency_write(struct file *file, const char __user *ubuf, { struct seq_file *m = file->private_data; struct drm_i915_private *dev_priv = m->private; - uint16_t *latencies; + u16 *latencies; if (INTEL_GEN(dev_priv) >= 9) latencies = dev_priv->wm.skl_latency; @@ -3842,7 +3842,7 @@ static ssize_t cur_wm_latency_write(struct file *file, const char __user *ubuf, { struct seq_file *m = file->private_data; struct drm_i915_private *dev_priv = m->private; - uint16_t *latencies; + u16 *latencies; if (INTEL_GEN(dev_priv) >= 9) latencies = dev_priv->wm.skl_latency; @@ -4860,7 +4860,7 @@ static int i915_dpcd_show(struct seq_file *m, void *data) struct drm_connector *connector = m->private; struct intel_dp *intel_dp = enc_to_intel_dp(&intel_attached_encoder(connector)->base); - uint8_t buf[16]; + u8 buf[16]; ssize_t err; int i; From a9c287c94e7980762be9b0d13fde5eff5a3afda2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 16 Jan 2019 11:15:24 +0200 Subject: [PATCH 185/539] drm/i915/irq: switch to kernel types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mixed C99 and kernel types use is getting ugly. Prefer kernel types. sed -i 's/\buint\(8\|16\|32\|64\)_t\b/u\1/g' Reviewed-by: Chris Wilson Reviewed-by: Ville Syrjälä Reviewed-by: José Roberto de Souza Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/841f4eac1c52f4ed3efe2ac9e343d6640c03b774.1547629303.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 82 ++++++++++++++++----------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 1c6cf024a509..1abfc3fa76ad 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -223,10 +223,10 @@ static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir); /* For display hotplug interrupt */ static inline void i915_hotplug_interrupt_update_locked(struct drm_i915_private *dev_priv, - uint32_t mask, - uint32_t bits) + u32 mask, + u32 bits) { - uint32_t val; + u32 val; lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(bits & ~mask); @@ -250,8 +250,8 @@ i915_hotplug_interrupt_update_locked(struct drm_i915_private *dev_priv, * version is also available. */ void i915_hotplug_interrupt_update(struct drm_i915_private *dev_priv, - uint32_t mask, - uint32_t bits) + u32 mask, + u32 bits) { spin_lock_irq(&dev_priv->irq_lock); i915_hotplug_interrupt_update_locked(dev_priv, mask, bits); @@ -300,10 +300,10 @@ static bool gen11_reset_one_iir(struct drm_i915_private * const i915, * @enabled_irq_mask: mask of interrupt bits to enable */ void ilk_update_display_irq(struct drm_i915_private *dev_priv, - uint32_t interrupt_mask, - uint32_t enabled_irq_mask) + u32 interrupt_mask, + u32 enabled_irq_mask) { - uint32_t new_val; + u32 new_val; lockdep_assert_held(&dev_priv->irq_lock); @@ -330,8 +330,8 @@ void ilk_update_display_irq(struct drm_i915_private *dev_priv, * @enabled_irq_mask: mask of interrupt bits to enable */ static void ilk_update_gt_irq(struct drm_i915_private *dev_priv, - uint32_t interrupt_mask, - uint32_t enabled_irq_mask) + u32 interrupt_mask, + u32 enabled_irq_mask) { lockdep_assert_held(&dev_priv->irq_lock); @@ -345,13 +345,13 @@ static void ilk_update_gt_irq(struct drm_i915_private *dev_priv, I915_WRITE(GTIMR, dev_priv->gt_irq_mask); } -void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask) +void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, u32 mask) { ilk_update_gt_irq(dev_priv, mask, mask); POSTING_READ_FW(GTIMR); } -void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask) +void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, u32 mask) { ilk_update_gt_irq(dev_priv, mask, 0); } @@ -390,10 +390,10 @@ static i915_reg_t gen6_pm_ier(struct drm_i915_private *dev_priv) * @enabled_irq_mask: mask of interrupt bits to enable */ static void snb_update_pm_irq(struct drm_i915_private *dev_priv, - uint32_t interrupt_mask, - uint32_t enabled_irq_mask) + u32 interrupt_mask, + u32 enabled_irq_mask) { - uint32_t new_val; + u32 new_val; WARN_ON(enabled_irq_mask & ~interrupt_mask); @@ -577,11 +577,11 @@ void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv) * @enabled_irq_mask: mask of interrupt bits to enable */ static void bdw_update_port_irq(struct drm_i915_private *dev_priv, - uint32_t interrupt_mask, - uint32_t enabled_irq_mask) + u32 interrupt_mask, + u32 enabled_irq_mask) { - uint32_t new_val; - uint32_t old_val; + u32 new_val; + u32 old_val; lockdep_assert_held(&dev_priv->irq_lock); @@ -611,10 +611,10 @@ static void bdw_update_port_irq(struct drm_i915_private *dev_priv, */ void bdw_update_pipe_irq(struct drm_i915_private *dev_priv, enum pipe pipe, - uint32_t interrupt_mask, - uint32_t enabled_irq_mask) + u32 interrupt_mask, + u32 enabled_irq_mask) { - uint32_t new_val; + u32 new_val; lockdep_assert_held(&dev_priv->irq_lock); @@ -641,10 +641,10 @@ void bdw_update_pipe_irq(struct drm_i915_private *dev_priv, * @enabled_irq_mask: mask of interrupt bits to enable */ void ibx_display_interrupt_update(struct drm_i915_private *dev_priv, - uint32_t interrupt_mask, - uint32_t enabled_irq_mask) + u32 interrupt_mask, + u32 enabled_irq_mask) { - uint32_t sdeimr = I915_READ(SDEIMR); + u32 sdeimr = I915_READ(SDEIMR); sdeimr &= ~interrupt_mask; sdeimr |= (~enabled_irq_mask & interrupt_mask); @@ -1368,8 +1368,8 @@ static void ivybridge_parity_work(struct work_struct *work) container_of(work, typeof(*dev_priv), l3_parity.error_work); u32 error_status, row, bank, subbank; char *parity_event[6]; - uint32_t misccpctl; - uint8_t slice = 0; + u32 misccpctl; + u8 slice = 0; /* We must turn off DOP level clock gating to access the L3 registers. * In order to prevent a get/put style interface, acquire struct mutex @@ -1730,13 +1730,13 @@ static void dp_aux_irq_handler(struct drm_i915_private *dev_priv) #if defined(CONFIG_DEBUG_FS) static void display_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, enum pipe pipe, - uint32_t crc0, uint32_t crc1, - uint32_t crc2, uint32_t crc3, - uint32_t crc4) + u32 crc0, u32 crc1, + u32 crc2, u32 crc3, + u32 crc4) { struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[pipe]; struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - uint32_t crcs[5]; + u32 crcs[5]; spin_lock(&pipe_crc->lock); /* @@ -1768,9 +1768,9 @@ static void display_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, static inline void display_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, enum pipe pipe, - uint32_t crc0, uint32_t crc1, - uint32_t crc2, uint32_t crc3, - uint32_t crc4) {} + u32 crc0, u32 crc1, + u32 crc2, u32 crc3, + u32 crc4) {} #endif @@ -1796,7 +1796,7 @@ static void ivb_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, static void i9xx_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, enum pipe pipe) { - uint32_t res1, res2; + u32 res1, res2; if (INTEL_GEN(dev_priv) >= 3) res1 = I915_READ(PIPE_CRC_RES_RES1_I915(pipe)); @@ -3172,7 +3172,7 @@ static int ironlake_enable_vblank(struct drm_device *dev, unsigned int pipe) { struct drm_i915_private *dev_priv = to_i915(dev); unsigned long irqflags; - uint32_t bit = INTEL_GEN(dev_priv) >= 7 ? + u32 bit = INTEL_GEN(dev_priv) >= 7 ? DE_PIPE_VBLANK_IVB(pipe) : DE_PIPE_VBLANK(pipe); spin_lock_irqsave(&dev_priv->irq_lock, irqflags); @@ -3234,7 +3234,7 @@ static void ironlake_disable_vblank(struct drm_device *dev, unsigned int pipe) { struct drm_i915_private *dev_priv = to_i915(dev); unsigned long irqflags; - uint32_t bit = INTEL_GEN(dev_priv) >= 7 ? + u32 bit = INTEL_GEN(dev_priv) >= 7 ? DE_PIPE_VBLANK_IVB(pipe) : DE_PIPE_VBLANK(pipe); spin_lock_irqsave(&dev_priv->irq_lock, irqflags); @@ -3452,7 +3452,7 @@ static void gen11_irq_reset(struct drm_device *dev) void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv, u8 pipe_mask) { - uint32_t extra_ier = GEN8_PIPE_VBLANK | GEN8_PIPE_FIFO_UNDERRUN; + u32 extra_ier = GEN8_PIPE_VBLANK | GEN8_PIPE_FIFO_UNDERRUN; enum pipe pipe; spin_lock_irq(&dev_priv->irq_lock); @@ -3921,7 +3921,7 @@ static int valleyview_irq_postinstall(struct drm_device *dev) static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv) { /* These are interrupts we'll toggle with the ring mask register */ - uint32_t gt_interrupts[] = { + u32 gt_interrupts[] = { GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT | GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT | @@ -3949,8 +3949,8 @@ static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv) static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) { - uint32_t de_pipe_masked = GEN8_PIPE_CDCLK_CRC_DONE; - uint32_t de_pipe_enables; + u32 de_pipe_masked = GEN8_PIPE_CDCLK_CRC_DONE; + u32 de_pipe_enables; u32 de_port_masked = GEN8_AUX_CHANNEL_A; u32 de_port_enables; u32 de_misc_masked = GEN8_DE_EDP_PSR; From cbe974fb964ec8f830512be0ff34b58242f321bc Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 16 Jan 2019 11:15:25 +0200 Subject: [PATCH 186/539] drm/i915/cdclk: switch to kernel types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mixed C99 and kernel types use is getting ugly. Prefer kernel types. sed -i 's/\buint\(8\|16\|32\|64\)_t\b/u\1/g' Reviewed-by: Ville Syrjälä Reviewed-by: José Roberto de Souza Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/b56d250007a5d85d15038962548abb3e1818480a.1547629303.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 40 +++++++++++++++--------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 73cb7250118e..15ba950dee00 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -218,7 +218,7 @@ static unsigned int intel_hpll_vco(struct drm_i915_private *dev_priv) }; const unsigned int *vco_table; unsigned int vco; - uint8_t tmp = 0; + u8 tmp = 0; /* FIXME other chipsets? */ if (IS_GM45(dev_priv)) @@ -249,13 +249,13 @@ static void g33_get_cdclk(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state) { struct pci_dev *pdev = dev_priv->drm.pdev; - static const uint8_t div_3200[] = { 12, 10, 8, 7, 5, 16 }; - static const uint8_t div_4000[] = { 14, 12, 10, 8, 6, 20 }; - static const uint8_t div_4800[] = { 20, 14, 12, 10, 8, 24 }; - static const uint8_t div_5333[] = { 20, 16, 12, 12, 8, 28 }; - const uint8_t *div_table; + static const u8 div_3200[] = { 12, 10, 8, 7, 5, 16 }; + static const u8 div_4000[] = { 14, 12, 10, 8, 6, 20 }; + static const u8 div_4800[] = { 20, 14, 12, 10, 8, 24 }; + static const u8 div_5333[] = { 20, 16, 12, 12, 8, 28 }; + const u8 *div_table; unsigned int cdclk_sel; - uint16_t tmp = 0; + u16 tmp = 0; cdclk_state->vco = intel_hpll_vco(dev_priv); @@ -330,12 +330,12 @@ static void i965gm_get_cdclk(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state) { struct pci_dev *pdev = dev_priv->drm.pdev; - static const uint8_t div_3200[] = { 16, 10, 8 }; - static const uint8_t div_4000[] = { 20, 12, 10 }; - static const uint8_t div_5333[] = { 24, 16, 14 }; - const uint8_t *div_table; + static const u8 div_3200[] = { 16, 10, 8 }; + static const u8 div_4000[] = { 20, 12, 10 }; + static const u8 div_5333[] = { 24, 16, 14 }; + const u8 *div_table; unsigned int cdclk_sel; - uint16_t tmp = 0; + u16 tmp = 0; cdclk_state->vco = intel_hpll_vco(dev_priv); @@ -375,7 +375,7 @@ static void gm45_get_cdclk(struct drm_i915_private *dev_priv, { struct pci_dev *pdev = dev_priv->drm.pdev; unsigned int cdclk_sel; - uint16_t tmp = 0; + u16 tmp = 0; cdclk_state->vco = intel_hpll_vco(dev_priv); @@ -403,8 +403,8 @@ static void gm45_get_cdclk(struct drm_i915_private *dev_priv, static void hsw_get_cdclk(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state) { - uint32_t lcpll = I915_READ(LCPLL_CTL); - uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; + u32 lcpll = I915_READ(LCPLL_CTL); + u32 freq = lcpll & LCPLL_CLK_FREQ_MASK; if (lcpll & LCPLL_CD_SOURCE_FCLK) cdclk_state->cdclk = 800000; @@ -672,8 +672,8 @@ static u8 bdw_calc_voltage_level(int cdclk) static void bdw_get_cdclk(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state) { - uint32_t lcpll = I915_READ(LCPLL_CTL); - uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; + u32 lcpll = I915_READ(LCPLL_CTL); + u32 freq = lcpll & LCPLL_CLK_FREQ_MASK; if (lcpll & LCPLL_CD_SOURCE_FCLK) cdclk_state->cdclk = 800000; @@ -700,7 +700,7 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_state *cdclk_state) { int cdclk = cdclk_state->cdclk; - uint32_t val; + u32 val; int ret; if (WARN((I915_READ(LCPLL_CTL) & @@ -1083,7 +1083,7 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) { - uint32_t cdctl, expected; + u32 cdctl, expected; /* * check if the pre-os initialized the display @@ -2690,7 +2690,7 @@ static int vlv_hrawclk(struct drm_i915_private *dev_priv) static int g4x_hrawclk(struct drm_i915_private *dev_priv) { - uint32_t clkcfg; + u32 clkcfg; /* hrawclock is 1/4 the FSB frequency */ clkcfg = I915_READ(CLKCFG); From 990290d124d5556fedcaae98f0b2f456fc009f4b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 16 Jan 2019 11:15:26 +0200 Subject: [PATCH 187/539] drm/i915/dpll_mgr: switch to kernel types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mixed C99 and kernel types use is getting ugly. Prefer kernel types. sed -i 's/\buint\(8\|16\|32\|64\)_t\b/u\1/g' Minor checkpatch/whitespace fixes sprinkled on top of the changed lines. Reviewed-by: Ville Syrjälä Reviewed-by: José Roberto de Souza Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/b73aefabb757acf59896bd77dbb20c2e343c6e6d.1547629303.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 145 +++++++++++++------------- drivers/gpu/drm/i915/intel_dpll_mgr.h | 51 +++++---- 2 files changed, 98 insertions(+), 98 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 04870e960537..606f54dde086 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -346,7 +346,7 @@ static bool ibx_pch_dpll_get_hw_state(struct drm_i915_private *dev_priv, { const enum intel_dpll_id id = pll->info->id; intel_wakeref_t wakeref; - uint32_t val; + u32 val; wakeref = intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS); @@ -490,7 +490,7 @@ static void hsw_ddi_wrpll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { const enum intel_dpll_id id = pll->info->id; - uint32_t val; + u32 val; val = I915_READ(WRPLL_CTL(id)); I915_WRITE(WRPLL_CTL(id), val & ~WRPLL_PLL_ENABLE); @@ -500,7 +500,7 @@ static void hsw_ddi_wrpll_disable(struct drm_i915_private *dev_priv, static void hsw_ddi_spll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { - uint32_t val; + u32 val; val = I915_READ(SPLL_CTL); I915_WRITE(SPLL_CTL, val & ~SPLL_PLL_ENABLE); @@ -513,7 +513,7 @@ static bool hsw_ddi_wrpll_get_hw_state(struct drm_i915_private *dev_priv, { const enum intel_dpll_id id = pll->info->id; intel_wakeref_t wakeref; - uint32_t val; + u32 val; wakeref = intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS); @@ -533,7 +533,7 @@ static bool hsw_ddi_spll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_dpll_hw_state *hw_state) { intel_wakeref_t wakeref; - uint32_t val; + u32 val; wakeref = intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS); @@ -639,11 +639,12 @@ static unsigned hsw_wrpll_get_budget_for_freq(int clock) return budget; } -static void hsw_wrpll_update_rnp(uint64_t freq2k, unsigned budget, - unsigned r2, unsigned n2, unsigned p, +static void hsw_wrpll_update_rnp(u64 freq2k, unsigned int budget, + unsigned int r2, unsigned int n2, + unsigned int p, struct hsw_wrpll_rnp *best) { - uint64_t a, b, c, d, diff, diff_best; + u64 a, b, c, d, diff, diff_best; /* No best (r,n,p) yet */ if (best->p == 0) { @@ -702,7 +703,7 @@ static void hsw_ddi_calculate_wrpll(int clock /* in Hz */, unsigned *r2_out, unsigned *n2_out, unsigned *p_out) { - uint64_t freq2k; + u64 freq2k; unsigned p, n2, r2; struct hsw_wrpll_rnp best = { 0, 0, 0 }; unsigned budget; @@ -768,7 +769,7 @@ static struct intel_shared_dpll *hsw_ddi_hdmi_get_dpll(int clock, struct intel_crtc_state *crtc_state) { struct intel_shared_dpll *pll; - uint32_t val; + u32 val; unsigned int p, n2, r2; hsw_ddi_calculate_wrpll(clock * 1000, &r2, &n2, &p); @@ -930,7 +931,7 @@ static void skl_ddi_pll_write_ctrl1(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { const enum intel_dpll_id id = pll->info->id; - uint32_t val; + u32 val; val = I915_READ(DPLL_CTRL1); @@ -995,7 +996,7 @@ static bool skl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) { - uint32_t val; + u32 val; const struct skl_dpll_regs *regs = skl_dpll_regs; const enum intel_dpll_id id = pll->info->id; intel_wakeref_t wakeref; @@ -1035,7 +1036,7 @@ static bool skl_ddi_dpll0_get_hw_state(struct drm_i915_private *dev_priv, const struct skl_dpll_regs *regs = skl_dpll_regs; const enum intel_dpll_id id = pll->info->id; intel_wakeref_t wakeref; - uint32_t val; + u32 val; bool ret; wakeref = intel_display_power_get_if_enabled(dev_priv, @@ -1062,9 +1063,9 @@ out: } struct skl_wrpll_context { - uint64_t min_deviation; /* current minimal deviation */ - uint64_t central_freq; /* chosen central freq */ - uint64_t dco_freq; /* chosen dco freq */ + u64 min_deviation; /* current minimal deviation */ + u64 central_freq; /* chosen central freq */ + u64 dco_freq; /* chosen dco freq */ unsigned int p; /* chosen divider */ }; @@ -1080,11 +1081,11 @@ static void skl_wrpll_context_init(struct skl_wrpll_context *ctx) #define SKL_DCO_MAX_NDEVIATION 600 static void skl_wrpll_try_divider(struct skl_wrpll_context *ctx, - uint64_t central_freq, - uint64_t dco_freq, + u64 central_freq, + u64 dco_freq, unsigned int divider) { - uint64_t deviation; + u64 deviation; deviation = div64_u64(10000 * abs_diff(dco_freq, central_freq), central_freq); @@ -1158,21 +1159,21 @@ static void skl_wrpll_get_multipliers(unsigned int p, } struct skl_wrpll_params { - uint32_t dco_fraction; - uint32_t dco_integer; - uint32_t qdiv_ratio; - uint32_t qdiv_mode; - uint32_t kdiv; - uint32_t pdiv; - uint32_t central_freq; + u32 dco_fraction; + u32 dco_integer; + u32 qdiv_ratio; + u32 qdiv_mode; + u32 kdiv; + u32 pdiv; + u32 central_freq; }; static void skl_wrpll_params_populate(struct skl_wrpll_params *params, - uint64_t afe_clock, - uint64_t central_freq, - uint32_t p0, uint32_t p1, uint32_t p2) + u64 afe_clock, + u64 central_freq, + u32 p0, u32 p1, u32 p2) { - uint64_t dco_freq; + u64 dco_freq; switch (central_freq) { case 9600000000ULL: @@ -1238,10 +1239,10 @@ static bool skl_ddi_calculate_wrpll(int clock /* in Hz */, struct skl_wrpll_params *wrpll_params) { - uint64_t afe_clock = clock * 5; /* AFE Clock is 5x Pixel clock */ - uint64_t dco_central_freq[3] = {8400000000ULL, - 9000000000ULL, - 9600000000ULL}; + u64 afe_clock = clock * 5; /* AFE Clock is 5x Pixel clock */ + u64 dco_central_freq[3] = { 8400000000ULL, + 9000000000ULL, + 9600000000ULL }; static const int even_dividers[] = { 4, 6, 8, 10, 12, 14, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 48, 52, 54, 56, 60, 64, 66, 68, @@ -1265,7 +1266,7 @@ skl_ddi_calculate_wrpll(int clock /* in Hz */, for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) { for (i = 0; i < dividers[d].n_dividers; i++) { unsigned int p = dividers[d].list[i]; - uint64_t dco_freq = p * afe_clock; + u64 dco_freq = p * afe_clock; skl_wrpll_try_divider(&ctx, dco_central_freq[dco], @@ -1311,7 +1312,7 @@ static bool skl_ddi_hdmi_pll_dividers(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, int clock) { - uint32_t ctrl1, cfgcr1, cfgcr2; + u32 ctrl1, cfgcr1, cfgcr2; struct skl_wrpll_params wrpll_params = { 0, }; /* @@ -1348,7 +1349,7 @@ static bool skl_ddi_dp_set_dpll_hw_state(int clock, struct intel_dpll_hw_state *dpll_hw_state) { - uint32_t ctrl1; + u32 ctrl1; /* * See comment in intel_dpll_hw_state to understand why we always use 0 @@ -1450,7 +1451,7 @@ static const struct intel_shared_dpll_funcs skl_ddi_dpll0_funcs = { static void bxt_ddi_pll_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { - uint32_t temp; + u32 temp; enum port port = (enum port)pll->info->id; /* 1:1 port->PLL mapping */ enum dpio_phy phy; enum dpio_channel ch; @@ -1571,7 +1572,7 @@ static void bxt_ddi_pll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { enum port port = (enum port)pll->info->id; /* 1:1 port->PLL mapping */ - uint32_t temp; + u32 temp; temp = I915_READ(BXT_PORT_PLL_ENABLE(port)); temp &= ~PORT_PLL_ENABLE; @@ -1597,7 +1598,7 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, intel_wakeref_t wakeref; enum dpio_phy phy; enum dpio_channel ch; - uint32_t val; + u32 val; bool ret; bxt_port_to_phy_channel(dev_priv, port, &phy, &ch); @@ -1669,12 +1670,12 @@ out: /* bxt clock parameters */ struct bxt_clk_div { int clock; - uint32_t p1; - uint32_t p2; - uint32_t m2_int; - uint32_t m2_frac; + u32 p1; + u32 p2; + u32 m2_int; + u32 m2_frac; bool m2_frac_en; - uint32_t n; + u32 n; int vco; }; @@ -1741,8 +1742,8 @@ static bool bxt_ddi_set_dpll_hw_state(int clock, struct intel_dpll_hw_state *dpll_hw_state) { int vco = clk_div->vco; - uint32_t prop_coef, int_coef, gain_ctl, targ_cnt; - uint32_t lanestagger; + u32 prop_coef, int_coef, gain_ctl, targ_cnt; + u32 lanestagger; if (vco >= 6200000 && vco <= 6700000) { prop_coef = 4; @@ -1891,7 +1892,7 @@ static void intel_ddi_pll_init(struct drm_device *dev) struct drm_i915_private *dev_priv = to_i915(dev); if (INTEL_GEN(dev_priv) < 9) { - uint32_t val = I915_READ(LCPLL_CTL); + u32 val = I915_READ(LCPLL_CTL); /* * The LCPLL register should be turned on by the BIOS. For now @@ -1977,7 +1978,7 @@ static void cnl_ddi_pll_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { const enum intel_dpll_id id = pll->info->id; - uint32_t val; + u32 val; /* 1. Enable DPLL power in DPLL_ENABLE. */ val = I915_READ(CNL_DPLL_ENABLE(id)); @@ -2052,7 +2053,7 @@ static void cnl_ddi_pll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { const enum intel_dpll_id id = pll->info->id; - uint32_t val; + u32 val; /* * 1. Configure DPCLKA_CFGCR0 to turn off the clock for the DDI. @@ -2110,7 +2111,7 @@ static bool cnl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, { const enum intel_dpll_id id = pll->info->id; intel_wakeref_t wakeref; - uint32_t val; + u32 val; bool ret; wakeref = intel_display_power_get_if_enabled(dev_priv, @@ -2246,7 +2247,7 @@ cnl_ddi_calculate_wrpll(int clock, struct skl_wrpll_params *wrpll_params) { u32 afe_clock = clock * 5; - uint32_t ref_clock; + u32 ref_clock; u32 dco_min = 7998000; u32 dco_max = 10000000; u32 dco_mid = (dco_min + dco_max) / 2; @@ -2292,7 +2293,7 @@ static bool cnl_ddi_hdmi_pll_dividers(struct intel_crtc *crtc, int clock) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - uint32_t cfgcr0, cfgcr1; + u32 cfgcr0, cfgcr1; struct skl_wrpll_params wrpll_params = { 0, }; cfgcr0 = DPLL_CFGCR0_HDMI_MODE; @@ -2321,7 +2322,7 @@ static bool cnl_ddi_dp_set_dpll_hw_state(int clock, struct intel_dpll_hw_state *dpll_hw_state) { - uint32_t cfgcr0; + u32 cfgcr0; cfgcr0 = DPLL_CFGCR0_SSC_ENABLE; @@ -2538,7 +2539,7 @@ static bool icl_calc_dpll_state(struct intel_crtc_state *crtc_state, struct intel_dpll_hw_state *pll_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - uint32_t cfgcr0, cfgcr1; + u32 cfgcr0, cfgcr1; struct skl_wrpll_params pll_params = { 0 }; bool ret; @@ -2568,10 +2569,10 @@ static bool icl_calc_dpll_state(struct intel_crtc_state *crtc_state, } int icl_calc_dp_combo_pll_link(struct drm_i915_private *dev_priv, - uint32_t pll_id) + u32 pll_id) { - uint32_t cfgcr0, cfgcr1; - uint32_t pdiv, kdiv, qdiv_mode, qdiv_ratio, dco_integer, dco_fraction; + u32 cfgcr0, cfgcr1; + u32 pdiv, kdiv, qdiv_mode, qdiv_ratio, dco_integer, dco_fraction; const struct skl_wrpll_params *params; int index, n_entries, link_clock; @@ -2654,10 +2655,10 @@ bool intel_dpll_is_combophy(enum intel_dpll_id id) } static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc, - uint32_t *target_dco_khz, + u32 *target_dco_khz, struct intel_dpll_hw_state *state) { - uint32_t dco_min_freq, dco_max_freq; + u32 dco_min_freq, dco_max_freq; int div1_vals[] = {7, 5, 3, 2}; unsigned int i; int div2; @@ -2733,12 +2734,12 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int refclk_khz = dev_priv->cdclk.hw.ref; - uint32_t dco_khz, m1div, m2div_int, m2div_rem, m2div_frac; - uint32_t iref_ndiv, iref_trim, iref_pulse_w; - uint32_t prop_coeff, int_coeff; - uint32_t tdc_targetcnt, feedfwgain; - uint64_t ssc_stepsize, ssc_steplen, ssc_steplog; - uint64_t tmp; + u32 dco_khz, m1div, m2div_int, m2div_rem, m2div_frac; + u32 iref_ndiv, iref_trim, iref_pulse_w; + u32 prop_coeff, int_coeff; + u32 tdc_targetcnt, feedfwgain; + u64 ssc_stepsize, ssc_steplen, ssc_steplog; + u64 tmp; bool use_ssc = false; bool is_dp = !intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI); @@ -2761,7 +2762,7 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state, } m2div_rem = dco_khz % (refclk_khz * m1div); - tmp = (uint64_t)m2div_rem * (1 << 22); + tmp = (u64)m2div_rem * (1 << 22); do_div(tmp, refclk_khz * m1div); m2div_frac = tmp; @@ -2820,11 +2821,11 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state, } if (use_ssc) { - tmp = (uint64_t)dco_khz * 47 * 32; + tmp = (u64)dco_khz * 47 * 32; do_div(tmp, refclk_khz * m1div * 10000); ssc_stepsize = tmp; - tmp = (uint64_t)dco_khz * 1000; + tmp = (u64)dco_khz * 1000; ssc_steplen = DIV_ROUND_UP_ULL(tmp, 32 * 2 * 32); } else { ssc_stepsize = 0; @@ -2974,7 +2975,7 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, intel_wakeref_t wakeref; bool ret = false; enum port port; - uint32_t val; + u32 val; wakeref = intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS); @@ -3101,7 +3102,7 @@ static void icl_pll_enable(struct drm_i915_private *dev_priv, { const enum intel_dpll_id id = pll->info->id; i915_reg_t enable_reg = icl_pll_id_to_enable_reg(id); - uint32_t val; + u32 val; val = I915_READ(enable_reg); val |= PLL_POWER_ENABLE; @@ -3142,7 +3143,7 @@ static void icl_pll_disable(struct drm_i915_private *dev_priv, { const enum intel_dpll_id id = pll->info->id; i915_reg_t enable_reg = icl_pll_id_to_enable_reg(id); - uint32_t val; + u32 val; /* The first steps are done by intel_ddi_post_disable(). */ diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h index a033d8f06d4a..e96e79413b54 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h @@ -138,14 +138,14 @@ enum intel_dpll_id { struct intel_dpll_hw_state { /* i9xx, pch plls */ - uint32_t dpll; - uint32_t dpll_md; - uint32_t fp0; - uint32_t fp1; + u32 dpll; + u32 dpll_md; + u32 fp0; + u32 fp1; /* hsw, bdw */ - uint32_t wrpll; - uint32_t spll; + u32 wrpll; + u32 spll; /* skl */ /* @@ -154,34 +154,33 @@ struct intel_dpll_hw_state { * the register. This allows us to easily compare the state to share * the DPLL. */ - uint32_t ctrl1; + u32 ctrl1; /* HDMI only, 0 when used for DP */ - uint32_t cfgcr1, cfgcr2; + u32 cfgcr1, cfgcr2; /* cnl */ - uint32_t cfgcr0; + u32 cfgcr0; /* CNL also uses cfgcr1 */ /* bxt */ - uint32_t ebb0, ebb4, pll0, pll1, pll2, pll3, pll6, pll8, pll9, pll10, - pcsdw12; + u32 ebb0, ebb4, pll0, pll1, pll2, pll3, pll6, pll8, pll9, pll10, pcsdw12; /* * ICL uses the following, already defined: - * uint32_t cfgcr0, cfgcr1; + * u32 cfgcr0, cfgcr1; */ - uint32_t mg_refclkin_ctl; - uint32_t mg_clktop2_coreclkctl1; - uint32_t mg_clktop2_hsclkctl; - uint32_t mg_pll_div0; - uint32_t mg_pll_div1; - uint32_t mg_pll_lf; - uint32_t mg_pll_frac_lock; - uint32_t mg_pll_ssc; - uint32_t mg_pll_bias; - uint32_t mg_pll_tdc_coldst_bias; - uint32_t mg_pll_bias_mask; - uint32_t mg_pll_tdc_coldst_bias_mask; + u32 mg_refclkin_ctl; + u32 mg_clktop2_coreclkctl1; + u32 mg_clktop2_hsclkctl; + u32 mg_pll_div0; + u32 mg_pll_div1; + u32 mg_pll_lf; + u32 mg_pll_frac_lock; + u32 mg_pll_ssc; + u32 mg_pll_bias; + u32 mg_pll_tdc_coldst_bias; + u32 mg_pll_bias_mask; + u32 mg_pll_tdc_coldst_bias_mask; }; /** @@ -280,7 +279,7 @@ struct dpll_info { * Inform the state checker that the DPLL is kept enabled even if * not in use by any CRTC. */ - uint32_t flags; + u32 flags; }; /** @@ -343,7 +342,7 @@ void intel_shared_dpll_init(struct drm_device *dev); void intel_dpll_dump_hw_state(struct drm_i915_private *dev_priv, struct intel_dpll_hw_state *hw_state); int icl_calc_dp_combo_pll_link(struct drm_i915_private *dev_priv, - uint32_t pll_id); + u32 pll_id); int cnl_hdmi_pll_ref_clock(struct drm_i915_private *dev_priv); enum intel_dpll_id icl_port_to_mg_pll_id(enum port port); bool intel_dpll_is_combophy(enum intel_dpll_id id); From 830de4220a27735f8893830cbb84621f23b031b3 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 16 Jan 2019 11:15:27 +0200 Subject: [PATCH 188/539] drm/i915/dp: switch to kernel types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mixed C99 and kernel types use is getting ugly. Prefer kernel types. sed -i 's/\buint\(8\|16\|32\|64\)_t\b/u\1/g' Minor checkpatch/whitespace fixes sprinkled on top of the changed lines. Reviewed-by: Ville Syrjälä Reviewed-by: José Roberto de Souza Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/3c030a12b4313eec512ce2b7a953cff439d8af67.1547629303.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 142 +++++++++--------- drivers/gpu/drm/i915/intel_dp_link_training.c | 32 ++-- 2 files changed, 87 insertions(+), 87 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index df4292bb1a4f..808ccdae15b8 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -429,7 +429,7 @@ static void intel_dp_set_common_rates(struct intel_dp *intel_dp) } static bool intel_dp_link_params_valid(struct intel_dp *intel_dp, int link_rate, - uint8_t lane_count) + u8 lane_count) { /* * FIXME: we need to synchronize the current link parameters with @@ -449,7 +449,7 @@ static bool intel_dp_link_params_valid(struct intel_dp *intel_dp, int link_rate, static bool intel_dp_can_link_train_fallback_for_edp(struct intel_dp *intel_dp, int link_rate, - uint8_t lane_count) + u8 lane_count) { const struct drm_display_mode *fixed_mode = intel_dp->attached_connector->panel.fixed_mode; @@ -464,7 +464,7 @@ static bool intel_dp_can_link_train_fallback_for_edp(struct intel_dp *intel_dp, } int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, - int link_rate, uint8_t lane_count) + int link_rate, u8 lane_count) { int index; @@ -572,19 +572,19 @@ intel_dp_mode_valid(struct drm_connector *connector, return MODE_OK; } -uint32_t intel_dp_pack_aux(const uint8_t *src, int src_bytes) +u32 intel_dp_pack_aux(const u8 *src, int src_bytes) { - int i; - uint32_t v = 0; + int i; + u32 v = 0; if (src_bytes > 4) src_bytes = 4; for (i = 0; i < src_bytes; i++) - v |= ((uint32_t) src[i]) << ((3-i) * 8); + v |= ((u32)src[i]) << ((3 - i) * 8); return v; } -static void intel_dp_unpack_aux(uint32_t src, uint8_t *dst, int dst_bytes) +static void intel_dp_unpack_aux(u32 src, u8 *dst, int dst_bytes) { int i; if (dst_bytes > 4) @@ -643,7 +643,7 @@ vlv_power_sequencer_kick(struct intel_dp *intel_dp) bool pll_enabled, release_cl_override = false; enum dpio_phy phy = DPIO_PHY(pipe); enum dpio_channel ch = vlv_pipe_to_channel(pipe); - uint32_t DP; + u32 DP; if (WARN(I915_READ(intel_dp->output_reg) & DP_PORT_EN, "skipping pipe %c power sequencer kick due to port %c being active\n", @@ -1051,12 +1051,12 @@ intel_dp_check_edp(struct intel_dp *intel_dp) } } -static uint32_t +static u32 intel_dp_aux_wait_done(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg(intel_dp); - uint32_t status; + u32 status; bool done; #define C (((status = I915_READ_NOTRACE(ch_ctl)) & DP_AUX_CH_CTL_SEND_BUSY) == 0) @@ -1069,7 +1069,7 @@ intel_dp_aux_wait_done(struct intel_dp *intel_dp) return status; } -static uint32_t g4x_get_aux_clock_divider(struct intel_dp *intel_dp, int index) +static u32 g4x_get_aux_clock_divider(struct intel_dp *intel_dp, int index) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); @@ -1083,7 +1083,7 @@ static uint32_t g4x_get_aux_clock_divider(struct intel_dp *intel_dp, int index) return DIV_ROUND_CLOSEST(dev_priv->rawclk_freq, 2000); } -static uint32_t ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) +static u32 ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); @@ -1102,7 +1102,7 @@ static uint32_t ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) return DIV_ROUND_CLOSEST(dev_priv->rawclk_freq, 2000); } -static uint32_t hsw_get_aux_clock_divider(struct intel_dp *intel_dp, int index) +static u32 hsw_get_aux_clock_divider(struct intel_dp *intel_dp, int index) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); @@ -1119,7 +1119,7 @@ static uint32_t hsw_get_aux_clock_divider(struct intel_dp *intel_dp, int index) return ilk_get_aux_clock_divider(intel_dp, index); } -static uint32_t skl_get_aux_clock_divider(struct intel_dp *intel_dp, int index) +static u32 skl_get_aux_clock_divider(struct intel_dp *intel_dp, int index) { /* * SKL doesn't need us to program the AUX clock divider (Hardware will @@ -1129,14 +1129,14 @@ static uint32_t skl_get_aux_clock_divider(struct intel_dp *intel_dp, int index) return index ? 0 : 1; } -static uint32_t g4x_get_aux_send_ctl(struct intel_dp *intel_dp, - int send_bytes, - uint32_t aux_clock_divider) +static u32 g4x_get_aux_send_ctl(struct intel_dp *intel_dp, + int send_bytes, + u32 aux_clock_divider) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); - uint32_t precharge, timeout; + u32 precharge, timeout; if (IS_GEN(dev_priv, 6)) precharge = 3; @@ -1159,12 +1159,12 @@ static uint32_t g4x_get_aux_send_ctl(struct intel_dp *intel_dp, (aux_clock_divider << DP_AUX_CH_CTL_BIT_CLOCK_2X_SHIFT); } -static uint32_t skl_get_aux_send_ctl(struct intel_dp *intel_dp, - int send_bytes, - uint32_t unused) +static u32 skl_get_aux_send_ctl(struct intel_dp *intel_dp, + int send_bytes, + u32 unused) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - uint32_t ret; + u32 ret; ret = DP_AUX_CH_CTL_SEND_BUSY | DP_AUX_CH_CTL_DONE | @@ -1184,19 +1184,19 @@ static uint32_t skl_get_aux_send_ctl(struct intel_dp *intel_dp, static int intel_dp_aux_xfer(struct intel_dp *intel_dp, - const uint8_t *send, int send_bytes, - uint8_t *recv, int recv_size, + const u8 *send, int send_bytes, + u8 *recv, int recv_size, u32 aux_send_ctl_flags) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); i915_reg_t ch_ctl, ch_data[5]; - uint32_t aux_clock_divider; + u32 aux_clock_divider; intel_wakeref_t wakeref; int i, ret, recv_bytes; int try, clock = 0; - uint32_t status; + u32 status; bool vdd; ch_ctl = intel_dp->aux_ch_ctl_reg(intel_dp); @@ -1369,7 +1369,7 @@ static ssize_t intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { struct intel_dp *intel_dp = container_of(aux, struct intel_dp, aux); - uint8_t txbuf[20], rxbuf[20]; + u8 txbuf[20], rxbuf[20]; size_t txsize, rxsize; int ret; @@ -1702,7 +1702,7 @@ int intel_dp_rate_select(struct intel_dp *intel_dp, int rate) } void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock, - uint8_t *link_bw, uint8_t *rate_select) + u8 *link_bw, u8 *rate_select) { /* eDP 1.4 rate select method. */ if (intel_dp->use_rate_select) { @@ -2217,7 +2217,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, } void intel_dp_set_link_params(struct intel_dp *intel_dp, - int link_rate, uint8_t lane_count, + int link_rate, u8 lane_count, bool link_mst) { intel_dp->link_trained = false; @@ -3177,20 +3177,20 @@ static void chv_post_disable_dp(struct intel_encoder *encoder, static void _intel_dp_set_link_train(struct intel_dp *intel_dp, - uint32_t *DP, - uint8_t dp_train_pat) + u32 *DP, + u8 dp_train_pat) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); enum port port = intel_dig_port->base.port; - uint8_t train_pat_mask = drm_dp_training_pattern_mask(intel_dp->dpcd); + u8 train_pat_mask = drm_dp_training_pattern_mask(intel_dp->dpcd); if (dp_train_pat & train_pat_mask) DRM_DEBUG_KMS("Using DP training pattern TPS%d\n", dp_train_pat & train_pat_mask); if (HAS_DDI(dev_priv)) { - uint32_t temp = I915_READ(DP_TP_CTL(port)); + u32 temp = I915_READ(DP_TP_CTL(port)); if (dp_train_pat & DP_LINK_SCRAMBLING_DISABLE) temp |= DP_TP_CTL_SCRAMBLE_DISABLE; @@ -3289,7 +3289,7 @@ static void intel_enable_dp(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); - uint32_t dp_reg = I915_READ(intel_dp->output_reg); + u32 dp_reg = I915_READ(intel_dp->output_reg); enum pipe pipe = crtc->pipe; intel_wakeref_t wakeref; @@ -3508,14 +3508,14 @@ static void chv_dp_post_pll_disable(struct intel_encoder *encoder, * link status information */ bool -intel_dp_get_link_status(struct intel_dp *intel_dp, uint8_t link_status[DP_LINK_STATUS_SIZE]) +intel_dp_get_link_status(struct intel_dp *intel_dp, u8 link_status[DP_LINK_STATUS_SIZE]) { return drm_dp_dpcd_read(&intel_dp->aux, DP_LANE0_1_STATUS, link_status, DP_LINK_STATUS_SIZE) == DP_LINK_STATUS_SIZE; } /* These are source-specific values. */ -uint8_t +u8 intel_dp_voltage_max(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); @@ -3534,8 +3534,8 @@ intel_dp_voltage_max(struct intel_dp *intel_dp) return DP_TRAIN_VOLTAGE_SWING_LEVEL_2; } -uint8_t -intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, uint8_t voltage_swing) +u8 +intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, u8 voltage_swing) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; @@ -3580,12 +3580,12 @@ intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, uint8_t voltage_swing) } } -static uint32_t vlv_signal_levels(struct intel_dp *intel_dp) +static u32 vlv_signal_levels(struct intel_dp *intel_dp) { struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; unsigned long demph_reg_value, preemph_reg_value, uniqtranscale_reg_value; - uint8_t train_set = intel_dp->train_set[0]; + u8 train_set = intel_dp->train_set[0]; switch (train_set & DP_TRAIN_PRE_EMPHASIS_MASK) { case DP_TRAIN_PRE_EMPH_LEVEL_0: @@ -3666,12 +3666,12 @@ static uint32_t vlv_signal_levels(struct intel_dp *intel_dp) return 0; } -static uint32_t chv_signal_levels(struct intel_dp *intel_dp) +static u32 chv_signal_levels(struct intel_dp *intel_dp) { struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; u32 deemph_reg_value, margin_reg_value; bool uniq_trans_scale = false; - uint8_t train_set = intel_dp->train_set[0]; + u8 train_set = intel_dp->train_set[0]; switch (train_set & DP_TRAIN_PRE_EMPHASIS_MASK) { case DP_TRAIN_PRE_EMPH_LEVEL_0: @@ -3749,10 +3749,10 @@ static uint32_t chv_signal_levels(struct intel_dp *intel_dp) return 0; } -static uint32_t -g4x_signal_levels(uint8_t train_set) +static u32 +g4x_signal_levels(u8 train_set) { - uint32_t signal_levels = 0; + u32 signal_levels = 0; switch (train_set & DP_TRAIN_VOLTAGE_SWING_MASK) { case DP_TRAIN_VOLTAGE_SWING_LEVEL_0: @@ -3788,8 +3788,8 @@ g4x_signal_levels(uint8_t train_set) } /* SNB CPU eDP voltage swing and pre-emphasis control */ -static uint32_t -snb_cpu_edp_signal_levels(uint8_t train_set) +static u32 +snb_cpu_edp_signal_levels(u8 train_set) { int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK | DP_TRAIN_PRE_EMPHASIS_MASK); @@ -3816,8 +3816,8 @@ snb_cpu_edp_signal_levels(uint8_t train_set) } /* IVB CPU eDP voltage swing and pre-emphasis control */ -static uint32_t -ivb_cpu_edp_signal_levels(uint8_t train_set) +static u32 +ivb_cpu_edp_signal_levels(u8 train_set) { int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK | DP_TRAIN_PRE_EMPHASIS_MASK); @@ -3852,8 +3852,8 @@ intel_dp_set_signal_levels(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); enum port port = intel_dig_port->base.port; - uint32_t signal_levels, mask = 0; - uint8_t train_set = intel_dp->train_set[0]; + u32 signal_levels, mask = 0; + u8 train_set = intel_dp->train_set[0]; if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10) { signal_levels = bxt_signal_levels(intel_dp); @@ -3892,7 +3892,7 @@ intel_dp_set_signal_levels(struct intel_dp *intel_dp) void intel_dp_program_link_training_pattern(struct intel_dp *intel_dp, - uint8_t dp_train_pat) + u8 dp_train_pat) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = @@ -3909,7 +3909,7 @@ void intel_dp_set_idle_link_train(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); enum port port = intel_dig_port->base.port; - uint32_t val; + u32 val; if (!HAS_DDI(dev_priv)) return; @@ -3944,7 +3944,7 @@ intel_dp_link_down(struct intel_encoder *encoder, struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); enum port port = encoder->port; - uint32_t DP = intel_dp->DP; + u32 DP = intel_dp->DP; if (WARN_ON(HAS_DDI(dev_priv))) return; @@ -4285,7 +4285,7 @@ intel_dp_get_sink_irq_esi(struct intel_dp *intel_dp, u8 *sink_irq_vector) DP_DPRX_ESI_LEN; } -u16 intel_dp_dsc_get_output_bpp(int link_clock, uint8_t lane_count, +u16 intel_dp_dsc_get_output_bpp(int link_clock, u8 lane_count, int mode_clock, int mode_hdisplay) { u16 bits_per_pixel, max_bpp_small_joiner_ram; @@ -4352,7 +4352,7 @@ u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, return 0; } /* Also take into account max slice width */ - min_slice_count = min_t(uint8_t, min_slice_count, + min_slice_count = min_t(u8, min_slice_count, DIV_ROUND_UP(mode_hdisplay, max_slice_width)); @@ -4370,11 +4370,11 @@ u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, return 0; } -static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp) +static u8 intel_dp_autotest_link_training(struct intel_dp *intel_dp) { int status = 0; int test_link_rate; - uint8_t test_lane_count, test_link_bw; + u8 test_lane_count, test_link_bw; /* (DP CTS 1.2) * 4.3.1.11 */ @@ -4407,10 +4407,10 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp) return DP_TEST_ACK; } -static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) +static u8 intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) { - uint8_t test_pattern; - uint8_t test_misc; + u8 test_pattern; + u8 test_misc; __be16 h_width, v_height; int status = 0; @@ -4468,9 +4468,9 @@ static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) return DP_TEST_ACK; } -static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp) +static u8 intel_dp_autotest_edid(struct intel_dp *intel_dp) { - uint8_t test_result = DP_TEST_ACK; + u8 test_result = DP_TEST_ACK; struct intel_connector *intel_connector = intel_dp->attached_connector; struct drm_connector *connector = &intel_connector->base; @@ -4512,16 +4512,16 @@ static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp) return test_result; } -static uint8_t intel_dp_autotest_phy_pattern(struct intel_dp *intel_dp) +static u8 intel_dp_autotest_phy_pattern(struct intel_dp *intel_dp) { - uint8_t test_result = DP_TEST_NAK; + u8 test_result = DP_TEST_NAK; return test_result; } static void intel_dp_handle_test_request(struct intel_dp *intel_dp) { - uint8_t response = DP_TEST_NAK; - uint8_t request = 0; + u8 response = DP_TEST_NAK; + u8 request = 0; int status; status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_REQUEST, &request); @@ -4847,8 +4847,8 @@ static enum drm_connector_status intel_dp_detect_dpcd(struct intel_dp *intel_dp) { struct intel_lspcon *lspcon = dp_to_lspcon(intel_dp); - uint8_t *dpcd = intel_dp->dpcd; - uint8_t type; + u8 *dpcd = intel_dp->dpcd; + u8 type; if (lspcon->active) lspcon_resume(lspcon); @@ -5630,7 +5630,7 @@ int intel_dp_hdcp_write_an_aksv(struct intel_digital_port *intel_dig_port, .address = DP_AUX_HDCP_AKSV, .size = DRM_HDCP_KSV_LEN, }; - uint8_t txbuf[HEADER_SIZE + DRM_HDCP_KSV_LEN] = {}, rxbuf[2], reply = 0; + u8 txbuf[HEADER_SIZE + DRM_HDCP_KSV_LEN] = {}, rxbuf[2], reply = 0; ssize_t dpcd_ret; int ret; diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c index 30be0e39bd5f..b59c87daa4f7 100644 --- a/drivers/gpu/drm/i915/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/intel_dp_link_training.c @@ -24,7 +24,7 @@ #include "intel_drv.h" static void -intel_dp_dump_link_status(const uint8_t link_status[DP_LINK_STATUS_SIZE]) +intel_dp_dump_link_status(const u8 link_status[DP_LINK_STATUS_SIZE]) { DRM_DEBUG_KMS("ln0_1:0x%x ln2_3:0x%x align:0x%x sink:0x%x adj_req0_1:0x%x adj_req2_3:0x%x", @@ -34,17 +34,17 @@ intel_dp_dump_link_status(const uint8_t link_status[DP_LINK_STATUS_SIZE]) static void intel_get_adjust_train(struct intel_dp *intel_dp, - const uint8_t link_status[DP_LINK_STATUS_SIZE]) + const u8 link_status[DP_LINK_STATUS_SIZE]) { - uint8_t v = 0; - uint8_t p = 0; + u8 v = 0; + u8 p = 0; int lane; - uint8_t voltage_max; - uint8_t preemph_max; + u8 voltage_max; + u8 preemph_max; for (lane = 0; lane < intel_dp->lane_count; lane++) { - uint8_t this_v = drm_dp_get_adjust_request_voltage(link_status, lane); - uint8_t this_p = drm_dp_get_adjust_request_pre_emphasis(link_status, lane); + u8 this_v = drm_dp_get_adjust_request_voltage(link_status, lane); + u8 this_p = drm_dp_get_adjust_request_pre_emphasis(link_status, lane); if (this_v > v) v = this_v; @@ -66,9 +66,9 @@ intel_get_adjust_train(struct intel_dp *intel_dp, static bool intel_dp_set_link_train(struct intel_dp *intel_dp, - uint8_t dp_train_pat) + u8 dp_train_pat) { - uint8_t buf[sizeof(intel_dp->train_set) + 1]; + u8 buf[sizeof(intel_dp->train_set) + 1]; int ret, len; intel_dp_program_link_training_pattern(intel_dp, dp_train_pat); @@ -92,7 +92,7 @@ intel_dp_set_link_train(struct intel_dp *intel_dp, static bool intel_dp_reset_link_train(struct intel_dp *intel_dp, - uint8_t dp_train_pat) + u8 dp_train_pat) { memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set)); intel_dp_set_signal_levels(intel_dp); @@ -128,11 +128,11 @@ static bool intel_dp_link_max_vswing_reached(struct intel_dp *intel_dp) static bool intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp) { - uint8_t voltage; + u8 voltage; int voltage_tries, cr_tries, max_cr_tries; bool max_vswing_reached = false; - uint8_t link_config[2]; - uint8_t link_bw, rate_select; + u8 link_config[2]; + u8 link_bw, rate_select; if (intel_dp->prepare_link_retrain) intel_dp->prepare_link_retrain(intel_dp); @@ -186,7 +186,7 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp) voltage_tries = 1; for (cr_tries = 0; cr_tries < max_cr_tries; ++cr_tries) { - uint8_t link_status[DP_LINK_STATUS_SIZE]; + u8 link_status[DP_LINK_STATUS_SIZE]; drm_dp_link_train_clock_recovery_delay(intel_dp->dpcd); @@ -282,7 +282,7 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp) { int tries; u32 training_pattern; - uint8_t link_status[DP_LINK_STATUS_SIZE]; + u8 link_status[DP_LINK_STATUS_SIZE]; bool channel_eq = false; training_pattern = intel_dp_training_pattern(intel_dp); From c4aa2eca319c99a4dd21d10ebfd462a2612175f1 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 16 Jan 2019 11:15:28 +0200 Subject: [PATCH 189/539] drm/i915/sprite: switch to kernel types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mixed C99 and kernel types use is getting ugly. Prefer kernel types. sed -i 's/\buint\(8\|16\|32\|64\)_t\b/u\1/g' Reviewed-by: Ville Syrjälä Reviewed-by: José Roberto de Souza Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/63fe4b9727b55727190e50e57427f513d204f000.1547629303.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_sprite.c | 60 ++++++++++++++--------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 87a06fcca284..b02d3d9809e3 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -321,8 +321,8 @@ skl_program_scaler(struct intel_plane *plane, &crtc_state->scaler_state.scalers[scaler_id]; int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); - uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + u32 crtc_w = drm_rect_width(&plane_state->base.dst); + u32 crtc_h = drm_rect_height(&plane_state->base.dst); u16 y_hphase, uv_rgb_hphase; u16 y_vphase, uv_rgb_vphase; int hscale, vscale; @@ -477,10 +477,10 @@ skl_program_plane(struct intel_plane *plane, u32 aux_stride = skl_plane_stride(plane_state, 1); int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; - uint32_t x = plane_state->color_plane[color_plane].x; - uint32_t y = plane_state->color_plane[color_plane].y; - uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; + u32 x = plane_state->color_plane[color_plane].x; + u32 y = plane_state->color_plane[color_plane].y; + u32 src_w = drm_rect_width(&plane_state->base.src) >> 16; + u32 src_h = drm_rect_height(&plane_state->base.src) >> 16; struct intel_plane *linked = plane_state->linked_plane; const struct drm_framebuffer *fb = plane_state->base.fb; u8 alpha = plane_state->base.alpha >> 8; @@ -814,10 +814,10 @@ vlv_update_plane(struct intel_plane *plane, const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); - uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); - uint32_t x = plane_state->color_plane[0].x; - uint32_t y = plane_state->color_plane[0].y; + u32 crtc_w = drm_rect_width(&plane_state->base.dst); + u32 crtc_h = drm_rect_height(&plane_state->base.dst); + u32 x = plane_state->color_plane[0].x; + u32 y = plane_state->color_plane[0].y; unsigned long irqflags; /* Sizes are 0 based */ @@ -976,12 +976,12 @@ ivb_update_plane(struct intel_plane *plane, const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); - uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); - uint32_t x = plane_state->color_plane[0].x; - uint32_t y = plane_state->color_plane[0].y; - uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; + u32 crtc_w = drm_rect_width(&plane_state->base.dst); + u32 crtc_h = drm_rect_height(&plane_state->base.dst); + u32 x = plane_state->color_plane[0].x; + u32 y = plane_state->color_plane[0].y; + u32 src_w = drm_rect_width(&plane_state->base.src) >> 16; + u32 src_h = drm_rect_height(&plane_state->base.src) >> 16; unsigned long irqflags; /* Sizes are 0 based */ @@ -1152,12 +1152,12 @@ g4x_update_plane(struct intel_plane *plane, const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); - uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); - uint32_t x = plane_state->color_plane[0].x; - uint32_t y = plane_state->color_plane[0].y; - uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; + u32 crtc_w = drm_rect_width(&plane_state->base.dst); + u32 crtc_h = drm_rect_height(&plane_state->base.dst); + u32 x = plane_state->color_plane[0].x; + u32 y = plane_state->color_plane[0].y; + u32 src_w = drm_rect_width(&plane_state->base.src) >> 16; + u32 src_h = drm_rect_height(&plane_state->base.src) >> 16; unsigned long irqflags; /* Sizes are 0 based */ @@ -1706,7 +1706,7 @@ out: return ret; } -static const uint32_t g4x_plane_formats[] = { +static const u32 g4x_plane_formats[] = { DRM_FORMAT_XRGB8888, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, @@ -1714,13 +1714,13 @@ static const uint32_t g4x_plane_formats[] = { DRM_FORMAT_VYUY, }; -static const uint64_t i9xx_plane_format_modifiers[] = { +static const u64 i9xx_plane_format_modifiers[] = { I915_FORMAT_MOD_X_TILED, DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_INVALID }; -static const uint32_t snb_plane_formats[] = { +static const u32 snb_plane_formats[] = { DRM_FORMAT_XBGR8888, DRM_FORMAT_XRGB8888, DRM_FORMAT_YUYV, @@ -1729,7 +1729,7 @@ static const uint32_t snb_plane_formats[] = { DRM_FORMAT_VYUY, }; -static const uint32_t vlv_plane_formats[] = { +static const u32 vlv_plane_formats[] = { DRM_FORMAT_RGB565, DRM_FORMAT_ABGR8888, DRM_FORMAT_ARGB8888, @@ -1743,7 +1743,7 @@ static const uint32_t vlv_plane_formats[] = { DRM_FORMAT_VYUY, }; -static const uint32_t skl_plane_formats[] = { +static const u32 skl_plane_formats[] = { DRM_FORMAT_C8, DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, @@ -1758,7 +1758,7 @@ static const uint32_t skl_plane_formats[] = { DRM_FORMAT_VYUY, }; -static const uint32_t skl_planar_formats[] = { +static const u32 skl_planar_formats[] = { DRM_FORMAT_C8, DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, @@ -1774,7 +1774,7 @@ static const uint32_t skl_planar_formats[] = { DRM_FORMAT_NV12, }; -static const uint64_t skl_plane_format_modifiers_noccs[] = { +static const u64 skl_plane_format_modifiers_noccs[] = { I915_FORMAT_MOD_Yf_TILED, I915_FORMAT_MOD_Y_TILED, I915_FORMAT_MOD_X_TILED, @@ -1782,7 +1782,7 @@ static const uint64_t skl_plane_format_modifiers_noccs[] = { DRM_FORMAT_MOD_INVALID }; -static const uint64_t skl_plane_format_modifiers_ccs[] = { +static const u64 skl_plane_format_modifiers_ccs[] = { I915_FORMAT_MOD_Yf_TILED_CCS, I915_FORMAT_MOD_Y_TILED_CCS, I915_FORMAT_MOD_Yf_TILED, From 1b4bd5c4a663663e8dd4b7b1c5f8565626eb068b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 16 Jan 2019 15:54:21 +0000 Subject: [PATCH 190/539] drm/i915: Limit the for_each_set_bit() to the valid range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let static analyzers (smatch) know that we are not going to wander off the end of the array by providing a tight upper bound: drivers/gpu/drm/i915/intel_display.c:9532 hsw_get_transcoder_state() error: buffer overflow 'dev_priv->__info.trans_offsets' 6 <= 31 References: 0716931a82b4 ("drm/i915/icl: fix transcoder state readout") Signed-off-by: Chris Wilson Cc: Jani Nikula Cc: Ville Syrjala Cc: Imre Deak Cc: Madhav Chauhan Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190116155421.7660-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_display.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8d6d7ae311f4..9a6fbce1cafc 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9526,7 +9526,9 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, * XXX: Do intel_display_power_get_if_enabled before reading this (for * consistency and less surprising code; it's in always on power). */ - for_each_set_bit(panel_transcoder, &panel_transcoder_mask, 32) { + for_each_set_bit(panel_transcoder, + &panel_transcoder_mask, + ARRAY_SIZE(INTEL_INFO(dev_priv)->trans_offsets)) { enum pipe trans_pipe; tmp = I915_READ(TRANS_DDI_FUNC_CTL(panel_transcoder)); From 6ddbb12e3f54e491abd1660d34da255e462b0aa2 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 17 Jan 2019 14:48:31 +0000 Subject: [PATCH 191/539] drm/i915: Fix wakeref cookie handling in debugfs/i915_forcewake_user To avoid a false positive of a leaked wakeref, we can store the cookie in file->private_data and use it in intel_runtime_pm_put. Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190117144831.13156-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index cdf7730a79df..0f100fd5ff2c 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4413,7 +4413,7 @@ static int i915_forcewake_open(struct inode *inode, struct file *file) if (INTEL_GEN(i915) < 6) return 0; - intel_runtime_pm_get(i915); + file->private_data = (void *)(uintptr_t)intel_runtime_pm_get(i915); intel_uncore_forcewake_user_get(i915); return 0; @@ -4427,7 +4427,8 @@ static int i915_forcewake_release(struct inode *inode, struct file *file) return 0; intel_uncore_forcewake_user_put(i915); - intel_runtime_pm_put_unchecked(i915); + intel_runtime_pm_put(i915, + (intel_wakeref_t)(uintptr_t)file->private_data); return 0; } From 1dfbea041ffd2293634b1a77650b195e58e7487a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 17 Jan 2019 23:31:26 +0000 Subject: [PATCH 192/539] drm/i915/breadcrumbs: Drop assertion that we've already enabled irqs The motivation for introducing the check that we only enable breadcrumb irqs if the device's irq was installed was once upon a time we waited during suspend after disabling interrupts (which was quite slow until the bug was discovered). Since then we have the notion of pinning the breadcrumb irq, broadening it from the sole purpose of user interrupt notification and waiting, and more importantly decoupling it from a very defined time period during which enabling the irq was expected. So stop insisting the irq is installed before we setup our IMR masks, if the IER isn't yet enabled, nothing will happen and we will timeout instead, revealing the lack of irq in the hang debug messages. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190117233126.30165-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 28 ++++++++++-------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 4ed7105d7ff5..bfbff04c16aa 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -158,30 +158,24 @@ static void intel_breadcrumbs_fake_irq(struct timer_list *t) static void irq_enable(struct intel_engine_cs *engine) { - /* - * FIXME: Ideally we want this on the API boundary, but for the - * sake of testing with mock breadcrumbs (no HW so unable to - * enable irqs) we place it deep within the bowels, at the point - * of no return. - */ - GEM_BUG_ON(!intel_irqs_enabled(engine->i915)); + if (!engine->irq_enable) + return; /* Caller disables interrupts */ - if (engine->irq_enable) { - spin_lock(&engine->i915->irq_lock); - engine->irq_enable(engine); - spin_unlock(&engine->i915->irq_lock); - } + spin_lock(&engine->i915->irq_lock); + engine->irq_enable(engine); + spin_unlock(&engine->i915->irq_lock); } static void irq_disable(struct intel_engine_cs *engine) { + if (!engine->irq_disable) + return; + /* Caller disables interrupts */ - if (engine->irq_disable) { - spin_lock(&engine->i915->irq_lock); - engine->irq_disable(engine); - spin_unlock(&engine->i915->irq_lock); - } + spin_lock(&engine->i915->irq_lock); + engine->irq_disable(engine); + spin_unlock(&engine->i915->irq_lock); } void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) From 8d714185951b04368f6695a8c01f5c9c332ac1d3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 17 Jan 2019 23:05:12 +0000 Subject: [PATCH 193/539] drm/i915/selftests: Query the vm under test for hugepage support Since we have the ppgtt we want to test, we can ask it directly if it is suitable for the hugepage test we intend to undertake. v2: Not everyone has full-ppgtt Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190117230512.4789-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/huge_pages.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index a52450111802..a9a2fa35876f 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -1449,7 +1449,7 @@ static int igt_ppgtt_pin_update(void *arg) * huge-gtt-pages. */ - if (!HAS_FULL_48BIT_PPGTT(dev_priv)) { + if (!ppgtt || !i915_vm_is_48bit(&ppgtt->vm)) { pr_info("48b PPGTT not supported, skipping\n"); return 0; } From 874cf192964e05dc6f8103aa8bbcd090a7541312 Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Tue, 15 Jan 2019 10:06:31 +0000 Subject: [PATCH 194/539] drm: arm/komeda: Remove IRQ parsing from initial series The initial series is only introducing the basic components and not implementing IRQ handling. Remove the left over code that touches IRQs until the proper implementation is introduced in a later series. Reviewed-by: James Qian Wang (Arm Technology China) Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/display/komeda/komeda_dev.c | 6 ------ drivers/gpu/drm/arm/display/komeda/komeda_dev.h | 3 --- drivers/gpu/drm/arm/display/komeda/komeda_kms.c | 4 +--- 3 files changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c index 84fdf707f210..0fe6954fbbf4 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c @@ -49,7 +49,6 @@ static int komeda_parse_pipe_dt(struct komeda_dev *mdev, struct device_node *np) static int komeda_parse_dt(struct device *dev, struct komeda_dev *mdev) { - struct platform_device *pdev = to_platform_device(dev); struct device_node *child, *np = dev->of_node; struct clk *clk; int ret; @@ -59,11 +58,6 @@ static int komeda_parse_dt(struct device *dev, struct komeda_dev *mdev) return PTR_ERR(clk); mdev->mclk = clk; - mdev->irq = platform_get_irq(pdev, 0); - if (mdev->irq < 0) { - DRM_ERROR("could not get IRQ number.\n"); - return mdev->irq; - } for_each_available_child_of_node(np, child) { if (of_node_cmp(child->name, "pipeline") == 0) { diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.h b/drivers/gpu/drm/arm/display/komeda/komeda_dev.h index a0bf7050037a..0f77dead6a23 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_dev.h +++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.h @@ -81,9 +81,6 @@ struct komeda_dev { /** @mck: HW main engine clk */ struct clk *mclk; - /** @irq: irq number */ - u32 irq; - int n_pipelines; struct komeda_pipeline *pipelines[KOMEDA_MAX_PIPELINES]; diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c index f41b20235130..3fc096d3883e 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c @@ -142,12 +142,10 @@ struct komeda_kms_dev *komeda_kms_attach(struct komeda_dev *mdev) err = drm_dev_register(drm, 0); if (err) - goto uninstall_irq; + goto cleanup_mode_config; return kms; -uninstall_irq: - drm_irq_uninstall(drm); cleanup_mode_config: drm_mode_config_cleanup(drm); free_kms: From 293f8c0f2bb4409705e4cbc7bc1b660c024e9be9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 18 Jan 2019 11:22:25 +0000 Subject: [PATCH 195/539] drm/i915: Use b->irq_enable() as predicate for mock engine Since commit d4ccceb05591 ("drm/i915/icl: Ringbuffer interrupt handling") we have required a mechanism to avoid touching the interrupt hardware for breadcrumbs, superseding our mock interface for selftests. The residual problem (ideas welcome) is in probing the mock ring registers for ring_is_idle. Hmm, maybe we should just install mock handlers for i915->uncore.mmio__write and friends? Only problem being is that we would to truly mock some expected reads. :( References: d4ccceb05591 ("drm/i915/icl: Ringbuffer interrupt handling") Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190118112225.13780-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 17 ++++------------- drivers/gpu/drm/i915/intel_engine_cs.c | 11 ++++------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - drivers/gpu/drm/i915/selftests/mock_engine.c | 1 - 4 files changed, 8 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index bfbff04c16aa..4fad93fe3678 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -287,25 +287,16 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) if (b->irq_armed) return false; - /* The breadcrumb irq will be disarmed on the interrupt after the + /* + * The breadcrumb irq will be disarmed on the interrupt after the * waiters are signaled. This gives us a single interrupt window in * which we can add a new waiter and avoid the cost of re-enabling * the irq. */ b->irq_armed = true; - if (I915_SELFTEST_ONLY(b->mock)) { - /* For our mock objects we want to avoid interaction - * with the real hardware (which is not set up). So - * we simply pretend we have enabled the powerwell - * and the irq, and leave it up to the mock - * implementation to call intel_engine_wakeup() - * itself when it wants to simulate a user interrupt, - */ - return true; - } - - /* Since we are waiting on a request, the GPU should be busy + /* + * Since we are waiting on a request, the GPU should be busy * and should have its own rpm reference. This is tracked * by i915->gt.awake, we can forgo holding our own wakref * for the interrupt as before i915->gt.awake is released (when diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index cc6379422bc0..639bcd4cf3e9 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -917,6 +917,9 @@ static bool ring_is_idle(struct intel_engine_cs *engine) intel_wakeref_t wakeref; bool idle = true; + if (I915_SELFTEST_ONLY(!engine->mmio_base)) + return true; + /* If the whole device is asleep, the engine must be idle */ wakeref = intel_runtime_pm_get_if_in_use(dev_priv); if (!wakeref) @@ -955,9 +958,6 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) if (!intel_engine_signaled(engine, intel_engine_last_submit(engine))) return false; - if (I915_SELFTEST_ONLY(engine->breadcrumbs.mock)) - return true; - /* Waiting to drain ELSP? */ if (READ_ONCE(engine->execlists.active)) { struct tasklet_struct *t = &engine->execlists.tasklet; @@ -983,10 +983,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) return false; /* Ring stopped? */ - if (!ring_is_idle(engine)) - return false; - - return true; + return ring_is_idle(engine); } bool intel_engines_are_idle(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 616f6bbb18ad..c3ef0f9bf321 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -398,7 +398,6 @@ struct intel_engine_cs { unsigned int irq_count; bool irq_armed : 1; - I915_SELFTEST_DECLARE(bool mock : 1); } breadcrumbs; struct { diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 50e1a0b1af7e..9fe5b2c8f8d4 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -201,7 +201,6 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE); intel_engine_init_breadcrumbs(&engine->base); - engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */ /* fake hw queue */ spin_lock_init(&engine->hw_lock); From 71fc448c1aaf896f7859c46a7c0c33fbac411455 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 18 Jan 2019 11:36:32 +0000 Subject: [PATCH 196/539] drm/i915/selftests: Make evict tolerant of foreign objects The evict selftests presumed that all objects in use had been allocated by itself. This is a dubious claim and so instead of asserting complete control over the object lists, take (temporary) ownership of them instead. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190118113632.7056-1-chris@chris-wilson.co.uk --- .../gpu/drm/i915/selftests/i915_gem_evict.c | 64 +++++++++++++++---- 1 file changed, 53 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 067e5dfa0a24..543d618c152b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -31,30 +31,63 @@ static int populate_ggtt(struct drm_i915_private *i915) { - struct drm_i915_gem_object *obj; + struct drm_i915_gem_object *obj, *on; + unsigned long expected_unbound, expected_bound; + unsigned long unbound, bound, count; u64 size; + int err; + expected_unbound = 0; + list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) { + i915_gem_object_get(obj); + expected_unbound++; + } + + expected_bound = 0; + list_for_each_entry(obj, &i915->mm.bound_list, mm.link) { + i915_gem_object_get(obj); + expected_bound++; + } + + count = 0; for (size = 0; size + I915_GTT_PAGE_SIZE <= i915->ggtt.vm.total; size += I915_GTT_PAGE_SIZE) { struct i915_vma *vma; obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto cleanup; + } vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) - return PTR_ERR(vma); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto cleanup; + } + + count++; } - if (!list_empty(&i915->mm.unbound_list)) { - size = 0; - list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) - size++; + unbound = 0; + list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) + unbound++; + if (unbound != expected_unbound) { + pr_err("%s: Found %lu objects unbound, expected %lu!\n", + __func__, unbound, expected_unbound); + err = -EINVAL; + goto cleanup; + } - pr_err("Found %lld objects unbound!\n", size); - return -EINVAL; + bound = 0; + list_for_each_entry(obj, &i915->mm.bound_list, mm.link) + bound++; + if (bound != expected_bound + count) { + pr_err("%s: Found %lu objects bound, expected %lu!\n", + __func__, bound, expected_bound + count); + err = -EINVAL; + goto cleanup; } if (list_empty(&i915->ggtt.vm.inactive_list)) { @@ -63,6 +96,15 @@ static int populate_ggtt(struct drm_i915_private *i915) } return 0; + +cleanup: + list_for_each_entry_safe(obj, on, &i915->mm.unbound_list, mm.link) + i915_gem_object_put(obj); + + list_for_each_entry_safe(obj, on, &i915->mm.bound_list, mm.link) + i915_gem_object_put(obj); + + return err; } static void unpin_ggtt(struct drm_i915_private *i915) From bfb0a2cb2b2d8ff3620b8cd21fdaf00c28998e19 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 17 Jan 2019 14:14:00 +0200 Subject: [PATCH 197/539] drm/i915/dp: remove PANEL_POWER_OFF macro and its use It's superfluous. Reviewed-by: Mika Kuoppala Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/7987938a7950853ac3ee43c82fb9cbb0cd59a2fa.1547726792.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 1 - drivers/gpu/drm/i915/intel_dp.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 9a1340cfda6c..93cbd057c07a 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4687,7 +4687,6 @@ enum { #define EDP_FORCE_VDD (1 << 3) #define EDP_BLC_ENABLE (1 << 2) #define PANEL_POWER_RESET (1 << 1) -#define PANEL_POWER_OFF (0 << 0) #define PANEL_POWER_ON (1 << 0) #define _PP_ON_DELAYS 0x61208 diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 808ccdae15b8..f7d5314e3395 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1000,8 +1000,7 @@ static int edp_notify_handler(struct notifier_block *this, unsigned long code, /* 0x1F write to PP_DIV_REG sets max cycle delay */ I915_WRITE(pp_div_reg, pp_div | 0x1F); - I915_WRITE(pp_ctrl_reg, - PANEL_UNLOCK_REGS | PANEL_POWER_OFF); + I915_WRITE(pp_ctrl_reg, PANEL_UNLOCK_REGS); msleep(intel_dp->panel_power_cycle_delay); } } From 209760b7f6eefce3a35cd5e134bd59155ec98888 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 18 Jan 2019 19:08:05 +0000 Subject: [PATCH 198/539] drm/i915/selftests: Allocate mock ring/timeline per context To correctly simulate preemption between contexts, we need independent timelines along each context. Make it so. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190118190805.11792-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/mock_engine.c | 90 ++++++++++---------- 1 file changed, 47 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 9fe5b2c8f8d4..8b8d51af7d6a 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -30,6 +30,36 @@ struct mock_ring { struct i915_timeline timeline; }; +static struct intel_ring *mock_ring(struct intel_engine_cs *engine) +{ + const unsigned long sz = PAGE_SIZE / 2; + struct mock_ring *ring; + + ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); + if (!ring) + return NULL; + + i915_timeline_init(engine->i915, &ring->timeline, engine->name); + + ring->base.size = sz; + ring->base.effective_size = sz; + ring->base.vaddr = (void *)(ring + 1); + ring->base.timeline = &ring->timeline; + + INIT_LIST_HEAD(&ring->base.request_list); + intel_ring_update_space(&ring->base); + + return &ring->base; +} + +static void mock_ring_free(struct intel_ring *base) +{ + struct mock_ring *ring = container_of(base, typeof(*ring), base); + + i915_timeline_fini(&ring->timeline); + kfree(ring); +} + static struct mock_request *first_request(struct mock_engine *engine) { return list_first_entry_or_null(&engine->hw_queue, @@ -80,6 +110,9 @@ static void mock_context_unpin(struct intel_context *ce) static void mock_context_destroy(struct intel_context *ce) { GEM_BUG_ON(ce->pin_count); + + if (ce->ring) + mock_ring_free(ce->ring); } static const struct intel_context_ops mock_context_ops = { @@ -93,13 +126,22 @@ mock_context_pin(struct intel_engine_cs *engine, { struct intel_context *ce = to_intel_context(ctx, engine); - if (!ce->pin_count++) { - i915_gem_context_get(ctx); - ce->ring = engine->buffer; - ce->ops = &mock_context_ops; + if (ce->pin_count++) + return ce; + + if (!ce->ring) { + ce->ring = mock_ring(engine); + if (!ce->ring) + goto err; } + ce->ops = &mock_context_ops; + i915_gem_context_get(ctx); return ce; + +err: + ce->pin_count = 0; + return ERR_PTR(-ENOMEM); } static int mock_request_alloc(struct i915_request *request) @@ -143,36 +185,6 @@ static void mock_submit_request(struct i915_request *request) spin_unlock_irq(&engine->hw_lock); } -static struct intel_ring *mock_ring(struct intel_engine_cs *engine) -{ - const unsigned long sz = PAGE_SIZE / 2; - struct mock_ring *ring; - - ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); - if (!ring) - return NULL; - - i915_timeline_init(engine->i915, &ring->timeline, engine->name); - - ring->base.size = sz; - ring->base.effective_size = sz; - ring->base.vaddr = (void *)(ring + 1); - ring->base.timeline = &ring->timeline; - - INIT_LIST_HEAD(&ring->base.request_list); - intel_ring_update_space(&ring->base); - - return &ring->base; -} - -static void mock_ring_free(struct intel_ring *base) -{ - struct mock_ring *ring = container_of(base, typeof(*ring), base); - - i915_timeline_fini(&ring->timeline); - kfree(ring); -} - struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, const char *name, int id) @@ -207,17 +219,11 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, timer_setup(&engine->hw_delay, hw_delay_complete, 0); INIT_LIST_HEAD(&engine->hw_queue); - engine->base.buffer = mock_ring(&engine->base); - if (!engine->base.buffer) - goto err_breadcrumbs; - if (IS_ERR(intel_context_pin(i915->kernel_context, &engine->base))) - goto err_ring; + goto err_breadcrumbs; return &engine->base; -err_ring: - mock_ring_free(engine->base.buffer); err_breadcrumbs: intel_engine_fini_breadcrumbs(&engine->base); i915_timeline_fini(&engine->base.timeline); @@ -260,8 +266,6 @@ void mock_engine_free(struct intel_engine_cs *engine) __intel_context_unpin(engine->i915->kernel_context, engine); - mock_ring_free(engine->buffer); - intel_engine_fini_breadcrumbs(engine); i915_timeline_fini(&engine->timeline); From 129fe7516b233892eb6ded103b10c61d9a0a52cb Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 18 Jan 2019 14:01:19 +0200 Subject: [PATCH 199/539] drm/i915/color: switch to kernel types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mixed C99 and kernel types use is getting ugly. Prefer kernel types. sed -i 's/\buint\(8\|16\|32\|64\)_t\b/u\1/g' Acked-by: Chris Wilson Acked-by: Tvrtko Ursulin Reviewed-by: Ville Syrjälä Reviewed-by: José Roberto de Souza Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190118120125.15484-2-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_color.c | 40 +++++++++++++++--------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index 37fd9ddf762e..299eb7858adc 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -142,7 +142,7 @@ static void ilk_load_csc_matrix(struct intel_crtc_state *crtc_state) struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); int i, pipe = crtc->pipe; - uint16_t coeffs[9] = { 0, }; + u16 coeffs[9] = { 0, }; bool limited_color_range = false; /* @@ -171,7 +171,7 @@ static void ilk_load_csc_matrix(struct intel_crtc_state *crtc_state) * hardware. */ for (i = 0; i < ARRAY_SIZE(coeffs); i++) { - uint64_t abs_coeff = ((1ULL << 63) - 1) & input[i]; + u64 abs_coeff = ((1ULL << 63) - 1) & input[i]; /* * Clamp input value to min/max supported by @@ -233,7 +233,7 @@ static void ilk_load_csc_matrix(struct intel_crtc_state *crtc_state) I915_WRITE(PIPE_CSC_PREOFF_LO(pipe), 0); if (INTEL_GEN(dev_priv) > 6) { - uint16_t postoff = 0; + u16 postoff = 0; if (limited_color_range) postoff = (16 * (1 << 12) / 255) & 0x1fff; @@ -244,7 +244,7 @@ static void ilk_load_csc_matrix(struct intel_crtc_state *crtc_state) I915_WRITE(PIPE_CSC_MODE(pipe), 0); } else { - uint32_t mode = CSC_MODE_YUV_TO_RGB; + u32 mode = CSC_MODE_YUV_TO_RGB; if (limited_color_range) mode |= CSC_BLACK_SCREEN_OFFSET; @@ -261,15 +261,15 @@ static void cherryview_load_csc_matrix(struct intel_crtc_state *crtc_state) struct drm_device *dev = crtc_state->base.crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); int pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; - uint32_t mode; + u32 mode; if (crtc_state->base.ctm) { struct drm_color_ctm *ctm = crtc_state->base.ctm->data; - uint16_t coeffs[9] = { 0, }; + u16 coeffs[9] = { 0, }; int i; for (i = 0; i < ARRAY_SIZE(coeffs); i++) { - uint64_t abs_coeff = + u64 abs_coeff = ((1ULL << 63) - 1) & ctm->matrix[i]; /* Round coefficient. */ @@ -331,7 +331,7 @@ static void i9xx_load_luts_internal(struct intel_crtc_state *crtc_state, if (blob) { struct drm_color_lut *lut = blob->data; for (i = 0; i < 256; i++) { - uint32_t word = + u32 word = (drm_color_lut_extract(lut[i].red, 8) << 16) | (drm_color_lut_extract(lut[i].green, 8) << 8) | drm_color_lut_extract(lut[i].blue, 8); @@ -343,7 +343,7 @@ static void i9xx_load_luts_internal(struct intel_crtc_state *crtc_state, } } else { for (i = 0; i < 256; i++) { - uint32_t word = (i << 16) | (i << 8) | i; + u32 word = (i << 16) | (i << 8) | i; if (HAS_GMCH_DISPLAY(dev_priv)) I915_WRITE(PALETTE(pipe, i), word); @@ -388,7 +388,7 @@ static void bdw_load_degamma_lut(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; - uint32_t i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; + u32 i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_SPLIT_MODE | PAL_PREC_AUTO_INCREMENT); @@ -397,7 +397,7 @@ static void bdw_load_degamma_lut(struct intel_crtc_state *crtc_state) struct drm_color_lut *lut = crtc_state->base.degamma_lut->data; for (i = 0; i < lut_size; i++) { - uint32_t word = + u32 word = drm_color_lut_extract(lut[i].red, 10) << 20 | drm_color_lut_extract(lut[i].green, 10) << 10 | drm_color_lut_extract(lut[i].blue, 10); @@ -406,7 +406,7 @@ static void bdw_load_degamma_lut(struct intel_crtc_state *crtc_state) } } else { for (i = 0; i < lut_size; i++) { - uint32_t v = (i * ((1 << 10) - 1)) / (lut_size - 1); + u32 v = (i * ((1 << 10) - 1)) / (lut_size - 1); I915_WRITE(PREC_PAL_DATA(pipe), (v << 20) | (v << 10) | v); @@ -418,7 +418,7 @@ static void bdw_load_gamma_lut(struct intel_crtc_state *crtc_state, u32 offset) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; - uint32_t i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + u32 i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; WARN_ON(offset & ~PAL_PREC_INDEX_VALUE_MASK); @@ -431,7 +431,7 @@ static void bdw_load_gamma_lut(struct intel_crtc_state *crtc_state, u32 offset) struct drm_color_lut *lut = crtc_state->base.gamma_lut->data; for (i = 0; i < lut_size; i++) { - uint32_t word = + u32 word = (drm_color_lut_extract(lut[i].red, 10) << 20) | (drm_color_lut_extract(lut[i].green, 10) << 10) | drm_color_lut_extract(lut[i].blue, 10); @@ -449,7 +449,7 @@ static void bdw_load_gamma_lut(struct intel_crtc_state *crtc_state, u32 offset) drm_color_lut_extract(lut[i].blue, 16)); } else { for (i = 0; i < lut_size; i++) { - uint32_t v = (i * ((1 << 10) - 1)) / (lut_size - 1); + u32 v = (i * ((1 << 10) - 1)) / (lut_size - 1); I915_WRITE(PREC_PAL_DATA(pipe), (v << 20) | (v << 10) | v); @@ -491,8 +491,8 @@ static void glk_load_degamma_lut(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; - const uint32_t lut_size = 33; - uint32_t i; + const u32 lut_size = 33; + u32 i; /* * When setting the auto-increment bit, the hardware seems to @@ -507,7 +507,7 @@ static void glk_load_degamma_lut(struct intel_crtc_state *crtc_state) * different values per channel, so this just loads a linear table. */ for (i = 0; i < lut_size; i++) { - uint32_t v = (i * (1 << 16)) / (lut_size - 1); + u32 v = (i * (1 << 16)) / (lut_size - 1); I915_WRITE(PRE_CSC_GAMC_DATA(pipe), v); } @@ -544,8 +544,8 @@ static void cherryview_load_luts(struct intel_crtc_state *crtc_state) struct drm_i915_private *dev_priv = to_i915(crtc->dev); enum pipe pipe = to_intel_crtc(crtc)->pipe; struct drm_color_lut *lut; - uint32_t i, lut_size; - uint32_t word0, word1; + u32 i, lut_size; + u32 word0, word1; if (crtc_state_is_legacy_gamma(crtc_state)) { /* Turn off degamma/gamma on CGM block. */ From 5ce9a6497914b21ca995ebbc0322b287b28176e8 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 18 Jan 2019 14:01:20 +0200 Subject: [PATCH 200/539] drm/i915/pm: switch to kernel types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mixed C99 and kernel types use is getting ugly. Prefer kernel types. sed -i 's/\buint\(8\|16\|32\|64\)_t\b/u\1/g' Minor checkpatch fixes sprinkled on top of the changed lines. Acked-by: Chris Wilson Acked-by: Tvrtko Ursulin Reviewed-by: Ville Syrjälä Reviewed-by: José Roberto de Souza Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190118120125.15484-3-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 213 ++++++++++++++++---------------- 1 file changed, 105 insertions(+), 108 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7613ae72df3d..8b63afa3a221 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -480,7 +480,7 @@ static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state) int sprite0_start, sprite1_start; switch (pipe) { - uint32_t dsparb, dsparb2, dsparb3; + u32 dsparb, dsparb2, dsparb3; case PIPE_A: dsparb = I915_READ(DSPARB); dsparb2 = I915_READ(DSPARB2); @@ -513,7 +513,7 @@ static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state) static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, enum i9xx_plane_id i9xx_plane) { - uint32_t dsparb = I915_READ(DSPARB); + u32 dsparb = I915_READ(DSPARB); int size; size = dsparb & 0x7f; @@ -529,7 +529,7 @@ static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, static int i830_get_fifo_size(struct drm_i915_private *dev_priv, enum i9xx_plane_id i9xx_plane) { - uint32_t dsparb = I915_READ(DSPARB); + u32 dsparb = I915_READ(DSPARB); int size; size = dsparb & 0x1ff; @@ -546,7 +546,7 @@ static int i830_get_fifo_size(struct drm_i915_private *dev_priv, static int i845_get_fifo_size(struct drm_i915_private *dev_priv, enum i9xx_plane_id i9xx_plane) { - uint32_t dsparb = I915_READ(DSPARB); + u32 dsparb = I915_READ(DSPARB); int size; size = dsparb & 0x7f; @@ -667,9 +667,9 @@ static unsigned int intel_wm_method1(unsigned int pixel_rate, unsigned int cpp, unsigned int latency) { - uint64_t ret; + u64 ret; - ret = (uint64_t) pixel_rate * cpp * latency; + ret = (u64)pixel_rate * cpp * latency; ret = DIV_ROUND_UP_ULL(ret, 10000); return ret; @@ -1089,9 +1089,9 @@ static int g4x_fbc_fifo_size(int level) } } -static uint16_t g4x_compute_wm(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state, - int level) +static u16 g4x_compute_wm(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, + int level) { struct intel_plane *plane = to_intel_plane(plane_state->base.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); @@ -1188,9 +1188,9 @@ static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state, return dirty; } -static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, - const struct intel_plane_state *pstate, - uint32_t pri_val); +static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, + const struct intel_plane_state *pstate, + u32 pri_val); static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) @@ -1598,9 +1598,9 @@ static void vlv_setup_wm_latency(struct drm_i915_private *dev_priv) } } -static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state, - int level) +static u16 vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, + int level) { struct intel_plane *plane = to_intel_plane(plane_state->base.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); @@ -1968,7 +1968,7 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state, spin_lock(&dev_priv->uncore.lock); switch (crtc->pipe) { - uint32_t dsparb, dsparb2, dsparb3; + u32 dsparb, dsparb2, dsparb3; case PIPE_A: dsparb = I915_READ_FW(DSPARB); dsparb2 = I915_READ_FW(DSPARB2); @@ -2262,8 +2262,8 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) { struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev); const struct intel_watermark_params *wm_info; - uint32_t fwater_lo; - uint32_t fwater_hi; + u32 fwater_lo; + u32 fwater_hi; int cwm, srwm = 1; int fifo_size; int planea_wm, planeb_wm; @@ -2406,7 +2406,7 @@ static void i845_update_wm(struct intel_crtc *unused_crtc) struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev); struct intel_crtc *crtc; const struct drm_display_mode *adjusted_mode; - uint32_t fwater_lo; + u32 fwater_lo; int planea_wm; crtc = single_enabled_crtc(dev_priv); @@ -2455,8 +2455,7 @@ static unsigned int ilk_wm_method2(unsigned int pixel_rate, return ret; } -static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels, - uint8_t cpp) +static u32 ilk_wm_fbc(u32 pri_val, u32 horiz_pixels, u8 cpp) { /* * Neither of these should be possible since this function shouldn't be @@ -2473,22 +2472,21 @@ static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels, } struct ilk_wm_maximums { - uint16_t pri; - uint16_t spr; - uint16_t cur; - uint16_t fbc; + u16 pri; + u16 spr; + u16 cur; + u16 fbc; }; /* * For both WM_PIPE and WM_LP. * mem_value must be in 0.1us units. */ -static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, - const struct intel_plane_state *pstate, - uint32_t mem_value, - bool is_lp) +static u32 ilk_compute_pri_wm(const struct intel_crtc_state *cstate, + const struct intel_plane_state *pstate, + u32 mem_value, bool is_lp) { - uint32_t method1, method2; + u32 method1, method2; int cpp; if (mem_value == 0) @@ -2516,11 +2514,11 @@ static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, * For both WM_PIPE and WM_LP. * mem_value must be in 0.1us units. */ -static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate, - const struct intel_plane_state *pstate, - uint32_t mem_value) +static u32 ilk_compute_spr_wm(const struct intel_crtc_state *cstate, + const struct intel_plane_state *pstate, + u32 mem_value) { - uint32_t method1, method2; + u32 method1, method2; int cpp; if (mem_value == 0) @@ -2543,9 +2541,9 @@ static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate, * For both WM_PIPE and WM_LP. * mem_value must be in 0.1us units. */ -static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, - const struct intel_plane_state *pstate, - uint32_t mem_value) +static u32 ilk_compute_cur_wm(const struct intel_crtc_state *cstate, + const struct intel_plane_state *pstate, + u32 mem_value) { int cpp; @@ -2563,9 +2561,9 @@ static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, } /* Only for WM_LP. */ -static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, - const struct intel_plane_state *pstate, - uint32_t pri_val) +static u32 ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, + const struct intel_plane_state *pstate, + u32 pri_val) { int cpp; @@ -2731,9 +2729,9 @@ static bool ilk_validate_wm_level(int level, DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n", level, result->cur_val, max->cur); - result->pri_val = min_t(uint32_t, result->pri_val, max->pri); - result->spr_val = min_t(uint32_t, result->spr_val, max->spr); - result->cur_val = min_t(uint32_t, result->cur_val, max->cur); + result->pri_val = min_t(u32, result->pri_val, max->pri); + result->spr_val = min_t(u32, result->spr_val, max->spr); + result->cur_val = min_t(u32, result->cur_val, max->cur); result->enable = true; } @@ -2749,9 +2747,9 @@ static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv, const struct intel_plane_state *curstate, struct intel_wm_level *result) { - uint16_t pri_latency = dev_priv->wm.pri_latency[level]; - uint16_t spr_latency = dev_priv->wm.spr_latency[level]; - uint16_t cur_latency = dev_priv->wm.cur_latency[level]; + u16 pri_latency = dev_priv->wm.pri_latency[level]; + u16 spr_latency = dev_priv->wm.spr_latency[level]; + u16 cur_latency = dev_priv->wm.cur_latency[level]; /* WM1+ latency values stored in 0.5us units */ if (level > 0) { @@ -2775,7 +2773,7 @@ static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv, result->enable = true; } -static uint32_t +static u32 hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) { const struct intel_atomic_state *intel_state = @@ -2804,10 +2802,10 @@ hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) } static void intel_read_wm_latency(struct drm_i915_private *dev_priv, - uint16_t wm[8]) + u16 wm[8]) { if (INTEL_GEN(dev_priv) >= 9) { - uint32_t val; + u32 val; int ret, i; int level, max_level = ilk_wm_max_level(dev_priv); @@ -2891,7 +2889,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, wm[0] += 1; } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - uint64_t sskpd = I915_READ64(MCH_SSKPD); + u64 sskpd = I915_READ64(MCH_SSKPD); wm[0] = (sskpd >> 56) & 0xFF; if (wm[0] == 0) @@ -2901,14 +2899,14 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, wm[3] = (sskpd >> 20) & 0x1FF; wm[4] = (sskpd >> 32) & 0x1FF; } else if (INTEL_GEN(dev_priv) >= 6) { - uint32_t sskpd = I915_READ(MCH_SSKPD); + u32 sskpd = I915_READ(MCH_SSKPD); wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK; wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK; wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK; wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK; } else if (INTEL_GEN(dev_priv) >= 5) { - uint32_t mltr = I915_READ(MLTR_ILK); + u32 mltr = I915_READ(MLTR_ILK); /* ILK primary LP0 latency is 700 ns */ wm[0] = 7; @@ -2920,7 +2918,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, } static void intel_fixup_spr_wm_latency(struct drm_i915_private *dev_priv, - uint16_t wm[5]) + u16 wm[5]) { /* ILK sprite LP0 latency is 1300 ns */ if (IS_GEN(dev_priv, 5)) @@ -2928,7 +2926,7 @@ static void intel_fixup_spr_wm_latency(struct drm_i915_private *dev_priv, } static void intel_fixup_cur_wm_latency(struct drm_i915_private *dev_priv, - uint16_t wm[5]) + u16 wm[5]) { /* ILK cursor LP0 latency is 1300 ns */ if (IS_GEN(dev_priv, 5)) @@ -2950,7 +2948,7 @@ int ilk_wm_max_level(const struct drm_i915_private *dev_priv) static void intel_print_wm_latency(struct drm_i915_private *dev_priv, const char *name, - const uint16_t wm[8]) + const u16 wm[8]) { int level, max_level = ilk_wm_max_level(dev_priv); @@ -2979,7 +2977,7 @@ static void intel_print_wm_latency(struct drm_i915_private *dev_priv, } static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv, - uint16_t wm[5], uint16_t min) + u16 wm[5], u16 min) { int level, max_level = ilk_wm_max_level(dev_priv); @@ -2988,7 +2986,7 @@ static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv, wm[0] = max(wm[0], min); for (level = 1; level <= max_level; level++) - wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5)); + wm[level] = max_t(u16, wm[level], DIV_ROUND_UP(min, 5)); return true; } @@ -3535,7 +3533,7 @@ static void ilk_write_wm_values(struct drm_i915_private *dev_priv, { struct ilk_wm_values *previous = &dev_priv->wm.hw; unsigned int dirty; - uint32_t val; + u32 val; dirty = ilk_compute_wm_dirty(dev_priv, previous, results); if (!dirty) @@ -4033,7 +4031,7 @@ skl_plane_downscale_amount(const struct intel_crtc_state *cstate, const struct intel_plane_state *pstate) { struct intel_plane *plane = to_intel_plane(pstate->base.plane); - uint32_t src_w, src_h, dst_w, dst_h; + u32 src_w, src_h, dst_w, dst_h; uint_fixed_16_16_t fp_w_ratio, fp_h_ratio; uint_fixed_16_16_t downscale_h, downscale_w; @@ -4079,8 +4077,8 @@ skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state) return pipe_downscale; if (crtc_state->pch_pfit.enabled) { - uint32_t src_w, src_h, dst_w, dst_h; - uint32_t pfit_size = crtc_state->pch_pfit.size; + u32 src_w, src_h, dst_w, dst_h; + u32 pfit_size = crtc_state->pch_pfit.size; uint_fixed_16_16_t fp_w_ratio, fp_h_ratio; uint_fixed_16_16_t downscale_h, downscale_w; @@ -4113,7 +4111,7 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, const struct drm_plane_state *pstate; struct intel_plane_state *intel_pstate; int crtc_clock, dotclk; - uint32_t pipe_max_pixel_rate; + u32 pipe_max_pixel_rate; uint_fixed_16_16_t pipe_downscale; uint_fixed_16_16_t max_downscale = u32_to_fixed16(1); @@ -4169,8 +4167,8 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, { struct intel_plane *intel_plane = to_intel_plane(intel_pstate->base.plane); - uint32_t data_rate; - uint32_t width = 0, height = 0; + u32 data_rate; + u32 width = 0, height = 0; struct drm_framebuffer *fb; u32 format; uint_fixed_16_16_t down_scale_amount; @@ -4313,15 +4311,15 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb; struct skl_plane_wm *wm; - uint16_t alloc_size, start = 0; - uint16_t total[I915_MAX_PLANES] = {}; - uint16_t uv_total[I915_MAX_PLANES] = {}; + u16 alloc_size, start = 0; + u16 total[I915_MAX_PLANES] = {}; + u16 uv_total[I915_MAX_PLANES] = {}; u64 total_data_rate; enum plane_id plane_id; int num_active; u64 plane_data_rate[I915_MAX_PLANES] = {}; u64 uv_plane_data_rate[I915_MAX_PLANES] = {}; - uint16_t blocks = 0; + u16 blocks = 0; int level; /* Clear the partitioning for disabled planes. */ @@ -4493,10 +4491,10 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, * 2xcdclk is 1350 MHz and the pixel rate should never exceed that. */ static uint_fixed_16_16_t -skl_wm_method1(const struct drm_i915_private *dev_priv, uint32_t pixel_rate, - uint8_t cpp, uint32_t latency, uint32_t dbuf_block_size) +skl_wm_method1(const struct drm_i915_private *dev_priv, u32 pixel_rate, + u8 cpp, u32 latency, u32 dbuf_block_size) { - uint32_t wm_intermediate_val; + u32 wm_intermediate_val; uint_fixed_16_16_t ret; if (latency == 0) @@ -4511,12 +4509,11 @@ skl_wm_method1(const struct drm_i915_private *dev_priv, uint32_t pixel_rate, return ret; } -static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate, - uint32_t pipe_htotal, - uint32_t latency, - uint_fixed_16_16_t plane_blocks_per_line) +static uint_fixed_16_16_t +skl_wm_method2(u32 pixel_rate, u32 pipe_htotal, u32 latency, + uint_fixed_16_16_t plane_blocks_per_line) { - uint32_t wm_intermediate_val; + u32 wm_intermediate_val; uint_fixed_16_16_t ret; if (latency == 0) @@ -4532,8 +4529,8 @@ static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate, static uint_fixed_16_16_t intel_get_linetime_us(const struct intel_crtc_state *cstate) { - uint32_t pixel_rate; - uint32_t crtc_htotal; + u32 pixel_rate; + u32 crtc_htotal; uint_fixed_16_16_t linetime_us; if (!cstate->base.active) @@ -4550,11 +4547,11 @@ intel_get_linetime_us(const struct intel_crtc_state *cstate) return linetime_us; } -static uint32_t +static u32 skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate, const struct intel_plane_state *pstate) { - uint64_t adjusted_pixel_rate; + u64 adjusted_pixel_rate; uint_fixed_16_16_t downscale_amount; /* Shouldn't reach here on disabled planes... */ @@ -4581,7 +4578,7 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate, struct drm_i915_private *dev_priv = to_i915(plane->base.dev); const struct drm_plane_state *pstate = &intel_pstate->base; const struct drm_framebuffer *fb = pstate->fb; - uint32_t interm_pbpl; + u32 interm_pbpl; struct intel_atomic_state *state = to_intel_atomic_state(cstate->base.state); bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); @@ -4686,10 +4683,10 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, { struct drm_i915_private *dev_priv = to_i915(intel_pstate->base.plane->dev); - uint32_t latency = dev_priv->wm.skl_latency[level]; + u32 latency = dev_priv->wm.skl_latency[level]; uint_fixed_16_16_t method1, method2; uint_fixed_16_16_t selected_result; - uint32_t res_blocks, res_lines; + u32 res_blocks, res_lines; struct intel_atomic_state *state = to_intel_atomic_state(cstate->base.state); bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); @@ -4795,13 +4792,13 @@ skl_compute_wm_levels(const struct intel_crtc_state *cstate, } } -static uint32_t +static u32 skl_compute_linetime_wm(const struct intel_crtc_state *cstate) { struct drm_atomic_state *state = cstate->base.state; struct drm_i915_private *dev_priv = to_i915(state->dev); uint_fixed_16_16_t linetime_us; - uint32_t linetime_wm; + u32 linetime_wm; linetime_us = intel_get_linetime_us(cstate); @@ -4824,9 +4821,9 @@ static void skl_compute_transition_wm(const struct intel_crtc_state *cstate, { struct drm_device *dev = cstate->base.crtc->dev; const struct drm_i915_private *dev_priv = to_i915(dev); - uint16_t trans_min, trans_y_tile_min; - const uint16_t trans_amount = 10; /* This is configurable amount */ - uint16_t wm0_sel_res_b, trans_offset_b, res_blocks; + u16 trans_min, trans_y_tile_min; + const u16 trans_amount = 10; /* This is configurable amount */ + u16 wm0_sel_res_b, trans_offset_b, res_blocks; /* Transition WM are not recommended by HW team for GEN9 */ if (INTEL_GEN(dev_priv) <= 9) @@ -4855,8 +4852,8 @@ static void skl_compute_transition_wm(const struct intel_crtc_state *cstate, wm0_sel_res_b = wm->wm[0].plane_res_b - 1; if (wp->y_tiled) { - trans_y_tile_min = (uint16_t) mul_round_up_u32_fixed16(2, - wp->y_tile_minimum); + trans_y_tile_min = + (u16)mul_round_up_u32_fixed16(2, wp->y_tile_minimum); res_blocks = max(wm0_sel_res_b, trans_y_tile_min) + trans_offset_b; } else { @@ -5030,7 +5027,7 @@ static void skl_write_wm_level(struct drm_i915_private *dev_priv, i915_reg_t reg, const struct skl_wm_level *level) { - uint32_t val = 0; + u32 val = 0; if (level->plane_en) { val |= PLANE_WM_EN; @@ -5161,12 +5158,12 @@ static int skl_update_pipe_wm(struct intel_crtc_state *cstate, return 0; } -static uint32_t +static u32 pipes_modified(struct intel_atomic_state *state) { struct intel_crtc *crtc; struct intel_crtc_state *cstate; - uint32_t i, ret = 0; + u32 i, ret = 0; for_each_new_intel_crtc_in_state(state, crtc, cstate, i) ret |= drm_crtc_mask(&crtc->base); @@ -5267,7 +5264,7 @@ skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed) const struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *crtc; struct intel_crtc_state *crtc_state; - uint32_t realloc_pipes = pipes_modified(state); + u32 realloc_pipes = pipes_modified(state); int ret, i; /* @@ -5566,7 +5563,7 @@ static void ilk_optimize_watermarks(struct intel_atomic_state *state, mutex_unlock(&dev_priv->wm.wm_mutex); } -static inline void skl_wm_level_from_reg_val(uint32_t val, +static inline void skl_wm_level_from_reg_val(u32 val, struct skl_wm_level *level) { level->plane_en = val & PLANE_WM_EN; @@ -5582,7 +5579,7 @@ void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc, enum pipe pipe = crtc->pipe; int level, max_level; enum plane_id plane_id; - uint32_t val; + u32 val; max_level = ilk_wm_max_level(dev_priv); @@ -5694,7 +5691,7 @@ static void ilk_pipe_wm_get_hw_state(struct intel_crtc *crtc) static void g4x_read_wm_values(struct drm_i915_private *dev_priv, struct g4x_wm_values *wm) { - uint32_t tmp; + u32 tmp; tmp = I915_READ(DSPFW1); wm->sr.plane = _FW_WM(tmp, SR); @@ -5721,7 +5718,7 @@ static void vlv_read_wm_values(struct drm_i915_private *dev_priv, struct vlv_wm_values *wm) { enum pipe pipe; - uint32_t tmp; + u32 tmp; for_each_pipe(dev_priv, pipe) { tmp = I915_READ(VLV_DDL(pipe)); @@ -8505,7 +8502,7 @@ static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv) static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) { - uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; + u32 dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; /* * Required for FBC @@ -8577,7 +8574,7 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) static void cpt_init_clock_gating(struct drm_i915_private *dev_priv) { int pipe; - uint32_t val; + u32 val; /* * On Ibex Peak and Cougar Point, we need to disable clock @@ -8612,7 +8609,7 @@ static void cpt_init_clock_gating(struct drm_i915_private *dev_priv) static void gen6_check_mch_setup(struct drm_i915_private *dev_priv) { - uint32_t tmp; + u32 tmp; tmp = I915_READ(MCH_SSKPD); if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL) @@ -8622,7 +8619,7 @@ static void gen6_check_mch_setup(struct drm_i915_private *dev_priv) static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) { - uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; + u32 dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); @@ -8716,7 +8713,7 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) { - uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE); + u32 reg = I915_READ(GEN7_FF_THREAD_MODE); /* * WaVSThreadDispatchOverride:ivb,vlv @@ -8752,7 +8749,7 @@ static void lpt_init_clock_gating(struct drm_i915_private *dev_priv) static void lpt_suspend_hw(struct drm_i915_private *dev_priv) { if (HAS_PCH_LPT_LP(dev_priv)) { - uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D); + u32 val = I915_READ(SOUTH_DSPCLK_GATE_D); val &= ~PCH_LP_PARTITION_LEVEL_DISABLE; I915_WRITE(SOUTH_DSPCLK_GATE_D, val); @@ -8990,7 +8987,7 @@ static void hsw_init_clock_gating(struct drm_i915_private *dev_priv) static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) { - uint32_t snpcr; + u32 snpcr; I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); @@ -9199,7 +9196,7 @@ static void chv_init_clock_gating(struct drm_i915_private *dev_priv) static void g4x_init_clock_gating(struct drm_i915_private *dev_priv) { - uint32_t dspclk_gate; + u32 dspclk_gate; I915_WRITE(RENCLK_GATE_D1, 0); I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE | @@ -9449,7 +9446,7 @@ void intel_init_pm(struct drm_i915_private *dev_priv) static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv) { - uint32_t flags = + u32 flags = I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK; switch (flags) { @@ -9472,7 +9469,7 @@ static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv) static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv) { - uint32_t flags = + u32 flags = I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK; switch (flags) { From 3d0c5005d3f0c511307ec808919c85725c8d7644 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 18 Jan 2019 14:01:21 +0200 Subject: [PATCH 201/539] drm/i915/ddi: switch to kernel types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mixed C99 and kernel types use is getting ugly. Prefer kernel types. sed -i 's/\buint\(8\|16\|32\|64\)_t\b/u\1/g' Acked-by: Chris Wilson Acked-by: Tvrtko Ursulin Reviewed-by: Ville Syrjälä Reviewed-by: José Roberto de Souza Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190118120125.15484-4-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 52 ++++++++++++++++---------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index ce44744a5f9d..b0bb8dfc2ed5 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -974,7 +974,7 @@ static void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv, DRM_ERROR("Timeout waiting for DDI BUF %c idle bit\n", port_name(port)); } -static uint32_t hsw_pll_to_ddi_pll_sel(const struct intel_shared_dpll *pll) +static u32 hsw_pll_to_ddi_pll_sel(const struct intel_shared_dpll *pll) { switch (pll->info->id) { case DPLL_ID_WRPLL1: @@ -995,8 +995,8 @@ static uint32_t hsw_pll_to_ddi_pll_sel(const struct intel_shared_dpll *pll) } } -static uint32_t icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state) +static u32 icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { const struct intel_shared_dpll *pll = crtc_state->shared_dpll; int clock = crtc_state->port_clock; @@ -1243,8 +1243,8 @@ static int skl_calc_wrpll_link(struct drm_i915_private *dev_priv, enum intel_dpll_id pll_id) { i915_reg_t cfgcr1_reg, cfgcr2_reg; - uint32_t cfgcr1_val, cfgcr2_val; - uint32_t p0, p1, p2, dco_freq; + u32 cfgcr1_val, cfgcr2_val; + u32 p0, p1, p2, dco_freq; cfgcr1_reg = DPLL_CFGCR1(pll_id); cfgcr2_reg = DPLL_CFGCR2(pll_id); @@ -1305,8 +1305,8 @@ static int skl_calc_wrpll_link(struct drm_i915_private *dev_priv, int cnl_calc_wrpll_link(struct drm_i915_private *dev_priv, enum intel_dpll_id pll_id) { - uint32_t cfgcr0, cfgcr1; - uint32_t p0, p1, p2, dco_freq, ref_clock; + u32 cfgcr0, cfgcr1; + u32 p0, p1, p2, dco_freq, ref_clock; if (INTEL_GEN(dev_priv) >= 11) { cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(pll_id)); @@ -1471,7 +1471,7 @@ static void icl_ddi_clock_get(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; int link_clock = 0; - uint32_t pll_id; + u32 pll_id; pll_id = intel_get_shared_dpll_id(dev_priv, pipe_config->shared_dpll); if (intel_port_is_combophy(dev_priv, port)) { @@ -1496,7 +1496,7 @@ static void cnl_ddi_clock_get(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int link_clock = 0; - uint32_t cfgcr0; + u32 cfgcr0; enum intel_dpll_id pll_id; pll_id = intel_get_shared_dpll_id(dev_priv, pipe_config->shared_dpll); @@ -1550,7 +1550,7 @@ static void skl_ddi_clock_get(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int link_clock = 0; - uint32_t dpll_ctl1; + u32 dpll_ctl1; enum intel_dpll_id pll_id; pll_id = intel_get_shared_dpll_id(dev_priv, pipe_config->shared_dpll); @@ -1739,7 +1739,7 @@ void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - uint32_t temp; + u32 temp; temp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); if (state == true) @@ -1757,7 +1757,7 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) enum pipe pipe = crtc->pipe; enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; enum port port = encoder->port; - uint32_t temp; + u32 temp; /* Enable TRANS_DDI_FUNC_CTL for the pipe to work in HDMI mode */ temp = TRANS_DDI_FUNC_ENABLE; @@ -1841,7 +1841,7 @@ void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; i915_reg_t reg = TRANS_DDI_FUNC_CTL(cpu_transcoder); - uint32_t val = I915_READ(reg); + u32 val = I915_READ(reg); val &= ~(TRANS_DDI_FUNC_ENABLE | TRANS_DDI_PORT_MASK | TRANS_DDI_DP_VC_PAYLOAD_ALLOC); val |= TRANS_DDI_PORT_NONE; @@ -1863,7 +1863,7 @@ int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder, intel_wakeref_t wakeref; enum pipe pipe = 0; int ret = 0; - uint32_t tmp; + u32 tmp; wakeref = intel_display_power_get_if_enabled(dev_priv, intel_encoder->power_domain); @@ -1896,7 +1896,7 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) enum transcoder cpu_transcoder; intel_wakeref_t wakeref; enum pipe pipe = 0; - uint32_t tmp; + u32 tmp; bool ret; wakeref = intel_display_power_get_if_enabled(dev_priv, @@ -2132,7 +2132,7 @@ void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state) } static void _skl_ddi_set_iboost(struct drm_i915_private *dev_priv, - enum port port, uint8_t iboost) + enum port port, u8 iboost) { u32 tmp; @@ -2151,7 +2151,7 @@ static void skl_ddi_set_iboost(struct intel_encoder *encoder, struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; - uint8_t iboost; + u8 iboost; if (type == INTEL_OUTPUT_HDMI) iboost = dev_priv->vbt.ddi_port_info[port].hdmi_boost_level; @@ -2665,7 +2665,7 @@ static void icl_ddi_vswing_sequence(struct intel_encoder *encoder, icl_mg_phy_ddi_vswing_sequence(encoder, link_clock, level); } -static uint32_t translate_signal_level(int signal_levels) +static u32 translate_signal_level(int signal_levels) { int i; @@ -2680,9 +2680,9 @@ static uint32_t translate_signal_level(int signal_levels) return 0; } -static uint32_t intel_ddi_dp_level(struct intel_dp *intel_dp) +static u32 intel_ddi_dp_level(struct intel_dp *intel_dp) { - uint8_t train_set = intel_dp->train_set[0]; + u8 train_set = intel_dp->train_set[0]; int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK | DP_TRAIN_PRE_EMPHASIS_MASK); @@ -2707,7 +2707,7 @@ u32 bxt_signal_levels(struct intel_dp *intel_dp) return 0; } -uint32_t ddi_signal_levels(struct intel_dp *intel_dp) +u32 ddi_signal_levels(struct intel_dp *intel_dp) { struct intel_digital_port *dport = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); @@ -2721,8 +2721,8 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) } static inline -uint32_t icl_dpclka_cfgcr0_clk_off(struct drm_i915_private *dev_priv, - enum port port) +u32 icl_dpclka_cfgcr0_clk_off(struct drm_i915_private *dev_priv, + enum port port) { if (intel_port_is_combophy(dev_priv, port)) { return ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(port); @@ -2857,7 +2857,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; - uint32_t val; + u32 val; const struct intel_shared_dpll *pll = crtc_state->shared_dpll; if (WARN_ON(!pll)) @@ -3356,7 +3356,7 @@ void intel_ddi_fdi_post_disable(struct intel_encoder *encoder, const struct drm_connector_state *old_conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - uint32_t val; + u32 val; /* * Bspec lists this as both step 13 (before DDI_BUF_CTL disable) @@ -3644,7 +3644,7 @@ void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); enum port port = intel_dig_port->base.port; - uint32_t val; + u32 val; bool wait = false; if (I915_READ(DP_TP_CTL(port)) & DP_TP_CTL_ENABLE) { From 5a01892ae58f621b10dd85ec41321bdd18077418 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 18 Jan 2019 14:01:22 +0200 Subject: [PATCH 202/539] drm/i915/csr: switch to kernel types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mixed C99 and kernel types use is getting ugly. Prefer kernel types. sed -i 's/\buint\(8\|16\|32\|64\)_t\b/u\1/g' Minor checkpatch/whitepace fixes sprinkled on top of the changed lines. v2: more whitespace fixes (Ville, José) Acked-by: Chris Wilson Acked-by: Tvrtko Ursulin Reviewed-by: Ville Syrjälä Reviewed-by: José Roberto de Souza Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190118120125.15484-5-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_csr.c | 68 ++++++++++++++++---------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index ea5fb64d33dd..e8ac04c33e29 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -70,50 +70,50 @@ MODULE_FIRMWARE(BXT_CSR_PATH); struct intel_css_header { /* 0x09 for DMC */ - uint32_t module_type; + u32 module_type; /* Includes the DMC specific header in dwords */ - uint32_t header_len; + u32 header_len; /* always value would be 0x10000 */ - uint32_t header_ver; + u32 header_ver; /* Not used */ - uint32_t module_id; + u32 module_id; /* Not used */ - uint32_t module_vendor; + u32 module_vendor; /* in YYYYMMDD format */ - uint32_t date; + u32 date; /* Size in dwords (CSS_Headerlen + PackageHeaderLen + dmc FWsLen)/4 */ - uint32_t size; + u32 size; /* Not used */ - uint32_t key_size; + u32 key_size; /* Not used */ - uint32_t modulus_size; + u32 modulus_size; /* Not used */ - uint32_t exponent_size; + u32 exponent_size; /* Not used */ - uint32_t reserved1[12]; + u32 reserved1[12]; /* Major Minor */ - uint32_t version; + u32 version; /* Not used */ - uint32_t reserved2[8]; + u32 reserved2[8]; /* Not used */ - uint32_t kernel_header_info; + u32 kernel_header_info; } __packed; struct intel_fw_info { - uint16_t reserved1; + u16 reserved1; /* Stepping (A, B, C, ..., *). * is a wildcard */ char stepping; @@ -121,8 +121,8 @@ struct intel_fw_info { /* Sub-stepping (0, 1, ..., *). * is a wildcard */ char substepping; - uint32_t offset; - uint32_t reserved2; + u32 offset; + u32 reserved2; } __packed; struct intel_package_header { @@ -135,14 +135,14 @@ struct intel_package_header { unsigned char reserved[10]; /* Number of valid entries in the FWInfo array below */ - uint32_t num_entries; + u32 num_entries; struct intel_fw_info fw_info[20]; } __packed; struct intel_dmc_header { /* always value would be 0x40403E3E */ - uint32_t signature; + u32 signature; /* DMC binary header length */ unsigned char header_len; @@ -151,30 +151,30 @@ struct intel_dmc_header { unsigned char header_ver; /* Reserved */ - uint16_t dmcc_ver; + u16 dmcc_ver; /* Major, Minor */ - uint32_t project; + u32 project; /* Firmware program size (excluding header) in dwords */ - uint32_t fw_size; + u32 fw_size; /* Major Minor version */ - uint32_t fw_version; + u32 fw_version; /* Number of valid MMIO cycles present. */ - uint32_t mmio_count; + u32 mmio_count; /* MMIO address */ - uint32_t mmioaddr[8]; + u32 mmioaddr[8]; /* MMIO data */ - uint32_t mmiodata[8]; + u32 mmiodata[8]; /* FW filename */ unsigned char dfile[32]; - uint32_t reserved1[2]; + u32 reserved1[2]; } __packed; struct stepping_info { @@ -230,7 +230,7 @@ intel_get_stepping_info(struct drm_i915_private *dev_priv) static void gen9_set_dc_state_debugmask(struct drm_i915_private *dev_priv) { - uint32_t val, mask; + u32 val, mask; mask = DC_STATE_DEBUG_MASK_MEMORY_UP; @@ -257,7 +257,7 @@ static void gen9_set_dc_state_debugmask(struct drm_i915_private *dev_priv) void intel_csr_load_program(struct drm_i915_private *dev_priv) { u32 *payload = dev_priv->csr.dmc_payload; - uint32_t i, fw_size; + u32 i, fw_size; if (!HAS_CSR(dev_priv)) { DRM_ERROR("No CSR support available for this platform\n"); @@ -289,17 +289,17 @@ void intel_csr_load_program(struct drm_i915_private *dev_priv) gen9_set_dc_state_debugmask(dev_priv); } -static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, - const struct firmware *fw) +static u32 *parse_csr_fw(struct drm_i915_private *dev_priv, + const struct firmware *fw) { struct intel_css_header *css_header; struct intel_package_header *package_header; struct intel_dmc_header *dmc_header; struct intel_csr *csr = &dev_priv->csr; const struct stepping_info *si = intel_get_stepping_info(dev_priv); - uint32_t dmc_offset = CSR_DEFAULT_FW_OFFSET, readcount = 0, nbytes; - uint32_t i; - uint32_t *dmc_payload; + u32 dmc_offset = CSR_DEFAULT_FW_OFFSET, readcount = 0, nbytes; + u32 i; + u32 *dmc_payload; if (!fw) return NULL; From ba3f4d0ad346a0ff83133b84a178d2d5fb0a2b37 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 18 Jan 2019 14:01:23 +0200 Subject: [PATCH 203/539] drm/i915/display: switch to kernel types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mixed C99 and kernel types use is getting ugly. Prefer kernel types. sed -i 's/\buint\(8\|16\|32\|64\)_t\b/u\1/g' Acked-by: Chris Wilson Acked-by: Tvrtko Ursulin Reviewed-by: Ville Syrjälä Reviewed-by: José Roberto de Souza Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190118120125.15484-6-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_display.c | 104 +++++++++++++-------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9a6fbce1cafc..e0169979c51e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -52,7 +52,7 @@ #include "i915_trace.h" /* Primary plane formats for gen <= 3 */ -static const uint32_t i8xx_primary_formats[] = { +static const u32 i8xx_primary_formats[] = { DRM_FORMAT_C8, DRM_FORMAT_RGB565, DRM_FORMAT_XRGB1555, @@ -60,7 +60,7 @@ static const uint32_t i8xx_primary_formats[] = { }; /* Primary plane formats for gen >= 4 */ -static const uint32_t i965_primary_formats[] = { +static const u32 i965_primary_formats[] = { DRM_FORMAT_C8, DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, @@ -69,18 +69,18 @@ static const uint32_t i965_primary_formats[] = { DRM_FORMAT_XBGR2101010, }; -static const uint64_t i9xx_format_modifiers[] = { +static const u64 i9xx_format_modifiers[] = { I915_FORMAT_MOD_X_TILED, DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_INVALID }; /* Cursor formats */ -static const uint32_t intel_cursor_formats[] = { +static const u32 intel_cursor_formats[] = { DRM_FORMAT_ARGB8888, }; -static const uint64_t cursor_format_modifiers[] = { +static const u64 cursor_format_modifiers[] = { DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_INVALID }; @@ -496,7 +496,7 @@ static int pnv_calc_dpll_params(int refclk, struct dpll *clock) return clock->dot; } -static uint32_t i9xx_dpll_compute_m(struct dpll *dpll) +static u32 i9xx_dpll_compute_m(struct dpll *dpll) { return 5 * (dpll->m1 + 2) + (dpll->m2 + 2); } @@ -531,8 +531,8 @@ int chv_calc_dpll_params(int refclk, struct dpll *clock) clock->p = clock->p1 * clock->p2; if (WARN_ON(clock->n == 0 || clock->p == 0)) return 0; - clock->vco = DIV_ROUND_CLOSEST_ULL((uint64_t)refclk * clock->m, - clock->n << 22); + clock->vco = DIV_ROUND_CLOSEST_ULL((u64)refclk * clock->m, + clock->n << 22); clock->dot = DIV_ROUND_CLOSEST(clock->vco, clock->p); return clock->dot / 5; @@ -894,7 +894,7 @@ chv_find_best_dpll(const struct intel_limit *limit, struct drm_device *dev = crtc->base.dev; unsigned int best_error_ppm; struct dpll clock; - uint64_t m2; + u64 m2; int found = false; memset(best_clock, 0, sizeof(*best_clock)); @@ -916,7 +916,7 @@ chv_find_best_dpll(const struct intel_limit *limit, clock.p = clock.p1 * clock.p2; - m2 = DIV_ROUND_CLOSEST_ULL(((uint64_t)target * clock.p * + m2 = DIV_ROUND_CLOSEST_ULL(((u64)target * clock.p * clock.n) << 22, refclk * clock.m1); if (m2 > INT_MAX/clock.m1) @@ -1613,7 +1613,7 @@ static void ironlake_enable_pch_transcoder(const struct intel_crtc_state *crtc_s struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; i915_reg_t reg; - uint32_t val, pipeconf_val; + u32 val, pipeconf_val; /* Make sure PCH DPLL is enabled */ assert_shared_dpll_enabled(dev_priv, crtc_state->shared_dpll); @@ -1701,7 +1701,7 @@ static void ironlake_disable_pch_transcoder(struct drm_i915_private *dev_priv, enum pipe pipe) { i915_reg_t reg; - uint32_t val; + u32 val; /* FDI relies on the transcoder */ assert_fdi_tx_disabled(dev_priv, pipe); @@ -2378,7 +2378,7 @@ static int intel_fb_offset_to_xy(int *x, int *y, return 0; } -static unsigned int intel_fb_modifier_to_tiling(uint64_t fb_modifier) +static unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier) { switch (fb_modifier) { case I915_FORMAT_MOD_X_TILED: @@ -3510,7 +3510,7 @@ u32 skl_plane_stride(const struct intel_plane_state *plane_state, return stride / skl_plane_stride_mult(fb, color_plane, rotation); } -static u32 skl_plane_ctl_format(uint32_t pixel_format) +static u32 skl_plane_ctl_format(u32 pixel_format) { switch (pixel_format) { case DRM_FORMAT_C8: @@ -3580,7 +3580,7 @@ static u32 glk_plane_color_ctl_alpha(const struct intel_plane_state *plane_state } } -static u32 skl_plane_ctl_tiling(uint64_t fb_modifier) +static u32 skl_plane_ctl_tiling(u64 fb_modifier) { switch (fb_modifier) { case DRM_FORMAT_MOD_LINEAR: @@ -4600,7 +4600,7 @@ static void ironlake_pch_transcoder_set_timings(const struct intel_crtc_state *c static void cpt_set_fdi_bc_bifurcation(struct drm_i915_private *dev_priv, bool enable) { - uint32_t temp; + u32 temp; temp = I915_READ(SOUTH_CHICKEN1); if (!!(temp & FDI_BC_BIFURCATION_SELECT) == enable) @@ -5723,7 +5723,7 @@ static void icl_pipe_mbus_enable(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; - uint32_t val; + u32 val; val = MBUS_DBOX_A_CREDIT(2); val |= MBUS_DBOX_BW_CREDIT(1); @@ -6627,9 +6627,9 @@ static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc) (crtc->pipe == PIPE_A || IS_I915G(dev_priv)); } -static uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) +static u32 ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) { - uint32_t pixel_rate; + u32 pixel_rate; pixel_rate = pipe_config->base.adjusted_mode.crtc_clock; @@ -6639,8 +6639,8 @@ static uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) */ if (pipe_config->pch_pfit.enabled) { - uint64_t pipe_w, pipe_h, pfit_w, pfit_h; - uint32_t pfit_size = pipe_config->pch_pfit.size; + u64 pipe_w, pipe_h, pfit_w, pfit_h; + u32 pfit_size = pipe_config->pch_pfit.size; pipe_w = pipe_config->pipe_src_w; pipe_h = pipe_config->pipe_src_h; @@ -6655,7 +6655,7 @@ static uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) if (WARN_ON(!pfit_w || !pfit_h)) return pixel_rate; - pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h, + pixel_rate = div_u64((u64)pixel_rate * pipe_w * pipe_h, pfit_w * pfit_h); } @@ -6751,7 +6751,7 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, } static void -intel_reduce_m_n_ratio(uint32_t *num, uint32_t *den) +intel_reduce_m_n_ratio(u32 *num, u32 *den) { while (*num > DATA_LINK_M_N_MASK || *den > DATA_LINK_M_N_MASK) { @@ -6761,7 +6761,7 @@ intel_reduce_m_n_ratio(uint32_t *num, uint32_t *den) } static void compute_m_n(unsigned int m, unsigned int n, - uint32_t *ret_m, uint32_t *ret_n, + u32 *ret_m, u32 *ret_n, bool constant_n) { /* @@ -6776,7 +6776,7 @@ static void compute_m_n(unsigned int m, unsigned int n, else *ret_n = min_t(unsigned int, roundup_pow_of_two(n), DATA_LINK_N_MAX); - *ret_m = div_u64((uint64_t) m * *ret_n, n); + *ret_m = div_u64((u64)m * *ret_n, n); intel_reduce_m_n_ratio(ret_m, ret_n); } @@ -6806,12 +6806,12 @@ static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv) && !(dev_priv->quirks & QUIRK_LVDS_SSC_DISABLE); } -static uint32_t pnv_dpll_compute_fp(struct dpll *dpll) +static u32 pnv_dpll_compute_fp(struct dpll *dpll) { return (1 << dpll->n) << 16 | dpll->m2; } -static uint32_t i9xx_dpll_compute_fp(struct dpll *dpll) +static u32 i9xx_dpll_compute_fp(struct dpll *dpll) { return dpll->n << 16 | dpll->m1 << 8 | dpll->m2; } @@ -7367,7 +7367,7 @@ static void intel_set_pipe_timings(const struct intel_crtc_state *crtc_state) enum pipe pipe = crtc->pipe; enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; - uint32_t crtc_vtotal, crtc_vblank_end; + u32 crtc_vtotal, crtc_vblank_end; int vsyncshift = 0; /* We need to be careful not to changed the adjusted mode, for otherwise @@ -7442,7 +7442,7 @@ static void intel_get_pipe_timings(struct intel_crtc *crtc, struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); enum transcoder cpu_transcoder = pipe_config->cpu_transcoder; - uint32_t tmp; + u32 tmp; tmp = I915_READ(HTOTAL(cpu_transcoder)); pipe_config->base.adjusted_mode.crtc_hdisplay = (tmp & 0xffff) + 1; @@ -7513,7 +7513,7 @@ static void i9xx_set_pipeconf(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - uint32_t pipeconf; + u32 pipeconf; pipeconf = 0; @@ -7758,7 +7758,7 @@ static void i9xx_get_pfit_config(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - uint32_t tmp; + u32 tmp; if (INTEL_GEN(dev_priv) <= 3 && (IS_I830(dev_priv) || !IS_MOBILE(dev_priv))) @@ -7974,7 +7974,7 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc, struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum intel_display_power_domain power_domain; intel_wakeref_t wakeref; - uint32_t tmp; + u32 tmp; bool ret; power_domain = POWER_DOMAIN_PIPE(crtc->pipe); @@ -8254,7 +8254,7 @@ static void ironlake_init_pch_refclk(struct drm_i915_private *dev_priv) static void lpt_reset_fdi_mphy(struct drm_i915_private *dev_priv) { - uint32_t tmp; + u32 tmp; tmp = I915_READ(SOUTH_CHICKEN2); tmp |= FDI_MPHY_IOSFSB_RESET_CTL; @@ -8276,7 +8276,7 @@ static void lpt_reset_fdi_mphy(struct drm_i915_private *dev_priv) /* WaMPhyProgramming:hsw */ static void lpt_program_fdi_mphy(struct drm_i915_private *dev_priv) { - uint32_t tmp; + u32 tmp; tmp = intel_sbi_read(dev_priv, 0x8008, SBI_MPHY); tmp &= ~(0xFF << 24); @@ -8357,7 +8357,7 @@ static void lpt_program_fdi_mphy(struct drm_i915_private *dev_priv) static void lpt_enable_clkout_dp(struct drm_i915_private *dev_priv, bool with_spread, bool with_fdi) { - uint32_t reg, tmp; + u32 reg, tmp; if (WARN(with_fdi && !with_spread, "FDI requires downspread\n")) with_spread = true; @@ -8396,7 +8396,7 @@ static void lpt_enable_clkout_dp(struct drm_i915_private *dev_priv, /* Sequence to disable CLKOUT_DP */ static void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv) { - uint32_t reg, tmp; + u32 reg, tmp; mutex_lock(&dev_priv->sb_lock); @@ -8421,7 +8421,7 @@ static void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv) #define BEND_IDX(steps) ((50 + (steps)) / 5) -static const uint16_t sscdivintphase[] = { +static const u16 sscdivintphase[] = { [BEND_IDX( 50)] = 0x3B23, [BEND_IDX( 45)] = 0x3B23, [BEND_IDX( 40)] = 0x3C23, @@ -8453,7 +8453,7 @@ static const uint16_t sscdivintphase[] = { */ static void lpt_bend_clkout_dp(struct drm_i915_private *dev_priv, int steps) { - uint32_t tmp; + u32 tmp; int idx = BEND_IDX(steps); if (WARN_ON(steps % 5 != 0)) @@ -8519,7 +8519,7 @@ static void ironlake_set_pipeconf(const struct intel_crtc_state *crtc_state) struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; - uint32_t val; + u32 val; val = 0; @@ -8866,7 +8866,7 @@ static void skylake_get_pfit_config(struct intel_crtc *crtc, struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc_scaler_state *scaler_state = &pipe_config->scaler_state; - uint32_t ps_ctrl = 0; + u32 ps_ctrl = 0; int id = -1; int i; @@ -9022,7 +9022,7 @@ static void ironlake_get_pfit_config(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - uint32_t tmp; + u32 tmp; tmp = I915_READ(PF_CTL(crtc->pipe)); @@ -9048,7 +9048,7 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, struct drm_i915_private *dev_priv = to_i915(dev); enum intel_display_power_domain power_domain; intel_wakeref_t wakeref; - uint32_t tmp; + u32 tmp; bool ret; power_domain = POWER_DOMAIN_PIPE(crtc->pipe); @@ -9176,7 +9176,7 @@ static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv) I915_STATE_WARN(intel_irqs_enabled(dev_priv), "IRQs enabled\n"); } -static uint32_t hsw_read_dcomp(struct drm_i915_private *dev_priv) +static u32 hsw_read_dcomp(struct drm_i915_private *dev_priv) { if (IS_HASWELL(dev_priv)) return I915_READ(D_COMP_HSW); @@ -9184,7 +9184,7 @@ static uint32_t hsw_read_dcomp(struct drm_i915_private *dev_priv) return I915_READ(D_COMP_BDW); } -static void hsw_write_dcomp(struct drm_i915_private *dev_priv, uint32_t val) +static void hsw_write_dcomp(struct drm_i915_private *dev_priv, u32 val) { if (IS_HASWELL(dev_priv)) { mutex_lock(&dev_priv->pcu_lock); @@ -9209,7 +9209,7 @@ static void hsw_write_dcomp(struct drm_i915_private *dev_priv, uint32_t val) static void hsw_disable_lcpll(struct drm_i915_private *dev_priv, bool switch_to_fclk, bool allow_power_down) { - uint32_t val; + u32 val; assert_can_disable_lcpll(dev_priv); @@ -9256,7 +9256,7 @@ static void hsw_disable_lcpll(struct drm_i915_private *dev_priv, */ static void hsw_restore_lcpll(struct drm_i915_private *dev_priv) { - uint32_t val; + u32 val; val = I915_READ(LCPLL_CTL); @@ -9331,7 +9331,7 @@ static void hsw_restore_lcpll(struct drm_i915_private *dev_priv) */ void hsw_enable_pc8(struct drm_i915_private *dev_priv) { - uint32_t val; + u32 val; DRM_DEBUG_KMS("Enabling package C8+\n"); @@ -9347,7 +9347,7 @@ void hsw_enable_pc8(struct drm_i915_private *dev_priv) void hsw_disable_pc8(struct drm_i915_private *dev_priv) { - uint32_t val; + u32 val; DRM_DEBUG_KMS("Disabling package C8+\n"); @@ -9469,7 +9469,7 @@ static void haswell_get_ddi_pll(struct drm_i915_private *dev_priv, struct intel_crtc_state *pipe_config) { enum intel_dpll_id id; - uint32_t ddi_pll_sel = I915_READ(PORT_CLK_SEL(port)); + u32 ddi_pll_sel = I915_READ(PORT_CLK_SEL(port)); switch (ddi_pll_sel) { case PORT_CLK_SEL_WRPLL1: @@ -9639,7 +9639,7 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc, struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_shared_dpll *pll; enum port port; - uint32_t tmp; + u32 tmp; tmp = I915_READ(TRANS_DDI_FUNC_CTL(pipe_config->cpu_transcoder)); @@ -13752,8 +13752,8 @@ intel_legacy_cursor_update(struct drm_plane *plane, struct drm_framebuffer *fb, int crtc_x, int crtc_y, unsigned int crtc_w, unsigned int crtc_h, - uint32_t src_x, uint32_t src_y, - uint32_t src_w, uint32_t src_h, + u32 src_x, u32 src_y, + u32 src_w, u32 src_h, struct drm_modeset_acquire_ctx *ctx) { struct drm_i915_private *dev_priv = to_i915(crtc->dev); From 143c335ad27f61c6d8d4401203ab8cf9221382ac Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 18 Jan 2019 14:01:24 +0200 Subject: [PATCH 204/539] drm/i915/i915_drv.h: switch to kernel types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mixed C99 and kernel types use is getting ugly. Prefer kernel types. sed -i 's/\buint\(8\|16\|32\|64\)_t\b/u\1/g' Reviewed-by: Chris Wilson Acked-by: Tvrtko Ursulin Reviewed-by: Ville Syrjälä Reviewed-by: José Roberto de Souza Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190118120125.15484-7-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 158 ++++++++++++++++---------------- 1 file changed, 79 insertions(+), 79 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 310d9e1e1620..03db011caa8e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -334,16 +334,16 @@ struct drm_i915_display_funcs { struct intel_csr { struct work_struct work; const char *fw_path; - uint32_t required_version; - uint32_t max_fw_size; /* bytes */ - uint32_t *dmc_payload; - uint32_t dmc_fw_size; /* dwords */ - uint32_t version; - uint32_t mmio_count; + u32 required_version; + u32 max_fw_size; /* bytes */ + u32 *dmc_payload; + u32 dmc_fw_size; /* dwords */ + u32 version; + u32 mmio_count; i915_reg_t mmioaddr[8]; - uint32_t mmiodata[8]; - uint32_t dc_state; - uint32_t allowed_dc_mask; + u32 mmiodata[8]; + u32 dc_state; + u32 allowed_dc_mask; intel_wakeref_t wakeref; }; @@ -400,7 +400,7 @@ struct intel_fbc { struct { unsigned int mode_flags; - uint32_t hsw_bdw_pixel_rate; + u32 hsw_bdw_pixel_rate; } crtc; struct { @@ -419,7 +419,7 @@ struct intel_fbc { int y; - uint16_t pixel_blend_mode; + u16 pixel_blend_mode; } plane; struct { @@ -559,7 +559,7 @@ struct i915_suspend_saved_registers { u32 saveSWF0[16]; u32 saveSWF1[16]; u32 saveSWF3[3]; - uint64_t saveFENCE[I915_MAX_NUM_FENCES]; + u64 saveFENCE[I915_MAX_NUM_FENCES]; u32 savePCH_PORT_HOTPLUG; u16 saveGCDGMBUS; }; @@ -906,9 +906,9 @@ struct i915_gem_mm { atomic_t bsd_engine_dispatch_index; /** Bit 6 swizzling required for X tiling */ - uint32_t bit_6_swizzle_x; + u32 bit_6_swizzle_x; /** Bit 6 swizzling required for Y tiling */ - uint32_t bit_6_swizzle_y; + u32 bit_6_swizzle_y; /* accounting, useful for userland debugging */ spinlock_t object_stat_lock; @@ -935,20 +935,20 @@ struct ddi_vbt_port_info { * populate this field. */ #define HDMI_LEVEL_SHIFT_UNKNOWN 0xff - uint8_t hdmi_level_shift; + u8 hdmi_level_shift; - uint8_t supports_dvi:1; - uint8_t supports_hdmi:1; - uint8_t supports_dp:1; - uint8_t supports_edp:1; - uint8_t supports_typec_usb:1; - uint8_t supports_tbt:1; + u8 supports_dvi:1; + u8 supports_hdmi:1; + u8 supports_dp:1; + u8 supports_edp:1; + u8 supports_typec_usb:1; + u8 supports_tbt:1; - uint8_t alternate_aux_channel; - uint8_t alternate_ddc_pin; + u8 alternate_aux_channel; + u8 alternate_ddc_pin; - uint8_t dp_boost_level; - uint8_t hdmi_boost_level; + u8 dp_boost_level; + u8 hdmi_boost_level; int dp_max_link_rate; /* 0 for not limited by VBT */ }; @@ -1039,41 +1039,41 @@ enum intel_ddb_partitioning { struct intel_wm_level { bool enable; - uint32_t pri_val; - uint32_t spr_val; - uint32_t cur_val; - uint32_t fbc_val; + u32 pri_val; + u32 spr_val; + u32 cur_val; + u32 fbc_val; }; struct ilk_wm_values { - uint32_t wm_pipe[3]; - uint32_t wm_lp[3]; - uint32_t wm_lp_spr[3]; - uint32_t wm_linetime[3]; + u32 wm_pipe[3]; + u32 wm_lp[3]; + u32 wm_lp_spr[3]; + u32 wm_linetime[3]; bool enable_fbc_wm; enum intel_ddb_partitioning partitioning; }; struct g4x_pipe_wm { - uint16_t plane[I915_MAX_PLANES]; - uint16_t fbc; + u16 plane[I915_MAX_PLANES]; + u16 fbc; }; struct g4x_sr_wm { - uint16_t plane; - uint16_t cursor; - uint16_t fbc; + u16 plane; + u16 cursor; + u16 fbc; }; struct vlv_wm_ddl_values { - uint8_t plane[I915_MAX_PLANES]; + u8 plane[I915_MAX_PLANES]; }; struct vlv_wm_values { struct g4x_pipe_wm pipe[3]; struct g4x_sr_wm sr; struct vlv_wm_ddl_values ddl[3]; - uint8_t level; + u8 level; bool cxsr; }; @@ -1087,10 +1087,10 @@ struct g4x_wm_values { }; struct skl_ddb_entry { - uint16_t start, end; /* in number of blocks, 'end' is exclusive */ + u16 start, end; /* in number of blocks, 'end' is exclusive */ }; -static inline uint16_t skl_ddb_entry_size(const struct skl_ddb_entry *entry) +static inline u16 skl_ddb_entry_size(const struct skl_ddb_entry *entry) { return entry->end - entry->start; } @@ -1114,8 +1114,8 @@ struct skl_ddb_values { }; struct skl_wm_level { - uint16_t plane_res_b; - uint8_t plane_res_l; + u16 plane_res_b; + u8 plane_res_l; bool plane_en; }; @@ -1124,15 +1124,15 @@ struct skl_wm_params { bool x_tiled, y_tiled; bool rc_surface; bool is_planar; - uint32_t width; - uint8_t cpp; - uint32_t plane_pixel_rate; - uint32_t y_min_scanlines; - uint32_t plane_bytes_per_line; + u32 width; + u8 cpp; + u32 plane_pixel_rate; + u32 y_min_scanlines; + u32 plane_bytes_per_line; uint_fixed_16_16_t plane_blocks_per_line; uint_fixed_16_16_t y_tile_minimum; - uint32_t linetime_us; - uint32_t dbuf_block_size; + u32 linetime_us; + u32 dbuf_block_size; }; /* @@ -1515,14 +1515,14 @@ struct drm_i915_private { * Base address of where the gmbus and gpio blocks are located (either * on PCH or on SoC for platforms without PCH). */ - uint32_t gpio_mmio_base; + u32 gpio_mmio_base; /* MMIO base address for MIPI regs */ - uint32_t mipi_mmio_base; + u32 mipi_mmio_base; - uint32_t psr_mmio_base; + u32 psr_mmio_base; - uint32_t pps_mmio_base; + u32 pps_mmio_base; wait_queue_head_t gmbus_wait_queue; @@ -1777,17 +1777,17 @@ struct drm_i915_private { * in 0.5us units for WM1+. */ /* primary */ - uint16_t pri_latency[5]; + u16 pri_latency[5]; /* sprite */ - uint16_t spr_latency[5]; + u16 spr_latency[5]; /* cursor */ - uint16_t cur_latency[5]; + u16 cur_latency[5]; /* * Raw watermark memory latency values * for SKL for all 8 levels * in 1us units. */ - uint16_t skl_latency[8]; + u16 skl_latency[8]; /* current hardware state */ union { @@ -1797,7 +1797,7 @@ struct drm_i915_private { struct g4x_wm_values g4x; }; - uint8_t max_level; + u8 max_level; /* * Should be held around atomic WM register writing; also @@ -2686,45 +2686,45 @@ i915_disable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, void valleyview_enable_display_irqs(struct drm_i915_private *dev_priv); void valleyview_disable_display_irqs(struct drm_i915_private *dev_priv); void i915_hotplug_interrupt_update(struct drm_i915_private *dev_priv, - uint32_t mask, - uint32_t bits); + u32 mask, + u32 bits); void ilk_update_display_irq(struct drm_i915_private *dev_priv, - uint32_t interrupt_mask, - uint32_t enabled_irq_mask); + u32 interrupt_mask, + u32 enabled_irq_mask); static inline void -ilk_enable_display_irq(struct drm_i915_private *dev_priv, uint32_t bits) +ilk_enable_display_irq(struct drm_i915_private *dev_priv, u32 bits) { ilk_update_display_irq(dev_priv, bits, bits); } static inline void -ilk_disable_display_irq(struct drm_i915_private *dev_priv, uint32_t bits) +ilk_disable_display_irq(struct drm_i915_private *dev_priv, u32 bits) { ilk_update_display_irq(dev_priv, bits, 0); } void bdw_update_pipe_irq(struct drm_i915_private *dev_priv, enum pipe pipe, - uint32_t interrupt_mask, - uint32_t enabled_irq_mask); + u32 interrupt_mask, + u32 enabled_irq_mask); static inline void bdw_enable_pipe_irq(struct drm_i915_private *dev_priv, - enum pipe pipe, uint32_t bits) + enum pipe pipe, u32 bits) { bdw_update_pipe_irq(dev_priv, pipe, bits, bits); } static inline void bdw_disable_pipe_irq(struct drm_i915_private *dev_priv, - enum pipe pipe, uint32_t bits) + enum pipe pipe, u32 bits) { bdw_update_pipe_irq(dev_priv, pipe, bits, 0); } void ibx_display_interrupt_update(struct drm_i915_private *dev_priv, - uint32_t interrupt_mask, - uint32_t enabled_irq_mask); + u32 interrupt_mask, + u32 enabled_irq_mask); static inline void -ibx_enable_display_interrupt(struct drm_i915_private *dev_priv, uint32_t bits) +ibx_enable_display_interrupt(struct drm_i915_private *dev_priv, u32 bits) { ibx_display_interrupt_update(dev_priv, bits, bits); } static inline void -ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, uint32_t bits) +ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, u32 bits) { ibx_display_interrupt_update(dev_priv, bits, 0); } @@ -2984,7 +2984,7 @@ int i915_gem_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev, - uint32_t handle, uint64_t *offset); + u32 handle, u64 *offset); int i915_gem_mmap_gtt_version(void); void i915_gem_track_fb(struct drm_i915_gem_object *old, @@ -3125,7 +3125,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, struct drm_file *file); void i915_oa_init_reg_state(struct intel_engine_cs *engine, struct i915_gem_context *ctx, - uint32_t *reg_state); + u32 *reg_state); /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, @@ -3377,10 +3377,10 @@ bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, enum dpio_phy phy); bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, enum dpio_phy phy); -uint8_t bxt_ddi_phy_calc_lane_lat_optim_mask(uint8_t lane_count); +u8 bxt_ddi_phy_calc_lane_lat_optim_mask(u8 lane_count); void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder, - uint8_t lane_lat_optim_mask); -uint8_t bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder); + u8 lane_lat_optim_mask); +u8 bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder); void chv_set_phy_signal_level(struct intel_encoder *encoder, u32 deemph_reg_value, u32 margin_reg_value, From d25236a3290ce621033a76e15bf74613acdb3a6d Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 18 Jan 2019 14:01:25 +0200 Subject: [PATCH 205/539] drm/i915/intel_drv.h: switch to kernel types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mixed C99 and kernel types use is getting ugly. Prefer kernel types. sed -i 's/\buint\(8\|16\|32\|64\)_t\b/u\1/g' Minor checkpatch fixes sprinkled on top of the changed lines. Acked-by: Chris Wilson Acked-by: Tvrtko Ursulin Reviewed-by: Ville Syrjälä Reviewed-by: José Roberto de Souza Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190118120125.15484-8-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_drv.h | 94 ++++++++++++++++---------------- 1 file changed, 46 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 9ecff07598d9..2c1b3b1de9c2 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -300,13 +300,12 @@ struct intel_panel { /* Connector and platform specific backlight functions */ int (*setup)(struct intel_connector *connector, enum pipe pipe); - uint32_t (*get)(struct intel_connector *connector); - void (*set)(const struct drm_connector_state *conn_state, uint32_t level); + u32 (*get)(struct intel_connector *connector); + void (*set)(const struct drm_connector_state *conn_state, u32 level); void (*disable)(const struct drm_connector_state *conn_state); void (*enable)(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); - uint32_t (*hz_to_pwm)(struct intel_connector *connector, - uint32_t hz); + u32 (*hz_to_pwm)(struct intel_connector *connector, u32 hz); void (*power)(struct intel_connector *, bool enable); } backlight; }; @@ -598,7 +597,7 @@ struct intel_initial_plane_config { struct intel_scaler { int in_use; - uint32_t mode; + u32 mode; }; struct intel_crtc_scaler_state { @@ -636,7 +635,7 @@ struct intel_crtc_scaler_state { struct intel_pipe_wm { struct intel_wm_level wm[5]; - uint32_t linetime; + u32 linetime; bool fbc_wm_enabled; bool pipe_enabled; bool sprites_enabled; @@ -652,7 +651,7 @@ struct skl_plane_wm { struct skl_pipe_wm { struct skl_plane_wm planes[I915_MAX_PLANES]; - uint32_t linetime; + u32 linetime; }; enum vlv_wm_level { @@ -665,7 +664,7 @@ enum vlv_wm_level { struct vlv_wm_state { struct g4x_pipe_wm wm[NUM_VLV_WM_LEVELS]; struct g4x_sr_wm sr[NUM_VLV_WM_LEVELS]; - uint8_t num_levels; + u8 num_levels; bool cxsr; }; @@ -878,13 +877,13 @@ struct intel_crtc_state { /* Used by SDVO (and if we ever fix it, HDMI). */ unsigned pixel_multiplier; - uint8_t lane_count; + u8 lane_count; /* * Used by platforms having DP/HDMI PHY with programmable lane * latency optimization. */ - uint8_t lane_lat_optim_mask; + u8 lane_lat_optim_mask; /* minimum acceptable voltage level */ u8 min_voltage_level; @@ -928,7 +927,7 @@ struct intel_crtc_state { struct intel_crtc_wm_state wm; /* Gamma mode programmed on the pipe */ - uint32_t gamma_mode; + u32 gamma_mode; /* bitmask of visible planes (enum plane_id) */ u8 active_planes; @@ -1014,7 +1013,7 @@ struct intel_plane { enum pipe pipe; bool has_fbc; bool has_ccs; - uint32_t frontbuffer_bit; + u32 frontbuffer_bit; struct { u32 base, cntl, size; @@ -1110,9 +1109,9 @@ enum link_m_n_set { struct intel_dp_compliance_data { unsigned long edid; - uint8_t video_pattern; - uint16_t hdisplay, vdisplay; - uint8_t bpc; + u8 video_pattern; + u16 hdisplay, vdisplay; + u8 bpc; }; struct intel_dp_compliance { @@ -1125,18 +1124,18 @@ struct intel_dp_compliance { struct intel_dp { i915_reg_t output_reg; - uint32_t DP; + u32 DP; int link_rate; - uint8_t lane_count; - uint8_t sink_count; + u8 lane_count; + u8 sink_count; bool link_mst; bool link_trained; bool has_audio; bool reset_link_params; - uint8_t dpcd[DP_RECEIVER_CAP_SIZE]; - uint8_t psr_dpcd[EDP_PSR_RECEIVER_CAP_SIZE]; - uint8_t downstream_ports[DP_MAX_DOWNSTREAM_PORTS]; - uint8_t edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE]; + u8 dpcd[DP_RECEIVER_CAP_SIZE]; + u8 psr_dpcd[EDP_PSR_RECEIVER_CAP_SIZE]; + u8 downstream_ports[DP_MAX_DOWNSTREAM_PORTS]; + u8 edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE]; u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE]; u8 fec_capable; /* source rates */ @@ -1156,7 +1155,7 @@ struct intel_dp { /* sink or branch descriptor */ struct drm_dp_desc desc; struct drm_dp_aux aux; - uint8_t train_set[4]; + u8 train_set[4]; int panel_power_up_delay; int panel_power_down_delay; int panel_power_cycle_delay; @@ -1198,14 +1197,13 @@ struct intel_dp { struct intel_dp_mst_encoder *mst_encoders[I915_MAX_PIPES]; struct drm_dp_mst_topology_mgr mst_mgr; - uint32_t (*get_aux_clock_divider)(struct intel_dp *dp, int index); + u32 (*get_aux_clock_divider)(struct intel_dp *dp, int index); /* * This function returns the value we have to program the AUX_CTL * register with to kick off an AUX transaction. */ - uint32_t (*get_aux_send_ctl)(struct intel_dp *dp, - int send_bytes, - uint32_t aux_clock_divider); + u32 (*get_aux_send_ctl)(struct intel_dp *dp, int send_bytes, + u32 aux_clock_divider); i915_reg_t (*aux_ch_ctl_reg)(struct intel_dp *dp); i915_reg_t (*aux_ch_data_reg)(struct intel_dp *dp, int index); @@ -1239,7 +1237,7 @@ struct intel_digital_port { struct intel_lspcon lspcon; enum irqreturn (*hpd_pulse)(struct intel_digital_port *, bool); bool release_cl2_override; - uint8_t max_lanes; + u8 max_lanes; /* Used for DP and ICL+ TypeC/DP and TypeC/HDMI ports. */ enum aux_ch aux_ch; enum intel_display_power_domain ddi_io_power_domain; @@ -1474,8 +1472,8 @@ void intel_check_cpu_fifo_underruns(struct drm_i915_private *dev_priv); void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv); /* i915_irq.c */ -void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); -void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); +void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, u32 mask); +void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, u32 mask); void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask); void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask); void gen11_reset_rps_interrupts(struct drm_i915_private *dev_priv); @@ -1538,7 +1536,7 @@ void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, struct intel_crtc_state *crtc_state); u32 bxt_signal_levels(struct intel_dp *intel_dp); -uint32_t ddi_signal_levels(struct intel_dp *intel_dp); +u32 ddi_signal_levels(struct intel_dp *intel_dp); u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder); u8 intel_ddi_dp_pre_emphasis_max(struct intel_encoder *encoder, u8 voltage_swing); @@ -1678,11 +1676,11 @@ void intel_cleanup_plane_fb(struct drm_plane *plane, int intel_plane_atomic_get_property(struct drm_plane *plane, const struct drm_plane_state *state, struct drm_property *property, - uint64_t *val); + u64 *val); int intel_plane_atomic_set_property(struct drm_plane *plane, struct drm_plane_state *state, struct drm_property *property, - uint64_t val); + u64 val); int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_state, struct drm_crtc_state *crtc_state, const struct intel_plane_state *old_plane_state, @@ -1802,10 +1800,10 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, i915_reg_t output_reg, bool intel_dp_init_connector(struct intel_digital_port *intel_dig_port, struct intel_connector *intel_connector); void intel_dp_set_link_params(struct intel_dp *intel_dp, - int link_rate, uint8_t lane_count, + int link_rate, u8 lane_count, bool link_mst); int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, - int link_rate, uint8_t lane_count); + int link_rate, u8 lane_count); void intel_dp_start_link_train(struct intel_dp *intel_dp); void intel_dp_stop_link_train(struct intel_dp *intel_dp); int intel_dp_retrain_link(struct intel_encoder *encoder, @@ -1837,7 +1835,7 @@ int intel_dp_max_lane_count(struct intel_dp *intel_dp); int intel_dp_rate_select(struct intel_dp *intel_dp, int rate); void intel_dp_hot_plug(struct intel_encoder *intel_encoder); void intel_power_sequencer_reset(struct drm_i915_private *dev_priv); -uint32_t intel_dp_pack_aux(const uint8_t *src, int src_bytes); +u32 intel_dp_pack_aux(const u8 *src, int src_bytes); void intel_plane_destroy(struct drm_plane *plane); void intel_edp_drrs_enable(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state); @@ -1850,24 +1848,24 @@ void intel_edp_drrs_flush(struct drm_i915_private *dev_priv, void intel_dp_program_link_training_pattern(struct intel_dp *intel_dp, - uint8_t dp_train_pat); + u8 dp_train_pat); void intel_dp_set_signal_levels(struct intel_dp *intel_dp); void intel_dp_set_idle_link_train(struct intel_dp *intel_dp); -uint8_t +u8 intel_dp_voltage_max(struct intel_dp *intel_dp); -uint8_t -intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, uint8_t voltage_swing); +u8 +intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, u8 voltage_swing); void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock, - uint8_t *link_bw, uint8_t *rate_select); + u8 *link_bw, u8 *rate_select); bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp); bool intel_dp_source_supports_hbr3(struct intel_dp *intel_dp); bool -intel_dp_get_link_status(struct intel_dp *intel_dp, uint8_t link_status[DP_LINK_STATUS_SIZE]); -uint16_t intel_dp_dsc_get_output_bpp(int link_clock, uint8_t lane_count, - int mode_clock, int mode_hdisplay); -uint8_t intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, int mode_clock, - int mode_hdisplay); +intel_dp_get_link_status(struct intel_dp *intel_dp, u8 link_status[DP_LINK_STATUS_SIZE]); +u16 intel_dp_dsc_get_output_bpp(int link_clock, u8 lane_count, + int mode_clock, int mode_hdisplay); +u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, int mode_clock, + int mode_hdisplay); /* intel_vdsc.c */ int intel_dp_compute_dsc_params(struct intel_dp *intel_dp, @@ -2325,11 +2323,11 @@ void intel_tv_init(struct drm_i915_private *dev_priv); int intel_digital_connector_atomic_get_property(struct drm_connector *connector, const struct drm_connector_state *state, struct drm_property *property, - uint64_t *val); + u64 *val); int intel_digital_connector_atomic_set_property(struct drm_connector *connector, struct drm_connector_state *state, struct drm_property *property, - uint64_t val); + u64 val); int intel_digital_connector_atomic_check(struct drm_connector *conn, struct drm_connector_state *new_state); struct drm_connector_state * From f1e9c90947979c041130011fbcd070200b5527b5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 19 Jan 2019 14:30:24 +0000 Subject: [PATCH 206/539] drm/i915: Prevent use of global_seqno=0 We are not allowed to assign rq->global_seqno=0 as it has a special meaning of "inactive" (not executing on HW). Fixes: 6faf5916e6be ("drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190119143024.26971-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 33eb9df0dd0e..c7ce27785cda 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -343,6 +343,13 @@ static void move_to_timeline(struct i915_request *request, spin_unlock(&request->timeline->lock); } +static u32 next_global_seqno(struct i915_timeline *tl) +{ + if (!++tl->seqno) + ++tl->seqno; + return tl->seqno; +} + void __i915_request_submit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; @@ -359,7 +366,7 @@ void __i915_request_submit(struct i915_request *request) GEM_BUG_ON(request->global_seqno); - seqno = timeline_get_seqno(&engine->timeline); + seqno = next_global_seqno(&engine->timeline); GEM_BUG_ON(!seqno); GEM_BUG_ON(intel_engine_signaled(engine, seqno)); From ca0b04db14a51893322a2a4638a41dc79c2cf98a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 1 Dec 2018 12:31:45 +0100 Subject: [PATCH 207/539] drm/i915/dsi: Fix pipe_bpp for handling for 6 bpc pixel-formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are 3 problems with the dsi code's pipe_bpp handling for 6 bpc pixel-formats which this commit addresses: 1) It assumes that the pipe_bpp is the same as the bpp going over the dsi lanes. This assumption is not valid for MIPI_DSI_FMT_RGB666, where pipe_bpp should be 18 so that we do proper dithering but we actually send 24 bpp over the dsi lanes (MIPI_DSI_FMT_RGB666_PACKED sends 18 bpp). This assumption is enforced by an assert in *_dsi_get_pclk(). This assert triggers on the initial hw-state readback on BYT/CHT devices which use MIPI_DSI_FMT_RGB666, such as the Prowise PT301 tablet. PIPECONF is set to 6BPC / 18 bpp by the GOP, while mipi_dsi_pixel_format_to_bpp() returns 24. This commits switches the calculations in *_dsi_get_pclk() to use the bpp from mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format) which returns the bpp going over the mipi lanes and drops the assert. 2) On BXT bxt_dsi_get_pipe_config() wrongly overrides the pipe_bpp which i9xx_get_pipe_config() reads from PIPECONF with the return value from mipi_dsi_pixel_format_to_bpp(). This avoids the assert from 1. but is wrong since the pipe is actually running at the value configured in PIPECONF. This commit drops the override of pipe_bpp from bxt_dsi_get_pipe_config(). 3) The dsi encoder's compute_config() never assigns a value to pipe_bpp, unlike most other encoders. Falling back on compute_baseline_pipe_bpp() which always picks 24. 24 is only correct for MIPI_DSI_FMT_RGB88 for the others we should use 18 bpp so that we correctly do 6bpc color dithering. This commit adds code to intel_dsi_compute_config() to properly set pipe_bpp based on intel_dsi->pixel_format. Signed-off-by: Hans de Goede Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181201113148.23184-1-hdegoede@redhat.com --- drivers/gpu/drm/i915/intel_dsi.h | 4 ++-- drivers/gpu/drm/i915/vlv_dsi.c | 17 ++++++++-------- drivers/gpu/drm/i915/vlv_dsi_pll.c | 31 ++++++------------------------ 3 files changed, 17 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h index df3d390e25fe..a9a19778dc7f 100644 --- a/drivers/gpu/drm/i915/intel_dsi.h +++ b/drivers/gpu/drm/i915/intel_dsi.h @@ -173,7 +173,7 @@ int vlv_dsi_pll_compute(struct intel_encoder *encoder, void vlv_dsi_pll_enable(struct intel_encoder *encoder, const struct intel_crtc_state *config); void vlv_dsi_pll_disable(struct intel_encoder *encoder); -u32 vlv_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, +u32 vlv_dsi_get_pclk(struct intel_encoder *encoder, struct intel_crtc_state *config); void vlv_dsi_reset_clocks(struct intel_encoder *encoder, enum port port); @@ -183,7 +183,7 @@ int bxt_dsi_pll_compute(struct intel_encoder *encoder, void bxt_dsi_pll_enable(struct intel_encoder *encoder, const struct intel_crtc_state *config); void bxt_dsi_pll_disable(struct intel_encoder *encoder); -u32 bxt_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, +u32 bxt_dsi_get_pclk(struct intel_encoder *encoder, struct intel_crtc_state *config); void bxt_dsi_reset_clocks(struct intel_encoder *encoder, enum port port); diff --git a/drivers/gpu/drm/i915/vlv_dsi.c b/drivers/gpu/drm/i915/vlv_dsi.c index c247ce74b71a..54cbd8eb1718 100644 --- a/drivers/gpu/drm/i915/vlv_dsi.c +++ b/drivers/gpu/drm/i915/vlv_dsi.c @@ -289,6 +289,11 @@ static int intel_dsi_compute_config(struct intel_encoder *encoder, /* DSI uses short packets for sync events, so clear mode flags for DSI */ adjusted_mode->flags = 0; + if (intel_dsi->pixel_format == MIPI_DSI_FMT_RGB888) + pipe_config->pipe_bpp = 24; + else + pipe_config->pipe_bpp = 18; + if (IS_GEN9_LP(dev_priv)) { /* Enable Frame time stamp based scanline reporting */ adjusted_mode->private_flags |= @@ -1059,10 +1064,8 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder, } fmt = I915_READ(MIPI_DSI_FUNC_PRG(port)) & VID_MODE_FORMAT_MASK; - pipe_config->pipe_bpp = - mipi_dsi_pixel_format_to_bpp( - pixel_format_from_register_bits(fmt)); - bpp = pipe_config->pipe_bpp; + bpp = mipi_dsi_pixel_format_to_bpp( + pixel_format_from_register_bits(fmt)); /* Enable Frame time stamo based scanline reporting */ adjusted_mode->private_flags |= @@ -1200,11 +1203,9 @@ static void intel_dsi_get_config(struct intel_encoder *encoder, if (IS_GEN9_LP(dev_priv)) { bxt_dsi_get_pipe_config(encoder, pipe_config); - pclk = bxt_dsi_get_pclk(encoder, pipe_config->pipe_bpp, - pipe_config); + pclk = bxt_dsi_get_pclk(encoder, pipe_config); } else { - pclk = vlv_dsi_get_pclk(encoder, pipe_config->pipe_bpp, - pipe_config); + pclk = vlv_dsi_get_pclk(encoder, pipe_config); } if (pclk) { diff --git a/drivers/gpu/drm/i915/vlv_dsi_pll.c b/drivers/gpu/drm/i915/vlv_dsi_pll.c index a132a8037ecc..954d5a8c4fa7 100644 --- a/drivers/gpu/drm/i915/vlv_dsi_pll.c +++ b/drivers/gpu/drm/i915/vlv_dsi_pll.c @@ -252,20 +252,12 @@ void bxt_dsi_pll_disable(struct intel_encoder *encoder) DRM_ERROR("Timeout waiting for PLL lock deassertion\n"); } -static void assert_bpp_mismatch(enum mipi_dsi_pixel_format fmt, int pipe_bpp) -{ - int bpp = mipi_dsi_pixel_format_to_bpp(fmt); - - WARN(bpp != pipe_bpp, - "bpp match assertion failure (expected %d, current %d)\n", - bpp, pipe_bpp); -} - -u32 vlv_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, +u32 vlv_dsi_get_pclk(struct intel_encoder *encoder, struct intel_crtc_state *config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + int bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); u32 dsi_clock, pclk; u32 pll_ctl, pll_div; u32 m = 0, p = 0, n; @@ -319,15 +311,12 @@ u32 vlv_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, dsi_clock = (m * refclk) / (p * n); - /* pixel_format and pipe_bpp should agree */ - assert_bpp_mismatch(intel_dsi->pixel_format, pipe_bpp); - - pclk = DIV_ROUND_CLOSEST(dsi_clock * intel_dsi->lane_count, pipe_bpp); + pclk = DIV_ROUND_CLOSEST(dsi_clock * intel_dsi->lane_count, bpp); return pclk; } -u32 bxt_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, +u32 bxt_dsi_get_pclk(struct intel_encoder *encoder, struct intel_crtc_state *config) { u32 pclk; @@ -335,12 +324,7 @@ u32 bxt_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, u32 dsi_ratio; struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - - /* Divide by zero */ - if (!pipe_bpp) { - DRM_ERROR("Invalid BPP(0)\n"); - return 0; - } + int bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); config->dsi_pll.ctrl = I915_READ(BXT_DSI_PLL_CTL); @@ -348,10 +332,7 @@ u32 bxt_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, dsi_clk = (dsi_ratio * BXT_REF_CLOCK_KHZ) / 2; - /* pixel_format and pipe_bpp should agree */ - assert_bpp_mismatch(intel_dsi->pixel_format, pipe_bpp); - - pclk = DIV_ROUND_CLOSEST(dsi_clk * intel_dsi->lane_count, pipe_bpp); + pclk = DIV_ROUND_CLOSEST(dsi_clk * intel_dsi->lane_count, bpp); DRM_DEBUG_DRIVER("Calculated pclk=%u\n", pclk); return pclk; From 24bf86ccf9f5563f7d288f8b7672b0facdefbc95 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 1 Dec 2018 12:31:46 +0100 Subject: [PATCH 208/539] drm/i915/dsi: Enable dithering for 6 bpc panels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The display engine has 2 dithering enable bits which both need to be set for dithering to happen, 1 in the PIPECONF register which is taken care of by i9xx_set_pipeconf() and a second bit at the encoder level. The dsi code was not setting the encoder level dithering enable bit causing dithering to be disabled, this commit fixes this. Signed-off-by: Hans de Goede Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181201113148.23184-2-hdegoede@redhat.com --- drivers/gpu/drm/i915/vlv_dsi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/vlv_dsi.c b/drivers/gpu/drm/i915/vlv_dsi.c index 54cbd8eb1718..4d47910e5184 100644 --- a/drivers/gpu/drm/i915/vlv_dsi.c +++ b/drivers/gpu/drm/i915/vlv_dsi.c @@ -678,6 +678,10 @@ static void intel_dsi_port_enable(struct intel_encoder *encoder, LANE_CONFIGURATION_DUAL_LINK_B : LANE_CONFIGURATION_DUAL_LINK_A; } + + if (intel_dsi->pixel_format != MIPI_DSI_FMT_RGB888) + temp |= DITHERING_ENABLE; + /* assert ip_tg_enable signal */ I915_WRITE(port_ctrl, temp | DPI_ENABLE); POSTING_READ(port_ctrl); From 480cd6dd9287ac8276c802475bacb3138afd8e04 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 21 Jan 2019 22:20:48 +0000 Subject: [PATCH 209/539] drm/i915/selftests: Track evict objects explicitly During review of commit 71fc448c1aaf ("drm/i915/selftests: Make evict tolerant of foreign objects"), Matthew mentioned it would be better if we explicitly tracked the objects we created. We have an obj->st_link hook for this purpose, so add the corresponding list of objects and reduce our loops to only consider our own list. References: 71fc448c1aaf ("drm/i915/selftests: Make evict tolerant of foreign objects") Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190121222117.23305-6-chris@chris-wilson.co.uk --- .../gpu/drm/i915/selftests/i915_gem_evict.c | 114 +++++++++--------- 1 file changed, 55 insertions(+), 59 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 543d618c152b..d0553bc69705 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -29,25 +29,21 @@ #include "mock_drm.h" #include "mock_gem_device.h" -static int populate_ggtt(struct drm_i915_private *i915) +static void quirk_add(struct drm_i915_gem_object *obj, + struct list_head *objects) +{ + /* quirk is only for live tiled objects, use it to declare ownership */ + GEM_BUG_ON(obj->mm.quirked); + obj->mm.quirked = true; + list_add(&obj->st_link, objects); +} + +static int populate_ggtt(struct drm_i915_private *i915, + struct list_head *objects) { - struct drm_i915_gem_object *obj, *on; - unsigned long expected_unbound, expected_bound; unsigned long unbound, bound, count; + struct drm_i915_gem_object *obj; u64 size; - int err; - - expected_unbound = 0; - list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) { - i915_gem_object_get(obj); - expected_unbound++; - } - - expected_bound = 0; - list_for_each_entry(obj, &i915->mm.bound_list, mm.link) { - i915_gem_object_get(obj); - expected_bound++; - } count = 0; for (size = 0; @@ -56,38 +52,36 @@ static int populate_ggtt(struct drm_i915_private *i915) struct i915_vma *vma; obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto cleanup; - } + if (IS_ERR(obj)) + return PTR_ERR(obj); + + quirk_add(obj, objects); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto cleanup; - } + if (IS_ERR(vma)) + return PTR_ERR(vma); count++; } unbound = 0; list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) - unbound++; - if (unbound != expected_unbound) { - pr_err("%s: Found %lu objects unbound, expected %lu!\n", - __func__, unbound, expected_unbound); - err = -EINVAL; - goto cleanup; + if (obj->mm.quirked) + unbound++; + if (unbound) { + pr_err("%s: Found %lu objects unbound, expected %u!\n", + __func__, unbound, 0); + return -EINVAL; } bound = 0; list_for_each_entry(obj, &i915->mm.bound_list, mm.link) - bound++; - if (bound != expected_bound + count) { + if (obj->mm.quirked) + bound++; + if (bound != count) { pr_err("%s: Found %lu objects bound, expected %lu!\n", - __func__, bound, expected_bound + count); - err = -EINVAL; - goto cleanup; + __func__, bound, count); + return -EINVAL; } if (list_empty(&i915->ggtt.vm.inactive_list)) { @@ -96,15 +90,6 @@ static int populate_ggtt(struct drm_i915_private *i915) } return 0; - -cleanup: - list_for_each_entry_safe(obj, on, &i915->mm.unbound_list, mm.link) - i915_gem_object_put(obj); - - list_for_each_entry_safe(obj, on, &i915->mm.bound_list, mm.link) - i915_gem_object_put(obj); - - return err; } static void unpin_ggtt(struct drm_i915_private *i915) @@ -112,18 +97,20 @@ static void unpin_ggtt(struct drm_i915_private *i915) struct i915_vma *vma; list_for_each_entry(vma, &i915->ggtt.vm.inactive_list, vm_link) - i915_vma_unpin(vma); + if (vma->obj->mm.quirked) + i915_vma_unpin(vma); } -static void cleanup_objects(struct drm_i915_private *i915) +static void cleanup_objects(struct drm_i915_private *i915, + struct list_head *list) { struct drm_i915_gem_object *obj, *on; - list_for_each_entry_safe(obj, on, &i915->mm.unbound_list, mm.link) - i915_gem_object_put(obj); - - list_for_each_entry_safe(obj, on, &i915->mm.bound_list, mm.link) + list_for_each_entry_safe(obj, on, list, st_link) { + GEM_BUG_ON(!obj->mm.quirked); + obj->mm.quirked = false; i915_gem_object_put(obj); + } mutex_unlock(&i915->drm.struct_mutex); @@ -136,11 +123,12 @@ static int igt_evict_something(void *arg) { struct drm_i915_private *i915 = arg; struct i915_ggtt *ggtt = &i915->ggtt; + LIST_HEAD(objects); int err; /* Fill the GGTT with pinned objects and try to evict one. */ - err = populate_ggtt(i915); + err = populate_ggtt(i915, &objects); if (err) goto cleanup; @@ -169,7 +157,7 @@ static int igt_evict_something(void *arg) } cleanup: - cleanup_objects(i915); + cleanup_objects(i915, &objects); return err; } @@ -178,13 +166,14 @@ static int igt_overcommit(void *arg) struct drm_i915_private *i915 = arg; struct drm_i915_gem_object *obj; struct i915_vma *vma; + LIST_HEAD(objects); int err; /* Fill the GGTT with pinned objects and then try to pin one more. * We expect it to fail. */ - err = populate_ggtt(i915); + err = populate_ggtt(i915, &objects); if (err) goto cleanup; @@ -194,6 +183,8 @@ static int igt_overcommit(void *arg) goto cleanup; } + quirk_add(obj, &objects); + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); if (!IS_ERR(vma) || PTR_ERR(vma) != -ENOSPC) { pr_err("Failed to evict+insert, i915_gem_object_ggtt_pin returned err=%d\n", (int)PTR_ERR(vma)); @@ -202,7 +193,7 @@ static int igt_overcommit(void *arg) } cleanup: - cleanup_objects(i915); + cleanup_objects(i915, &objects); return err; } @@ -214,11 +205,12 @@ static int igt_evict_for_vma(void *arg) .start = 0, .size = 4096, }; + LIST_HEAD(objects); int err; /* Fill the GGTT with pinned objects and try to evict a range. */ - err = populate_ggtt(i915); + err = populate_ggtt(i915, &objects); if (err) goto cleanup; @@ -241,7 +233,7 @@ static int igt_evict_for_vma(void *arg) } cleanup: - cleanup_objects(i915); + cleanup_objects(i915, &objects); return err; } @@ -264,6 +256,7 @@ static int igt_evict_for_cache_color(void *arg) }; struct drm_i915_gem_object *obj; struct i915_vma *vma; + LIST_HEAD(objects); int err; /* Currently the use of color_adjust is limited to cache domains within @@ -279,6 +272,7 @@ static int igt_evict_for_cache_color(void *arg) goto cleanup; } i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); + quirk_add(obj, &objects); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, I915_GTT_PAGE_SIZE | flags); @@ -294,6 +288,7 @@ static int igt_evict_for_cache_color(void *arg) goto cleanup; } i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); + quirk_add(obj, &objects); /* Neighbouring; same colour - should fit */ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, @@ -329,7 +324,7 @@ static int igt_evict_for_cache_color(void *arg) cleanup: unpin_ggtt(i915); - cleanup_objects(i915); + cleanup_objects(i915, &objects); ggtt->vm.mm.color_adjust = NULL; return err; } @@ -338,11 +333,12 @@ static int igt_evict_vm(void *arg) { struct drm_i915_private *i915 = arg; struct i915_ggtt *ggtt = &i915->ggtt; + LIST_HEAD(objects); int err; /* Fill the GGTT with pinned objects and try to evict everything. */ - err = populate_ggtt(i915); + err = populate_ggtt(i915, &objects); if (err) goto cleanup; @@ -364,7 +360,7 @@ static int igt_evict_vm(void *arg) } cleanup: - cleanup_objects(i915); + cleanup_objects(i915, &objects); return err; } From c95e7ce387f97df6f7e61e08a35f97a8d74e5ee1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 21 Jan 2019 22:20:49 +0000 Subject: [PATCH 210/539] drm/i915/selftests: Create a clean GGTT for vma/gtt selftesting Some tests (e.g. igt_vma_pin1) presume that we have a completely clean GGTT so that it can probe boundaries without fear that something is already allocated there. However, the mock device is starting to get complicated and following similar rules to the live device, i.e. we can't guarantee that i915->ggtt remains clean, so create a temporary address_space equivalent to the mock ggtt for the purpose. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190121222117.23305-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 108 +++++++++++------- drivers/gpu/drm/i915/selftests/i915_vma.c | 75 ++++++------ .../gpu/drm/i915/selftests/mock_gem_device.c | 4 +- drivers/gpu/drm/i915/selftests/mock_gtt.c | 9 +- drivers/gpu/drm/i915/selftests/mock_gtt.h | 4 +- 5 files changed, 113 insertions(+), 87 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index fea8ab14e79d..06bde4a273cb 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1267,27 +1267,35 @@ static int exercise_mock(struct drm_i915_private *i915, static int igt_mock_fill(void *arg) { - return exercise_mock(arg, fill_hole); + struct i915_ggtt *ggtt = arg; + + return exercise_mock(ggtt->vm.i915, fill_hole); } static int igt_mock_walk(void *arg) { - return exercise_mock(arg, walk_hole); + struct i915_ggtt *ggtt = arg; + + return exercise_mock(ggtt->vm.i915, walk_hole); } static int igt_mock_pot(void *arg) { - return exercise_mock(arg, pot_hole); + struct i915_ggtt *ggtt = arg; + + return exercise_mock(ggtt->vm.i915, pot_hole); } static int igt_mock_drunk(void *arg) { - return exercise_mock(arg, drunk_hole); + struct i915_ggtt *ggtt = arg; + + return exercise_mock(ggtt->vm.i915, drunk_hole); } static int igt_gtt_reserve(void *arg) { - struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = arg; struct drm_i915_gem_object *obj, *on; LIST_HEAD(objects); u64 total; @@ -1300,11 +1308,12 @@ static int igt_gtt_reserve(void *arg) /* Start by filling the GGTT */ for (total = 0; - total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.vm.total; - total += 2*I915_GTT_PAGE_SIZE) { + total + 2 * I915_GTT_PAGE_SIZE <= ggtt->vm.total; + total += 2 * I915_GTT_PAGE_SIZE) { struct i915_vma *vma; - obj = i915_gem_object_create_internal(i915, 2*PAGE_SIZE); + obj = i915_gem_object_create_internal(ggtt->vm.i915, + 2 * PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto out; @@ -1318,20 +1327,20 @@ static int igt_gtt_reserve(void *arg) list_add(&obj->st_link, &objects); - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, &ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out; } - err = i915_gem_gtt_reserve(&i915->ggtt.vm, &vma->node, + err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, obj->base.size, total, obj->cache_level, 0); if (err) { pr_err("i915_gem_gtt_reserve (pass 1) failed at %llu/%llu with err=%d\n", - total, i915->ggtt.vm.total, err); + total, ggtt->vm.total, err); goto out; } track_vma_bind(vma); @@ -1349,11 +1358,12 @@ static int igt_gtt_reserve(void *arg) /* Now we start forcing evictions */ for (total = I915_GTT_PAGE_SIZE; - total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.vm.total; - total += 2*I915_GTT_PAGE_SIZE) { + total + 2 * I915_GTT_PAGE_SIZE <= ggtt->vm.total; + total += 2 * I915_GTT_PAGE_SIZE) { struct i915_vma *vma; - obj = i915_gem_object_create_internal(i915, 2*PAGE_SIZE); + obj = i915_gem_object_create_internal(ggtt->vm.i915, + 2 * PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto out; @@ -1367,20 +1377,20 @@ static int igt_gtt_reserve(void *arg) list_add(&obj->st_link, &objects); - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, &ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out; } - err = i915_gem_gtt_reserve(&i915->ggtt.vm, &vma->node, + err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, obj->base.size, total, obj->cache_level, 0); if (err) { pr_err("i915_gem_gtt_reserve (pass 2) failed at %llu/%llu with err=%d\n", - total, i915->ggtt.vm.total, err); + total, ggtt->vm.total, err); goto out; } track_vma_bind(vma); @@ -1401,7 +1411,7 @@ static int igt_gtt_reserve(void *arg) struct i915_vma *vma; u64 offset; - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, &ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out; @@ -1413,18 +1423,18 @@ static int igt_gtt_reserve(void *arg) goto out; } - offset = random_offset(0, i915->ggtt.vm.total, + offset = random_offset(0, ggtt->vm.total, 2*I915_GTT_PAGE_SIZE, I915_GTT_MIN_ALIGNMENT); - err = i915_gem_gtt_reserve(&i915->ggtt.vm, &vma->node, + err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, obj->base.size, offset, obj->cache_level, 0); if (err) { pr_err("i915_gem_gtt_reserve (pass 3) failed at %llu/%llu with err=%d\n", - total, i915->ggtt.vm.total, err); + total, ggtt->vm.total, err); goto out; } track_vma_bind(vma); @@ -1450,7 +1460,7 @@ out: static int igt_gtt_insert(void *arg) { - struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = arg; struct drm_i915_gem_object *obj, *on; struct drm_mm_node tmp = {}; const struct invalid_insert { @@ -1459,8 +1469,8 @@ static int igt_gtt_insert(void *arg) u64 start, end; } invalid_insert[] = { { - i915->ggtt.vm.total + I915_GTT_PAGE_SIZE, 0, - 0, i915->ggtt.vm.total, + ggtt->vm.total + I915_GTT_PAGE_SIZE, 0, + 0, ggtt->vm.total, }, { 2*I915_GTT_PAGE_SIZE, 0, @@ -1490,7 +1500,7 @@ static int igt_gtt_insert(void *arg) /* Check a couple of obviously invalid requests */ for (ii = invalid_insert; ii->size; ii++) { - err = i915_gem_gtt_insert(&i915->ggtt.vm, &tmp, + err = i915_gem_gtt_insert(&ggtt->vm, &tmp, ii->size, ii->alignment, I915_COLOR_UNEVICTABLE, ii->start, ii->end, @@ -1505,11 +1515,12 @@ static int igt_gtt_insert(void *arg) /* Start by filling the GGTT */ for (total = 0; - total + I915_GTT_PAGE_SIZE <= i915->ggtt.vm.total; + total + I915_GTT_PAGE_SIZE <= ggtt->vm.total; total += I915_GTT_PAGE_SIZE) { struct i915_vma *vma; - obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + obj = i915_gem_object_create_internal(ggtt->vm.i915, + I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto out; @@ -1523,15 +1534,15 @@ static int igt_gtt_insert(void *arg) list_add(&obj->st_link, &objects); - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, &ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out; } - err = i915_gem_gtt_insert(&i915->ggtt.vm, &vma->node, + err = i915_gem_gtt_insert(&ggtt->vm, &vma->node, obj->base.size, 0, obj->cache_level, - 0, i915->ggtt.vm.total, + 0, ggtt->vm.total, 0); if (err == -ENOSPC) { /* maxed out the GGTT space */ @@ -1540,7 +1551,7 @@ static int igt_gtt_insert(void *arg) } if (err) { pr_err("i915_gem_gtt_insert (pass 1) failed at %llu/%llu with err=%d\n", - total, i915->ggtt.vm.total, err); + total, ggtt->vm.total, err); goto out; } track_vma_bind(vma); @@ -1552,7 +1563,7 @@ static int igt_gtt_insert(void *arg) list_for_each_entry(obj, &objects, st_link) { struct i915_vma *vma; - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, &ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out; @@ -1572,7 +1583,7 @@ static int igt_gtt_insert(void *arg) struct i915_vma *vma; u64 offset; - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, &ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out; @@ -1587,13 +1598,13 @@ static int igt_gtt_insert(void *arg) goto out; } - err = i915_gem_gtt_insert(&i915->ggtt.vm, &vma->node, + err = i915_gem_gtt_insert(&ggtt->vm, &vma->node, obj->base.size, 0, obj->cache_level, - 0, i915->ggtt.vm.total, + 0, ggtt->vm.total, 0); if (err) { pr_err("i915_gem_gtt_insert (pass 2) failed at %llu/%llu with err=%d\n", - total, i915->ggtt.vm.total, err); + total, ggtt->vm.total, err); goto out; } track_vma_bind(vma); @@ -1609,11 +1620,12 @@ static int igt_gtt_insert(void *arg) /* And then force evictions */ for (total = 0; - total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.vm.total; - total += 2*I915_GTT_PAGE_SIZE) { + total + 2 * I915_GTT_PAGE_SIZE <= ggtt->vm.total; + total += 2 * I915_GTT_PAGE_SIZE) { struct i915_vma *vma; - obj = i915_gem_object_create_internal(i915, 2*I915_GTT_PAGE_SIZE); + obj = i915_gem_object_create_internal(ggtt->vm.i915, + 2 * I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto out; @@ -1627,19 +1639,19 @@ static int igt_gtt_insert(void *arg) list_add(&obj->st_link, &objects); - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, &ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out; } - err = i915_gem_gtt_insert(&i915->ggtt.vm, &vma->node, + err = i915_gem_gtt_insert(&ggtt->vm, &vma->node, obj->base.size, 0, obj->cache_level, - 0, i915->ggtt.vm.total, + 0, ggtt->vm.total, 0); if (err) { pr_err("i915_gem_gtt_insert (pass 3) failed at %llu/%llu with err=%d\n", - total, i915->ggtt.vm.total, err); + total, ggtt->vm.total, err); goto out; } track_vma_bind(vma); @@ -1666,17 +1678,25 @@ int i915_gem_gtt_mock_selftests(void) SUBTEST(igt_gtt_insert), }; struct drm_i915_private *i915; + struct i915_ggtt ggtt; int err; i915 = mock_gem_device(); if (!i915) return -ENOMEM; + mock_init_ggtt(i915, &ggtt); + mutex_lock(&i915->drm.struct_mutex); - err = i915_subtests(tests, i915); + err = i915_subtests(tests, &ggtt); + mock_device_flush(i915); mutex_unlock(&i915->drm.struct_mutex); + i915_gem_drain_freed_objects(i915); + + mock_fini_ggtt(&ggtt); drm_dev_put(&i915->drm); + return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index ffa74290e054..f0a32edfb9b1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -28,6 +28,7 @@ #include "mock_gem_device.h" #include "mock_context.h" +#include "mock_gtt.h" static bool assert_vma(struct i915_vma *vma, struct drm_i915_gem_object *obj, @@ -141,7 +142,8 @@ static int create_vmas(struct drm_i915_private *i915, static int igt_vma_create(void *arg) { - struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = arg; + struct drm_i915_private *i915 = ggtt->vm.i915; struct drm_i915_gem_object *obj, *on; struct i915_gem_context *ctx, *cn; unsigned long num_obj, num_ctx; @@ -245,7 +247,7 @@ static bool assert_pin_einval(const struct i915_vma *vma, static int igt_vma_pin1(void *arg) { - struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = arg; const struct pin_mode modes[] = { #define VALID(sz, fl) { .size = (sz), .flags = (fl), .assert = assert_pin_valid, .string = #sz ", " #fl ", (valid) " } #define __INVALID(sz, fl, check, eval) { .size = (sz), .flags = (fl), .assert = (check), .string = #sz ", " #fl ", (invalid " #eval ")" } @@ -256,30 +258,30 @@ static int igt_vma_pin1(void *arg) VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | 4096), VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | 8192), - VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), - VALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), - VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.vm.total - 4096)), + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | (ggtt->mappable_end - 4096)), + VALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | (ggtt->mappable_end - 4096)), + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | (ggtt->vm.total - 4096)), - VALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (i915->ggtt.mappable_end - 4096)), - INVALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | i915->ggtt.mappable_end), - VALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | (i915->ggtt.vm.total - 4096)), - INVALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | i915->ggtt.vm.total), + VALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (ggtt->mappable_end - 4096)), + INVALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | ggtt->mappable_end), + VALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | (ggtt->vm.total - 4096)), + INVALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | ggtt->vm.total), INVALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | round_down(U64_MAX, PAGE_SIZE)), VALID(4096, PIN_GLOBAL), VALID(8192, PIN_GLOBAL), - VALID(i915->ggtt.mappable_end - 4096, PIN_GLOBAL | PIN_MAPPABLE), - VALID(i915->ggtt.mappable_end, PIN_GLOBAL | PIN_MAPPABLE), - NOSPACE(i915->ggtt.mappable_end + 4096, PIN_GLOBAL | PIN_MAPPABLE), - VALID(i915->ggtt.vm.total - 4096, PIN_GLOBAL), - VALID(i915->ggtt.vm.total, PIN_GLOBAL), - NOSPACE(i915->ggtt.vm.total + 4096, PIN_GLOBAL), + VALID(ggtt->mappable_end - 4096, PIN_GLOBAL | PIN_MAPPABLE), + VALID(ggtt->mappable_end, PIN_GLOBAL | PIN_MAPPABLE), + NOSPACE(ggtt->mappable_end + 4096, PIN_GLOBAL | PIN_MAPPABLE), + VALID(ggtt->vm.total - 4096, PIN_GLOBAL), + VALID(ggtt->vm.total, PIN_GLOBAL), + NOSPACE(ggtt->vm.total + 4096, PIN_GLOBAL), NOSPACE(round_down(U64_MAX, PAGE_SIZE), PIN_GLOBAL), - INVALID(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (i915->ggtt.mappable_end - 4096)), - INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (i915->ggtt.vm.total - 4096)), + INVALID(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (ggtt->mappable_end - 4096)), + INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (ggtt->vm.total - 4096)), INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (round_down(U64_MAX, PAGE_SIZE) - 4096)), - VALID(8192, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + VALID(8192, PIN_GLOBAL | PIN_OFFSET_BIAS | (ggtt->mappable_end - 4096)), #if !IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) /* Misusing BIAS is a programming error (it is not controllable @@ -287,10 +289,10 @@ static int igt_vma_pin1(void *arg) * However, the tests are still quite interesting for checking * variable start, end and size. */ - NOSPACE(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | i915->ggtt.mappable_end), - NOSPACE(0, PIN_GLOBAL | PIN_OFFSET_BIAS | i915->ggtt.vm.total), - NOSPACE(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), - NOSPACE(8192, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.vm.total - 4096)), + NOSPACE(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | ggtt->mappable_end), + NOSPACE(0, PIN_GLOBAL | PIN_OFFSET_BIAS | ggtt->vm.total), + NOSPACE(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | (ggtt->mappable_end - 4096)), + NOSPACE(8192, PIN_GLOBAL | PIN_OFFSET_BIAS | (ggtt->vm.total - 4096)), #endif { }, #undef NOSPACE @@ -306,13 +308,13 @@ static int igt_vma_pin1(void *arg) * focusing on error handling of boundary conditions. */ - GEM_BUG_ON(!drm_mm_clean(&i915->ggtt.vm.mm)); + GEM_BUG_ON(!drm_mm_clean(&ggtt->vm.mm)); - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + obj = i915_gem_object_create_internal(ggtt->vm.i915, PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); - vma = checked_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = checked_vma_instance(obj, &ggtt->vm, NULL); if (IS_ERR(vma)) goto out; @@ -403,8 +405,8 @@ static unsigned int rotated_size(const struct intel_rotation_plane_info *a, static int igt_vma_rotate(void *arg) { - struct drm_i915_private *i915 = arg; - struct i915_address_space *vm = &i915->ggtt.vm; + struct i915_ggtt *ggtt = arg; + struct i915_address_space *vm = &ggtt->vm; struct drm_i915_gem_object *obj; const struct intel_rotation_plane_info planes[] = { { .width = 1, .height = 1, .stride = 1 }, @@ -431,7 +433,7 @@ static int igt_vma_rotate(void *arg) * that the page layout within the rotated VMA match our expectations. */ - obj = i915_gem_object_create_internal(i915, max_pages * PAGE_SIZE); + obj = i915_gem_object_create_internal(vm->i915, max_pages * PAGE_SIZE); if (IS_ERR(obj)) goto out; @@ -602,8 +604,8 @@ static bool assert_pin(struct i915_vma *vma, static int igt_vma_partial(void *arg) { - struct drm_i915_private *i915 = arg; - struct i915_address_space *vm = &i915->ggtt.vm; + struct i915_ggtt *ggtt = arg; + struct i915_address_space *vm = &ggtt->vm; const unsigned int npages = 1021; /* prime! */ struct drm_i915_gem_object *obj; const struct phase { @@ -621,7 +623,7 @@ static int igt_vma_partial(void *arg) * we are returned the same VMA when we later request the same range. */ - obj = i915_gem_object_create_internal(i915, npages*PAGE_SIZE); + obj = i915_gem_object_create_internal(vm->i915, npages * PAGE_SIZE); if (IS_ERR(obj)) goto out; @@ -723,17 +725,24 @@ int i915_vma_mock_selftests(void) SUBTEST(igt_vma_partial), }; struct drm_i915_private *i915; + struct i915_ggtt ggtt; int err; i915 = mock_gem_device(); if (!i915) return -ENOMEM; + mock_init_ggtt(i915, &ggtt); + mutex_lock(&i915->drm.struct_mutex); - err = i915_subtests(tests, i915); + err = i915_subtests(tests, &ggtt); + mock_device_flush(i915); mutex_unlock(&i915->drm.struct_mutex); + i915_gem_drain_freed_objects(i915); + + mock_fini_ggtt(&ggtt); drm_dev_put(&i915->drm); + return err; } - diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 3cda66292e76..5477ad4a7e7d 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -72,7 +72,7 @@ static void mock_device_release(struct drm_device *dev) i915_gem_drain_freed_objects(i915); mutex_lock(&i915->drm.struct_mutex); - mock_fini_ggtt(i915); + mock_fini_ggtt(&i915->ggtt); mutex_unlock(&i915->drm.struct_mutex); WARN_ON(!list_empty(&i915->gt.timelines)); @@ -232,7 +232,7 @@ struct drm_i915_private *mock_gem_device(void) mutex_lock(&i915->drm.struct_mutex); - mock_init_ggtt(i915); + mock_init_ggtt(i915, &i915->ggtt); mkwrite_device_info(i915)->ring_mask = BIT(0); i915->kernel_context = mock_context(i915, NULL); diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index 976c862b3842..cd83929fde8e 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -97,9 +97,9 @@ static void mock_unbind_ggtt(struct i915_vma *vma) { } -void mock_init_ggtt(struct drm_i915_private *i915) +void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt) { - struct i915_ggtt *ggtt = &i915->ggtt; + memset(ggtt, 0, sizeof(*ggtt)); ggtt->vm.i915 = i915; ggtt->vm.is_ggtt = true; @@ -118,13 +118,10 @@ void mock_init_ggtt(struct drm_i915_private *i915) ggtt->vm.vma_ops.set_pages = ggtt_set_pages; ggtt->vm.vma_ops.clear_pages = clear_pages; - i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); } -void mock_fini_ggtt(struct drm_i915_private *i915) +void mock_fini_ggtt(struct i915_ggtt *ggtt) { - struct i915_ggtt *ggtt = &i915->ggtt; - i915_address_space_fini(&ggtt->vm); } diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.h b/drivers/gpu/drm/i915/selftests/mock_gtt.h index 9a0a833bb545..40d544bde1d5 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.h +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.h @@ -25,8 +25,8 @@ #ifndef __MOCK_GTT_H #define __MOCK_GTT_H -void mock_init_ggtt(struct drm_i915_private *i915); -void mock_fini_ggtt(struct drm_i915_private *i915); +void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt); +void mock_fini_ggtt(struct i915_ggtt *ggtt); struct i915_hw_ppgtt * mock_ppgtt(struct drm_i915_private *i915, From e4a8c8130ba3ac5566c96c0dd79d7a3988fc13ab Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 21 Jan 2019 22:20:47 +0000 Subject: [PATCH 211/539] drm/i915/selftests: Refactor common live_test framework Before adding yet another copy of struct live_test and its handler, refactor the existing code into a common framework for live selftests. For many live selftests, we want to know if the GPU hung or otherwise misbehaved during the execution of the test (beyond any infraction in the behaviour under test), live_test provides this by comparing the GPU state before and after, alerting if it unexpectedly changed (e.g. the reset counter changed). It also ensures that the GPU is idle before and after the test, so that residual code running on the GPU is flushed before testing. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190121222117.23305-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 1 + .../gpu/drm/i915/selftests/i915_gem_context.c | 103 +++--------------- drivers/gpu/drm/i915/selftests/i915_request.c | 86 +++------------ .../gpu/drm/i915/selftests/igt_live_test.c | 85 +++++++++++++++ .../gpu/drm/i915/selftests/igt_live_test.h | 35 ++++++ 5 files changed, 147 insertions(+), 163 deletions(-) create mode 100644 drivers/gpu/drm/i915/selftests/igt_live_test.c create mode 100644 drivers/gpu/drm/i915/selftests/igt_live_test.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 611115ed00db..f050759686ca 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -167,6 +167,7 @@ i915-$(CONFIG_DRM_I915_SELFTEST) += \ selftests/i915_random.o \ selftests/i915_selftest.o \ selftests/igt_flush_test.o \ + selftests/igt_live_test.o \ selftests/igt_reset.o \ selftests/igt_spinner.o diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 4cba50679607..e2c1f0bc2abe 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -27,6 +27,7 @@ #include "../i915_selftest.h" #include "i915_random.h" #include "igt_flush_test.h" +#include "igt_live_test.h" #include "mock_drm.h" #include "mock_gem_device.h" @@ -34,84 +35,6 @@ #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) -struct live_test { - struct drm_i915_private *i915; - const char *func; - const char *name; - - unsigned int reset_global; - unsigned int reset_engine[I915_NUM_ENGINES]; -}; - -static int begin_live_test(struct live_test *t, - struct drm_i915_private *i915, - const char *func, - const char *name) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err; - - t->i915 = i915; - t->func = func; - t->name = name; - - err = i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (err) { - pr_err("%s(%s): failed to idle before, with err=%d!", - func, name, err); - return err; - } - - i915->gpu_error.missed_irq_rings = 0; - t->reset_global = i915_reset_count(&i915->gpu_error); - - for_each_engine(engine, i915, id) - t->reset_engine[id] = - i915_reset_engine_count(&i915->gpu_error, engine); - - return 0; -} - -static int end_live_test(struct live_test *t) -{ - struct drm_i915_private *i915 = t->i915; - struct intel_engine_cs *engine; - enum intel_engine_id id; - - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - return -EIO; - - if (t->reset_global != i915_reset_count(&i915->gpu_error)) { - pr_err("%s(%s): GPU was reset %d times!\n", - t->func, t->name, - i915_reset_count(&i915->gpu_error) - t->reset_global); - return -EIO; - } - - for_each_engine(engine, i915, id) { - if (t->reset_engine[id] == - i915_reset_engine_count(&i915->gpu_error, engine)) - continue; - - pr_err("%s(%s): engine '%s' was reset %d times!\n", - t->func, t->name, engine->name, - i915_reset_engine_count(&i915->gpu_error, engine) - - t->reset_engine[id]); - return -EIO; - } - - if (i915->gpu_error.missed_irq_rings) { - pr_err("%s(%s): Missed interrupts on engines %lx\n", - t->func, t->name, i915->gpu_error.missed_irq_rings); - return -EIO; - } - - return 0; -} - static int live_nop_switch(void *arg) { const unsigned int nctx = 1024; @@ -120,8 +43,8 @@ static int live_nop_switch(void *arg) struct i915_gem_context **ctx; enum intel_engine_id id; intel_wakeref_t wakeref; + struct igt_live_test t; struct drm_file *file; - struct live_test t; unsigned long n; int err = -ENODEV; @@ -185,7 +108,7 @@ static int live_nop_switch(void *arg) pr_info("Populated %d contexts on %s in %lluns\n", nctx, engine->name, ktime_to_ns(times[1] - times[0])); - err = begin_live_test(&t, i915, __func__, engine->name); + err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) goto out_unlock; @@ -233,7 +156,7 @@ static int live_nop_switch(void *arg) break; } - err = end_live_test(&t); + err = igt_live_test_end(&t); if (err) goto out_unlock; @@ -554,10 +477,10 @@ static int igt_ctx_exec(void *arg) struct drm_i915_private *i915 = arg; struct drm_i915_gem_object *obj = NULL; unsigned long ncontexts, ndwords, dw; + struct igt_live_test t; struct drm_file *file; IGT_TIMEOUT(end_time); LIST_HEAD(objects); - struct live_test t; int err = -ENODEV; /* @@ -575,7 +498,7 @@ static int igt_ctx_exec(void *arg) mutex_lock(&i915->drm.struct_mutex); - err = begin_live_test(&t, i915, __func__, ""); + err = igt_live_test_begin(&t, i915, __func__, ""); if (err) goto out_unlock; @@ -645,7 +568,7 @@ static int igt_ctx_exec(void *arg) } out_unlock: - if (end_live_test(&t)) + if (igt_live_test_end(&t)) err = -EIO; mutex_unlock(&i915->drm.struct_mutex); @@ -660,11 +583,11 @@ static int igt_ctx_readonly(void *arg) struct i915_gem_context *ctx; struct i915_hw_ppgtt *ppgtt; unsigned long ndwords, dw; + struct igt_live_test t; struct drm_file *file; I915_RND_STATE(prng); IGT_TIMEOUT(end_time); LIST_HEAD(objects); - struct live_test t; int err = -ENODEV; /* @@ -679,7 +602,7 @@ static int igt_ctx_readonly(void *arg) mutex_lock(&i915->drm.struct_mutex); - err = begin_live_test(&t, i915, __func__, ""); + err = igt_live_test_begin(&t, i915, __func__, ""); if (err) goto out_unlock; @@ -757,7 +680,7 @@ static int igt_ctx_readonly(void *arg) } out_unlock: - if (end_live_test(&t)) + if (igt_live_test_end(&t)) err = -EIO; mutex_unlock(&i915->drm.struct_mutex); @@ -982,10 +905,10 @@ static int igt_vm_isolation(void *arg) struct i915_gem_context *ctx_a, *ctx_b; struct intel_engine_cs *engine; intel_wakeref_t wakeref; + struct igt_live_test t; struct drm_file *file; I915_RND_STATE(prng); unsigned long count; - struct live_test t; unsigned int id; u64 vm_total; int err; @@ -1004,7 +927,7 @@ static int igt_vm_isolation(void *arg) mutex_lock(&i915->drm.struct_mutex); - err = begin_live_test(&t, i915, __func__, ""); + err = igt_live_test_begin(&t, i915, __func__, ""); if (err) goto out_unlock; @@ -1075,7 +998,7 @@ static int igt_vm_isolation(void *arg) out_rpm: intel_runtime_pm_put(i915, wakeref); out_unlock: - if (end_live_test(&t)) + if (igt_live_test_end(&t)) err = -EIO; mutex_unlock(&i915->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 2e14d6d3bad7..4d4b86b5fa11 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -25,6 +25,7 @@ #include #include "../i915_selftest.h" +#include "igt_live_test.h" #include "mock_context.h" #include "mock_gem_device.h" @@ -270,73 +271,12 @@ int i915_request_mock_selftests(void) return err; } -struct live_test { - struct drm_i915_private *i915; - const char *func; - const char *name; - - unsigned int reset_count; -}; - -static int begin_live_test(struct live_test *t, - struct drm_i915_private *i915, - const char *func, - const char *name) -{ - int err; - - t->i915 = i915; - t->func = func; - t->name = name; - - err = i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (err) { - pr_err("%s(%s): failed to idle before, with err=%d!", - func, name, err); - return err; - } - - i915->gpu_error.missed_irq_rings = 0; - t->reset_count = i915_reset_count(&i915->gpu_error); - - return 0; -} - -static int end_live_test(struct live_test *t) -{ - struct drm_i915_private *i915 = t->i915; - - i915_retire_requests(i915); - - if (wait_for(intel_engines_are_idle(i915), 10)) { - pr_err("%s(%s): GPU not idle\n", t->func, t->name); - return -EIO; - } - - if (t->reset_count != i915_reset_count(&i915->gpu_error)) { - pr_err("%s(%s): GPU was reset %d times!\n", - t->func, t->name, - i915_reset_count(&i915->gpu_error) - t->reset_count); - return -EIO; - } - - if (i915->gpu_error.missed_irq_rings) { - pr_err("%s(%s): Missed interrupts on engines %lx\n", - t->func, t->name, i915->gpu_error.missed_irq_rings); - return -EIO; - } - - return 0; -} - static int live_nop_request(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; intel_wakeref_t wakeref; - struct live_test t; + struct igt_live_test t; unsigned int id; int err = -ENODEV; @@ -354,7 +294,7 @@ static int live_nop_request(void *arg) IGT_TIMEOUT(end_time); ktime_t times[2] = {}; - err = begin_live_test(&t, i915, __func__, engine->name); + err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) goto out_unlock; @@ -396,7 +336,7 @@ static int live_nop_request(void *arg) break; } - err = end_live_test(&t); + err = igt_live_test_end(&t); if (err) goto out_unlock; @@ -483,8 +423,8 @@ static int live_empty_request(void *arg) struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; intel_wakeref_t wakeref; + struct igt_live_test t; struct i915_vma *batch; - struct live_test t; unsigned int id; int err = 0; @@ -508,7 +448,7 @@ static int live_empty_request(void *arg) unsigned long n, prime; ktime_t times[2] = {}; - err = begin_live_test(&t, i915, __func__, engine->name); + err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) goto out_batch; @@ -544,7 +484,7 @@ static int live_empty_request(void *arg) break; } - err = end_live_test(&t); + err = igt_live_test_end(&t); if (err) goto out_batch; @@ -643,8 +583,8 @@ static int live_all_engines(void *arg) struct intel_engine_cs *engine; struct i915_request *request[I915_NUM_ENGINES]; intel_wakeref_t wakeref; + struct igt_live_test t; struct i915_vma *batch; - struct live_test t; unsigned int id; int err; @@ -656,7 +596,7 @@ static int live_all_engines(void *arg) mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(i915); - err = begin_live_test(&t, i915, __func__, ""); + err = igt_live_test_begin(&t, i915, __func__, ""); if (err) goto out_unlock; @@ -728,7 +668,7 @@ static int live_all_engines(void *arg) request[id] = NULL; } - err = end_live_test(&t); + err = igt_live_test_end(&t); out_request: for_each_engine(engine, i915, id) @@ -749,7 +689,7 @@ static int live_sequential_engines(void *arg) struct i915_request *prev = NULL; struct intel_engine_cs *engine; intel_wakeref_t wakeref; - struct live_test t; + struct igt_live_test t; unsigned int id; int err; @@ -762,7 +702,7 @@ static int live_sequential_engines(void *arg) mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(i915); - err = begin_live_test(&t, i915, __func__, ""); + err = igt_live_test_begin(&t, i915, __func__, ""); if (err) goto out_unlock; @@ -845,7 +785,7 @@ static int live_sequential_engines(void *arg) GEM_BUG_ON(!i915_request_completed(request[id])); } - err = end_live_test(&t); + err = igt_live_test_end(&t); out_request: for_each_engine(engine, i915, id) { diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c new file mode 100644 index 000000000000..5deb485fb942 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -0,0 +1,85 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "../i915_drv.h" + +#include "../i915_selftest.h" +#include "igt_flush_test.h" +#include "igt_live_test.h" + +int igt_live_test_begin(struct igt_live_test *t, + struct drm_i915_private *i915, + const char *func, + const char *name) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; + + lockdep_assert_held(&i915->drm.struct_mutex); + + t->i915 = i915; + t->func = func; + t->name = name; + + err = i915_gem_wait_for_idle(i915, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (err) { + pr_err("%s(%s): failed to idle before, with err=%d!", + func, name, err); + return err; + } + + i915->gpu_error.missed_irq_rings = 0; + t->reset_global = i915_reset_count(&i915->gpu_error); + + for_each_engine(engine, i915, id) + t->reset_engine[id] = + i915_reset_engine_count(&i915->gpu_error, engine); + + return 0; +} + +int igt_live_test_end(struct igt_live_test *t) +{ + struct drm_i915_private *i915 = t->i915; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + lockdep_assert_held(&i915->drm.struct_mutex); + + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + return -EIO; + + if (t->reset_global != i915_reset_count(&i915->gpu_error)) { + pr_err("%s(%s): GPU was reset %d times!\n", + t->func, t->name, + i915_reset_count(&i915->gpu_error) - t->reset_global); + return -EIO; + } + + for_each_engine(engine, i915, id) { + if (t->reset_engine[id] == + i915_reset_engine_count(&i915->gpu_error, engine)) + continue; + + pr_err("%s(%s): engine '%s' was reset %d times!\n", + t->func, t->name, engine->name, + i915_reset_engine_count(&i915->gpu_error, engine) - + t->reset_engine[id]); + return -EIO; + } + + if (i915->gpu_error.missed_irq_rings) { + pr_err("%s(%s): Missed interrupts on engines %lx\n", + t->func, t->name, i915->gpu_error.missed_irq_rings); + return -EIO; + } + + return 0; +} diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.h b/drivers/gpu/drm/i915/selftests/igt_live_test.h new file mode 100644 index 000000000000..c0e9f99d50de --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.h @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef IGT_LIVE_TEST_H +#define IGT_LIVE_TEST_H + +#include "../i915_gem.h" + +struct drm_i915_private; + +struct igt_live_test { + struct drm_i915_private *i915; + const char *func; + const char *name; + + unsigned int reset_global; + unsigned int reset_engine[I915_NUM_ENGINES]; +}; + +/* + * Flush the GPU state before and after the test to ensure that no residual + * code is running on the GPU that may affect this test. Also compare the + * state before and after the test and alert if it unexpectedly changes, + * e.g. if the GPU was reset. + */ +int igt_live_test_begin(struct igt_live_test *t, + struct drm_i915_private *i915, + const char *func, + const char *name); +int igt_live_test_end(struct igt_live_test *t); + +#endif /* IGT_LIVE_TEST_H */ From 1579ab2de9147c50b00b35d9e7a0a66c86dffe13 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 21 Jan 2019 22:21:01 +0000 Subject: [PATCH 212/539] drm/i915/selftests: Use common mock_engine::advance Replace the open-coding of advance with a call instead. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190121222117.23305-19-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/mock_engine.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 8b8d51af7d6a..442ec2aeec81 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -67,11 +67,10 @@ static struct mock_request *first_request(struct mock_engine *engine) link); } -static void advance(struct mock_engine *engine, - struct mock_request *request) +static void advance(struct mock_request *request) { list_del_init(&request->link); - mock_seqno_advance(&engine->base, request->base.global_seqno); + mock_seqno_advance(request->base.engine, request->base.global_seqno); } static void hw_delay_complete(struct timer_list *t) @@ -84,7 +83,7 @@ static void hw_delay_complete(struct timer_list *t) /* Timer fired, first request is complete */ request = first_request(engine); if (request) - advance(engine, request); + advance(request); /* * Also immediately signal any subsequent 0-delay requests, but @@ -96,7 +95,7 @@ static void hw_delay_complete(struct timer_list *t) break; } - advance(engine, request); + advance(request); } spin_unlock(&engine->hw_lock); @@ -180,7 +179,7 @@ static void mock_submit_request(struct i915_request *request) if (mock->delay) mod_timer(&engine->hw_delay, jiffies + mock->delay); else - advance(engine, mock); + advance(mock); } spin_unlock_irq(&engine->hw_lock); } @@ -240,10 +239,8 @@ void mock_engine_flush(struct intel_engine_cs *engine) del_timer_sync(&mock->hw_delay); spin_lock_irq(&mock->hw_lock); - list_for_each_entry_safe(request, rn, &mock->hw_queue, link) { - list_del_init(&request->link); - mock_seqno_advance(&mock->base, request->base.global_seqno); - } + list_for_each_entry_safe(request, rn, &mock->hw_queue, link) + advance(request); spin_unlock_irq(&mock->hw_lock); } From 924090f4237bc34f32bb1992bbf334af76e64a29 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 21 Jan 2019 22:20:50 +0000 Subject: [PATCH 213/539] drm/i915: Refactor out intel_context_init() Prior to adding a third instance of intel_context_init() and extending the information stored therewithin, refactor out the common assignments. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190121222117.23305-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 7 ++----- drivers/gpu/drm/i915/i915_gem_context.h | 8 ++++++++ drivers/gpu/drm/i915/selftests/mock_context.c | 7 ++----- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index b68b4345d7be..71d5ca059ee6 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -338,11 +338,8 @@ __create_hw_context(struct drm_i915_private *dev_priv, ctx->i915 = dev_priv; ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL); - for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { - struct intel_context *ce = &ctx->__engine[n]; - - ce->gem_context = ctx; - } + for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) + intel_context_init(&ctx->__engine[n], ctx, dev_priv->engine[n]); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); INIT_LIST_HEAD(&ctx->handles_list); diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index f6d870b1f73e..47d82ce7ba6a 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -364,4 +364,12 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) kref_put(&ctx->ref, i915_gem_context_release); } +static inline void +intel_context_init(struct intel_context *ce, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + ce->gem_context = ctx; +} + #endif /* !__I915_GEM_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c index d937bdff26f9..b646cdcdd602 100644 --- a/drivers/gpu/drm/i915/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -45,11 +45,8 @@ mock_context(struct drm_i915_private *i915, INIT_LIST_HEAD(&ctx->handles_list); INIT_LIST_HEAD(&ctx->hw_id_link); - for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { - struct intel_context *ce = &ctx->__engine[n]; - - ce->gem_context = ctx; - } + for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) + intel_context_init(&ctx->__engine[n], ctx, i915->engine[n]); ret = i915_gem_context_pin_hw_id(ctx); if (ret < 0) From 0e21834e18c545bdebed527209a7b6bb8aed9f9b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 21 Jan 2019 22:21:02 +0000 Subject: [PATCH 214/539] drm/i915: Tidy common test_bit probing of i915_request->fence.flags A repeated pattern is to test the signaled bit of our request->fence.flags. Make this an inline to shorten a few lines and remove unnecessary line continuations. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190121222117.23305-20-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_irq.c | 3 +-- drivers/gpu/drm/i915/i915_request.c | 2 +- drivers/gpu/drm/i915/i915_request.h | 5 +++++ drivers/gpu/drm/i915/intel_breadcrumbs.c | 3 +-- drivers/gpu/drm/i915/intel_lrc.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 3 +-- 7 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 1abfc3fa76ad..5fd5080c4ccb 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1182,8 +1182,7 @@ static void notify_ring(struct intel_engine_cs *engine) struct i915_request *waiter = wait->request; if (waiter && - !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &waiter->fence.flags) && + !i915_request_signaled(waiter) && intel_wait_check_request(wait, waiter)) rq = i915_request_get(waiter); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c7ce27785cda..426194ee978a 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -198,7 +198,7 @@ static void __retire_engine_request(struct intel_engine_cs *engine, spin_unlock(&engine->timeline.lock); spin_lock(&rq->lock); - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) + if (!i915_request_signaled(rq)) dma_fence_signal_locked(&rq->fence); if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) intel_engine_cancel_signaling(rq); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index d014b0605445..c0f084ca4f29 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -280,6 +280,11 @@ long i915_request_wait(struct i915_request *rq, #define I915_WAIT_ALL BIT(3) /* used by i915_gem_object_wait() */ #define I915_WAIT_FOR_IDLE_BOOST BIT(4) +static inline bool i915_request_signaled(const struct i915_request *rq) +{ + return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags); +} + static inline bool intel_engine_has_started(struct intel_engine_cs *engine, u32 seqno); static inline bool intel_engine_has_completed(struct intel_engine_cs *engine, diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 4fad93fe3678..b58915b8708b 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -631,8 +631,7 @@ static int intel_breadcrumbs_signaler(void *arg) rq->signaling.wait.seqno = 0; __list_del_entry(&rq->signaling.link); - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &rq->fence.flags)) { + if (!i915_request_signaled(rq)) { list_add_tail(&rq->signaling.link, &list); i915_request_get(rq); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index f0fa0f767eb6..382a1262f75a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -816,7 +816,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) list_for_each_entry(rq, &engine->timeline.requests, link) { GEM_BUG_ON(!rq->global_seqno); - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) + if (i915_request_signaled(rq)) continue; dma_fence_set_error(&rq->fence, -EIO); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 8b63afa3a221..fdc28a3d2936 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6662,7 +6662,7 @@ void gen6_rps_boost(struct i915_request *rq, if (!rps->enabled) return; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) + if (i915_request_signaled(rq)) return; /* Serializes with i915_request_retire() */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 26b7274a2d43..e39e483d8d16 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -836,8 +836,7 @@ static void cancel_requests(struct intel_engine_cs *engine) list_for_each_entry(request, &engine->timeline.requests, link) { GEM_BUG_ON(!request->global_seqno); - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &request->fence.flags)) + if (i915_request_signaled(request)) continue; dma_fence_set_error(&request->fence, -EIO); From 25f9cebd7a52ddf15405d74fb5fd4c374f301983 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 21 Jan 2019 22:20:46 +0000 Subject: [PATCH 215/539] drm/i915: Show all active engines on hangcheck This turns out to be quite useful if one happens to be debugging semaphore deadlocks. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190121222117.23305-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_hangcheck.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index 7dc11fcb13de..741441daae32 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -195,10 +195,6 @@ static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, break; case ENGINE_DEAD: - if (GEM_SHOW_DEBUG()) { - struct drm_printer p = drm_debug_printer("hangcheck"); - intel_engine_dump(engine, &p, "%s\n", engine->name); - } break; default: @@ -285,6 +281,17 @@ static void i915_hangcheck_elapsed(struct work_struct *work) wedged |= intel_engine_flag(engine); } + if (GEM_SHOW_DEBUG() && (hung | stuck)) { + struct drm_printer p = drm_debug_printer("hangcheck"); + + for_each_engine(engine, dev_priv, id) { + if (intel_engine_is_idle(engine)) + continue; + + intel_engine_dump(engine, &p, "%s\n", engine->name); + } + } + if (wedged) { dev_err(dev_priv->drm.dev, "GPU recovery timed out," From 235ca26fc799d78522ea00dc13c54b3c3488151a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Thu, 17 Jan 2019 12:55:45 -0800 Subject: [PATCH 216/539] drm/i915/psr: Allow PSR2 to be enabled when debugfs asks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For now PSR2 is still disabled by default for all platforms but is our intention to let debugfs to enable it for debug and tests proporses, so intel_psr2_enabled() that is also used by debugfs to decide if PSR2 is going to be enabled needs to take in consideration the debug field. v2: Using the switch/case that intel_psr2_enabled() already had to handle this(DK) Cc: Dhinakaran Pandiyan Cc: Rodrigo Vivi Reviewed-by: Dhinakaran Pandiyan Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190117205548.28378-1-jose.souza@intel.com --- drivers/gpu/drm/i915/intel_psr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 8dbf26c212cc..84a0fb981561 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -70,17 +70,17 @@ static bool psr_global_enabled(u32 debug) static bool intel_psr2_enabled(struct drm_i915_private *dev_priv, const struct intel_crtc_state *crtc_state) { - /* Disable PSR2 by default for all platforms */ - if (i915_modparams.enable_psr == -1) - return false; - /* Cannot enable DSC and PSR2 simultaneously */ WARN_ON(crtc_state->dsc_params.compression_enable && crtc_state->has_psr2); switch (dev_priv->psr.debug & I915_PSR_DEBUG_MODE_MASK) { + case I915_PSR_DEBUG_DISABLE: case I915_PSR_DEBUG_FORCE_PSR1: return false; + case I915_PSR_DEBUG_DEFAULT: + if (i915_modparams.enable_psr <= 0) + return false; default: return crtc_state->has_psr2; } From 47c6cd54efde71b0e904cd41593978c109660430 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Thu, 17 Jan 2019 12:55:46 -0800 Subject: [PATCH 217/539] drm/i915: Refactor PSR status debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The old debugfs fields was not following a naming partern and it was a bit confusing. So it went from: ~$ sudo more /sys/kernel/debug/dri/0/i915_edp_psr_status Sink_Support: yes PSR mode: PSR1 Enabled: yes Busy frontbuffer bits: 0x000 Main link in standby mode: no HW Enabled & Active bit: yes Source PSR status: 0x24050006 [SRDONACK] To: ~$ sudo more /sys/kernel/debug/dri/0/i915_edp_psr_status Sink support: yes [0x03] PSR mode: PSR1 enabled Source PSR ctl: enabled [0x81f00e26] Source PSR status: IDLE [0x04010006] Busy frontbuffer bits: 0x00000000 The 'Main link in standby mode' was removed as it is not useful but if needed by someone the information is still in the register value of 'Source PSR ctl' inside of the brackets, PSR mode and Enabled was squashed into PSR mode, some renames and reorders and we have this cleaner version. This will also make easy to parse debugfs for IGT tests. v2: Printing sink PSR version with only 2 hex digits as it is a byte Cc: Rodrigo Vivi Cc: Dhinakaran Pandiyan Suggested-by: Dhinakaran Pandiyan Reviewed-by: Dhinakaran Pandiyan Acked-by: Rodrigo Vivi Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190117205548.28378-2-jose.souza@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 98 +++++++++++++++-------------- 1 file changed, 50 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 24d6d4ce14ef..fb59874fed99 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2506,7 +2506,8 @@ DEFINE_SHOW_ATTRIBUTE(i915_psr_sink_status); static void psr_source_status(struct drm_i915_private *dev_priv, struct seq_file *m) { - u32 val, psr_status; + u32 val, status_val; + const char *status = "unknown"; if (dev_priv->psr.psr2_enabled) { static const char * const live_status[] = { @@ -2522,14 +2523,11 @@ psr_source_status(struct drm_i915_private *dev_priv, struct seq_file *m) "BUF_ON", "TG_ON" }; - psr_status = I915_READ(EDP_PSR2_STATUS); - val = (psr_status & EDP_PSR2_STATUS_STATE_MASK) >> - EDP_PSR2_STATUS_STATE_SHIFT; - if (val < ARRAY_SIZE(live_status)) { - seq_printf(m, "Source PSR status: 0x%x [%s]\n", - psr_status, live_status[val]); - return; - } + val = I915_READ(EDP_PSR2_STATUS); + status_val = (val & EDP_PSR2_STATUS_STATE_MASK) >> + EDP_PSR2_STATUS_STATE_SHIFT; + if (status_val < ARRAY_SIZE(live_status)) + status = live_status[status_val]; } else { static const char * const live_status[] = { "IDLE", @@ -2541,75 +2539,79 @@ psr_source_status(struct drm_i915_private *dev_priv, struct seq_file *m) "SRDOFFACK", "SRDENT_ON", }; - psr_status = I915_READ(EDP_PSR_STATUS); - val = (psr_status & EDP_PSR_STATUS_STATE_MASK) >> - EDP_PSR_STATUS_STATE_SHIFT; - if (val < ARRAY_SIZE(live_status)) { - seq_printf(m, "Source PSR status: 0x%x [%s]\n", - psr_status, live_status[val]); - return; - } + val = I915_READ(EDP_PSR_STATUS); + status_val = (val & EDP_PSR_STATUS_STATE_MASK) >> + EDP_PSR_STATUS_STATE_SHIFT; + if (status_val < ARRAY_SIZE(live_status)) + status = live_status[status_val]; } - seq_printf(m, "Source PSR status: 0x%x [%s]\n", psr_status, "unknown"); + seq_printf(m, "Source PSR status: %s [0x%08x]\n", status, val); } static int i915_edp_psr_status(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct i915_psr *psr = &dev_priv->psr; intel_wakeref_t wakeref; - u32 psrperf = 0; - bool enabled = false; - bool sink_support; + const char *status; + bool enabled; + u32 val; if (!HAS_PSR(dev_priv)) return -ENODEV; - sink_support = dev_priv->psr.sink_support; - seq_printf(m, "Sink_Support: %s\n", yesno(sink_support)); - if (!sink_support) + seq_printf(m, "Sink support: %s", yesno(psr->sink_support)); + if (psr->dp) + seq_printf(m, " [0x%02x]", psr->dp->psr_dpcd[0]); + seq_puts(m, "\n"); + + if (!psr->sink_support) return 0; wakeref = intel_runtime_pm_get(dev_priv); + mutex_lock(&psr->lock); - mutex_lock(&dev_priv->psr.lock); - seq_printf(m, "PSR mode: %s\n", - dev_priv->psr.psr2_enabled ? "PSR2" : "PSR1"); - seq_printf(m, "Enabled: %s\n", yesno(dev_priv->psr.enabled)); - seq_printf(m, "Busy frontbuffer bits: 0x%03x\n", - dev_priv->psr.busy_frontbuffer_bits); - - if (dev_priv->psr.psr2_enabled) - enabled = I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE; + if (psr->enabled) + status = psr->psr2_enabled ? "PSR2 enabled" : "PSR1 enabled"; else - enabled = I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE; + status = "disabled"; + seq_printf(m, "PSR mode: %s\n", status); - seq_printf(m, "Main link in standby mode: %s\n", - yesno(dev_priv->psr.link_standby)); + if (!psr->enabled) + goto unlock; - seq_printf(m, "HW Enabled & Active bit: %s\n", yesno(enabled)); + if (psr->psr2_enabled) { + val = I915_READ(EDP_PSR2_CTL); + enabled = val & EDP_PSR2_ENABLE; + } else { + val = I915_READ(EDP_PSR_CTL); + enabled = val & EDP_PSR_ENABLE; + } + seq_printf(m, "Source PSR ctl: %s [0x%08x]\n", + enableddisabled(enabled), val); + psr_source_status(dev_priv, m); + seq_printf(m, "Busy frontbuffer bits: 0x%08x\n", + psr->busy_frontbuffer_bits); /* * SKL+ Perf counter is reset to 0 everytime DC state is entered */ if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - psrperf = I915_READ(EDP_PSR_PERF_CNT) & - EDP_PSR_PERF_CNT_MASK; - - seq_printf(m, "Performance_Counter: %u\n", psrperf); + val = I915_READ(EDP_PSR_PERF_CNT) & EDP_PSR_PERF_CNT_MASK; + seq_printf(m, "Performance counter: %u\n", val); } - psr_source_status(dev_priv, m); - mutex_unlock(&dev_priv->psr.lock); - - if (READ_ONCE(dev_priv->psr.debug) & I915_PSR_DEBUG_IRQ) { + if (psr->debug & I915_PSR_DEBUG_IRQ) { seq_printf(m, "Last attempted entry at: %lld\n", - dev_priv->psr.last_entry_attempt); - seq_printf(m, "Last exit at: %lld\n", - dev_priv->psr.last_exit); + psr->last_entry_attempt); + seq_printf(m, "Last exit at: %lld\n", psr->last_exit); } +unlock: + mutex_unlock(&psr->lock); intel_runtime_pm_put(dev_priv, wakeref); + return 0; } From cc8853f57e00511f46386c8e7910e00c5b2e58ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Thu, 17 Jan 2019 12:55:47 -0800 Subject: [PATCH 218/539] drm/i915: Add PSR2 selective update status registers and bits definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This register contains how many blocks was sent in the past selective updates. Those registers are not kept set all the times but polling it after flip can show the values corresponding to the last 8 frames. v2: Improved macros(Dhinakaran) Cc: Rodrigo Vivi Cc: Dhinakaran Pandiyan Reviewed-by: Dhinakaran Pandiyan Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190117205548.28378-3-jose.souza@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 93cbd057c07a..f4e447437d75 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4272,6 +4272,15 @@ enum { #define EDP_PSR2_STATUS_STATE_MASK (0xf << 28) #define EDP_PSR2_STATUS_STATE_SHIFT 28 +#define _PSR2_SU_STATUS_0 0x6F914 +#define _PSR2_SU_STATUS_1 0x6F918 +#define _PSR2_SU_STATUS_2 0x6F91C +#define _PSR2_SU_STATUS(index) _MMIO(_PICK_EVEN((index), _PSR2_SU_STATUS_0, _PSR2_SU_STATUS_1)) +#define PSR2_SU_STATUS(frame) (_PSR2_SU_STATUS((frame) / 3)) +#define PSR2_SU_STATUS_SHIFT(frame) (((frame) % 3) * 10) +#define PSR2_SU_STATUS_MASK(frame) (0x3ff << PSR2_SU_STATUS_SHIFT(frame)) +#define PSR2_SU_STATUS_FRAMES 8 + /* VGA port control */ #define ADPA _MMIO(0x61100) #define PCH_ADPA _MMIO(0xe1100) From a81f781a32384aef7e5b58854112881674c59e9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Thu, 17 Jan 2019 12:55:48 -0800 Subject: [PATCH 219/539] drm/i915/debugfs: Print PSR selective update status register values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The value of this registers will be used to test if PSR2 is doing selective update and if the number of blocks match with the expected. v2: - Using new macros - Changed the string output v3: - reading PSR2_SU_STATUS registers together(Dhinakaran) - printing SU blocks of frames with 0 updates(Dhinakaran) Cc: Rodrigo Vivi Cc: Dhinakaran Pandiyan Reviewed-by: Dhinakaran Pandiyan Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190117205548.28378-4-jose.souza@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index fb59874fed99..9a9e1da496dc 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2608,6 +2608,29 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) seq_printf(m, "Last exit at: %lld\n", psr->last_exit); } + if (psr->psr2_enabled) { + u32 su_frames_val[3]; + int frame; + + /* + * Reading all 3 registers before hand to minimize crossing a + * frame boundary between register reads + */ + for (frame = 0; frame < PSR2_SU_STATUS_FRAMES; frame += 3) + su_frames_val[frame / 3] = I915_READ(PSR2_SU_STATUS(frame)); + + seq_puts(m, "Frame:\tPSR2 SU blocks:\n"); + + for (frame = 0; frame < PSR2_SU_STATUS_FRAMES; frame++) { + u32 su_blocks; + + su_blocks = su_frames_val[frame / 3] & + PSR2_SU_STATUS_MASK(frame); + su_blocks = su_blocks >> PSR2_SU_STATUS_SHIFT(frame); + seq_printf(m, "%d\t%d\n", frame, su_blocks); + } + } + unlock: mutex_unlock(&psr->lock); intel_runtime_pm_put(dev_priv, wakeref); From 2e679d48f38c378650db403b4ba2248adf0691b2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 21 Jan 2019 11:51:41 +0200 Subject: [PATCH 220/539] drm/i915/gvt: switch to kernel types Mixed C99 and kernel types use is getting ugly. Prefer kernel types. sed -i 's/\buint\(8\|16\|32\|64\)_t\b/u\1/g' Acked-by: Zhenyu Wang Signed-off-by: Jani Nikula Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 14 +++++++------- drivers/gpu/drm/i915/gvt/handlers.c | 6 +++--- drivers/gpu/drm/i915/gvt/kvmgt.c | 24 ++++++++++++------------ drivers/gpu/drm/i915/gvt/mmio.c | 6 +++--- drivers/gpu/drm/i915/gvt/sched_policy.c | 2 +- drivers/gpu/drm/i915/gvt/scheduler.h | 2 +- 6 files changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index a04e8aa58547..35b4ec3f7618 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -399,10 +399,10 @@ struct cmd_info { #define R_VECS (1 << VECS) #define R_ALL (R_RCS | R_VCS | R_BCS | R_VECS) /* rings that support this cmd: BLT/RCS/VCS/VECS */ - uint16_t rings; + u16 rings; /* devices that support this cmd: SNB/IVB/HSW/... */ - uint16_t devices; + u16 devices; /* which DWords are address that need fix up. * bit 0 means a 32-bit non address operand in command @@ -412,13 +412,13 @@ struct cmd_info { * No matter the address length, each address only takes * one bit in the bitmap. */ - uint16_t addr_bitmap; + u16 addr_bitmap; /* flag == F_LEN_CONST : command length * flag == F_LEN_VAR : length bias bits * Note: length is in DWord */ - uint8_t len; + u8 len; parser_cmd_handler handler; }; @@ -1639,7 +1639,7 @@ static int find_bb_size(struct parser_exec_state *s, unsigned long *bb_size) { unsigned long gma = 0; const struct cmd_info *info; - uint32_t cmd_len = 0; + u32 cmd_len = 0; bool bb_end = false; struct intel_vgpu *vgpu = s->vgpu; u32 cmd; @@ -2678,7 +2678,7 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) I915_GTT_PAGE_SIZE))) return -EINVAL; - ring_tail = wa_ctx->indirect_ctx.size + 3 * sizeof(uint32_t); + ring_tail = wa_ctx->indirect_ctx.size + 3 * sizeof(u32); ring_size = round_up(wa_ctx->indirect_ctx.size + CACHELINE_BYTES, PAGE_SIZE); gma_head = wa_ctx->indirect_ctx.guest_gma; @@ -2845,7 +2845,7 @@ put_obj: static int combine_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) { - uint32_t per_ctx_start[CACHELINE_DWORDS] = {0}; + u32 per_ctx_start[CACHELINE_DWORDS] = {0}; unsigned char *bb_start_sva; if (!wa_ctx->per_ctx.valid) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 68a62ba5bf54..9c106e47e640 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -278,7 +278,7 @@ static int mul_force_wake_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { u32 old, new; - uint32_t ack_reg_offset; + u32 ack_reg_offset; old = vgpu_vreg(vgpu, offset); new = CALC_MODE_MASK_REG(old, *(u32 *)p_data); @@ -833,7 +833,7 @@ static int dp_aux_ch_ctl_trans_done(struct intel_vgpu *vgpu, u32 value, } static void dp_aux_ch_ctl_link_training(struct intel_vgpu_dpcd_data *dpcd, - uint8_t t) + u8 t) { if ((t & DPCD_TRAINING_PATTERN_SET_MASK) == DPCD_TRAINING_PATTERN_1) { /* training pattern 1 for CR */ @@ -917,7 +917,7 @@ static int dp_aux_ch_ctl_mmio_write(struct intel_vgpu *vgpu, if (op == GVT_AUX_NATIVE_WRITE) { int t; - uint8_t buf[16]; + u8 buf[16]; if ((addr + len + 1) >= DPCD_SIZE) { /* diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index a19e684e621a..f8d44e8f86a6 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -712,7 +712,7 @@ static void intel_vgpu_release_work(struct work_struct *work) __intel_vgpu_release(vgpu); } -static uint64_t intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) +static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) { u32 start_lo, start_hi; u32 mem_type; @@ -739,10 +739,10 @@ static uint64_t intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) return ((u64)start_hi << 32) | start_lo; } -static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, uint64_t off, +static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, u64 off, void *buf, unsigned int count, bool is_write) { - uint64_t bar_start = intel_vgpu_get_bar_addr(vgpu, bar); + u64 bar_start = intel_vgpu_get_bar_addr(vgpu, bar); int ret; if (is_write) @@ -754,13 +754,13 @@ static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, uint64_t off, return ret; } -static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, uint64_t off) +static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, u64 off) { return off >= vgpu_aperture_offset(vgpu) && off < vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu); } -static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, uint64_t off, +static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off, void *buf, unsigned long count, bool is_write) { void *aperture_va; @@ -792,7 +792,7 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf, { struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); - uint64_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + u64 pos = *ppos & VFIO_PCI_OFFSET_MASK; int ret = -EINVAL; @@ -1038,7 +1038,7 @@ static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type) static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu, unsigned int index, unsigned int start, - unsigned int count, uint32_t flags, + unsigned int count, u32 flags, void *data) { return 0; @@ -1046,21 +1046,21 @@ static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu, static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu, unsigned int index, unsigned int start, - unsigned int count, uint32_t flags, void *data) + unsigned int count, u32 flags, void *data) { return 0; } static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu, unsigned int index, unsigned int start, unsigned int count, - uint32_t flags, void *data) + u32 flags, void *data) { return 0; } static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu, unsigned int index, unsigned int start, unsigned int count, - uint32_t flags, void *data) + u32 flags, void *data) { struct eventfd_ctx *trigger; @@ -1079,12 +1079,12 @@ static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu, return 0; } -static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, uint32_t flags, +static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, u32 flags, unsigned int index, unsigned int start, unsigned int count, void *data) { int (*func)(struct intel_vgpu *vgpu, unsigned int index, - unsigned int start, unsigned int count, uint32_t flags, + unsigned int start, unsigned int count, u32 flags, void *data) = NULL; switch (index) { diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 43f65848ecd6..ed4df2f6d60b 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -57,7 +57,7 @@ int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa) (reg >= gvt->device_info.gtt_start_offset \ && reg < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) -static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa, +static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, u64 pa, void *p_data, unsigned int bytes, bool read) { struct intel_gvt *gvt = NULL; @@ -99,7 +99,7 @@ static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa, * Returns: * Zero on success, negative error code if failed */ -int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, +int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, u64 pa, void *p_data, unsigned int bytes) { struct intel_gvt *gvt = vgpu->gvt; @@ -171,7 +171,7 @@ out: * Returns: * Zero on success, negative error code if failed */ -int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, +int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, u64 pa, void *p_data, unsigned int bytes) { struct intel_gvt *gvt = vgpu->gvt; diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index c32e7d5e8629..951cfee85902 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -94,7 +94,7 @@ static void gvt_balance_timeslice(struct gvt_sched_data *sched_data) { struct vgpu_sched_data *vgpu_data; struct list_head *pos; - static uint64_t stage_check; + static u64 stage_check; int stage = stage_check++ % GVT_TS_BALANCE_STAGE_NUM; /* The timeslice accumulation reset at stage 0, which is diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index ca5529d0e48e..1e9eec6a32fe 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -61,7 +61,7 @@ struct shadow_indirect_ctx { unsigned long guest_gma; unsigned long shadow_gma; void *shadow_va; - uint32_t size; + u32 size; }; #define PER_CTX_ADDR_MASK 0xfffff000 From a9dc3395fc8bc460761f853b71971bdc1671560f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 18 Jan 2019 14:01:18 +0200 Subject: [PATCH 221/539] drm/i915/sdvo: switch to kernel types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mixed C99 and kernel types use is getting ugly. Prefer kernel types. sed -i 's/\buint\(8\|16\|32\|64\)_t\b/u\1/g' v2: rebase Acked-by: Chris Wilson Acked-by: Tvrtko Ursulin Reviewed-by: Ville Syrjälä Reviewed-by: José Roberto de Souza Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190118120125.15484-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_sdvo.c | 78 +++++++++++++++---------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index df2d830a7405..e7b0884ba5a5 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -76,7 +76,7 @@ struct intel_sdvo { i915_reg_t sdvo_reg; /* Active outputs controlled by this SDVO output */ - uint16_t controlled_output; + u16 controlled_output; /* * Capabilities of the SDVO device returned by @@ -91,12 +91,12 @@ struct intel_sdvo { * For multiple function SDVO device, * this is for current attached outputs. */ - uint16_t attached_output; + u16 attached_output; /* * Hotplug activation bits for this device */ - uint16_t hotplug_active; + u16 hotplug_active; enum port port; @@ -104,19 +104,19 @@ struct intel_sdvo { bool has_hdmi_audio; /* DDC bus used by this SDVO encoder */ - uint8_t ddc_bus; + u8 ddc_bus; /* * the sdvo flag gets lost in round trip: dtd->adjusted_mode->dtd */ - uint8_t dtd_sdvo_flags; + u8 dtd_sdvo_flags; }; struct intel_sdvo_connector { struct intel_connector base; /* Mark the type of connector */ - uint16_t output_flag; + u16 output_flag; /* This contains all current supported TV format */ u8 tv_format_supported[TV_FORMAT_NUM]; @@ -184,7 +184,7 @@ to_intel_sdvo_connector(struct drm_connector *connector) container_of((conn_state), struct intel_sdvo_connector_state, base.base) static bool -intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, uint16_t flags); +intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags); static bool intel_sdvo_tv_create_property(struct intel_sdvo *intel_sdvo, struct intel_sdvo_connector *intel_sdvo_connector, @@ -746,9 +746,9 @@ static bool intel_sdvo_get_input_timing(struct intel_sdvo *intel_sdvo, static bool intel_sdvo_create_preferred_input_timing(struct intel_sdvo *intel_sdvo, struct intel_sdvo_connector *intel_sdvo_connector, - uint16_t clock, - uint16_t width, - uint16_t height) + u16 clock, + u16 width, + u16 height) { struct intel_sdvo_preferred_input_timing_args args; @@ -791,9 +791,9 @@ static bool intel_sdvo_set_clock_rate_mult(struct intel_sdvo *intel_sdvo, u8 val static void intel_sdvo_get_dtd_from_mode(struct intel_sdvo_dtd *dtd, const struct drm_display_mode *mode) { - uint16_t width, height; - uint16_t h_blank_len, h_sync_len, v_blank_len, v_sync_len; - uint16_t h_sync_offset, v_sync_offset; + u16 width, height; + u16 h_blank_len, h_sync_len, v_blank_len, v_sync_len; + u16 h_sync_offset, v_sync_offset; int mode_clock; memset(dtd, 0, sizeof(*dtd)); @@ -898,13 +898,13 @@ static bool intel_sdvo_check_supp_encode(struct intel_sdvo *intel_sdvo) } static bool intel_sdvo_set_encode(struct intel_sdvo *intel_sdvo, - uint8_t mode) + u8 mode) { return intel_sdvo_set_value(intel_sdvo, SDVO_CMD_SET_ENCODE, &mode, 1); } static bool intel_sdvo_set_colorimetry(struct intel_sdvo *intel_sdvo, - uint8_t mode) + u8 mode) { return intel_sdvo_set_value(intel_sdvo, SDVO_CMD_SET_COLORIMETRY, &mode, 1); } @@ -913,11 +913,11 @@ static bool intel_sdvo_set_colorimetry(struct intel_sdvo *intel_sdvo, static void intel_sdvo_dump_hdmi_buf(struct intel_sdvo *intel_sdvo) { int i, j; - uint8_t set_buf_index[2]; - uint8_t av_split; - uint8_t buf_size; - uint8_t buf[48]; - uint8_t *pos; + u8 set_buf_index[2]; + u8 av_split; + u8 buf_size; + u8 buf[48]; + u8 *pos; intel_sdvo_get_value(encoder, SDVO_CMD_GET_HBUF_AV_SPLIT, &av_split, 1); @@ -940,11 +940,11 @@ static void intel_sdvo_dump_hdmi_buf(struct intel_sdvo *intel_sdvo) #endif static bool intel_sdvo_write_infoframe(struct intel_sdvo *intel_sdvo, - unsigned if_index, uint8_t tx_rate, - const uint8_t *data, unsigned length) + unsigned int if_index, u8 tx_rate, + const u8 *data, unsigned int length) { - uint8_t set_buf_index[2] = { if_index, 0 }; - uint8_t hbuf_size, tmp[8]; + u8 set_buf_index[2] = { if_index, 0 }; + u8 hbuf_size, tmp[8]; int i; if (!intel_sdvo_set_value(intel_sdvo, @@ -984,7 +984,7 @@ static bool intel_sdvo_set_avi_infoframe(struct intel_sdvo *intel_sdvo, { const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; - uint8_t sdvo_data[HDMI_INFOFRAME_SIZE(AVI)]; + u8 sdvo_data[HDMI_INFOFRAME_SIZE(AVI)]; union hdmi_infoframe frame; int ret; ssize_t len; @@ -1017,7 +1017,7 @@ static bool intel_sdvo_set_tv_format(struct intel_sdvo *intel_sdvo, const struct drm_connector_state *conn_state) { struct intel_sdvo_tv_format format; - uint32_t format_map; + u32 format_map; format_map = 1 << conn_state->tv.mode; memset(&format, 0, sizeof(format)); @@ -1208,7 +1208,7 @@ static void intel_sdvo_update_props(struct intel_sdvo *intel_sdvo, const struct drm_connector_state *conn_state = &sdvo_state->base.base; struct intel_sdvo_connector *intel_sdvo_conn = to_intel_sdvo_connector(conn_state->connector); - uint16_t val; + u16 val; if (intel_sdvo_conn->left) UPDATE_PROPERTY(sdvo_state->tv.overscan_h, OVERSCAN_H); @@ -1692,10 +1692,10 @@ static bool intel_sdvo_get_capabilities(struct intel_sdvo *intel_sdvo, struct in return true; } -static uint16_t intel_sdvo_get_hotplug_support(struct intel_sdvo *intel_sdvo) +static u16 intel_sdvo_get_hotplug_support(struct intel_sdvo *intel_sdvo) { struct drm_i915_private *dev_priv = to_i915(intel_sdvo->base.base.dev); - uint16_t hotplug; + u16 hotplug; if (!I915_HAS_HOTPLUG(dev_priv)) return 0; @@ -1826,7 +1826,7 @@ intel_sdvo_connector_matches_edid(struct intel_sdvo_connector *sdvo, static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connector, bool force) { - uint16_t response; + u16 response; struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector); struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); enum drm_connector_status ret; @@ -1977,7 +1977,7 @@ static void intel_sdvo_get_tv_modes(struct drm_connector *connector) struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector); const struct drm_connector_state *conn_state = connector->state; struct intel_sdvo_sdtv_resolution_request tv_res; - uint32_t reply = 0, format_map = 0; + u32 reply = 0, format_map = 0; int i; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", @@ -2062,7 +2062,7 @@ static int intel_sdvo_connector_atomic_get_property(struct drm_connector *connector, const struct drm_connector_state *state, struct drm_property *property, - uint64_t *val) + u64 *val) { struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); const struct intel_sdvo_connector_state *sdvo_state = to_intel_sdvo_connector_state((void *)state); @@ -2121,7 +2121,7 @@ static int intel_sdvo_connector_atomic_set_property(struct drm_connector *connector, struct drm_connector_state *state, struct drm_property *property, - uint64_t val) + u64 val) { struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); struct intel_sdvo_connector_state *sdvo_state = to_intel_sdvo_connector_state(state); @@ -2270,7 +2270,7 @@ static const struct drm_encoder_funcs intel_sdvo_enc_funcs = { static void intel_sdvo_guess_ddc_bus(struct intel_sdvo *sdvo) { - uint16_t mask = 0; + u16 mask = 0; unsigned int num_bits; /* @@ -2671,7 +2671,7 @@ err: } static bool -intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, uint16_t flags) +intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) { /* SDVO requires XXX1 function may not exist unless it has XXX0 function.*/ @@ -2747,7 +2747,7 @@ static bool intel_sdvo_tv_create_property(struct intel_sdvo *intel_sdvo, { struct drm_device *dev = intel_sdvo->base.base.dev; struct intel_sdvo_tv_format format; - uint32_t format_map, i; + u32 format_map, i; if (!intel_sdvo_set_target_output(intel_sdvo, type)) return false; @@ -2814,7 +2814,7 @@ intel_sdvo_create_enhance_property_tv(struct intel_sdvo *intel_sdvo, struct drm_connector_state *conn_state = connector->state; struct intel_sdvo_connector_state *sdvo_state = to_intel_sdvo_connector_state(conn_state); - uint16_t response, data_value[2]; + u16 response, data_value[2]; /* when horizontal overscan is supported, Add the left/right property */ if (enhancements.overscan_h) { @@ -2925,7 +2925,7 @@ intel_sdvo_create_enhance_property_lvds(struct intel_sdvo *intel_sdvo, { struct drm_device *dev = intel_sdvo->base.base.dev; struct drm_connector *connector = &intel_sdvo_connector->base.base; - uint16_t response, data_value[2]; + u16 response, data_value[2]; ENHANCEMENT(&connector->state->tv, brightness, BRIGHTNESS); @@ -2939,7 +2939,7 @@ static bool intel_sdvo_create_enhance_property(struct intel_sdvo *intel_sdvo, { union { struct intel_sdvo_enhancements_reply reply; - uint16_t response; + u16 response; } enhancements; BUILD_BUG_ON(sizeof(enhancements) != 2); From c25f0c6a0426527134d992bb4782cf5abdf962b6 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 22 Jan 2019 18:32:27 -0800 Subject: [PATCH 222/539] drm/i915/icl: do a posting read after irq install When reading GEN11_GT_INTR_DWx closely after enabling the interrupts in gen11_irq_postinstall, the returned value is garbage. This can cause other parts of the setup code (e.g. gen11_reset_one_iir) to think that there are interrupts to be cleared when there are none. The garbage value is only seen on the first read done after the enable, so this looks like a posting issue. Adding a posting read after enabling the interrupts does indeed fix the problem. Note that the posting read has been purposely added outside of gen11_master_intr_enable since the issue has only been observed when the full interrupt setup is performed. Cc: Mika Kuoppala Signed-off-by: Daniele Ceraolo Spurio Acked-by: Chris Wilson Acked-by: Mika Kuoppala Signed-off-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190123023227.8117-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 5fd5080c4ccb..7056ae2d1e0e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -4089,6 +4089,7 @@ static int gen11_irq_postinstall(struct drm_device *dev) I915_WRITE(GEN11_DISPLAY_INT_CTL, GEN11_DISPLAY_IRQ_ENABLE); gen11_master_intr_enable(dev_priv->regs); + POSTING_READ(GEN11_GFX_MSTR_IRQ); return 0; } From 03ca3cf8e9aa7549e6c398462af0f68bdd43e7fe Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 17 Jan 2019 21:59:43 -0800 Subject: [PATCH 223/539] drm/i915/icl: Adding few more device IDs for Ice Lake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We just got aware that there was more IDs available at spec, so let's add them already. Cc: James Ausmus Cc: José Roberto de Souza Signed-off-by: Rodrigo Vivi Reviewed-by: Mika Kuoppala Reviewed-by: José Roberto de Souza Signed-off-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190118055943.10252-1-rodrigo.vivi@intel.com --- include/drm/i915_pciids.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 192667144693..df72be7e8b88 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -457,9 +457,13 @@ INTEL_VGA_DEVICE(0x8A51, info), \ INTEL_VGA_DEVICE(0x8A5C, info), \ INTEL_VGA_DEVICE(0x8A5D, info), \ + INTEL_VGA_DEVICE(0x8A59, info), \ + INTEL_VGA_DEVICE(0x8A58, info), \ INTEL_VGA_DEVICE(0x8A52, info), \ INTEL_VGA_DEVICE(0x8A5A, info), \ INTEL_VGA_DEVICE(0x8A5B, info), \ + INTEL_VGA_DEVICE(0x8A57, info), \ + INTEL_VGA_DEVICE(0x8A56, info), \ INTEL_VGA_DEVICE(0x8A71, info), \ INTEL_VGA_DEVICE(0x8A70, info) From 6e062b60b0b1bd82cac475e63cdb8c451647182b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 23 Jan 2019 13:51:55 +0000 Subject: [PATCH 224/539] drm/i915/execlists: Mark up priority boost on preemption Record the priority boost we giving to the preempted client or else we may end up in a situation where the priority queue no longer matches the request priority order and so we can end up in an infinite loop of preempting the same pair of requests. Fixes: e9eaf82d97a2 ("drm/i915: Priority boost for waiting clients") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190123135155.21562-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 436e59724900..8aa8a4862543 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -302,6 +302,7 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) */ if (!(prio & I915_PRIORITY_NEWCLIENT)) { prio |= I915_PRIORITY_NEWCLIENT; + active->sched.attr.priority = prio; list_move_tail(&active->sched.link, i915_sched_lookup_priolist(engine, prio)); } @@ -625,6 +626,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { + GEM_BUG_ON(last && + need_preempt(engine, last, rq_prio(rq))); + /* * Can we combine this request with the current port? * It has to be the same context/ringbuffer and not From 3c8861d84a4d2c6cd7221d18e49bf9201c6c6115 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 17 Dec 2018 14:44:14 -0800 Subject: [PATCH 225/539] drm: Add color management LUT validation helper (v4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some hardware may place additional restrictions on the gamma/degamma curves described by our LUT properties. E.g., that a gamma curve never decreases or that the red/green/blue channels of a LUT's entries must be equal. Let's add a helper function that drivers can use to test that a userspace-provided LUT is valid and doesn't violate hardware requirements. v2: - Combine into a single helper that just takes a bitmask of the tests to apply. (Brian Starkey) - Add additional check (always performed) that LUT property blob size is always a multiple of the LUT entry size. (stolen from ARM driver) v3: - Drop the LUT size check again since drm_atomic_replace_property_blob_from_id() already covers this for us. (Alexandru Gheorghe) v4: - Use an enum to describe possible test values rather than #define's; this is cleaner to provide kerneldoc for. (Daniel Vetter) - s/DRM_COLOR_LUT_INCREASING/DRM_COLOR_LUT_NON_DECREASING/. (Ville) Cc: Uma Shankar Cc: Swati Sharma Cc: Brian Starkey Cc: Daniel Vetter Cc: Ville Syrjälä Signed-off-by: Matt Roper Reviewed-by: Brian Starkey Reviewed-by: Alexandru Gheorghe Reviewed-by: Uma Shankar Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20181217224415.12848-1-matthew.d.roper@intel.com --- drivers/gpu/drm/drm_color_mgmt.c | 44 ++++++++++++++++++++++++++++++++ include/drm/drm_color_mgmt.h | 29 +++++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index 07dcf47daafe..968ca7c91ad8 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -462,3 +462,47 @@ int drm_plane_create_color_properties(struct drm_plane *plane, return 0; } EXPORT_SYMBOL(drm_plane_create_color_properties); + +/** + * drm_color_lut_check - check validity of lookup table + * @lut: property blob containing LUT to check + * @tests: bitmask of tests to run + * + * Helper to check whether a userspace-provided lookup table is valid and + * satisfies hardware requirements. Drivers pass a bitmask indicating which of + * the tests in &drm_color_lut_tests should be performed. + * + * Returns 0 on success, -EINVAL on failure. + */ +int drm_color_lut_check(struct drm_property_blob *lut, + uint32_t tests) +{ + struct drm_color_lut *entry; + int i; + + if (!lut || !tests) + return 0; + + entry = lut->data; + for (i = 0; i < drm_color_lut_size(lut); i++) { + if (tests & DRM_COLOR_LUT_EQUAL_CHANNELS) { + if (entry[i].red != entry[i].blue || + entry[i].red != entry[i].green) { + DRM_DEBUG_KMS("All LUT entries must have equal r/g/b\n"); + return -EINVAL; + } + } + + if (i > 0 && tests & DRM_COLOR_LUT_NON_DECREASING) { + if (entry[i].red < entry[i - 1].red || + entry[i].green < entry[i - 1].green || + entry[i].blue < entry[i - 1].blue) { + DRM_DEBUG_KMS("LUT entries must never decrease.\n"); + return -EINVAL; + } + } + } + + return 0; +} +EXPORT_SYMBOL(drm_color_lut_check); diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index 90ef9996d9a4..6affbda6d9cb 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -69,4 +69,33 @@ int drm_plane_create_color_properties(struct drm_plane *plane, u32 supported_ranges, enum drm_color_encoding default_encoding, enum drm_color_range default_range); + +/** + * enum drm_color_lut_tests - hw-specific LUT tests to perform + * + * The drm_color_lut_check() function takes a bitmask of the values here to + * determine which tests to apply to a userspace-provided LUT. + */ +enum drm_color_lut_tests { + /** + * @DRM_COLOR_LUT_EQUAL_CHANNELS: + * + * Checks whether the entries of a LUT all have equal values for the + * red, green, and blue channels. Intended for hardware that only + * accepts a single value per LUT entry and assumes that value applies + * to all three color components. + */ + DRM_COLOR_LUT_EQUAL_CHANNELS = BIT(0), + + /** + * @DRM_COLOR_LUT_NON_DECREASING: + * + * Checks whether the entries of a LUT are always flat or increasing + * (never decreasing). + */ + DRM_COLOR_LUT_NON_DECREASING = BIT(1), +}; + +int drm_color_lut_check(struct drm_property_blob *lut, + uint32_t tests); #endif From 85e2d61e49768f85346fe7ff133625eabf0946fe Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 18 Dec 2018 09:51:58 -0800 Subject: [PATCH 226/539] drm/i915: Validate userspace-provided color management LUT's (v4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We currently program userspace-provided gamma and degamma LUT's into our hardware without really checking to see whether they satisfy our hardware's rules. We should try to catch tables that are invalid for our hardware early and reject the atomic transaction. All of our platforms that accept a degamma LUT expect that the entries in the LUT are always flat or increasing, never decreasing. Also, our GLK and ICL platforms only accept degamma tables with r=g=b entries; so we should also add the relevant checks for that in anticipation of degamma support landing for those platforms. v2: - Use new API (single check function with bitmask of tests to apply) - Call helper for our gamma table as well (with no additional tests specified) so that the table size will be validated. v3: - Don't call on the gamma table since the LUT size is already tested at property blob upload and we don't have any additional hardware constraints for that LUT. v4: - Apply equal color channel check on gen10 as well; the bspec has some strange tagging for CNL platforms, but this appears to apply there as well. (Ville) Cc: Uma Shankar Cc: Swati Sharma Cc: Ville Syrjälä Signed-off-by: Matt Roper Reviewed-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20181218175158.5739-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_color.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index 299eb7858adc..bc7589656a8f 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -609,10 +609,26 @@ int intel_color_check(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); size_t gamma_length, degamma_length; + uint32_t tests = DRM_COLOR_LUT_NON_DECREASING; degamma_length = INTEL_INFO(dev_priv)->color.degamma_lut_size; gamma_length = INTEL_INFO(dev_priv)->color.gamma_lut_size; + /* + * All of our platforms mandate that the degamma curve be + * non-decreasing. Additionally, GLK and gen11 only accept a single + * value for red, green, and blue in the degamma table. Make sure + * userspace didn't try to pass us something we can't handle. + * + * We don't have any extra hardware constraints on the gamma table, + * so no need to explicitly check it. + */ + if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10) + tests |= DRM_COLOR_LUT_EQUAL_CHANNELS; + + if (drm_color_lut_check(crtc_state->base.degamma_lut, tests) != 0) + return -EINVAL; + /* * We allow both degamma & gamma luts at the right size or * NULL. From 63cb4e641af1e81796a9e45c10a17a88742b5a2d Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 22 Jan 2019 10:23:01 +0200 Subject: [PATCH 227/539] drm/i915/crt: split out intel_crt_present() to platform specific setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With new platforms not having CRT support and most conditions in intel_crt_present() being specific to DDI, split out the CRT initialization to platform specific blocks in the if ladder. Add new Pineview block for this. This puts intel_crt_init() more in line with the rest of the outputs, and makes it slightly easier for the uninitiated to figure out which platforms actually have what. v2: keep gen >= 9 check in intel_ddi_crt_present() (Ville) Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190122082307.4003-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_display.c | 34 ++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2fa9f4aec08e..9e0f34524d0b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14245,7 +14245,7 @@ static bool has_edp_a(struct drm_i915_private *dev_priv) return true; } -static bool intel_crt_present(struct drm_i915_private *dev_priv) +static bool intel_ddi_crt_present(struct drm_i915_private *dev_priv) { if (INTEL_GEN(dev_priv) >= 9) return false; @@ -14253,15 +14253,12 @@ static bool intel_crt_present(struct drm_i915_private *dev_priv) if (IS_HSW_ULT(dev_priv) || IS_BDW_ULT(dev_priv)) return false; - if (IS_CHERRYVIEW(dev_priv)) - return false; - if (HAS_PCH_LPT_H(dev_priv) && I915_READ(SFUSE_STRAP) & SFUSE_STRAP_CRT_DISABLED) return false; /* DDI E can't be used if DDI A requires 4 lanes */ - if (HAS_DDI(dev_priv) && I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_A_4_LANES) + if (I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_A_4_LANES) return false; if (!dev_priv->vbt.int_crt_support) @@ -14323,9 +14320,6 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) */ intel_lvds_init(dev_priv); - if (intel_crt_present(dev_priv)) - intel_crt_init(dev_priv); - if (IS_ICELAKE(dev_priv)) { intel_ddi_init(dev_priv, PORT_A); intel_ddi_init(dev_priv, PORT_B); @@ -14354,6 +14348,9 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) } else if (HAS_DDI(dev_priv)) { int found; + if (intel_ddi_crt_present(dev_priv)) + intel_crt_init(dev_priv); + /* * Haswell uses DDI functions to detect digital outputs. * On SKL pre-D0 the strap isn't connected, so we assume @@ -14385,6 +14382,10 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) } else if (HAS_PCH_SPLIT(dev_priv)) { int found; + + if (dev_priv->vbt.int_crt_support) + intel_crt_init(dev_priv); + dpd_is_edp = intel_dp_is_port_edp(dev_priv, PORT_D); if (has_edp_a(dev_priv)) @@ -14413,6 +14414,9 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { bool has_edp, has_port; + if (IS_VALLEYVIEW(dev_priv) && dev_priv->vbt.int_crt_support) + intel_crt_init(dev_priv); + /* * The DP_DETECTED bit is the latched state of the DDC * SDA pin at boot. However since eDP doesn't require DDC @@ -14455,9 +14459,15 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) } vlv_dsi_init(dev_priv); - } else if (!IS_GEN(dev_priv, 2) && !IS_PINEVIEW(dev_priv)) { + } else if (IS_PINEVIEW(dev_priv)) { + if (dev_priv->vbt.int_crt_support) + intel_crt_init(dev_priv); + } else if (IS_GEN_RANGE(dev_priv, 3, 4)) { bool found = false; + if (dev_priv->vbt.int_crt_support) + intel_crt_init(dev_priv); + if (I915_READ(GEN3_SDVOB) & SDVO_DETECTED) { DRM_DEBUG_KMS("probing SDVOB\n"); found = intel_sdvo_init(dev_priv, GEN3_SDVOB, PORT_B); @@ -14489,8 +14499,12 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) if (IS_G4X(dev_priv) && (I915_READ(DP_D) & DP_DETECTED)) intel_dp_init(dev_priv, DP_D, PORT_D); - } else if (IS_GEN(dev_priv, 2)) + } else if (IS_GEN(dev_priv, 2)) { + if (dev_priv->vbt.int_crt_support) + intel_crt_init(dev_priv); + intel_dvo_init(dev_priv); + } if (SUPPORTS_TV(dev_priv)) intel_tv_init(dev_priv); From 0fafa22692773e950182cba48cd886fe93ec14c8 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 22 Jan 2019 10:23:02 +0200 Subject: [PATCH 228/539] drm/i915/lvds: only call intel_lvds_init() on platforms that might have LVDS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With new platforms not having LVDS support, only call intel_lvds_init() on platforms that might actually have LVDS. Move the comment about eDP init to the PCH block where it's relevant. This puts intel_lvds_init() more in line with the rest of the outputs, and makes it slightly easier for the uninitiated to figure out which platforms actually have what. Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190122082307.4003-2-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_display.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9e0f34524d0b..29a7dd4afe0e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14313,13 +14313,6 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) if (!HAS_DISPLAY(dev_priv)) return; - /* - * intel_edp_init_connector() depends on this completing first, to - * prevent the registeration of both eDP and LVDS and the incorrect - * sharing of the PPS. - */ - intel_lvds_init(dev_priv); - if (IS_ICELAKE(dev_priv)) { intel_ddi_init(dev_priv, PORT_A); intel_ddi_init(dev_priv, PORT_B); @@ -14383,6 +14376,13 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) } else if (HAS_PCH_SPLIT(dev_priv)) { int found; + /* + * intel_edp_init_connector() depends on this completing first, + * to prevent the registration of both eDP and LVDS and the + * incorrect sharing of the PPS. + */ + intel_lvds_init(dev_priv); + if (dev_priv->vbt.int_crt_support) intel_crt_init(dev_priv); @@ -14460,11 +14460,15 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) vlv_dsi_init(dev_priv); } else if (IS_PINEVIEW(dev_priv)) { + intel_lvds_init(dev_priv); + if (dev_priv->vbt.int_crt_support) intel_crt_init(dev_priv); } else if (IS_GEN_RANGE(dev_priv, 3, 4)) { bool found = false; + intel_lvds_init(dev_priv); + if (dev_priv->vbt.int_crt_support) intel_crt_init(dev_priv); @@ -14500,6 +14504,8 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) if (IS_G4X(dev_priv) && (I915_READ(DP_D) & DP_DETECTED)) intel_dp_init(dev_priv, DP_D, PORT_D); } else if (IS_GEN(dev_priv, 2)) { + intel_lvds_init(dev_priv); + if (dev_priv->vbt.int_crt_support) intel_crt_init(dev_priv); From 9bedc7edf624a6c552d9d382712a56a705f40a41 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 22 Jan 2019 10:23:03 +0200 Subject: [PATCH 229/539] drm/i915/lvds: nuke intel_lvds_supported() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that intel_lvds_init() is only called for platforms that might have LVDS, move the remaining checks to intel_setup_outputs(), again similar to other outputs, and remove the overlapping checks. Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190122082307.4003-3-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_display.c | 6 ++++-- drivers/gpu/drm/i915/intel_lvds.c | 23 ----------------------- 2 files changed, 4 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 29a7dd4afe0e..db0f15242ccf 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14467,7 +14467,8 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) } else if (IS_GEN_RANGE(dev_priv, 3, 4)) { bool found = false; - intel_lvds_init(dev_priv); + if (IS_MOBILE(dev_priv)) + intel_lvds_init(dev_priv); if (dev_priv->vbt.int_crt_support) intel_crt_init(dev_priv); @@ -14504,7 +14505,8 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) if (IS_G4X(dev_priv) && (I915_READ(DP_D) & DP_DETECTED)) intel_dp_init(dev_priv, DP_D, PORT_D); } else if (IS_GEN(dev_priv, 2)) { - intel_lvds_init(dev_priv); + if (IS_MOBILE(dev_priv) && !IS_I830(dev_priv)) + intel_lvds_init(dev_priv); if (dev_priv->vbt.int_crt_support) intel_crt_init(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 46a5dfd5cdf7..815ed463d9c5 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -798,26 +798,6 @@ static bool compute_is_dual_link_lvds(struct intel_lvds_encoder *lvds_encoder) return (val & LVDS_CLKB_POWER_MASK) == LVDS_CLKB_POWER_UP; } -static bool intel_lvds_supported(struct drm_i915_private *dev_priv) -{ - /* - * With the introduction of the PCH we gained a dedicated - * LVDS presence pin, use it. - */ - if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv)) - return true; - - /* - * Otherwise LVDS was only attached to mobile products, - * except for the inglorious 830gm - */ - if (INTEL_GEN(dev_priv) <= 4 && - IS_MOBILE(dev_priv) && !IS_I830(dev_priv)) - return true; - - return false; -} - /** * intel_lvds_init - setup LVDS connectors on this device * @dev_priv: i915 device @@ -842,9 +822,6 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) u8 pin; u32 allowed_scalers; - if (!intel_lvds_supported(dev_priv)) - return; - /* Skip init on machines we know falsely report LVDS */ if (dmi_check_system(intel_no_lvds)) { WARN(!dev_priv->vbt.int_lvds_support, From d6521463897bc673ed929e87a884b7a6ef641f64 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 22 Jan 2019 10:23:04 +0200 Subject: [PATCH 230/539] drm/i915/tv: only call intel_tv_init() on platforms that might have TV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With most platforms not having TV support, only call intel_tv_init() on platforms that might actually have TV, specifically gens 3 and 4. This puts intel_tv_init() more in line with the rest of the outputs, and makes it slightly easier for the uninitiated to figure out which platforms actually have what. Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190122082307.4003-4-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_display.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index db0f15242ccf..8e89f04ddd9c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14504,6 +14504,9 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) if (IS_G4X(dev_priv) && (I915_READ(DP_D) & DP_DETECTED)) intel_dp_init(dev_priv, DP_D, PORT_D); + + if (SUPPORTS_TV(dev_priv)) + intel_tv_init(dev_priv); } else if (IS_GEN(dev_priv, 2)) { if (IS_MOBILE(dev_priv) && !IS_I830(dev_priv)) intel_lvds_init(dev_priv); @@ -14514,9 +14517,6 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_dvo_init(dev_priv); } - if (SUPPORTS_TV(dev_priv)) - intel_tv_init(dev_priv); - intel_psr_init(dev_priv); for_each_intel_encoder(&dev_priv->drm, encoder) { From a5916fd7a1f151a7890329787fa703a9215837f2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 22 Jan 2019 10:23:05 +0200 Subject: [PATCH 231/539] drm/i915: rename has_edp_a() to ilk_has_edp_a() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clarify that the name is specific to ILK+ PCH platforms. v2: prefix the name with ilk rather than pch (Ville) Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190122082307.4003-5-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8e89f04ddd9c..9895ea566f99 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14231,7 +14231,7 @@ static int intel_encoder_clones(struct intel_encoder *encoder) return index_mask; } -static bool has_edp_a(struct drm_i915_private *dev_priv) +static bool ilk_has_edp_a(struct drm_i915_private *dev_priv) { if (!IS_MOBILE(dev_priv)) return false; @@ -14388,7 +14388,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) dpd_is_edp = intel_dp_is_port_edp(dev_priv, PORT_D); - if (has_edp_a(dev_priv)) + if (ilk_has_edp_a(dev_priv)) intel_dp_init(dev_priv, DP_A, PORT_A); if (I915_READ(PCH_HDMIB) & SDVO_DETECTED) { From 346073cee660bbbede71244b64ea34f66f21c36e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 22 Jan 2019 10:23:06 +0200 Subject: [PATCH 232/539] drm/i915/lvds: simplify gen 2 lvds presence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gen 2 mobile and not I830 is, in fact, I85X. Simplify. Suggested-by: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190122082307.4003-6-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9895ea566f99..ed3780f24638 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14508,7 +14508,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) if (SUPPORTS_TV(dev_priv)) intel_tv_init(dev_priv); } else if (IS_GEN(dev_priv, 2)) { - if (IS_MOBILE(dev_priv) && !IS_I830(dev_priv)) + if (IS_I85X(dev_priv)) intel_lvds_init(dev_priv); if (dev_priv->vbt.int_crt_support) From 74d021eaa70a1add287a5c65ba0fbc34606b8484 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 22 Jan 2019 10:23:07 +0200 Subject: [PATCH 233/539] drm/i915/crt: simplify CRT VBT check on pre-VLV/DDI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VBT int_crt_support can't be trusted on earlier platforms, and is always set to true in intel_bios.c for pre-DDI and pre-VLV platforms. We can simplify the output setup by unconditionally calling intel_crt_init() for these platforms. Suggested-by: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190122082307.4003-7-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_display.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ed3780f24638..d328599240cb 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14382,9 +14382,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) * incorrect sharing of the PPS. */ intel_lvds_init(dev_priv); - - if (dev_priv->vbt.int_crt_support) - intel_crt_init(dev_priv); + intel_crt_init(dev_priv); dpd_is_edp = intel_dp_is_port_edp(dev_priv, PORT_D); @@ -14461,17 +14459,14 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) vlv_dsi_init(dev_priv); } else if (IS_PINEVIEW(dev_priv)) { intel_lvds_init(dev_priv); - - if (dev_priv->vbt.int_crt_support) - intel_crt_init(dev_priv); + intel_crt_init(dev_priv); } else if (IS_GEN_RANGE(dev_priv, 3, 4)) { bool found = false; if (IS_MOBILE(dev_priv)) intel_lvds_init(dev_priv); - if (dev_priv->vbt.int_crt_support) - intel_crt_init(dev_priv); + intel_crt_init(dev_priv); if (I915_READ(GEN3_SDVOB) & SDVO_DETECTED) { DRM_DEBUG_KMS("probing SDVOB\n"); @@ -14511,9 +14506,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) if (IS_I85X(dev_priv)) intel_lvds_init(dev_priv); - if (dev_priv->vbt.int_crt_support) - intel_crt_init(dev_priv); - + intel_crt_init(dev_priv); intel_dvo_init(dev_priv); } From f6626e1d96ed33f16d5a8ca5df4bbc2b374738ee Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 24 Jan 2019 08:37:10 +0000 Subject: [PATCH 234/539] drm/i915: De-inline intel_context_init() Nip some inline spaghetti in the bud before the problem gets too bad. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190124083710.7033-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 8 ++++++++ drivers/gpu/drm/i915/i915_gem_context.h | 10 +++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index fae68c4c4683..93e84751370f 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -321,6 +321,14 @@ static u32 default_desc_template(const struct drm_i915_private *i915, return desc; } +void +intel_context_init(struct intel_context *ce, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + ce->gem_context = ctx; +} + static struct i915_gem_context * __create_hw_context(struct drm_i915_private *dev_priv, struct drm_i915_file_private *file_priv) diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 47d82ce7ba6a..3769438228f6 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -364,12 +364,8 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) kref_put(&ctx->ref, i915_gem_context_release); } -static inline void -intel_context_init(struct intel_context *ce, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - ce->gem_context = ctx; -} +void intel_context_init(struct intel_context *ce, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine); #endif /* !__I915_GEM_CONTEXT_H__ */ From 63a23d245b2cb094be99d274204e458eb2810410 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 8 Jan 2019 17:08:38 +0100 Subject: [PATCH 235/539] drm/i915/backlight: Restore backlight on resume, v3. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restore our saved values for backlight. This way even with fastset on S4 resume we will correctly restore the backlight to the active values. Changes since v1: - Call enable_backlight() when backlight.level is set. On suspend backlight.enabled is always cleared, this makes it not a good indicator. Also check for crtc->state->active. Changes since v2: - Use the new update_pipe() callback to run this on resume as well. Signed-off-by: Maarten Lankhorst Cc: Tolga Cakir Cc: Basil Eric Rabi Cc: Hans de Goede Cc: Ville Syrjälä Reported-by: Ville Syrjälä Signed-off-by: Maarten Lankhorst Reviewed-by: Hans de Goede Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190108160842.13396-1-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/icl_dsi.c | 1 + drivers/gpu/drm/i915/intel_ddi.c | 2 ++ drivers/gpu/drm/i915/intel_dp.c | 1 + drivers/gpu/drm/i915/intel_drv.h | 3 ++ drivers/gpu/drm/i915/intel_lvds.c | 1 + drivers/gpu/drm/i915/intel_panel.c | 49 +++++++++++++++++++++++------- drivers/gpu/drm/i915/vlv_dsi.c | 1 + 7 files changed, 47 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/icl_dsi.c b/drivers/gpu/drm/i915/icl_dsi.c index 355b48d1c937..73a7bee24a66 100644 --- a/drivers/gpu/drm/i915/icl_dsi.c +++ b/drivers/gpu/drm/i915/icl_dsi.c @@ -1390,6 +1390,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv) encoder->disable = gen11_dsi_disable; encoder->port = port; encoder->get_config = gen11_dsi_get_config; + encoder->update_pipe = intel_panel_update_backlight; encoder->compute_config = gen11_dsi_compute_config; encoder->get_hw_state = gen11_dsi_get_hw_state; encoder->type = INTEL_OUTPUT_DSI; diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index b0bb8dfc2ed5..acd94354afc8 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -3556,6 +3556,8 @@ static void intel_ddi_update_pipe_dp(struct intel_encoder *encoder, intel_psr_enable(intel_dp, crtc_state); intel_edp_drrs_enable(intel_dp, crtc_state); + + intel_panel_update_backlight(encoder, crtc_state, conn_state); } static void intel_ddi_update_pipe(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index f7d5314e3395..2e994b5ba40b 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -7001,6 +7001,7 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_encoder->compute_config = intel_dp_compute_config; intel_encoder->get_hw_state = intel_dp_get_hw_state; intel_encoder->get_config = intel_dp_get_config; + intel_encoder->update_pipe = intel_panel_update_backlight; intel_encoder->suspend = intel_dp_encoder_suspend; if (IS_CHERRYVIEW(dev_priv)) { intel_encoder->pre_pll_enable = chv_dp_pre_pll_enable; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 33b733d37706..47195320413a 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -2023,6 +2023,9 @@ int intel_panel_setup_backlight(struct drm_connector *connector, enum pipe pipe); void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); +void intel_panel_update_backlight(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); void intel_panel_disable_backlight(const struct drm_connector_state *old_conn_state); extern struct drm_display_mode *intel_find_panel_downclock( struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 815ed463d9c5..b4aa49768e90 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -887,6 +887,7 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) } intel_encoder->get_hw_state = intel_lvds_get_hw_state; intel_encoder->get_config = intel_lvds_get_config; + intel_encoder->update_pipe = intel_panel_update_backlight; intel_connector->get_hw_state = intel_connector_get_hw_state; intel_connector_attach_encoder(intel_connector, intel_encoder); diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 5a39a6347a7a..bb8612ab8ee1 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -1087,20 +1087,11 @@ static void pwm_enable_backlight(const struct intel_crtc_state *crtc_state, intel_panel_actually_set_backlight(conn_state, panel->backlight.level); } -void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state) +static void __intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { struct intel_connector *connector = to_intel_connector(conn_state->connector); - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; - enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; - - if (!panel->backlight.present) - return; - - DRM_DEBUG_KMS("pipe %c\n", pipe_name(pipe)); - - mutex_lock(&dev_priv->backlight_lock); WARN_ON(panel->backlight.max == 0); @@ -1117,6 +1108,24 @@ void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state, panel->backlight.enabled = true; if (panel->backlight.device) panel->backlight.device->props.power = FB_BLANK_UNBLANK; +} + +void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct intel_connector *connector = to_intel_connector(conn_state->connector); + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_panel *panel = &connector->panel; + enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; + + if (!panel->backlight.present) + return; + + DRM_DEBUG_KMS("pipe %c\n", pipe_name(pipe)); + + mutex_lock(&dev_priv->backlight_lock); + + __intel_panel_enable_backlight(crtc_state, conn_state); mutex_unlock(&dev_priv->backlight_lock); } @@ -1776,6 +1785,24 @@ static int pwm_setup_backlight(struct intel_connector *connector, return 0; } +void intel_panel_update_backlight(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct intel_connector *connector = to_intel_connector(conn_state->connector); + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_panel *panel = &connector->panel; + + if (!panel->backlight.present) + return; + + mutex_lock(&dev_priv->backlight_lock); + if (!panel->backlight.enabled) + __intel_panel_enable_backlight(crtc_state, conn_state); + + mutex_unlock(&dev_priv->backlight_lock); +} + int intel_panel_setup_backlight(struct drm_connector *connector, enum pipe pipe) { struct drm_i915_private *dev_priv = to_i915(connector->dev); diff --git a/drivers/gpu/drm/i915/vlv_dsi.c b/drivers/gpu/drm/i915/vlv_dsi.c index 4d47910e5184..696b750acd1d 100644 --- a/drivers/gpu/drm/i915/vlv_dsi.c +++ b/drivers/gpu/drm/i915/vlv_dsi.c @@ -1697,6 +1697,7 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) intel_encoder->post_disable = intel_dsi_post_disable; intel_encoder->get_hw_state = intel_dsi_get_hw_state; intel_encoder->get_config = intel_dsi_get_config; + intel_encoder->update_pipe = intel_panel_update_backlight; intel_connector->get_hw_state = intel_connector_get_hw_state; From 5b1ec9ac7ab5b4520d4db98b7024a8dd5051b000 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 8 Jan 2019 17:08:39 +0100 Subject: [PATCH 236/539] drm/i915/backlight: Fix backlight takeover on LPT, v3. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On lynxpoint the bios sometimes sets up the backlight using the CPU display, but the driver expects using the PWM PCH override register. Read the value from the CPU register, then convert it to the other units by converting from the old duty cycle, to freq, to the new units. This value is then programmed in the override register, after which we set the override and disable the CPU display control. This allows us to switch the source without flickering, and make the backlight controls work in the driver. Changes since v1: - Read BLC_PWM_CPU_CTL2 to cpu_ctl2. - Clean up cpu_mode if slightly. - Always disable BLM_PWM_ENABLE in cpu_ctl2. Changes since v2: - Simplify cpu_mode handling (Jani) Signed-off-by: Maarten Lankhorst Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108225 Cc: Basil Eric Rabi Cc: Hans de Goede Cc: Tolga Cakir Cc: Ville Syrjälä Tested-by: Tolga Cakir Cc: Jani Nikula Reviewed-by: Hans de Goede Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190108160842.13396-2-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/intel_panel.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index bb8612ab8ee1..beca98d2b035 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -1496,8 +1496,8 @@ static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unus { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; - u32 pch_ctl1, pch_ctl2, val; - bool alt; + u32 cpu_ctl2, pch_ctl1, pch_ctl2, val; + bool alt, cpu_mode; if (HAS_PCH_LPT(dev_priv)) alt = I915_READ(SOUTH_CHICKEN2) & LPT_PWM_GRANULARITY; @@ -1511,6 +1511,8 @@ static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unus pch_ctl2 = I915_READ(BLC_PWM_PCH_CTL2); panel->backlight.max = pch_ctl2 >> 16; + cpu_ctl2 = I915_READ(BLC_PWM_CPU_CTL2); + if (!panel->backlight.max) panel->backlight.max = get_backlight_max_vbt(connector); @@ -1519,12 +1521,28 @@ static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unus panel->backlight.min = get_backlight_min_vbt(connector); - val = lpt_get_backlight(connector); + panel->backlight.enabled = pch_ctl1 & BLM_PCH_PWM_ENABLE; + + cpu_mode = panel->backlight.enabled && HAS_PCH_LPT(dev_priv) && + !(pch_ctl1 & BLM_PCH_OVERRIDE_ENABLE) && + (cpu_ctl2 & BLM_PWM_ENABLE); + if (cpu_mode) + val = pch_get_backlight(connector); + else + val = lpt_get_backlight(connector); val = intel_panel_compute_brightness(connector, val); panel->backlight.level = clamp(val, panel->backlight.min, panel->backlight.max); - panel->backlight.enabled = pch_ctl1 & BLM_PCH_PWM_ENABLE; + if (cpu_mode) { + DRM_DEBUG_KMS("CPU backlight register was enabled, switching to PCH override\n"); + + /* Write converted CPU PWM value to PCH override register */ + lpt_set_backlight(connector->base.state, panel->backlight.level); + I915_WRITE(BLC_PWM_PCH_CTL1, pch_ctl1 | BLM_PCH_OVERRIDE_ENABLE); + + I915_WRITE(BLC_PWM_CPU_CTL2, cpu_ctl2 & ~BLM_PWM_ENABLE); + } return 0; } From d19f958db23c14c857e3eaa0cefa6a9c55e1468d Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 8 Jan 2019 17:08:40 +0100 Subject: [PATCH 237/539] drm/i915: Enable fastset for non-boot modesets. Now that our state comparison functions are pretty complete, we should enable fastset by default when a modeset can be avoided. Even if we're not completely certain about the inherited state, we can be certain after the first modeset that our sw state matches the hw state. There is one testcase explicitly testing fastset, kms_panel_fitting.atomic-fastset but other testcases do so indirectly because most tests don't clean up the display during exit, or otherwise indirectly preserve mode by doing igt_display_reset or inheriting during init. Signed-off-by: Maarten Lankhorst Reviewed-by: Hans de Goede Cc: Daniel Vetter Reviewed-by: Hans de Goede [mlankhorst: Use DRM_DEBUG_KMS. (j4ni)] Link: https://patchwork.freedesktop.org/patch/msgid/20190108160842.13396-3-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index d328599240cb..a02ce5a47f44 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11700,6 +11700,11 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, (current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) && !(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED); + if (fixup_inherited && !i915_modparams.fastboot) { + DRM_DEBUG_KMS("initial modeset and fastboot not set\n"); + ret = false; + } + #define PIPE_CONF_CHECK_X(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_err(adjust, __stringify(name), \ @@ -12723,8 +12728,7 @@ static int intel_atomic_check(struct drm_device *dev, return ret; } - if (i915_modparams.fastboot && - intel_pipe_config_compare(dev_priv, + if (intel_pipe_config_compare(dev_priv, to_intel_crtc_state(old_crtc_state), pipe_config, true)) { crtc_state->mode_changed = false; From 0cdc1d07b4616f164978e8f1b3a5fe8b0a3ac835 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 8 Jan 2019 17:08:41 +0100 Subject: [PATCH 238/539] drm/i915: Make HW readout mark CRTC scaler as in use. This way we don't accidentally double allocate it. Noticed this when I wrote a patch to sanity check all of the scaler state. Signed-off-by: Maarten Lankhorst Reviewed-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20190108160842.13396-4-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a02ce5a47f44..36c1126cbc85 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8878,6 +8878,7 @@ static void skylake_get_pfit_config(struct intel_crtc *crtc, pipe_config->pch_pfit.enabled = true; pipe_config->pch_pfit.pos = I915_READ(SKL_PS_WIN_POS(crtc->pipe, i)); pipe_config->pch_pfit.size = I915_READ(SKL_PS_WIN_SZ(crtc->pipe, i)); + scaler_state->scalers[i].in_use = true; break; } } From 85baa5dbf79163026dcb78f742294c522e176432 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 24 Jan 2019 15:00:59 -0800 Subject: [PATCH 239/539] drm/i915: Update DRIVER_DATE to 20190124 Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 03db011caa8e..3c111ad09922 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -91,8 +91,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20190110" -#define DRIVER_TIMESTAMP 1547162337 +#define DRIVER_DATE "20190124" +#define DRIVER_TIMESTAMP 1548370857 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions From 7f92e6c2aecf66f1ccf5d3137aaf1aa9905940d4 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 23 Jan 2019 16:05:58 -0800 Subject: [PATCH 240/539] drm/i915: initialize unused MOCS entries to PTE Instead of initializing them to uncached, let's set them to PTE for kernel tracking. While at it do some minor adjustments to comments and coding style. From Chris: "What it does mean is that the buffer contents are consistent with our cache tracking; and for userspace the results were always undefined. So we should at least be able to guarantee that the data written by userspace from the CPU is visible. After that, your caches are on your own". Signed-off-by: Lucas De Marchi Reviewed-by: Chris Wilson Reviewed-by: Tomasz Lis Link: https://patchwork.freedesktop.org/patch/msgid/20190124000604.18861-2-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/intel_mocs.c | 56 +++++++++++++------------------ 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index e976c5ce5479..0d6b94a239d6 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -85,10 +85,7 @@ struct drm_i915_mocs_table { * * Entries not part of the following tables are undefined as far as * userspace is concerned and shouldn't be relied upon. For the time - * being they will be implicitly initialized to the strictest caching - * configuration (uncached) to guarantee forwards compatibility with - * userspace programs written against more recent kernels providing - * additional MOCS entries. + * being they will be initialized to PTE. * * NOTE: These tables MUST start with being uncached and the length * MUST be less than 63 as the last two registers are reserved @@ -249,16 +246,13 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) table.table[index].control_value); /* - * Ok, now set the unused entries to uncached. These entries - * are officially undefined and no contract for the contents - * and settings is given for these entries. - * - * Entry 0 in the table is uncached - so we are just writing - * that value to all the used entries. + * Now set the unused entries to PTE. These entries are officially + * undefined and no contract for the contents and settings is given + * for these entries. */ for (; index < GEN9_NUM_MOCS_ENTRIES; index++) I915_WRITE(mocs_register(engine->id, index), - table.table[0].control_value); + table.table[I915_MOCS_PTE].control_value); } /** @@ -293,16 +287,13 @@ static int emit_mocs_control_table(struct i915_request *rq, } /* - * Ok, now set the unused entries to uncached. These entries - * are officially undefined and no contract for the contents - * and settings is given for these entries. - * - * Entry 0 in the table is uncached - so we are just writing - * that value to all the used entries. + * Now set the unused entries to PTE. These entries are officially + * undefined and no contract for the contents and settings is given + * for these entries. */ for (; index < GEN9_NUM_MOCS_ENTRIES; index++) { *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); - *cs++ = table->table[0].control_value; + *cs++ = table->table[I915_MOCS_PTE].control_value; } *cs++ = MI_NOOP; @@ -345,7 +336,7 @@ static int emit_mocs_l3cc_table(struct i915_request *rq, *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2); - for (i = 0; i < table->size/2; i++) { + for (i = 0; i < table->size / 2; i++) { *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); *cs++ = l3cc_combine(table, 2 * i, 2 * i + 1); } @@ -353,18 +344,18 @@ static int emit_mocs_l3cc_table(struct i915_request *rq, if (table->size & 0x01) { /* Odd table size - 1 left over */ *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, 2 * i, 0); + *cs++ = l3cc_combine(table, 2 * i, I915_MOCS_PTE); i++; } /* - * Now set the rest of the table to uncached - use entry 0 as - * this will be uncached. Leave the last pair uninitialised as - * they are reserved by the hardware. + * Now set the unused entries to PTE. These entries are officially + * undefined and no contract for the contents and settings is given + * for these entries. */ for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) { *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, 0, 0); + *cs++ = l3cc_combine(table, I915_MOCS_PTE, I915_MOCS_PTE); } *cs++ = MI_NOOP; @@ -395,22 +386,21 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv) if (!get_mocs_settings(dev_priv, &table)) return; - for (i = 0; i < table.size/2; i++) - I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 2*i, 2*i+1)); + for (i = 0; i < table.size / 2; i++) + I915_WRITE(GEN9_LNCFCMOCS(i), + l3cc_combine(&table, 2 * i, 2 * i + 1)); /* Odd table size - 1 left over */ if (table.size & 0x01) { - I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 2*i, 0)); + I915_WRITE(GEN9_LNCFCMOCS(i), + l3cc_combine(&table, 2 * i, I915_MOCS_PTE)); i++; } - /* - * Now set the rest of the table to uncached - use entry 0 as - * this will be uncached. Leave the last pair as initialised as - * they are reserved by the hardware. - */ + /* Now set the rest of the table to PTE */ for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++) - I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0)); + I915_WRITE(GEN9_LNCFCMOCS(i), + l3cc_combine(&table, I915_MOCS_PTE, I915_MOCS_PTE)); } /** From d7a43c3ba607207bc51d859e3bd450047519a06f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 23 Jan 2019 16:05:59 -0800 Subject: [PATCH 241/539] drm/i915: Simplify MOCS table definition Make the defines for LE and L3 caching options to contain the shifts and remove the zeros from the tables as shifting zeros always result in zero. Starting from Ice Lake the MOCS table is defined in the spec and contains all entries. So to simplify checking the table with the values set in code, the value is now part of the macro name. This allows to still give the most used option and sensible name, but also to easily cross check the table from the spec for gen >= 11. By removing the zeros we avoid maintaining a huge table since the one from spec contains many more entries. The new table for Ice Lake will be added by other patches, this only reformats the table. While at it also fix the indentation. Signed-off-by: Lucas De Marchi Reviewed-by: Tomasz Lis Link: https://patchwork.freedesktop.org/patch/msgid/20190124000604.18861-3-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/intel_mocs.c | 80 +++++++++++-------------------- 1 file changed, 29 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 0d6b94a239d6..4ea80bb7dcc8 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -36,8 +36,8 @@ struct drm_i915_mocs_table { }; /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ -#define LE_CACHEABILITY(value) ((value) << 0) -#define LE_TGT_CACHE(value) ((value) << 2) +#define _LE_CACHEABILITY(value) ((value) << 0) +#define _LE_TGT_CACHE(value) ((value) << 2) #define LE_LRUM(value) ((value) << 4) #define LE_AOM(value) ((value) << 6) #define LE_RSC(value) ((value) << 7) @@ -48,28 +48,28 @@ struct drm_i915_mocs_table { /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */ #define L3_ESC(value) ((value) << 0) #define L3_SCC(value) ((value) << 1) -#define L3_CACHEABILITY(value) ((value) << 4) +#define _L3_CACHEABILITY(value) ((value) << 4) /* Helper defines */ #define GEN9_NUM_MOCS_ENTRIES 62 /* 62 out of 64 - 63 & 64 are reserved. */ /* (e)LLC caching options */ -#define LE_PAGETABLE 0 -#define LE_UC 1 -#define LE_WT 2 -#define LE_WB 3 - -/* L3 caching options */ -#define L3_DIRECT 0 -#define L3_UC 1 -#define L3_RESERVED 2 -#define L3_WB 3 +#define LE_0_PAGETABLE _LE_CACHEABILITY(0) +#define LE_1_UC _LE_CACHEABILITY(1) +#define LE_2_WT _LE_CACHEABILITY(2) +#define LE_3_WB _LE_CACHEABILITY(3) /* Target cache */ -#define LE_TC_PAGETABLE 0 -#define LE_TC_LLC 1 -#define LE_TC_LLC_ELLC 2 -#define LE_TC_LLC_ELLC_ALT 3 +#define LE_TC_0_PAGETABLE _LE_TGT_CACHE(0) +#define LE_TC_1_LLC _LE_TGT_CACHE(1) +#define LE_TC_2_LLC_ELLC _LE_TGT_CACHE(2) +#define LE_TC_3_LLC_ELLC_ALT _LE_TGT_CACHE(3) + +/* L3 caching options */ +#define L3_0_DIRECT _L3_CACHEABILITY(0) +#define L3_1_UC _L3_CACHEABILITY(1) +#define L3_2_RESERVED _L3_CACHEABILITY(2) +#define L3_3_WB _L3_CACHEABILITY(3) /* * MOCS tables @@ -96,31 +96,21 @@ struct drm_i915_mocs_table { static const struct drm_i915_mocs_entry skylake_mocs_table[] = { [I915_MOCS_UNCACHED] = { /* 0x00000009 */ - .control_value = LE_CACHEABILITY(LE_UC) | - LE_TGT_CACHE(LE_TC_LLC_ELLC) | - LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | - LE_PFM(0) | LE_SCF(0), - + .control_value = LE_1_UC | LE_TC_2_LLC_ELLC, /* 0x0010 */ - .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), + .l3cc_value = L3_1_UC, }, [I915_MOCS_PTE] = { /* 0x00000038 */ - .control_value = LE_CACHEABILITY(LE_PAGETABLE) | - LE_TGT_CACHE(LE_TC_LLC_ELLC) | - LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | - LE_PFM(0) | LE_SCF(0), + .control_value = LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), /* 0x0030 */ - .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), + .l3cc_value = L3_3_WB, }, [I915_MOCS_CACHED] = { /* 0x0000003b */ - .control_value = LE_CACHEABILITY(LE_WB) | - LE_TGT_CACHE(LE_TC_LLC_ELLC) | - LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | - LE_PFM(0) | LE_SCF(0), + .control_value = LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3), /* 0x0030 */ - .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), + .l3cc_value = L3_3_WB, }, }; @@ -128,33 +118,21 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { static const struct drm_i915_mocs_entry broxton_mocs_table[] = { [I915_MOCS_UNCACHED] = { /* 0x00000009 */ - .control_value = LE_CACHEABILITY(LE_UC) | - LE_TGT_CACHE(LE_TC_LLC_ELLC) | - LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | - LE_PFM(0) | LE_SCF(0), - + .control_value = LE_1_UC | LE_TC_2_LLC_ELLC, /* 0x0010 */ - .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), + .l3cc_value = L3_1_UC, }, [I915_MOCS_PTE] = { /* 0x00000038 */ - .control_value = LE_CACHEABILITY(LE_PAGETABLE) | - LE_TGT_CACHE(LE_TC_LLC_ELLC) | - LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | - LE_PFM(0) | LE_SCF(0), - + .control_value = LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), /* 0x0030 */ - .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), + .l3cc_value = L3_3_WB, }, [I915_MOCS_CACHED] = { /* 0x00000039 */ - .control_value = LE_CACHEABILITY(LE_UC) | - LE_TGT_CACHE(LE_TC_LLC_ELLC) | - LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | - LE_PFM(0) | LE_SCF(0), - + .control_value = LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3), /* 0x0030 */ - .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), + .l3cc_value = L3_3_WB, }, }; From 66f996052f955fe338c7cd6ca2099cfdf396b2bf Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Wed, 23 Jan 2019 16:06:00 -0800 Subject: [PATCH 242/539] drm/i915/skl: Rework MOCS tables to keep common part in a define The MOCS tables are going to be very similar across platforms. To reduce the amount of copied code, this patch rips the common part and puts it into a definition valid for all gen9 platforms. v2: Made defines for or-ing flags. Renamed macros from MOCS_TABLE to MOCS_ENTRIES. (Joonas) v3 (Lucas): - Fix indentation - Rebase on rework done by additional patch - Remove define for or-ing flags as it made the table more complex by requiring zeroed values to be passed - Do not embed comma in the macro, so to treat that just as another item and please source code formatting tools Signed-off-by: Tomasz Lis Suggested-by: Lucas De Marchi Signed-off-by: Lucas De Marchi Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190124000604.18861-4-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/intel_mocs.c | 57 ++++++++++++++----------------- 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 4ea80bb7dcc8..c7a2a8d81d90 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -93,46 +93,39 @@ struct drm_i915_mocs_table { * may only be updated incrementally by adding entries at the * end. */ + +#define GEN9_MOCS_ENTRIES \ + [I915_MOCS_UNCACHED] = { \ + /* 0x00000009 */ \ + .control_value = LE_1_UC | LE_TC_2_LLC_ELLC, \ + /* 0x0010 */ \ + .l3cc_value = L3_1_UC, \ + }, \ + [I915_MOCS_PTE] = { \ + /* 0x00000038 */ \ + .control_value = LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \ + /* 0x0030 */ \ + .l3cc_value = L3_3_WB, \ + } + static const struct drm_i915_mocs_entry skylake_mocs_table[] = { - [I915_MOCS_UNCACHED] = { - /* 0x00000009 */ - .control_value = LE_1_UC | LE_TC_2_LLC_ELLC, - /* 0x0010 */ - .l3cc_value = L3_1_UC, - }, - [I915_MOCS_PTE] = { - /* 0x00000038 */ - .control_value = LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), - /* 0x0030 */ - .l3cc_value = L3_3_WB, - }, + GEN9_MOCS_ENTRIES, [I915_MOCS_CACHED] = { - /* 0x0000003b */ - .control_value = LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3), - /* 0x0030 */ - .l3cc_value = L3_3_WB, + /* 0x0000003b */ + .control_value = LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3), + /* 0x0030 */ + .l3cc_value = L3_3_WB, }, }; /* NOTE: the LE_TGT_CACHE is not used on Broxton */ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { - [I915_MOCS_UNCACHED] = { - /* 0x00000009 */ - .control_value = LE_1_UC | LE_TC_2_LLC_ELLC, - /* 0x0010 */ - .l3cc_value = L3_1_UC, - }, - [I915_MOCS_PTE] = { - /* 0x00000038 */ - .control_value = LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), - /* 0x0030 */ - .l3cc_value = L3_3_WB, - }, + GEN9_MOCS_ENTRIES, [I915_MOCS_CACHED] = { - /* 0x00000039 */ - .control_value = LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3), - /* 0x0030 */ - .l3cc_value = L3_3_WB, + /* 0x00000039 */ + .control_value = LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3), + /* 0x0030 */ + .l3cc_value = L3_3_WB, }, }; From 828f31502045fbf4354723169b1b64e7719b8de7 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 23 Jan 2019 16:06:01 -0800 Subject: [PATCH 243/539] drm/i915: use a macro to define MOCS entries Let's use a macro to make tables smaller and at the same time allow us to add fields that apply to all entries in future. v2: rewrap lines to respect 80 chars limit and make it more readable (from Chris) Signed-off-by: Lucas De Marchi Reviewed-by: Tomasz Lis Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190124000604.18861-5-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/intel_mocs.c | 43 +++++++++++++------------------ 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index c7a2a8d81d90..59dd74765288 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -71,6 +71,12 @@ struct drm_i915_mocs_table { #define L3_2_RESERVED _L3_CACHEABILITY(2) #define L3_3_WB _L3_CACHEABILITY(3) +#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \ + [__idx] = { \ + .control_value = __control_value, \ + .l3cc_value = __l3cc_value, \ + } + /* * MOCS tables * @@ -93,40 +99,27 @@ struct drm_i915_mocs_table { * may only be updated incrementally by adding entries at the * end. */ - #define GEN9_MOCS_ENTRIES \ - [I915_MOCS_UNCACHED] = { \ - /* 0x00000009 */ \ - .control_value = LE_1_UC | LE_TC_2_LLC_ELLC, \ - /* 0x0010 */ \ - .l3cc_value = L3_1_UC, \ - }, \ - [I915_MOCS_PTE] = { \ - /* 0x00000038 */ \ - .control_value = LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \ - /* 0x0030 */ \ - .l3cc_value = L3_3_WB, \ - } + MOCS_ENTRY(I915_MOCS_UNCACHED, \ + LE_1_UC | LE_TC_2_LLC_ELLC, \ + L3_1_UC), \ + MOCS_ENTRY(I915_MOCS_PTE, \ + LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \ + L3_3_WB) static const struct drm_i915_mocs_entry skylake_mocs_table[] = { GEN9_MOCS_ENTRIES, - [I915_MOCS_CACHED] = { - /* 0x0000003b */ - .control_value = LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3), - /* 0x0030 */ - .l3cc_value = L3_3_WB, - }, + MOCS_ENTRY(I915_MOCS_CACHED, + LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3), + L3_3_WB) }; /* NOTE: the LE_TGT_CACHE is not used on Broxton */ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { GEN9_MOCS_ENTRIES, - [I915_MOCS_CACHED] = { - /* 0x00000039 */ - .control_value = LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3), - /* 0x0030 */ - .l3cc_value = L3_3_WB, - }, + MOCS_ENTRY(I915_MOCS_CACHED, + LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3), + L3_3_WB) }; /** From 1878fce8de256dd0d263c670ae86b96b82cac735 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 23 Jan 2019 16:06:02 -0800 Subject: [PATCH 244/539] drm/i915: keep track of used entries in MOCS table Instead of considering we have defined entries for any index in the table, let's keep track of the ones we explicitly defined. This will allow Gen 11 to have it's new table defined in which we have holes of undefined entries. Repeated comments about the meaning of undefined entries were removed since they are overly verbose and copy-pasted in several functions: now the definition is in the top only. v2: add helper function to get the index (from Chris) Signed-off-by: Lucas De Marchi Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190124000604.18861-6-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/intel_mocs.c | 111 +++++++++++++++++++++--------- 1 file changed, 78 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 59dd74765288..e5f0e9da258c 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -28,6 +28,7 @@ struct drm_i915_mocs_entry { u32 control_value; u16 l3cc_value; + u16 used; }; struct drm_i915_mocs_table { @@ -75,6 +76,7 @@ struct drm_i915_mocs_table { [__idx] = { \ .control_value = __control_value, \ .l3cc_value = __l3cc_value, \ + .used = 1, \ } /* @@ -187,6 +189,19 @@ static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) } } +/* + * Get control_value from MOCS entry taking into account when it's not used: + * I915_MOCS_PTE's value is returned in this case. + */ +static u32 get_entry_control(const struct drm_i915_mocs_table *table, + unsigned int index) +{ + if (table->table[index].used) + return table->table[index].control_value; + + return table->table[I915_MOCS_PTE].control_value; +} + /** * intel_mocs_init_engine() - emit the mocs control table * @engine: The engine for whom to emit the registers. @@ -199,24 +214,25 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; struct drm_i915_mocs_table table; unsigned int index; + u32 unused_value; if (!get_mocs_settings(dev_priv, &table)) return; GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES); - for (index = 0; index < table.size; index++) - I915_WRITE(mocs_register(engine->id, index), - table.table[index].control_value); + /* Set unused values to PTE */ + unused_value = table.table[I915_MOCS_PTE].control_value; - /* - * Now set the unused entries to PTE. These entries are officially - * undefined and no contract for the contents and settings is given - * for these entries. - */ + for (index = 0; index < table.size; index++) { + u32 value = get_entry_control(&table, index); + + I915_WRITE(mocs_register(engine->id, index), value); + } + + /* All remaining entries are also unused */ for (; index < GEN9_NUM_MOCS_ENTRIES; index++) - I915_WRITE(mocs_register(engine->id, index), - table.table[I915_MOCS_PTE].control_value); + I915_WRITE(mocs_register(engine->id, index), unused_value); } /** @@ -234,11 +250,15 @@ static int emit_mocs_control_table(struct i915_request *rq, { enum intel_engine_id engine = rq->engine->id; unsigned int index; + u32 unused_value; u32 *cs; if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) return -ENODEV; + /* Set unused values to PTE */ + unused_value = table->table[I915_MOCS_PTE].control_value; + cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -246,18 +266,16 @@ static int emit_mocs_control_table(struct i915_request *rq, *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES); for (index = 0; index < table->size; index++) { + u32 value = get_entry_control(table, index); + *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); - *cs++ = table->table[index].control_value; + *cs++ = value; } - /* - * Now set the unused entries to PTE. These entries are officially - * undefined and no contract for the contents and settings is given - * for these entries. - */ + /* All remaining entries are also unused */ for (; index < GEN9_NUM_MOCS_ENTRIES; index++) { *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); - *cs++ = table->table[I915_MOCS_PTE].control_value; + *cs++ = unused_value; } *cs++ = MI_NOOP; @@ -266,12 +284,24 @@ static int emit_mocs_control_table(struct i915_request *rq, return 0; } +/* + * Get l3cc_value from MOCS entry taking into account when it's not used: + * I915_MOCS_PTE's value is returned in this case. + */ +static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, + unsigned int index) +{ + if (table->table[index].used) + return table->table[index].l3cc_value; + + return table->table[I915_MOCS_PTE].l3cc_value; +} + static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, u16 low, u16 high) { - return table->table[low].l3cc_value | - table->table[high].l3cc_value << 16; + return low | high << 16; } /** @@ -288,12 +318,16 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, static int emit_mocs_l3cc_table(struct i915_request *rq, const struct drm_i915_mocs_table *table) { + u16 unused_value; unsigned int i; u32 *cs; if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) return -ENODEV; + /* Set unused values to PTE */ + unused_value = table->table[I915_MOCS_PTE].l3cc_value; + cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -301,25 +335,26 @@ static int emit_mocs_l3cc_table(struct i915_request *rq, *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2); for (i = 0; i < table->size / 2; i++) { + u16 low = get_entry_l3cc(table, 2 * i); + u16 high = get_entry_l3cc(table, 2 * i + 1); + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, 2 * i, 2 * i + 1); + *cs++ = l3cc_combine(table, low, high); } + /* Odd table size - 1 left over */ if (table->size & 0x01) { - /* Odd table size - 1 left over */ + u16 low = get_entry_l3cc(table, 2 * i); + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, 2 * i, I915_MOCS_PTE); + *cs++ = l3cc_combine(table, low, unused_value); i++; } - /* - * Now set the unused entries to PTE. These entries are officially - * undefined and no contract for the contents and settings is given - * for these entries. - */ + /* All remaining entries are also unused */ for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) { *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, I915_MOCS_PTE, I915_MOCS_PTE); + *cs++ = l3cc_combine(table, unused_value, unused_value); } *cs++ = MI_NOOP; @@ -346,25 +381,35 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv) { struct drm_i915_mocs_table table; unsigned int i; + u16 unused_value; if (!get_mocs_settings(dev_priv, &table)) return; - for (i = 0; i < table.size / 2; i++) + /* Set unused values to PTE */ + unused_value = table.table[I915_MOCS_PTE].l3cc_value; + + for (i = 0; i < table.size / 2; i++) { + u16 low = get_entry_l3cc(&table, 2 * i); + u16 high = get_entry_l3cc(&table, 2 * i + 1); + I915_WRITE(GEN9_LNCFCMOCS(i), - l3cc_combine(&table, 2 * i, 2 * i + 1)); + l3cc_combine(&table, low, high)); + } /* Odd table size - 1 left over */ if (table.size & 0x01) { + u16 low = get_entry_l3cc(&table, 2 * i); + I915_WRITE(GEN9_LNCFCMOCS(i), - l3cc_combine(&table, 2 * i, I915_MOCS_PTE)); + l3cc_combine(&table, low, unused_value)); i++; } - /* Now set the rest of the table to PTE */ + /* All remaining entries are also unused */ for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++) I915_WRITE(GEN9_LNCFCMOCS(i), - l3cc_combine(&table, I915_MOCS_PTE, I915_MOCS_PTE)); + l3cc_combine(&table, unused_value, unused_value)); } /** From 5029537f4fbb30d2640d0cda86e35b6316e7464e Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 23 Jan 2019 16:06:03 -0800 Subject: [PATCH 245/539] drm/i915: cache number of MOCS entries Instead of checking the gen number every time we need to know the max number of entries, just save it into the table struct so we don't need extra branches throughout the code. This will be useful for Ice Lake that has 64 rather than 62 defined entries. Ice Lake changes will be added in a follow up. v2: make size and n_entries `unsigned int` and introduce changes as a pre-work for the Ice Lake changes (Tvrtko) Suggested-by: Tvrtko Ursulin Signed-off-by: Lucas De Marchi Reviewed-by: Chris Wilson Reviewed-by: Tomasz Lis Link: https://patchwork.freedesktop.org/patch/msgid/20190124000604.18861-7-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/intel_mocs.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index e5f0e9da258c..6e293f91a10c 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -32,7 +32,8 @@ struct drm_i915_mocs_entry { }; struct drm_i915_mocs_table { - u32 size; + unsigned int size; + unsigned int n_entries; const struct drm_i915_mocs_entry *table; }; @@ -144,10 +145,12 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) || IS_ICELAKE(dev_priv)) { table->size = ARRAY_SIZE(skylake_mocs_table); + table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->table = skylake_mocs_table; result = true; } else if (IS_GEN9_LP(dev_priv)) { table->size = ARRAY_SIZE(broxton_mocs_table); + table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->table = broxton_mocs_table; result = true; } else { @@ -219,8 +222,6 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) if (!get_mocs_settings(dev_priv, &table)) return; - GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES); - /* Set unused values to PTE */ unused_value = table.table[I915_MOCS_PTE].control_value; @@ -231,7 +232,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) } /* All remaining entries are also unused */ - for (; index < GEN9_NUM_MOCS_ENTRIES; index++) + for (; index < table.n_entries; index++) I915_WRITE(mocs_register(engine->id, index), unused_value); } @@ -253,17 +254,17 @@ static int emit_mocs_control_table(struct i915_request *rq, u32 unused_value; u32 *cs; - if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) + if (GEM_WARN_ON(table->size > table->n_entries)) return -ENODEV; /* Set unused values to PTE */ unused_value = table->table[I915_MOCS_PTE].control_value; - cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); + cs = intel_ring_begin(rq, 2 + 2 * table->n_entries); if (IS_ERR(cs)) return PTR_ERR(cs); - *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES); + *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries); for (index = 0; index < table->size; index++) { u32 value = get_entry_control(table, index); @@ -273,7 +274,7 @@ static int emit_mocs_control_table(struct i915_request *rq, } /* All remaining entries are also unused */ - for (; index < GEN9_NUM_MOCS_ENTRIES; index++) { + for (; index < table->n_entries; index++) { *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); *cs++ = unused_value; } @@ -322,17 +323,17 @@ static int emit_mocs_l3cc_table(struct i915_request *rq, unsigned int i; u32 *cs; - if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) + if (GEM_WARN_ON(table->size > table->n_entries)) return -ENODEV; /* Set unused values to PTE */ unused_value = table->table[I915_MOCS_PTE].l3cc_value; - cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES); + cs = intel_ring_begin(rq, 2 + table->n_entries); if (IS_ERR(cs)) return PTR_ERR(cs); - *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2); + *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2); for (i = 0; i < table->size / 2; i++) { u16 low = get_entry_l3cc(table, 2 * i); @@ -352,7 +353,7 @@ static int emit_mocs_l3cc_table(struct i915_request *rq, } /* All remaining entries are also unused */ - for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) { + for (; i < table->n_entries / 2; i++) { *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); *cs++ = l3cc_combine(table, unused_value, unused_value); } @@ -407,7 +408,7 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv) } /* All remaining entries are also unused */ - for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++) + for (; i < table.n_entries / 2; i++) I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, unused_value, unused_value)); } From b3c316b0b869252481acc8d88138aae7619de582 Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Wed, 23 Jan 2019 16:06:04 -0800 Subject: [PATCH 246/539] drm/i915/icl: Define MOCS table for Icelake The table has been unified across OSes to minimize virtualization overhead. The MOCS table is now published as part of bspec, and versioned. Entries are supposed to never be modified, but new ones can be added. Adding entries increases table version. The patch includes version 1 entries. Meaning of each entry is now explained in bspec, and user mode clients are expected to know what each entry means. The 3 entries used for previous platforms are still compatible with their legacy definitions, but that is not guaranteed to be true for future platforms. v2: Fixed SCC values, improved commit comment (Daniele) v3: Improved MOCS table comment (Daniele) v4: Moved new entries below gen9 ones. Put common entries into definition to be used in multiple arrays. (Lucas) v5: Made defines for or-ing flags. Renamed macros from MOCS_TABLE to MOCS_ENTRIES. Switched LE_CoS to upper case. (Joonas) v6: Removed definitions of reserved entries. (Michal) Increased limit of entries sent to the hardware on gen11+. v7: Simplify table as done for previou gens (Lucas) v8: Rebase on cached number of entries per-platform and use new MOCS_ENTRY() macro (Lucas) v9: Update comment (from Tomasz) BSpec: 34007 BSpec: 560 Signed-off-by: Tomasz Lis Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20190124000604.18861-8-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/intel_mocs.c | 132 ++++++++++++++++++++++++++++-- 1 file changed, 123 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 6e293f91a10c..331e7a678fb7 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -46,6 +46,8 @@ struct drm_i915_mocs_table { #define LE_SCC(value) ((value) << 8) #define LE_PFM(value) ((value) << 11) #define LE_SCF(value) ((value) << 14) +#define LE_COS(value) ((value) << 15) +#define LE_SSE(value) ((value) << 17) /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */ #define L3_ESC(value) ((value) << 0) @@ -54,6 +56,7 @@ struct drm_i915_mocs_table { /* Helper defines */ #define GEN9_NUM_MOCS_ENTRIES 62 /* 62 out of 64 - 63 & 64 are reserved. */ +#define GEN11_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ /* (e)LLC caching options */ #define LE_0_PAGETABLE _LE_CACHEABILITY(0) @@ -89,18 +92,23 @@ struct drm_i915_mocs_table { * LNCFCMOCS0 - LNCFCMOCS32 registers. * * These tables are intended to be kept reasonably consistent across - * platforms. However some of the fields are not applicable to all of - * them. + * HW platforms, and for ICL+, be identical across OSes. To achieve + * that, for Icelake and above, list of entries is published as part + * of bspec. * * Entries not part of the following tables are undefined as far as * userspace is concerned and shouldn't be relied upon. For the time * being they will be initialized to PTE. * - * NOTE: These tables MUST start with being uncached and the length - * MUST be less than 63 as the last two registers are reserved - * by the hardware. These tables are part of the kernel ABI and - * may only be updated incrementally by adding entries at the - * end. + * The last two entries are reserved by the hardware. For ICL+ they + * should be initialized according to bspec and never used, for older + * platforms they should never be written to. + * + * NOTE: These tables are part of bspec and defined as part of hardware + * interface for ICL+. For older platforms, they are part of kernel + * ABI. It is expected that, for specific hardware platform, existing + * entries will remain constant and the table will only be updated by + * adding new entries, filling unused positions. */ #define GEN9_MOCS_ENTRIES \ MOCS_ENTRY(I915_MOCS_UNCACHED, \ @@ -125,6 +133,108 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { L3_3_WB) }; +#define GEN11_MOCS_ENTRIES \ + /* Base - Uncached (Deprecated) */ \ + MOCS_ENTRY(I915_MOCS_UNCACHED, \ + LE_1_UC | LE_TC_1_LLC, \ + L3_1_UC), \ + /* Base - L3 + LeCC:PAT (Deprecated) */ \ + MOCS_ENTRY(I915_MOCS_PTE, \ + LE_0_PAGETABLE | LE_TC_1_LLC, \ + L3_3_WB), \ + /* Base - L3 + LLC */ \ + MOCS_ENTRY(2, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_3_WB), \ + /* Base - Uncached */ \ + MOCS_ENTRY(3, \ + LE_1_UC | LE_TC_1_LLC, \ + L3_1_UC), \ + /* Base - L3 */ \ + MOCS_ENTRY(4, \ + LE_1_UC | LE_TC_1_LLC, \ + L3_3_WB), \ + /* Base - LLC */ \ + MOCS_ENTRY(5, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_1_UC), \ + /* Age 0 - LLC */ \ + MOCS_ENTRY(6, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ + L3_1_UC), \ + /* Age 0 - L3 + LLC */ \ + MOCS_ENTRY(7, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ + L3_3_WB), \ + /* Age: Don't Chg. - LLC */ \ + MOCS_ENTRY(8, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ + L3_1_UC), \ + /* Age: Don't Chg. - L3 + LLC */ \ + MOCS_ENTRY(9, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ + L3_3_WB), \ + /* No AOM - LLC */ \ + MOCS_ENTRY(10, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ + L3_1_UC), \ + /* No AOM - L3 + LLC */ \ + MOCS_ENTRY(11, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ + L3_3_WB), \ + /* No AOM; Age 0 - LLC */ \ + MOCS_ENTRY(12, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ + L3_1_UC), \ + /* No AOM; Age 0 - L3 + LLC */ \ + MOCS_ENTRY(13, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ + L3_3_WB), \ + /* No AOM; Age:DC - LLC */ \ + MOCS_ENTRY(14, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ + L3_1_UC), \ + /* No AOM; Age:DC - L3 + LLC */ \ + MOCS_ENTRY(15, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ + L3_3_WB), \ + /* Self-Snoop - L3 + LLC */ \ + MOCS_ENTRY(18, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(12.5%) */ \ + MOCS_ENTRY(19, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(25%) */ \ + MOCS_ENTRY(20, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(50%) */ \ + MOCS_ENTRY(21, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(75%) */ \ + MOCS_ENTRY(22, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(87.5%) */ \ + MOCS_ENTRY(23, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \ + L3_3_WB), \ + /* HW Reserved - SW program but never use */ \ + MOCS_ENTRY(62, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_1_UC), \ + /* HW Reserved - SW program but never use */ \ + MOCS_ENTRY(63, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_1_UC) + +static const struct drm_i915_mocs_entry icelake_mocs_table[] = { + GEN11_MOCS_ENTRIES +}; + /** * get_mocs_settings() * @dev_priv: i915 device. @@ -142,8 +252,12 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, { bool result = false; - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) || - IS_ICELAKE(dev_priv)) { + if (IS_ICELAKE(dev_priv)) { + table->size = ARRAY_SIZE(icelake_mocs_table); + table->table = icelake_mocs_table; + table->n_entries = GEN11_NUM_MOCS_ENTRIES; + result = true; + } else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { table->size = ARRAY_SIZE(skylake_mocs_table); table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->table = skylake_mocs_table; From 8e525cb4a622148fbe30134ee3a1a34ad839a43a Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 25 Jan 2019 02:29:33 +0000 Subject: [PATCH 247/539] drm/i915/execlists: Move RPCS setup to context pin Configuring RPCS in context image just before pin is sufficient and will come extra handy in one of the following patches. v2: * Split image setup a bit differently. (Chris Wilson) v3: * Update context image after reset as well - otherwise the application of pinned default state clears the RPCS. v4: * Use local variable throughout the function. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Suggested-by: Chris Wilson Cc: Chris Wilson Reviewed-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20190125023005.1007-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 45 ++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8aa8a4862543..9155cc675924 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1173,6 +1173,24 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma) return i915_vma_pin(vma, 0, 0, flags); } +static u32 make_rpcs(struct drm_i915_private *dev_priv); + +static void +__execlists_update_reg_state(struct intel_engine_cs *engine, + struct intel_context *ce) +{ + u32 *regs = ce->lrc_reg_state; + struct intel_ring *ring = ce->ring; + + regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma); + regs[CTX_RING_HEAD + 1] = ring->head; + regs[CTX_RING_TAIL + 1] = ring->tail; + + /* RPCS */ + if (engine->class == RENDER_CLASS) + regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(engine->i915); +} + static struct intel_context * __execlists_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx, @@ -1211,10 +1229,8 @@ __execlists_context_pin(struct intel_engine_cs *engine, GEM_BUG_ON(!intel_ring_offset_valid(ce->ring, ce->ring->head)); ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; - ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = - i915_ggtt_offset(ce->ring->vma); - ce->lrc_reg_state[CTX_RING_HEAD + 1] = ce->ring->head; - ce->lrc_reg_state[CTX_RING_TAIL + 1] = ce->ring->tail; + + __execlists_update_reg_state(engine, ce); ce->state->obj->pin_global++; i915_gem_context_get(ctx); @@ -1838,14 +1854,14 @@ static void execlists_reset(struct intel_engine_cs *engine, engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, engine->context_size - PAGE_SIZE); } - execlists_init_reg_state(regs, - request->gem_context, engine, request->ring); /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */ - regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma); - request->ring->head = intel_ring_wrap(request->ring, request->postfix); - regs[CTX_RING_HEAD + 1] = request->ring->head; + + execlists_init_reg_state(regs, request->gem_context, engine, + request->ring); + + __execlists_update_reg_state(engine, request->hw_context); intel_ring_update_space(request->ring); @@ -2534,8 +2550,7 @@ static void execlists_init_reg_state(u32 *regs, if (rcs) { regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); - CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, - make_rpcs(dev_priv)); + CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0); i915_oa_init_reg_state(engine, ctx, regs); } @@ -2696,12 +2711,8 @@ void intel_lr_context_resume(struct drm_i915_private *i915) intel_ring_reset(ce->ring, 0); - if (ce->pin_count) { /* otherwise done in context_pin */ - u32 *regs = ce->lrc_reg_state; - - regs[CTX_RING_HEAD + 1] = ce->ring->head; - regs[CTX_RING_TAIL + 1] = ce->ring->tail; - } + if (ce->pin_count) /* otherwise done in context_pin */ + __execlists_update_reg_state(engine, ce); } } } From e1a73a54a96e80dc6009e73c9209e4f81ae22285 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 25 Jan 2019 10:05:20 +0000 Subject: [PATCH 248/539] drm/i915: Measure the required reserved size for request emission Instead of tediously and fragilely counting up the number of dwords required to emit the breadcrumb to seal a request, fake a request and measure it automatically once during engine setup. The downside is that this requires a fair amount of mocking to create a proper breadcrumb. Still, should be less error prone in future as the breadcrumb size fluctuates! Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190125100520.20163-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 49 ++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 12 +++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 24 +++++++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- drivers/gpu/drm/i915/selftests/mock_engine.c | 4 +- 5 files changed, 77 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 2f3c71f6d313..8f738a7cd117 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -604,6 +604,47 @@ static void __intel_context_unpin(struct i915_gem_context *ctx, intel_context_unpin(to_intel_context(ctx, engine)); } +struct measure_breadcrumb { + struct i915_request rq; + struct i915_timeline timeline; + struct intel_ring ring; + u32 cs[1024]; +}; + +static int measure_breadcrumb_sz(struct intel_engine_cs *engine) +{ + struct measure_breadcrumb *frame; + unsigned int dw; + + GEM_BUG_ON(!engine->i915->gt.scratch); + + frame = kzalloc(sizeof(*frame), GFP_KERNEL); + if (!frame) + return -ENOMEM; + + i915_timeline_init(engine->i915, &frame->timeline, "measure"); + + INIT_LIST_HEAD(&frame->ring.request_list); + frame->ring.timeline = &frame->timeline; + frame->ring.vaddr = frame->cs; + frame->ring.size = sizeof(frame->cs); + frame->ring.effective_size = frame->ring.size; + intel_ring_update_space(&frame->ring); + + frame->rq.i915 = engine->i915; + frame->rq.engine = engine; + frame->rq.ring = &frame->ring; + frame->rq.timeline = &frame->timeline; + + dw = engine->emit_breadcrumb(&frame->rq, frame->cs) - frame->cs; + GEM_BUG_ON(dw != engine->emit_breadcrumb_sz); + + i915_timeline_fini(&frame->timeline); + kfree(frame); + + return dw; +} + /** * intel_engines_init_common - initialize cengine state which might require hw access * @engine: Engine to initialize. @@ -657,8 +698,16 @@ int intel_engine_init_common(struct intel_engine_cs *engine) if (ret) goto err_breadcrumbs; + ret = measure_breadcrumb_sz(engine); + if (ret < 0) + goto err_status_page; + + engine->emit_breadcrumb_sz = ret; + return 0; +err_status_page: + cleanup_status_page(engine); err_breadcrumbs: intel_engine_fini_breadcrumbs(engine); err_unpin_preempt: diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9155cc675924..d2299425cf2f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2051,15 +2051,17 @@ static int gen8_emit_flush_render(struct i915_request *request, * used as a workaround for not being allowed to do lite * restore with HEAD==TAIL (WaIdleLiteRestore). */ -static void gen8_emit_wa_tail(struct i915_request *request, u32 *cs) +static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) { /* Ensure there's always at least one preemption point per-request. */ *cs++ = MI_ARB_CHECK; *cs++ = MI_NOOP; request->wa_tail = intel_ring_offset(request, cs); + + return cs; } -static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) +static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) { /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); @@ -2071,11 +2073,11 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); - gen8_emit_wa_tail(request, cs); + return gen8_emit_wa_tail(request, cs); } static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; -static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) +static u32 *gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) { /* We're using qword write, seqno should be aligned to 8 bytes. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); @@ -2095,7 +2097,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); - gen8_emit_wa_tail(request, cs); + return gen8_emit_wa_tail(request, cs); } static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e39e483d8d16..107c4934e2fa 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -299,7 +299,7 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode) return 0; } -static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { /* First we do the gen6_emit_post_sync_nonzero_flush w/a */ *cs++ = GFX_OP_PIPE_CONTROL(4); @@ -327,6 +327,8 @@ static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; } static const int gen6_rcs_emit_breadcrumb_sz = 14; @@ -409,7 +411,7 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode) return 0; } -static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { *cs++ = GFX_OP_PIPE_CONTROL(4); *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | @@ -427,10 +429,12 @@ static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; } static const int gen7_rcs_emit_breadcrumb_sz = 6; -static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW; *cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT; @@ -439,11 +443,13 @@ static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; } static const int gen6_xcs_emit_breadcrumb_sz = 4; #define GEN7_XCS_WA 32 -static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { int i; @@ -466,6 +472,8 @@ static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; } static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3; #undef GEN7_XCS_WA @@ -861,7 +869,7 @@ static void i9xx_submit_request(struct i915_request *request) intel_ring_set_tail(request->ring, request->tail)); } -static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) { *cs++ = MI_FLUSH; @@ -874,11 +882,13 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; } static const int i9xx_emit_breadcrumb_sz = 6; #define GEN5_WA_STORES 8 /* must be at least 1! */ -static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) +static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) { int i; @@ -895,6 +905,8 @@ static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; } static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2; #undef GEN5_WA_STORES diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c3ef0f9bf321..479bd53d4ac6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -470,7 +470,7 @@ struct intel_engine_cs { unsigned int dispatch_flags); #define I915_DISPATCH_SECURE BIT(0) #define I915_DISPATCH_PINNED BIT(1) - void (*emit_breadcrumb)(struct i915_request *rq, u32 *cs); + u32 *(*emit_breadcrumb)(struct i915_request *rq, u32 *cs); int emit_breadcrumb_sz; /* Pass the request to the hardware queue (e.g. directly into diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 442ec2aeec81..905318b7ae18 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -159,9 +159,9 @@ static int mock_emit_flush(struct i915_request *request, return 0; } -static void mock_emit_breadcrumb(struct i915_request *request, - u32 *flags) +static u32 *mock_emit_breadcrumb(struct i915_request *request, u32 *cs) { + return cs; } static void mock_submit_request(struct i915_request *request) From 9fa4973e91be3e5cb220f7d607c21bf6e82c52d1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 25 Jan 2019 12:00:04 +0000 Subject: [PATCH 249/539] drm/i915: Remove manual breadcumb counting Now that we know we measure the size of the engine->emit_breadcrumb() correctly, we can remove the previous manual counting. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190125120005.25191-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 4 ++-- drivers/gpu/drm/i915/intel_engine_cs.c | 7 +++---- drivers/gpu/drm/i915/intel_lrc.c | 4 ---- drivers/gpu/drm/i915/intel_ringbuffer.c | 28 +++++-------------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 5 files changed, 11 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index f941e40fd373..ddc35e9dc0c0 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -650,7 +650,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * around inside i915_request_add() there is sufficient space at * the beginning of the ring as well. */ - rq->reserved_space = 2 * engine->emit_breadcrumb_sz * sizeof(u32); + rq->reserved_space = 2 * engine->emit_breadcrumb_dw * sizeof(u32); /* * Record the position of the start of the request so that @@ -901,7 +901,7 @@ void i915_request_add(struct i915_request *request) * GPU processing the request, we never over-estimate the * position of the ring's HEAD. */ - cs = intel_ring_begin(request, engine->emit_breadcrumb_sz); + cs = intel_ring_begin(request, engine->emit_breadcrumb_dw); GEM_BUG_ON(IS_ERR(cs)); request->postfix = intel_ring_offset(request, cs); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 8f738a7cd117..ef4c8c50a4ba 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -611,7 +611,7 @@ struct measure_breadcrumb { u32 cs[1024]; }; -static int measure_breadcrumb_sz(struct intel_engine_cs *engine) +static int measure_breadcrumb_dw(struct intel_engine_cs *engine) { struct measure_breadcrumb *frame; unsigned int dw; @@ -637,7 +637,6 @@ static int measure_breadcrumb_sz(struct intel_engine_cs *engine) frame->rq.timeline = &frame->timeline; dw = engine->emit_breadcrumb(&frame->rq, frame->cs) - frame->cs; - GEM_BUG_ON(dw != engine->emit_breadcrumb_sz); i915_timeline_fini(&frame->timeline); kfree(frame); @@ -698,11 +697,11 @@ int intel_engine_init_common(struct intel_engine_cs *engine) if (ret) goto err_breadcrumbs; - ret = measure_breadcrumb_sz(engine); + ret = measure_breadcrumb_dw(engine); if (ret < 0) goto err_status_page; - engine->emit_breadcrumb_sz = ret; + engine->emit_breadcrumb_dw = ret; return 0; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d2299425cf2f..5551dd2ec0e6 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2075,7 +2075,6 @@ static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) return gen8_emit_wa_tail(request, cs); } -static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; static u32 *gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) { @@ -2099,7 +2098,6 @@ static u32 *gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) return gen8_emit_wa_tail(request, cs); } -static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS; static int gen8_init_rcs_context(struct i915_request *rq) { @@ -2192,7 +2190,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->emit_flush = gen8_emit_flush; engine->emit_breadcrumb = gen8_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz; engine->set_default_submission = intel_execlists_set_default_submission; @@ -2298,7 +2295,6 @@ int logical_render_ring_init(struct intel_engine_cs *engine) engine->init_context = gen8_init_rcs_context; engine->emit_flush = gen8_emit_flush_render; engine->emit_breadcrumb = gen8_emit_breadcrumb_rcs; - engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_rcs_sz; ret = logical_ring_init(engine); if (ret) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 107c4934e2fa..09c90475168a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -330,7 +330,6 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) return cs; } -static const int gen6_rcs_emit_breadcrumb_sz = 14; static int gen7_render_ring_cs_stall_wa(struct i915_request *rq) @@ -432,7 +431,6 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) return cs; } -static const int gen7_rcs_emit_breadcrumb_sz = 6; static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { @@ -446,7 +444,6 @@ static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) return cs; } -static const int gen6_xcs_emit_breadcrumb_sz = 4; #define GEN7_XCS_WA 32 static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) @@ -475,7 +472,6 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) return cs; } -static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3; #undef GEN7_XCS_WA static void set_hwstam(struct intel_engine_cs *engine, u32 mask) @@ -885,7 +881,6 @@ static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) return cs; } -static const int i9xx_emit_breadcrumb_sz = 6; #define GEN5_WA_STORES 8 /* must be at least 1! */ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) @@ -908,7 +903,6 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) return cs; } -static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2; #undef GEN5_WA_STORES static void @@ -2206,11 +2200,8 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->request_alloc = ring_request_alloc; engine->emit_breadcrumb = i9xx_emit_breadcrumb; - engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; - if (IS_GEN(dev_priv, 5)) { + if (IS_GEN(dev_priv, 5)) engine->emit_breadcrumb = gen5_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen5_emit_breadcrumb_sz; - } engine->set_default_submission = i9xx_set_default_submission; @@ -2240,12 +2231,10 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen7_render_ring_flush; engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen7_rcs_emit_breadcrumb_sz; } else if (IS_GEN(dev_priv, 6)) { engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen6_render_ring_flush; engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen6_rcs_emit_breadcrumb_sz; } else if (IS_GEN(dev_priv, 5)) { engine->emit_flush = gen4_render_ring_flush; } else { @@ -2281,13 +2270,10 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) engine->emit_flush = gen6_bsd_ring_flush; engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; - if (IS_GEN(dev_priv, 6)) { + if (IS_GEN(dev_priv, 6)) engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz; - } else { + else engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz; - } } else { engine->emit_flush = bsd_ring_flush; if (IS_GEN(dev_priv, 5)) @@ -2310,13 +2296,10 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) engine->emit_flush = gen6_ring_flush; engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; - if (IS_GEN(dev_priv, 6)) { + if (IS_GEN(dev_priv, 6)) engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz; - } else { + else engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz; - } return intel_init_ring_buffer(engine); } @@ -2335,7 +2318,6 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) engine->irq_disable = hsw_vebox_irq_disable; engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz; return intel_init_ring_buffer(engine); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 479bd53d4ac6..0834e91d4ace 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -471,7 +471,7 @@ struct intel_engine_cs { #define I915_DISPATCH_SECURE BIT(0) #define I915_DISPATCH_PINNED BIT(1) u32 *(*emit_breadcrumb)(struct i915_request *rq, u32 *cs); - int emit_breadcrumb_sz; + int emit_breadcrumb_dw; /* Pass the request to the hardware queue (e.g. directly into * the legacy ringbuffer or to the end of an execlist). From 832a67bdb205765109d37fdfff4a97479b03b19b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 25 Jan 2019 12:00:05 +0000 Subject: [PATCH 250/539] drm/i915: Compute the HWS offsets explicitly Simplify by using sizeof(u32) to convert from the index inside the HWSP to the byte offset. This has the advantage of not only being shorter (and so not upsetting checkpatch!) but that it matches use where we are writing to byte addresses using other commands than MI_STORE_DWORD_IMM. v2: Drop the now superfluous MI_STORE_DWORD_INDEX_SHIFT, it appears to be a local invention so keeping it after the final use does not help to clarify the GPU instruction. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190125120005.25191-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_gpu_commands.h | 1 - drivers/gpu/drm/i915/intel_guc_submission.c | 4 ++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 10 +++++----- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h index 105e2a9e874a..b96a31bc1080 100644 --- a/drivers/gpu/drm/i915/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/intel_gpu_commands.h @@ -112,7 +112,6 @@ #define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ #define MI_USE_GGTT (1 << 22) /* g4x+ */ #define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) -#define MI_STORE_DWORD_INDEX_SHIFT 2 /* * Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM: * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index ab1c49b106f2..349ae5844f24 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -666,7 +666,7 @@ static void complete_preempt_context(struct intel_engine_cs *engine) execlists_unwind_incomplete_requests(execlists); wait_for_guc_preempt_report(engine); - intel_write_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX, 0); + intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, 0); } /** @@ -824,7 +824,7 @@ static void guc_submission_tasklet(unsigned long data) } if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) && - intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) == + intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) == GUC_PREEMPT_FINISHED) complete_preempt_context(engine); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 0834e91d4ace..5ad46c2fbc0f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -716,11 +716,11 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) * The area from dword 0x30 to 0x3ff is available for driver usage. */ #define I915_GEM_HWS_INDEX 0x30 -#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) -#define I915_GEM_HWS_PREEMPT_INDEX 0x32 -#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT) -#define I915_GEM_HWS_SCRATCH_INDEX 0x40 -#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) +#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX * sizeof(u32)) +#define I915_GEM_HWS_PREEMPT 0x32 +#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32)) +#define I915_GEM_HWS_SCRATCH 0x40 +#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32)) #define I915_HWS_CSB_BUF0_INDEX 0x10 #define I915_HWS_CSB_WRITE_INDEX 0x1f From ade8a0f59844349650514f608eb1da5280f7818b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 25 Jan 2019 13:22:26 +0000 Subject: [PATCH 251/539] drm/i915: Make all GPU resets atomic In preparation for the next few commits, make resetting the GPU atomic. Currently, we have prepared gen6+ for atomic resetting of individual engines, but now there is a requirement to perform the whole device level reset (just the register poking) from inside an atomic context. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190125132230.22221-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_reset.c | 90 ++++++++++++++----------------- 1 file changed, 39 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index 342d9ee42601..2f840858572c 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -12,6 +12,8 @@ #include "intel_guc.h" +#define RESET_MAX_RETRIES 3 + static void engine_skip_context(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; @@ -144,14 +146,14 @@ static int i915_do_reset(struct drm_i915_private *i915, /* Assert reset for at least 20 usec, and wait for acknowledgement. */ pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); - usleep_range(50, 200); - err = wait_for(i915_in_reset(pdev), 500); + udelay(50); + err = wait_for_atomic(i915_in_reset(pdev), 50); /* Clear the reset request. */ pci_write_config_byte(pdev, I915_GDRST, 0); - usleep_range(50, 200); + udelay(50); if (!err) - err = wait_for(!i915_in_reset(pdev), 500); + err = wait_for_atomic(!i915_in_reset(pdev), 50); return err; } @@ -171,7 +173,7 @@ static int g33_do_reset(struct drm_i915_private *i915, struct pci_dev *pdev = i915->drm.pdev; pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); - return wait_for(g4x_reset_complete(pdev), 500); + return wait_for_atomic(g4x_reset_complete(pdev), 50); } static int g4x_do_reset(struct drm_i915_private *dev_priv, @@ -182,13 +184,13 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, int ret; /* WaVcpClkGateDisableForMediaReset:ctg,elk */ - I915_WRITE(VDECCLK_GATE_D, - I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); - POSTING_READ(VDECCLK_GATE_D); + I915_WRITE_FW(VDECCLK_GATE_D, + I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); + POSTING_READ_FW(VDECCLK_GATE_D); pci_write_config_byte(pdev, I915_GDRST, GRDOM_MEDIA | GRDOM_RESET_ENABLE); - ret = wait_for(g4x_reset_complete(pdev), 500); + ret = wait_for_atomic(g4x_reset_complete(pdev), 50); if (ret) { DRM_DEBUG_DRIVER("Wait for media reset failed\n"); goto out; @@ -196,7 +198,7 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, pci_write_config_byte(pdev, I915_GDRST, GRDOM_RENDER | GRDOM_RESET_ENABLE); - ret = wait_for(g4x_reset_complete(pdev), 500); + ret = wait_for_atomic(g4x_reset_complete(pdev), 50); if (ret) { DRM_DEBUG_DRIVER("Wait for render reset failed\n"); goto out; @@ -205,9 +207,9 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, out: pci_write_config_byte(pdev, I915_GDRST, 0); - I915_WRITE(VDECCLK_GATE_D, - I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE); - POSTING_READ(VDECCLK_GATE_D); + I915_WRITE_FW(VDECCLK_GATE_D, + I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE); + POSTING_READ_FW(VDECCLK_GATE_D); return ret; } @@ -218,27 +220,29 @@ static int ironlake_do_reset(struct drm_i915_private *dev_priv, { int ret; - I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); - ret = intel_wait_for_register(dev_priv, - ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, - 500); + I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); + ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR, + ILK_GRDOM_RESET_ENABLE, 0, + 5000, 0, + NULL); if (ret) { DRM_DEBUG_DRIVER("Wait for render reset failed\n"); goto out; } - I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); - ret = intel_wait_for_register(dev_priv, - ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, - 500); + I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); + ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR, + ILK_GRDOM_RESET_ENABLE, 0, + 5000, 0, + NULL); if (ret) { DRM_DEBUG_DRIVER("Wait for media reset failed\n"); goto out; } out: - I915_WRITE(ILK_GDSR, 0); - POSTING_READ(ILK_GDSR); + I915_WRITE_FW(ILK_GDSR, 0); + POSTING_READ_FW(ILK_GDSR); return ret; } @@ -527,32 +531,21 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *i915) int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask) { - reset_func reset = intel_get_gpu_reset(i915); + const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1; + reset_func reset; + int ret = -ETIMEDOUT; int retry; - int ret; - /* - * We want to perform per-engine reset from atomic context (e.g. - * softirq), which imposes the constraint that we cannot sleep. - * However, experience suggests that spending a bit of time waiting - * for a reset helps in various cases, so for a full-device reset - * we apply the opposite rule and wait if we want to. As we should - * always follow up a failed per-engine reset with a full device reset, - * being a little faster, stricter and more error prone for the - * atomic case seems an acceptable compromise. - * - * Unfortunately this leads to a bimodal routine, when the goal was - * to have a single reset function that worked for resetting any - * number of engines simultaneously. - */ - might_sleep_if(engine_mask == ALL_ENGINES); + reset = intel_get_gpu_reset(i915); + if (!reset) + return -ENODEV; /* * If the power well sleeps during the reset, the reset * request may be dropped and never completes (causing -EIO). */ intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); - for (retry = 0; retry < 3; retry++) { + for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) { /* * We stop engines, otherwise we might get failed reset and a * dead gpu (on elk). Also as modern gpu as kbl can suffer @@ -569,15 +562,10 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask) */ i915_stop_engines(i915, engine_mask); - ret = -ENODEV; - if (reset) { - GEM_TRACE("engine_mask=%x\n", engine_mask); - ret = reset(i915, engine_mask, retry); - } - if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES) - break; - - cond_resched(); + GEM_TRACE("engine_mask=%x\n", engine_mask); + preempt_disable(); + ret = reset(i915, engine_mask, retry); + preempt_enable(); } intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); @@ -1014,7 +1002,7 @@ void i915_reset(struct drm_i915_private *i915, goto error; } - for (i = 0; i < 3; i++) { + for (i = 0; i < RESET_MAX_RETRIES; i++) { ret = intel_gpu_reset(i915, ALL_ENGINES); if (ret == 0) break; From fe62365f9f80a1c1d438c54fba21f5108a182de8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 25 Jan 2019 13:22:27 +0000 Subject: [PATCH 252/539] drm/i915/guc: Disable global reset The guc (and huc) currently inexcruitably depend on struct_mutex for device reinitialisation from inside the reset, and indeed taking any mutex here is verboten (as we must be able to reset from underneath any of our mutexes). That makes recovering the guc unviable without, for example, reserving contiguous vma space and pages for it to use. The plan to re-enable global reset for the GuC centres around reusing the WOPM reserved space at the top of the aperture (that we know we can populate a contiguous range large enough to dma xfer the fw image). In the meantime, hopefully no one even notices as the device-reset is only used as a backup to the per-engine resets for handling GPU hangs. Signed-off-by: Chris Wilson Acked-by: Mika Kuoppala Acked-by: Daniele Ceraolo Spurio Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20190125132230.22221-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_reset.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index 2f840858572c..33408c4e6358 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -574,6 +574,9 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask) bool intel_has_gpu_reset(struct drm_i915_private *i915) { + if (USES_GUC(i915)) + return false; + return intel_get_gpu_reset(i915); } From eb8d0f5af4ec2d172baf8b4b9a2199cd916b4e54 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 25 Jan 2019 13:22:28 +0000 Subject: [PATCH 253/539] drm/i915: Remove GPU reset dependence on struct_mutex Now that the submission backends are controlled via their own spinlocks, with a wave of a magic wand we can lift the struct_mutex requirement around GPU reset. That is we allow the submission frontend (userspace) to keep on submitting while we process the GPU reset as we can suspend the backend independently. The major change is around the backoff/handoff strategy for performing the reset. With no mutex deadlock, we no longer have to coordinate with any waiter, and just perform the reset immediately. Testcase: igt/gem_mmap_gtt/hang # regresses Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190125132230.22221-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 38 +- drivers/gpu/drm/i915/i915_drv.h | 5 - drivers/gpu/drm/i915/i915_gem.c | 18 +- drivers/gpu/drm/i915/i915_gem_fence_reg.h | 1 - drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + drivers/gpu/drm/i915/i915_gpu_error.c | 104 ++--- drivers/gpu/drm/i915/i915_gpu_error.h | 28 +- drivers/gpu/drm/i915/i915_request.c | 47 -- drivers/gpu/drm/i915/i915_reset.c | 420 ++++++++---------- drivers/gpu/drm/i915/i915_reset.h | 3 + drivers/gpu/drm/i915/intel_engine_cs.c | 6 +- drivers/gpu/drm/i915/intel_guc_submission.c | 5 +- drivers/gpu/drm/i915/intel_hangcheck.c | 28 +- drivers/gpu/drm/i915/intel_lrc.c | 91 ++-- drivers/gpu/drm/i915/intel_overlay.c | 2 - drivers/gpu/drm/i915/intel_ringbuffer.c | 91 ++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 17 +- .../gpu/drm/i915/selftests/intel_hangcheck.c | 57 +-- .../drm/i915/selftests/intel_workarounds.c | 3 - .../gpu/drm/i915/selftests/mock_gem_device.c | 4 +- 20 files changed, 418 insertions(+), 551 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9a9e1da496dc..76dea0572f3e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1284,8 +1284,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) seq_puts(m, "Wedged\n"); if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags)) seq_puts(m, "Reset in progress: struct_mutex backoff\n"); - if (test_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags)) - seq_puts(m, "Reset in progress: reset handoff to waiter\n"); if (waitqueue_active(&dev_priv->gpu_error.wait_queue)) seq_puts(m, "Waiter holding struct mutex\n"); if (waitqueue_active(&dev_priv->gpu_error.reset_queue)) @@ -1321,15 +1319,15 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) struct rb_node *rb; seq_printf(m, "%s:\n", engine->name); - seq_printf(m, "\tseqno = %x [current %x, last %x]\n", + seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n", engine->hangcheck.seqno, seqno[id], - intel_engine_last_submit(engine)); - seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s, wedged? %s\n", + intel_engine_last_submit(engine), + jiffies_to_msecs(jiffies - + engine->hangcheck.action_timestamp)); + seq_printf(m, "\twaiters? %s, fake irq active? %s\n", yesno(intel_engine_has_waiter(engine)), yesno(test_bit(engine->id, - &dev_priv->gpu_error.missed_irq_rings)), - yesno(engine->hangcheck.stalled), - yesno(engine->hangcheck.wedged)); + &dev_priv->gpu_error.missed_irq_rings))); spin_lock_irq(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { @@ -1343,11 +1341,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", (long long)engine->hangcheck.acthd, (long long)acthd[id]); - seq_printf(m, "\taction = %s(%d) %d ms ago\n", - hangcheck_action_to_str(engine->hangcheck.action), - engine->hangcheck.action, - jiffies_to_msecs(jiffies - - engine->hangcheck.action_timestamp)); if (engine->id == RCS) { seq_puts(m, "\tinstdone read =\n"); @@ -3911,8 +3904,6 @@ static int i915_wedged_set(void *data, u64 val) { struct drm_i915_private *i915 = data; - struct intel_engine_cs *engine; - unsigned int tmp; /* * There is no safeguard against this debugfs entry colliding @@ -3925,18 +3916,8 @@ i915_wedged_set(void *data, u64 val) if (i915_reset_backoff(&i915->gpu_error)) return -EAGAIN; - for_each_engine_masked(engine, i915, val, tmp) { - engine->hangcheck.seqno = intel_engine_get_seqno(engine); - engine->hangcheck.stalled = true; - } - i915_handle_error(i915, val, I915_ERROR_CAPTURE, "Manually set wedged engine mask = %llx", val); - - wait_on_bit(&i915->gpu_error.flags, - I915_RESET_HANDOFF, - TASK_UNINTERRUPTIBLE); - return 0; } @@ -4091,13 +4072,8 @@ i915_drop_caches_set(void *data, u64 val) mutex_unlock(&i915->drm.struct_mutex); } - if (val & DROP_RESET_ACTIVE && - i915_terminally_wedged(&i915->gpu_error)) { + if (val & DROP_RESET_ACTIVE && i915_terminally_wedged(&i915->gpu_error)) i915_handle_error(i915, ALL_ENGINES, 0, NULL); - wait_on_bit(&i915->gpu_error.flags, - I915_RESET_HANDOFF, - TASK_UNINTERRUPTIBLE); - } fs_reclaim_acquire(GFP_KERNEL); if (val & DROP_BOUND) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3c111ad09922..0133d1da3d3c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3001,11 +3001,6 @@ static inline bool i915_reset_backoff(struct i915_gpu_error *error) return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags)); } -static inline bool i915_reset_handoff(struct i915_gpu_error *error) -{ - return unlikely(test_bit(I915_RESET_HANDOFF, &error->flags)); -} - static inline bool i915_terminally_wedged(struct i915_gpu_error *error) { return unlikely(test_bit(I915_WEDGED, &error->flags)); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b359390ba22c..d20b42386c3c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -657,11 +657,6 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj, struct intel_rps_client *rps_client) { might_sleep(); -#if IS_ENABLED(CONFIG_LOCKDEP) - GEM_BUG_ON(debug_locks && - !!lockdep_is_held(&obj->base.dev->struct_mutex) != - !!(flags & I915_WAIT_LOCKED)); -#endif GEM_BUG_ON(timeout < 0); timeout = i915_gem_object_wait_reservation(obj->resv, @@ -4493,8 +4488,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915) GEM_TRACE("\n"); - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); @@ -4520,6 +4513,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915) intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); intel_runtime_pm_put(i915, wakeref); + mutex_lock(&i915->drm.struct_mutex); i915_gem_contexts_lost(i915); mutex_unlock(&i915->drm.struct_mutex); } @@ -4534,6 +4528,8 @@ int i915_gem_suspend(struct drm_i915_private *i915) wakeref = intel_runtime_pm_get(i915); intel_suspend_gt_powersave(i915); + flush_workqueue(i915->wq); + mutex_lock(&i915->drm.struct_mutex); /* @@ -4563,11 +4559,9 @@ int i915_gem_suspend(struct drm_i915_private *i915) i915_retire_requests(i915); /* ensure we flush after wedging */ mutex_unlock(&i915->drm.struct_mutex); + i915_reset_flush(i915); - intel_uc_suspend(i915); - - cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); - cancel_delayed_work_sync(&i915->gt.retire_work); + drain_delayed_work(&i915->gt.retire_work); /* * As the idle_work is rearming if it detects a race, play safe and @@ -4575,6 +4569,8 @@ int i915_gem_suspend(struct drm_i915_private *i915) */ drain_delayed_work(&i915->gt.idle_work); + intel_uc_suspend(i915); + /* * Assert that we successfully flushed all the work and * reset the GPU back to its idle, low power state. diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h index 99a31ded4dfd..09dcaf14121b 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h @@ -50,4 +50,3 @@ struct drm_i915_fence_reg { }; #endif - diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 9229b03d629b..a0039ea97cdc 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -39,6 +39,7 @@ #include #include "i915_request.h" +#include "i915_reset.h" #include "i915_selftest.h" #include "i915_timeline.h" diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 1f8e80e31b49..4eef0462489c 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -533,10 +533,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, err_printf(m, " waiting: %s\n", yesno(ee->waiting)); err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head); err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail); - err_printf(m, " hangcheck stall: %s\n", yesno(ee->hangcheck_stalled)); - err_printf(m, " hangcheck action: %s\n", - hangcheck_action_to_str(ee->hangcheck_action)); - err_printf(m, " hangcheck action timestamp: %dms (%lu%s)\n", + err_printf(m, " hangcheck timestamp: %dms (%lu%s)\n", jiffies_to_msecs(ee->hangcheck_timestamp - epoch), ee->hangcheck_timestamp, ee->hangcheck_timestamp == epoch ? "; epoch" : ""); @@ -684,15 +681,15 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, jiffies_to_msecs(error->capture - error->epoch)); for (i = 0; i < ARRAY_SIZE(error->engine); i++) { - if (error->engine[i].hangcheck_stalled && - error->engine[i].context.pid) { - err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n", - engine_name(m->i915, i), - error->engine[i].context.comm, - error->engine[i].context.pid, - error->engine[i].context.ban_score, - bannable(&error->engine[i].context)); - } + if (!error->engine[i].context.pid) + continue; + + err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n", + engine_name(m->i915, i), + error->engine[i].context.comm, + error->engine[i].context.pid, + error->engine[i].context.ban_score, + bannable(&error->engine[i].context)); } err_printf(m, "Reset count: %u\n", error->reset_count); err_printf(m, "Suspend count: %u\n", error->suspend_count); @@ -1144,7 +1141,8 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err, return i; } -/* Generate a semi-unique error code. The code is not meant to have meaning, The +/* + * Generate a semi-unique error code. The code is not meant to have meaning, The * code's only purpose is to try to prevent false duplicated bug reports by * grossly estimating a GPU error state. * @@ -1153,29 +1151,23 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err, * * It's only a small step better than a random number in its current form. */ -static u32 i915_error_generate_code(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error, - int *engine_id) +static u32 i915_error_generate_code(struct i915_gpu_state *error, + unsigned long engine_mask) { - u32 error_code = 0; - int i; - - /* IPEHR would be an ideal way to detect errors, as it's the gross + /* + * IPEHR would be an ideal way to detect errors, as it's the gross * measure of "the command that hung." However, has some very common * synchronization commands which almost always appear in the case * strictly a client bug. Use instdone to differentiate those some. */ - for (i = 0; i < I915_NUM_ENGINES; i++) { - if (error->engine[i].hangcheck_stalled) { - if (engine_id) - *engine_id = i; + if (engine_mask) { + struct drm_i915_error_engine *ee = + &error->engine[ffs(engine_mask)]; - return error->engine[i].ipehr ^ - error->engine[i].instdone.instdone; - } + return ee->ipehr ^ ee->instdone.instdone; } - return error_code; + return 0; } static void gem_record_fences(struct i915_gpu_state *error) @@ -1338,9 +1330,8 @@ static void error_record_engine_registers(struct i915_gpu_state *error, } ee->idle = intel_engine_is_idle(engine); - ee->hangcheck_timestamp = engine->hangcheck.action_timestamp; - ee->hangcheck_action = engine->hangcheck.action; - ee->hangcheck_stalled = engine->hangcheck.stalled; + if (!ee->idle) + ee->hangcheck_timestamp = engine->hangcheck.action_timestamp; ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error, engine); @@ -1783,31 +1774,35 @@ static void capture_reg_state(struct i915_gpu_state *error) error->pgtbl_er = I915_READ(PGTBL_ER); } -static void i915_error_capture_msg(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error, - u32 engine_mask, - const char *error_msg) +static const char * +error_msg(struct i915_gpu_state *error, unsigned long engines, const char *msg) { - u32 ecode; - int engine_id = -1, len; + int len; + int i; - ecode = i915_error_generate_code(dev_priv, error, &engine_id); + for (i = 0; i < ARRAY_SIZE(error->engine); i++) + if (!error->engine[i].context.pid) + engines &= ~BIT(i); len = scnprintf(error->error_msg, sizeof(error->error_msg), - "GPU HANG: ecode %d:%d:0x%08x", - INTEL_GEN(dev_priv), engine_id, ecode); - - if (engine_id != -1 && error->engine[engine_id].context.pid) + "GPU HANG: ecode %d:%lx:0x%08x", + INTEL_GEN(error->i915), engines, + i915_error_generate_code(error, engines)); + if (engines) { + /* Just show the first executing process, more is confusing */ + i = ffs(engines); len += scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", in %s [%d]", - error->engine[engine_id].context.comm, - error->engine[engine_id].context.pid); + error->engine[i].context.comm, + error->engine[i].context.pid); + } + if (msg) + len += scnprintf(error->error_msg + len, + sizeof(error->error_msg) - len, + ", %s", msg); - scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, - ", reason: %s, action: %s", - error_msg, - engine_mask ? "reset" : "continue"); + return error->error_msg; } static void capture_gen_state(struct i915_gpu_state *error) @@ -1847,7 +1842,7 @@ static unsigned long capture_find_epoch(const struct i915_gpu_state *error) for (i = 0; i < ARRAY_SIZE(error->engine); i++) { const struct drm_i915_error_engine *ee = &error->engine[i]; - if (ee->hangcheck_stalled && + if (ee->hangcheck_timestamp && time_before(ee->hangcheck_timestamp, epoch)) epoch = ee->hangcheck_timestamp; } @@ -1921,7 +1916,7 @@ i915_capture_gpu_state(struct drm_i915_private *i915) * i915_capture_error_state - capture an error record for later analysis * @i915: i915 device * @engine_mask: the mask of engines triggering the hang - * @error_msg: a message to insert into the error capture header + * @msg: a message to insert into the error capture header * * Should be called when an error is detected (either a hang or an error * interrupt) to capture error state from the time of the error. Fills @@ -1929,8 +1924,8 @@ i915_capture_gpu_state(struct drm_i915_private *i915) * to pick up. */ void i915_capture_error_state(struct drm_i915_private *i915, - u32 engine_mask, - const char *error_msg) + unsigned long engine_mask, + const char *msg) { static bool warned; struct i915_gpu_state *error; @@ -1946,8 +1941,7 @@ void i915_capture_error_state(struct drm_i915_private *i915, if (IS_ERR(error)) return; - i915_error_capture_msg(i915, error, engine_mask, error_msg); - DRM_INFO("%s\n", error->error_msg); + dev_info(i915->drm.dev, "%s\n", error_msg(error, engine_mask, msg)); if (!error->simulated) { spin_lock_irqsave(&i915->gpu_error.lock, flags); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 604291f7762d..231173786eae 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -85,8 +85,6 @@ struct i915_gpu_state { bool waiting; int num_waiters; unsigned long hangcheck_timestamp; - bool hangcheck_stalled; - enum intel_engine_hangcheck_action hangcheck_action; struct i915_address_space *vm; int num_requests; u32 reset_count; @@ -197,6 +195,8 @@ struct i915_gpu_state { struct scatterlist *sgl, *fit; }; +struct i915_gpu_restart; + struct i915_gpu_error { /* For hangcheck timer */ #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ @@ -247,15 +247,6 @@ struct i915_gpu_error { * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a * secondary role in preventing two concurrent global reset attempts. * - * #I915_RESET_HANDOFF - To perform the actual GPU reset, we need the - * struct_mutex. We try to acquire the struct_mutex in the reset worker, - * but it may be held by some long running waiter (that we cannot - * interrupt without causing trouble). Once we are ready to do the GPU - * reset, we set the I915_RESET_HANDOFF bit and wakeup any waiters. If - * they already hold the struct_mutex and want to participate they can - * inspect the bit and do the reset directly, otherwise the worker - * waits for the struct_mutex. - * * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to * acquire the struct_mutex to reset an engine, we need an explicit * flag to prevent two concurrent reset attempts in the same engine. @@ -269,20 +260,13 @@ struct i915_gpu_error { */ unsigned long flags; #define I915_RESET_BACKOFF 0 -#define I915_RESET_HANDOFF 1 -#define I915_RESET_MODESET 2 -#define I915_RESET_ENGINE 3 +#define I915_RESET_MODESET 1 +#define I915_RESET_ENGINE 2 #define I915_WEDGED (BITS_PER_LONG - 1) /** Number of times an engine has been reset */ u32 reset_engine_count[I915_NUM_ENGINES]; - /** Set of stalled engines with guilty requests, in the current reset */ - u32 stalled_mask; - - /** Reason for the current *global* reset */ - const char *reason; - struct mutex wedge_mutex; /* serialises wedging/unwedging */ /** @@ -299,6 +283,8 @@ struct i915_gpu_error { /* For missed irq/seqno simulation. */ unsigned long test_irq_rings; + + struct i915_gpu_restart *restart; }; struct drm_i915_error_state_buf { @@ -320,7 +306,7 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915); void i915_capture_error_state(struct drm_i915_private *dev_priv, - u32 engine_mask, + unsigned long engine_mask, const char *error_msg); static inline struct i915_gpu_state * diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index ddc35e9dc0c0..f4241a17e2ad 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1083,18 +1083,6 @@ static bool __i915_spin_request(const struct i915_request *rq, return false; } -static bool __i915_wait_request_check_and_reset(struct i915_request *request) -{ - struct i915_gpu_error *error = &request->i915->gpu_error; - - if (likely(!i915_reset_handoff(error))) - return false; - - __set_current_state(TASK_RUNNING); - i915_reset(request->i915, error->stalled_mask, error->reason); - return true; -} - /** * i915_request_wait - wait until execution of request has finished * @rq: the request to wait upon @@ -1120,17 +1108,10 @@ long i915_request_wait(struct i915_request *rq, { const int state = flags & I915_WAIT_INTERRUPTIBLE ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; - wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue; - DEFINE_WAIT_FUNC(reset, default_wake_function); DEFINE_WAIT_FUNC(exec, default_wake_function); struct intel_wait wait; might_sleep(); -#if IS_ENABLED(CONFIG_LOCKDEP) - GEM_BUG_ON(debug_locks && - !!lockdep_is_held(&rq->i915->drm.struct_mutex) != - !!(flags & I915_WAIT_LOCKED)); -#endif GEM_BUG_ON(timeout < 0); if (i915_request_completed(rq)) @@ -1140,11 +1121,7 @@ long i915_request_wait(struct i915_request *rq, return -ETIME; trace_i915_request_wait_begin(rq, flags); - add_wait_queue(&rq->execute, &exec); - if (flags & I915_WAIT_LOCKED) - add_wait_queue(errq, &reset); - intel_wait_init(&wait); if (flags & I915_WAIT_PRIORITY) i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); @@ -1155,10 +1132,6 @@ restart: if (intel_wait_update_request(&wait, rq)) break; - if (flags & I915_WAIT_LOCKED && - __i915_wait_request_check_and_reset(rq)) - continue; - if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; goto complete; @@ -1188,9 +1161,6 @@ restart: */ goto wakeup; - if (flags & I915_WAIT_LOCKED) - __i915_wait_request_check_and_reset(rq); - for (;;) { if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; @@ -1214,21 +1184,6 @@ wakeup: if (i915_request_completed(rq)) break; - /* - * If the GPU is hung, and we hold the lock, reset the GPU - * and then check for completion. On a full reset, the engine's - * HW seqno will be advanced passed us and we are complete. - * If we do a partial reset, we have to wait for the GPU to - * resume and update the breadcrumb. - * - * If we don't hold the mutex, we can just wait for the worker - * to come along and update the breadcrumb (either directly - * itself, or indirectly by recovering the GPU). - */ - if (flags & I915_WAIT_LOCKED && - __i915_wait_request_check_and_reset(rq)) - continue; - /* Only spin if we know the GPU is processing this request */ if (__i915_spin_request(rq, wait.seqno, state, 2)) break; @@ -1242,8 +1197,6 @@ wakeup: intel_engine_remove_wait(rq->engine, &wait); complete: __set_current_state(TASK_RUNNING); - if (flags & I915_WAIT_LOCKED) - remove_wait_queue(errq, &reset); remove_wait_queue(&rq->execute, &exec); trace_i915_request_wait_end(rq); diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index 33408c4e6358..68af017ee548 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -5,6 +5,7 @@ */ #include +#include #include "i915_drv.h" #include "i915_gpu_error.h" @@ -14,27 +15,31 @@ #define RESET_MAX_RETRIES 3 +/* XXX How to handle concurrent GGTT updates using tiling registers? */ +#define RESET_UNDER_STOP_MACHINE 0 + static void engine_skip_context(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; struct i915_gem_context *hung_ctx = rq->gem_context; struct i915_timeline *timeline = rq->timeline; - unsigned long flags; + lockdep_assert_held(&engine->timeline.lock); GEM_BUG_ON(timeline == &engine->timeline); - spin_lock_irqsave(&engine->timeline.lock, flags); spin_lock(&timeline->lock); - list_for_each_entry_continue(rq, &engine->timeline.requests, link) - if (rq->gem_context == hung_ctx) - i915_request_skip(rq, -EIO); + if (rq->global_seqno) { + list_for_each_entry_continue(rq, + &engine->timeline.requests, link) + if (rq->gem_context == hung_ctx) + i915_request_skip(rq, -EIO); + } list_for_each_entry(rq, &timeline->requests, link) i915_request_skip(rq, -EIO); spin_unlock(&timeline->lock); - spin_unlock_irqrestore(&engine->timeline.lock, flags); } static void client_mark_guilty(struct drm_i915_file_private *file_priv, @@ -61,7 +66,7 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv, } } -static void context_mark_guilty(struct i915_gem_context *ctx) +static bool context_mark_guilty(struct i915_gem_context *ctx) { unsigned int score; bool banned, bannable; @@ -74,7 +79,7 @@ static void context_mark_guilty(struct i915_gem_context *ctx) /* Cool contexts don't accumulate client ban score */ if (!bannable) - return; + return false; if (banned) { DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n", @@ -85,6 +90,8 @@ static void context_mark_guilty(struct i915_gem_context *ctx) if (!IS_ERR_OR_NULL(ctx->file_priv)) client_mark_guilty(ctx->file_priv, ctx); + + return banned; } static void context_mark_innocent(struct i915_gem_context *ctx) @@ -92,6 +99,21 @@ static void context_mark_innocent(struct i915_gem_context *ctx) atomic_inc(&ctx->active_count); } +void i915_reset_request(struct i915_request *rq, bool guilty) +{ + lockdep_assert_held(&rq->engine->timeline.lock); + GEM_BUG_ON(i915_request_completed(rq)); + + if (guilty) { + i915_request_skip(rq, -EIO); + if (context_mark_guilty(rq->gem_context)) + engine_skip_context(rq); + } else { + dma_fence_set_error(&rq->fence, -EAGAIN); + context_mark_innocent(rq->gem_context); + } +} + static void gen3_stop_engine(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -604,11 +626,8 @@ int intel_reset_guc(struct drm_i915_private *i915) * Ensure irq handler finishes, and not run again. * Also return the active request so that we only search for it once. */ -static struct i915_request * -reset_prepare_engine(struct intel_engine_cs *engine) +static void reset_prepare_engine(struct intel_engine_cs *engine) { - struct i915_request *rq; - /* * During the reset sequence, we must prevent the engine from * entering RC6. As the context state is undefined until we restart @@ -617,174 +636,116 @@ reset_prepare_engine(struct intel_engine_cs *engine) * GPU state upon resume, i.e. fail to restart after a reset. */ intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); - - rq = engine->reset.prepare(engine); - if (rq && rq->fence.error == -EIO) - rq = ERR_PTR(-EIO); /* Previous reset failed! */ - - return rq; + engine->reset.prepare(engine); } -static int reset_prepare(struct drm_i915_private *i915) +static void reset_prepare(struct drm_i915_private *i915) { struct intel_engine_cs *engine; - struct i915_request *rq; enum intel_engine_id id; - int err = 0; - for_each_engine(engine, i915, id) { - rq = reset_prepare_engine(engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - continue; - } + for_each_engine(engine, i915, id) + reset_prepare_engine(engine); - engine->hangcheck.active_request = rq; - } - - i915_gem_revoke_fences(i915); intel_uc_sanitize(i915); - - return err; } -/* Returns the request if it was guilty of the hang */ -static struct i915_request * -reset_request(struct intel_engine_cs *engine, - struct i915_request *rq, - bool stalled) -{ - /* - * The guilty request will get skipped on a hung engine. - * - * Users of client default contexts do not rely on logical - * state preserved between batches so it is safe to execute - * queued requests following the hang. Non default contexts - * rely on preserved state, so skipping a batch loses the - * evolution of the state and it needs to be considered corrupted. - * Executing more queued batches on top of corrupted state is - * risky. But we take the risk by trying to advance through - * the queued requests in order to make the client behaviour - * more predictable around resets, by not throwing away random - * amount of batches it has prepared for execution. Sophisticated - * clients can use gem_reset_stats_ioctl and dma fence status - * (exported via sync_file info ioctl on explicit fences) to observe - * when it loses the context state and should rebuild accordingly. - * - * The context ban, and ultimately the client ban, mechanism are safety - * valves if client submission ends up resulting in nothing more than - * subsequent hangs. - */ - - if (i915_request_completed(rq)) { - GEM_TRACE("%s pardoned global=%d (fence %llx:%lld), current %d\n", - engine->name, rq->global_seqno, - rq->fence.context, rq->fence.seqno, - intel_engine_get_seqno(engine)); - stalled = false; - } - - if (stalled) { - context_mark_guilty(rq->gem_context); - i915_request_skip(rq, -EIO); - - /* If this context is now banned, skip all pending requests. */ - if (i915_gem_context_is_banned(rq->gem_context)) - engine_skip_context(rq); - } else { - /* - * Since this is not the hung engine, it may have advanced - * since the hang declaration. Double check by refinding - * the active request at the time of the reset. - */ - rq = i915_gem_find_active_request(engine); - if (rq) { - unsigned long flags; - - context_mark_innocent(rq->gem_context); - dma_fence_set_error(&rq->fence, -EAGAIN); - - /* Rewind the engine to replay the incomplete rq */ - spin_lock_irqsave(&engine->timeline.lock, flags); - rq = list_prev_entry(rq, link); - if (&rq->link == &engine->timeline.requests) - rq = NULL; - spin_unlock_irqrestore(&engine->timeline.lock, flags); - } - } - - return rq; -} - -static void reset_engine(struct intel_engine_cs *engine, - struct i915_request *rq, - bool stalled) -{ - if (rq) - rq = reset_request(engine, rq, stalled); - - /* Setup the CS to resume from the breadcrumb of the hung request */ - engine->reset.reset(engine, rq); -} - -static void gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask) +static int gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask) { struct intel_engine_cs *engine; enum intel_engine_id id; + int err; - lockdep_assert_held(&i915->drm.struct_mutex); + /* + * Everything depends on having the GTT running, so we need to start + * there. + */ + err = i915_ggtt_enable_hw(i915); + if (err) + return err; - i915_retire_requests(i915); - - for_each_engine(engine, i915, id) { - struct intel_context *ce; - - reset_engine(engine, - engine->hangcheck.active_request, - stalled_mask & ENGINE_MASK(id)); - ce = fetch_and_zero(&engine->last_retired_context); - if (ce) - intel_context_unpin(ce); - - /* - * Ostensibily, we always want a context loaded for powersaving, - * so if the engine is idle after the reset, send a request - * to load our scratch kernel_context. - * - * More mysteriously, if we leave the engine idle after a reset, - * the next userspace batch may hang, with what appears to be - * an incoherent read by the CS (presumably stale TLB). An - * empty request appears sufficient to paper over the glitch. - */ - if (intel_engine_is_idle(engine)) { - struct i915_request *rq; - - rq = i915_request_alloc(engine, i915->kernel_context); - if (!IS_ERR(rq)) - i915_request_add(rq); - } - } + for_each_engine(engine, i915, id) + intel_engine_reset(engine, stalled_mask & ENGINE_MASK(id)); i915_gem_restore_fences(i915); + + return err; } static void reset_finish_engine(struct intel_engine_cs *engine) { engine->reset.finish(engine); - intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); } +struct i915_gpu_restart { + struct work_struct work; + struct drm_i915_private *i915; +}; + +static void restart_work(struct work_struct *work) +{ + struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work); + struct drm_i915_private *i915 = arg->i915; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + + wakeref = intel_runtime_pm_get(i915); + mutex_lock(&i915->drm.struct_mutex); + WRITE_ONCE(i915->gpu_error.restart, NULL); + + for_each_engine(engine, i915, id) { + struct i915_request *rq; + + /* + * Ostensibily, we always want a context loaded for powersaving, + * so if the engine is idle after the reset, send a request + * to load our scratch kernel_context. + */ + if (!intel_engine_is_idle(engine)) + continue; + + rq = i915_request_alloc(engine, i915->kernel_context); + if (!IS_ERR(rq)) + i915_request_add(rq); + } + + mutex_unlock(&i915->drm.struct_mutex); + intel_runtime_pm_put(i915, wakeref); + + kfree(arg); +} + static void reset_finish(struct drm_i915_private *i915) { struct intel_engine_cs *engine; enum intel_engine_id id; - lockdep_assert_held(&i915->drm.struct_mutex); - - for_each_engine(engine, i915, id) { - engine->hangcheck.active_request = NULL; + for_each_engine(engine, i915, id) reset_finish_engine(engine); +} + +static void reset_restart(struct drm_i915_private *i915) +{ + struct i915_gpu_restart *arg; + + /* + * Following the reset, ensure that we always reload context for + * powersaving, and to correct engine->last_retired_context. Since + * this requires us to submit a request, queue a worker to do that + * task for us to evade any locking here. + */ + if (READ_ONCE(i915->gpu_error.restart)) + return; + + arg = kmalloc(sizeof(*arg), GFP_KERNEL); + if (arg) { + arg->i915 = i915; + INIT_WORK(&arg->work, restart_work); + + WRITE_ONCE(i915->gpu_error.restart, arg); + queue_work(i915->wq, &arg->work); } } @@ -873,8 +834,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) struct i915_timeline *tl; bool ret = false; - lockdep_assert_held(&i915->drm.struct_mutex); - if (!test_bit(I915_WEDGED, &error->flags)) return true; @@ -897,9 +856,9 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) */ list_for_each_entry(tl, &i915->gt.timelines, link) { struct i915_request *rq; + long timeout; - rq = i915_gem_active_peek(&tl->last_request, - &i915->drm.struct_mutex); + rq = i915_gem_active_get_unlocked(&tl->last_request); if (!rq) continue; @@ -914,12 +873,12 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) * and when the seqno passes the fence, the signaler * then signals the fence waking us up). */ - if (dma_fence_default_wait(&rq->fence, true, - MAX_SCHEDULE_TIMEOUT) < 0) + timeout = dma_fence_default_wait(&rq->fence, true, + MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + if (timeout < 0) goto unlock; } - i915_retire_requests(i915); - GEM_BUG_ON(i915->gt.active_requests); intel_engines_sanitize(i915, false); @@ -933,7 +892,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) * context and do not require stop_machine(). */ intel_engines_reset_default_submission(i915); - i915_gem_contexts_lost(i915); GEM_TRACE("end\n"); @@ -946,6 +904,52 @@ unlock: return ret; } +struct __i915_reset { + struct drm_i915_private *i915; + unsigned int stalled_mask; +}; + +static int __i915_reset__BKL(void *data) +{ + struct __i915_reset *arg = data; + int err; + + err = intel_gpu_reset(arg->i915, ALL_ENGINES); + if (err) + return err; + + return gt_reset(arg->i915, arg->stalled_mask); +} + +#if RESET_UNDER_STOP_MACHINE +/* + * XXX An alternative to using stop_machine would be to park only the + * processes that have a GGTT mmap. By remote parking the threads (SIGSTOP) + * we should be able to prevent their memmory accesses via the lost fence + * registers over the course of the reset without the potential recursive + * of mutexes between the pagefault handler and reset. + * + * See igt/gem_mmap_gtt/hang + */ +#define __do_reset(fn, arg) stop_machine(fn, arg, NULL) +#else +#define __do_reset(fn, arg) fn(arg) +#endif + +static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask) +{ + struct __i915_reset arg = { i915, stalled_mask }; + int err, i; + + err = __do_reset(__i915_reset__BKL, &arg); + for (i = 0; err && i < RESET_MAX_RETRIES; i++) { + msleep(100); + err = __do_reset(__i915_reset__BKL, &arg); + } + + return err; +} + /** * i915_reset - reset chip after a hang * @i915: #drm_i915_private to reset @@ -971,31 +975,22 @@ void i915_reset(struct drm_i915_private *i915, { struct i915_gpu_error *error = &i915->gpu_error; int ret; - int i; GEM_TRACE("flags=%lx\n", error->flags); might_sleep(); - lockdep_assert_held(&i915->drm.struct_mutex); assert_rpm_wakelock_held(i915); GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); - if (!test_bit(I915_RESET_HANDOFF, &error->flags)) - return; - /* Clear any previous failed attempts at recovery. Time to try again. */ if (!i915_gem_unset_wedged(i915)) - goto wakeup; + return; if (reason) dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); error->reset_count++; - ret = reset_prepare(i915); - if (ret) { - dev_err(i915->drm.dev, "GPU recovery failed\n"); - goto taint; - } + reset_prepare(i915); if (!intel_has_gpu_reset(i915)) { if (i915_modparams.reset) @@ -1005,32 +1000,11 @@ void i915_reset(struct drm_i915_private *i915, goto error; } - for (i = 0; i < RESET_MAX_RETRIES; i++) { - ret = intel_gpu_reset(i915, ALL_ENGINES); - if (ret == 0) - break; - - msleep(100); - } - if (ret) { + if (do_reset(i915, stalled_mask)) { dev_err(i915->drm.dev, "Failed to reset chip\n"); goto taint; } - /* Ok, now get things going again... */ - - /* - * Everything depends on having the GTT running, so we need to start - * there. - */ - ret = i915_ggtt_enable_hw(i915); - if (ret) { - DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n", - ret); - goto error; - } - - gt_reset(i915, stalled_mask); intel_overlay_reset(i915); /* @@ -1052,9 +1026,8 @@ void i915_reset(struct drm_i915_private *i915, finish: reset_finish(i915); -wakeup: - clear_bit(I915_RESET_HANDOFF, &error->flags); - wake_up_bit(&error->flags, I915_RESET_HANDOFF); + if (!i915_terminally_wedged(error)) + reset_restart(i915); return; taint: @@ -1073,7 +1046,6 @@ taint: add_taint(TAINT_WARN, LOCKDEP_STILL_OK); error: i915_gem_set_wedged(i915); - i915_retire_requests(i915); goto finish; } @@ -1099,18 +1071,16 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *i915, int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) { struct i915_gpu_error *error = &engine->i915->gpu_error; - struct i915_request *active_request; int ret; GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); - active_request = reset_prepare_engine(engine); - if (IS_ERR_OR_NULL(active_request)) { - /* Either the previous reset failed, or we pardon the reset. */ - ret = PTR_ERR(active_request); - goto out; - } + if (i915_seqno_passed(intel_engine_get_seqno(engine), + intel_engine_last_submit(engine))) + return 0; + + reset_prepare_engine(engine); if (msg) dev_notice(engine->i915->drm.dev, @@ -1134,7 +1104,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) * active request and can drop it, adjust head to skip the offending * request to resume executing remaining requests in the queue. */ - reset_engine(engine, active_request, true); + intel_engine_reset(engine, true); /* * The engine and its registers (and workarounds in case of render) @@ -1171,30 +1141,7 @@ static void i915_reset_device(struct drm_i915_private *i915, i915_wedge_on_timeout(&w, i915, 5 * HZ) { intel_prepare_reset(i915); - error->reason = reason; - error->stalled_mask = engine_mask; - - /* Signal that locked waiters should reset the GPU */ - smp_mb__before_atomic(); - set_bit(I915_RESET_HANDOFF, &error->flags); - wake_up_all(&error->wait_queue); - - /* - * Wait for anyone holding the lock to wakeup, without - * blocking indefinitely on struct_mutex. - */ - do { - if (mutex_trylock(&i915->drm.struct_mutex)) { - i915_reset(i915, engine_mask, reason); - mutex_unlock(&i915->drm.struct_mutex); - } - } while (wait_on_bit_timeout(&error->flags, - I915_RESET_HANDOFF, - TASK_UNINTERRUPTIBLE, - 1)); - - error->stalled_mask = 0; - error->reason = NULL; + i915_reset(i915, engine_mask, reason); intel_finish_reset(i915); } @@ -1350,6 +1297,25 @@ out: intel_runtime_pm_put(i915, wakeref); } +bool i915_reset_flush(struct drm_i915_private *i915) +{ + int err; + + cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); + + flush_workqueue(i915->wq); + GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart)); + + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED | + I915_WAIT_FOR_IDLE_BOOST, + MAX_SCHEDULE_TIMEOUT); + mutex_unlock(&i915->drm.struct_mutex); + + return !err; +} + static void i915_wedge_me(struct work_struct *work) { struct i915_wedge_me *w = container_of(work, typeof(*w), work.work); diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h index b6a519bde67d..f2d347f319df 100644 --- a/drivers/gpu/drm/i915/i915_reset.h +++ b/drivers/gpu/drm/i915/i915_reset.h @@ -29,6 +29,9 @@ void i915_reset(struct drm_i915_private *i915, int i915_reset_engine(struct intel_engine_cs *engine, const char *reason); +void i915_reset_request(struct i915_request *rq, bool guilty); +bool i915_reset_flush(struct drm_i915_private *i915); + bool intel_has_gpu_reset(struct drm_i915_private *i915); bool intel_has_reset_engine(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ef4c8c50a4ba..1a5c163b98d6 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1119,10 +1119,8 @@ void intel_engines_sanitize(struct drm_i915_private *i915, bool force) if (!reset_engines(i915) && !force) return; - for_each_engine(engine, i915, id) { - if (engine->reset.reset) - engine->reset.reset(engine, NULL); - } + for_each_engine(engine, i915, id) + intel_engine_reset(engine, false); } /** diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 349ae5844f24..45e2db683fe5 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -834,8 +834,7 @@ static void guc_submission_tasklet(unsigned long data) spin_unlock_irqrestore(&engine->timeline.lock, flags); } -static struct i915_request * -guc_reset_prepare(struct intel_engine_cs *engine) +static void guc_reset_prepare(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -861,8 +860,6 @@ guc_reset_prepare(struct intel_engine_cs *engine) */ if (engine->i915->guc.preempt_wq) flush_workqueue(engine->i915->guc.preempt_wq); - - return i915_gem_find_active_request(engine); } /* diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index 741441daae32..5662d6fed523 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -25,6 +25,17 @@ #include "i915_drv.h" #include "i915_reset.h" +struct hangcheck { + u64 acthd; + u32 seqno; + enum intel_engine_hangcheck_action action; + unsigned long action_timestamp; + int deadlock; + struct intel_instdone instdone; + bool wedged:1; + bool stalled:1; +}; + static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone) { u32 tmp = current_instdone | *old_instdone; @@ -119,25 +130,22 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) } static void hangcheck_load_sample(struct intel_engine_cs *engine, - struct intel_engine_hangcheck *hc) + struct hangcheck *hc) { hc->acthd = intel_engine_get_active_head(engine); hc->seqno = intel_engine_get_seqno(engine); } static void hangcheck_store_sample(struct intel_engine_cs *engine, - const struct intel_engine_hangcheck *hc) + const struct hangcheck *hc) { engine->hangcheck.acthd = hc->acthd; engine->hangcheck.seqno = hc->seqno; - engine->hangcheck.action = hc->action; - engine->hangcheck.stalled = hc->stalled; - engine->hangcheck.wedged = hc->wedged; } static enum intel_engine_hangcheck_action hangcheck_get_action(struct intel_engine_cs *engine, - const struct intel_engine_hangcheck *hc) + const struct hangcheck *hc) { if (engine->hangcheck.seqno != hc->seqno) return ENGINE_ACTIVE_SEQNO; @@ -149,7 +157,7 @@ hangcheck_get_action(struct intel_engine_cs *engine, } static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, - struct intel_engine_hangcheck *hc) + struct hangcheck *hc) { unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT; @@ -265,19 +273,19 @@ static void i915_hangcheck_elapsed(struct work_struct *work) intel_uncore_arm_unclaimed_mmio_detection(dev_priv); for_each_engine(engine, dev_priv, id) { - struct intel_engine_hangcheck hc; + struct hangcheck hc; hangcheck_load_sample(engine, &hc); hangcheck_accumulate_sample(engine, &hc); hangcheck_store_sample(engine, &hc); - if (engine->hangcheck.stalled) { + if (hc.stalled) { hung |= intel_engine_flag(engine); if (hc.action != ENGINE_DEAD) stuck |= intel_engine_flag(engine); } - if (engine->hangcheck.wedged) + if (hc.wedged) wedged |= intel_engine_flag(engine); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5551dd2ec0e6..185867106c14 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -136,6 +136,7 @@ #include #include "i915_drv.h" #include "i915_gem_render_state.h" +#include "i915_reset.h" #include "i915_vgpu.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" @@ -264,7 +265,8 @@ static void unwind_wa_tail(struct i915_request *rq) assert_ring_tail_valid(rq->ring, rq->tail); } -static void __unwind_incomplete_requests(struct intel_engine_cs *engine) +static struct i915_request * +__unwind_incomplete_requests(struct intel_engine_cs *engine) { struct i915_request *rq, *rn, *active = NULL; struct list_head *uninitialized_var(pl); @@ -306,6 +308,8 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) list_move_tail(&active->sched.link, i915_sched_lookup_priolist(engine, prio)); } + + return active; } void @@ -1732,11 +1736,9 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) return 0; } -static struct i915_request * -execlists_reset_prepare(struct intel_engine_cs *engine) +static void execlists_reset_prepare(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct i915_request *request, *active; unsigned long flags; GEM_TRACE("%s: depth<-%d\n", engine->name, @@ -1752,59 +1754,21 @@ execlists_reset_prepare(struct intel_engine_cs *engine) * prevents the race. */ __tasklet_disable_sync_once(&execlists->tasklet); + GEM_BUG_ON(!reset_in_progress(execlists)); + /* And flush any current direct submission. */ spin_lock_irqsave(&engine->timeline.lock, flags); - - /* - * We want to flush the pending context switches, having disabled - * the tasklet above, we can assume exclusive access to the execlists. - * For this allows us to catch up with an inflight preemption event, - * and avoid blaming an innocent request if the stall was due to the - * preemption itself. - */ - process_csb(engine); - - /* - * The last active request can then be no later than the last request - * now in ELSP[0]. So search backwards from there, so that if the GPU - * has advanced beyond the last CSB update, it will be pardoned. - */ - active = NULL; - request = port_request(execlists->port); - if (request) { - /* - * Prevent the breadcrumb from advancing before we decide - * which request is currently active. - */ - intel_engine_stop_cs(engine); - - list_for_each_entry_from_reverse(request, - &engine->timeline.requests, - link) { - if (__i915_request_completed(request, - request->global_seqno)) - break; - - active = request; - } - } - + process_csb(engine); /* drain preemption events */ spin_unlock_irqrestore(&engine->timeline.lock, flags); - - return active; } -static void execlists_reset(struct intel_engine_cs *engine, - struct i915_request *request) +static void execlists_reset(struct intel_engine_cs *engine, bool stalled) { struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_request *rq; unsigned long flags; u32 *regs; - GEM_TRACE("%s request global=%d, current=%d\n", - engine->name, request ? request->global_seqno : 0, - intel_engine_get_seqno(engine)); - spin_lock_irqsave(&engine->timeline.lock, flags); /* @@ -1819,12 +1783,18 @@ static void execlists_reset(struct intel_engine_cs *engine, execlists_cancel_port_requests(execlists); /* Push back any incomplete requests for replay after the reset. */ - __unwind_incomplete_requests(engine); + rq = __unwind_incomplete_requests(engine); /* Following the reset, we need to reload the CSB read/write pointers */ reset_csb_pointers(&engine->execlists); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n", + engine->name, + rq ? rq->global_seqno : 0, + intel_engine_get_seqno(engine), + yesno(stalled)); + if (!rq) + goto out_unlock; /* * If the request was innocent, we leave the request in the ELSP @@ -1837,8 +1807,9 @@ static void execlists_reset(struct intel_engine_cs *engine, * and have to at least restore the RING register in the context * image back to the expected values to skip over the guilty request. */ - if (!request || request->fence.error != -EIO) - return; + i915_reset_request(rq, stalled); + if (!stalled) + goto out_unlock; /* * We want a simple context + ring to execute the breadcrumb update. @@ -1848,7 +1819,7 @@ static void execlists_reset(struct intel_engine_cs *engine, * future request will be after userspace has had the opportunity * to recreate its own state. */ - regs = request->hw_context->lrc_reg_state; + regs = rq->hw_context->lrc_reg_state; if (engine->pinned_default_state) { memcpy(regs, /* skip restoring the vanilla PPHWSP */ engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, @@ -1856,17 +1827,14 @@ static void execlists_reset(struct intel_engine_cs *engine, } /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */ - request->ring->head = intel_ring_wrap(request->ring, request->postfix); + rq->ring->head = intel_ring_wrap(rq->ring, rq->postfix); + intel_ring_update_space(rq->ring); - execlists_init_reg_state(regs, request->gem_context, engine, - request->ring); + execlists_init_reg_state(regs, rq->gem_context, engine, rq->ring); + __execlists_update_reg_state(engine, rq->hw_context); - __execlists_update_reg_state(engine, request->hw_context); - - intel_ring_update_space(request->ring); - - /* Reset WaIdleLiteRestore:bdw,skl as well */ - unwind_wa_tail(request); +out_unlock: + spin_unlock_irqrestore(&engine->timeline.lock, flags); } static void execlists_reset_finish(struct intel_engine_cs *engine) @@ -1879,6 +1847,7 @@ static void execlists_reset_finish(struct intel_engine_cs *engine) * to sleep before we restart and reload a context. * */ + GEM_BUG_ON(!reset_in_progress(execlists)); if (!RB_EMPTY_ROOT(&execlists->queue.rb_root)) execlists->tasklet.func(execlists->tasklet.data); diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index c81db81e4416..f68c7975006c 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -478,8 +478,6 @@ void intel_overlay_reset(struct drm_i915_private *dev_priv) if (!overlay) return; - intel_overlay_release_old_vid(overlay); - overlay->old_xscale = 0; overlay->old_yscale = 0; overlay->crtc = NULL; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 09c90475168a..a9efc8c71254 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -33,6 +33,7 @@ #include "i915_drv.h" #include "i915_gem_render_state.h" +#include "i915_reset.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_workarounds.h" @@ -711,52 +712,80 @@ out: return ret; } -static struct i915_request *reset_prepare(struct intel_engine_cs *engine) +static void reset_prepare(struct intel_engine_cs *engine) { intel_engine_stop_cs(engine); - return i915_gem_find_active_request(engine); } -static void skip_request(struct i915_request *rq) +static void reset_ring(struct intel_engine_cs *engine, bool stalled) { - void *vaddr = rq->ring->vaddr; + struct i915_timeline *tl = &engine->timeline; + struct i915_request *pos, *rq; + unsigned long flags; u32 head; - head = rq->infix; - if (rq->postfix < head) { - memset32(vaddr + head, MI_NOOP, - (rq->ring->size - head) / sizeof(u32)); - head = 0; + rq = NULL; + spin_lock_irqsave(&tl->lock, flags); + list_for_each_entry(pos, &tl->requests, link) { + if (!__i915_request_completed(pos, pos->global_seqno)) { + rq = pos; + break; + } } - memset32(vaddr + head, MI_NOOP, (rq->postfix - head) / sizeof(u32)); -} - -static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq) -{ - GEM_TRACE("%s request global=%d, current=%d\n", - engine->name, rq ? rq->global_seqno : 0, - intel_engine_get_seqno(engine)); + GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n", + engine->name, + rq ? rq->global_seqno : 0, + intel_engine_get_seqno(engine), + yesno(stalled)); /* - * Try to restore the logical GPU state to match the continuation - * of the request queue. If we skip the context/PD restore, then - * the next request may try to execute assuming that its context - * is valid and loaded on the GPU and so may try to access invalid - * memory, prompting repeated GPU hangs. + * The guilty request will get skipped on a hung engine. * - * If the request was guilty, we still restore the logical state - * in case the next request requires it (e.g. the aliasing ppgtt), - * but skip over the hung batch. + * Users of client default contexts do not rely on logical + * state preserved between batches so it is safe to execute + * queued requests following the hang. Non default contexts + * rely on preserved state, so skipping a batch loses the + * evolution of the state and it needs to be considered corrupted. + * Executing more queued batches on top of corrupted state is + * risky. But we take the risk by trying to advance through + * the queued requests in order to make the client behaviour + * more predictable around resets, by not throwing away random + * amount of batches it has prepared for execution. Sophisticated + * clients can use gem_reset_stats_ioctl and dma fence status + * (exported via sync_file info ioctl on explicit fences) to observe + * when it loses the context state and should rebuild accordingly. * - * If the request was innocent, we try to replay the request with - * the restored context. + * The context ban, and ultimately the client ban, mechanism are safety + * valves if client submission ends up resulting in nothing more than + * subsequent hangs. */ + if (rq) { - /* If the rq hung, jump to its breadcrumb and skip the batch */ - rq->ring->head = intel_ring_wrap(rq->ring, rq->head); - if (rq->fence.error == -EIO) - skip_request(rq); + /* + * Try to restore the logical GPU state to match the + * continuation of the request queue. If we skip the + * context/PD restore, then the next request may try to execute + * assuming that its context is valid and loaded on the GPU and + * so may try to access invalid memory, prompting repeated GPU + * hangs. + * + * If the request was guilty, we still restore the logical + * state in case the next request requires it (e.g. the + * aliasing ppgtt), but skip over the hung batch. + * + * If the request was innocent, we try to replay the request + * with the restored context. + */ + i915_reset_request(rq, stalled); + + GEM_BUG_ON(rq->ring != engine->buffer); + head = rq->head; + } else { + head = engine->buffer->tail; } + engine->buffer->head = intel_ring_wrap(engine->buffer, head); + + spin_unlock_irqrestore(&tl->lock, flags); } static void reset_finish(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 5ad46c2fbc0f..f2effd001540 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -120,13 +120,8 @@ struct intel_instdone { struct intel_engine_hangcheck { u64 acthd; u32 seqno; - enum intel_engine_hangcheck_action action; unsigned long action_timestamp; - int deadlock; struct intel_instdone instdone; - struct i915_request *active_request; - bool stalled:1; - bool wedged:1; }; struct intel_ring { @@ -444,9 +439,8 @@ struct intel_engine_cs { int (*init_hw)(struct intel_engine_cs *engine); struct { - struct i915_request *(*prepare)(struct intel_engine_cs *engine); - void (*reset)(struct intel_engine_cs *engine, - struct i915_request *rq); + void (*prepare)(struct intel_engine_cs *engine); + void (*reset)(struct intel_engine_cs *engine, bool stalled); void (*finish)(struct intel_engine_cs *engine); } reset; @@ -1018,6 +1012,13 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) return cs; } +static inline void intel_engine_reset(struct intel_engine_cs *engine, + bool stalled) +{ + if (engine->reset.reset) + engine->reset.reset(engine, stalled); +} + void intel_engines_sanitize(struct drm_i915_private *i915, bool force); bool intel_engine_is_idle(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 12550b55c42f..67431355cd6e 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -363,9 +363,7 @@ static int igt_global_reset(void *arg) /* Check that we can issue a global GPU reset */ igt_global_reset_lock(i915); - set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags); - mutex_lock(&i915->drm.struct_mutex); reset_count = i915_reset_count(&i915->gpu_error); i915_reset(i915, ALL_ENGINES, NULL); @@ -374,9 +372,7 @@ static int igt_global_reset(void *arg) pr_err("No GPU reset recorded!\n"); err = -EINVAL; } - mutex_unlock(&i915->drm.struct_mutex); - GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); igt_global_reset_unlock(i915); if (i915_terminally_wedged(&i915->gpu_error)) @@ -399,9 +395,7 @@ static int igt_wedged_reset(void *arg) i915_gem_set_wedged(i915); GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error)); - set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags); i915_reset(i915, ALL_ENGINES, NULL); - GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); @@ -511,7 +505,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) break; } - if (!wait_for_idle(engine)) { + if (!i915_reset_flush(i915)) { struct drm_printer p = drm_info_printer(i915->drm.dev); @@ -903,20 +897,13 @@ static int igt_reset_engines(void *arg) return 0; } -static u32 fake_hangcheck(struct i915_request *rq, u32 mask) +static u32 fake_hangcheck(struct drm_i915_private *i915, u32 mask) { - struct i915_gpu_error *error = &rq->i915->gpu_error; - u32 reset_count = i915_reset_count(error); + u32 count = i915_reset_count(&i915->gpu_error); - error->stalled_mask = mask; + i915_reset(i915, mask, NULL); - /* set_bit() must be after we have setup the backchannel (mask) */ - smp_mb__before_atomic(); - set_bit(I915_RESET_HANDOFF, &error->flags); - - wake_up_all(&error->wait_queue); - - return reset_count; + return count; } static int igt_reset_wait(void *arg) @@ -962,7 +949,7 @@ static int igt_reset_wait(void *arg) goto out_rq; } - reset_count = fake_hangcheck(rq, ALL_ENGINES); + reset_count = fake_hangcheck(i915, ALL_ENGINES); timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10); if (timeout < 0) { @@ -972,7 +959,6 @@ static int igt_reset_wait(void *arg) goto out_rq; } - GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); if (i915_reset_count(&i915->gpu_error) == reset_count) { pr_err("No GPU reset recorded!\n"); err = -EINVAL; @@ -1162,7 +1148,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, } out_reset: - fake_hangcheck(rq, intel_engine_flag(rq->engine)); + fake_hangcheck(rq->i915, intel_engine_flag(rq->engine)); if (tsk) { struct igt_wedge_me w; @@ -1341,12 +1327,7 @@ static int igt_reset_queue(void *arg) goto fini; } - reset_count = fake_hangcheck(prev, ENGINE_MASK(id)); - - i915_reset(i915, ENGINE_MASK(id), NULL); - - GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, - &i915->gpu_error.flags)); + reset_count = fake_hangcheck(i915, ENGINE_MASK(id)); if (prev->fence.error != -EIO) { pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", @@ -1565,6 +1546,7 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine, pr_err("%s(%s): Failed to start request %llx, at %x\n", __func__, engine->name, rq->fence.seqno, hws_seqno(&h, rq)); + i915_gem_set_wedged(i915); err = -EIO; } @@ -1588,7 +1570,6 @@ out: static void force_reset(struct drm_i915_private *i915) { i915_gem_set_wedged(i915); - set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags); i915_reset(i915, 0, NULL); } @@ -1618,6 +1599,26 @@ static int igt_atomic_reset(void *arg) if (i915_terminally_wedged(&i915->gpu_error)) goto unlock; + if (intel_has_gpu_reset(i915)) { + const typeof(*phases) *p; + + for (p = phases; p->name; p++) { + GEM_TRACE("intel_gpu_reset under %s\n", p->name); + + p->critical_section_begin(); + err = intel_gpu_reset(i915, ALL_ENGINES); + p->critical_section_end(); + + if (err) { + pr_err("intel_gpu_reset failed under %s\n", + p->name); + goto out; + } + } + + force_reset(i915); + } + if (intel_has_reset_engine(i915)) { struct intel_engine_cs *engine; enum intel_engine_id id; diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index a8cac56be835..b15c4f26c593 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -214,7 +214,6 @@ out_put: static int do_device_reset(struct intel_engine_cs *engine) { - set_bit(I915_RESET_HANDOFF, &engine->i915->gpu_error.flags); i915_reset(engine->i915, ENGINE_MASK(engine->id), "live_workarounds"); return 0; } @@ -394,7 +393,6 @@ static int live_gpu_reset_gt_engine_workarounds(void *arg) { struct drm_i915_private *i915 = arg; - struct i915_gpu_error *error = &i915->gpu_error; intel_wakeref_t wakeref; struct wa_lists lists; bool ok; @@ -413,7 +411,6 @@ live_gpu_reset_gt_engine_workarounds(void *arg) if (!ok) goto out; - set_bit(I915_RESET_HANDOFF, &error->flags); i915_reset(i915, ALL_ENGINES, "live_workarounds"); ok = verify_gt_engine_wa(i915, &lists, "after reset"); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 5477ad4a7e7d..8ab5a2688a0c 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -58,8 +58,8 @@ static void mock_device_release(struct drm_device *dev) i915_gem_contexts_lost(i915); mutex_unlock(&i915->drm.struct_mutex); - cancel_delayed_work_sync(&i915->gt.retire_work); - cancel_delayed_work_sync(&i915->gt.idle_work); + drain_delayed_work(&i915->gt.retire_work); + drain_delayed_work(&i915->gt.idle_work); i915_gem_drain_workqueue(i915); mutex_lock(&i915->drm.struct_mutex); From f3dccbdbdd94a3b255f9661df6048c4a6e372db0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 25 Jan 2019 13:22:29 +0000 Subject: [PATCH 254/539] drm/i915/selftests: Trim struct_mutex duration for set-wedged selftest Trim the struct_mutex hold and exclude the call to i915_gem_set_wedged() as a reminder that it must be callable without struct_mutex held. Signed-off-by: Chris Wilson Cc: Michal Wajdeczko Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190125132230.22221-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 67431355cd6e..8025c7e0bf6c 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -389,16 +389,16 @@ static int igt_wedged_reset(void *arg) /* Check that we can recover a wedged device with a GPU reset */ igt_global_reset_lock(i915); - mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(i915); i915_gem_set_wedged(i915); - GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error)); + mutex_lock(&i915->drm.struct_mutex); + GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error)); i915_reset(i915, ALL_ENGINES, NULL); + mutex_unlock(&i915->drm.struct_mutex); intel_runtime_pm_put(i915, wakeref); - mutex_unlock(&i915->drm.struct_mutex); igt_global_reset_unlock(i915); return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; @@ -1675,6 +1675,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) wakeref = intel_runtime_pm_get(i915); saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); + drain_delayed_work(&i915->gpu_error.hangcheck_work); /* flush param */ err = i915_subtests(tests, i915); From 9b974bde4d4ad44c0458f922373340d54cb0452c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 25 Jan 2019 13:22:30 +0000 Subject: [PATCH 255/539] drm/i915: Issue engine resets onto idle engines Always perform the requested reset, even if we believe the engine is idle. Presumably there was a reason the caller wanted the reset, and in the near future we lose the easy tracking for whether the engine is idle. Signed-off-by: Chris Wilson Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20190125132230.22221-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_reset.c | 4 ---- .../gpu/drm/i915/selftests/intel_hangcheck.c | 22 +++++-------------- 2 files changed, 6 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index 68af017ee548..99bd3bc336b3 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -1076,10 +1076,6 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); - if (i915_seqno_passed(intel_engine_get_seqno(engine), - intel_engine_last_submit(engine))) - return 0; - reset_prepare_engine(engine); if (msg) diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 8025c7e0bf6c..2c38ea5892d9 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -449,8 +449,6 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { - u32 seqno = intel_engine_get_seqno(engine); - if (active) { struct i915_request *rq; @@ -479,8 +477,6 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) break; } - GEM_BUG_ON(!rq->global_seqno); - seqno = rq->global_seqno - 1; i915_request_put(rq); } @@ -496,11 +492,10 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) break; } - reset_engine_count += active; if (i915_reset_engine_count(&i915->gpu_error, engine) != - reset_engine_count) { - pr_err("%s engine reset %srecorded!\n", - engine->name, active ? "not " : ""); + ++reset_engine_count) { + pr_err("%s engine reset not recorded!\n", + engine->name); err = -EINVAL; break; } @@ -728,7 +723,6 @@ static int __igt_reset_engines(struct drm_i915_private *i915, set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { - u32 seqno = intel_engine_get_seqno(engine); struct i915_request *rq = NULL; if (flags & TEST_ACTIVE) { @@ -756,9 +750,6 @@ static int __igt_reset_engines(struct drm_i915_private *i915, err = -EIO; break; } - - GEM_BUG_ON(!rq->global_seqno); - seqno = rq->global_seqno - 1; } err = i915_reset_engine(engine, NULL); @@ -795,10 +786,9 @@ static int __igt_reset_engines(struct drm_i915_private *i915, reported = i915_reset_engine_count(&i915->gpu_error, engine); reported -= threads[engine->id].resets; - if (reported != (flags & TEST_ACTIVE ? count : 0)) { - pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu, expected %lu reported\n", - engine->name, test_name, count, reported, - (flags & TEST_ACTIVE ? count : 0)); + if (reported != count) { + pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", + engine->name, test_name, count, reported); if (!err) err = -EINVAL; } From 32db0b6501d97b09e92e70caefc74fa35aa9a8d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 27 Nov 2018 22:05:50 +0200 Subject: [PATCH 256/539] drm/i915: Don't try to use the hardware frame counter with i965gm TV output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On i965gm the hardware frame counter does not work when the TV encoder is active. So let's not try to consult the hardware frame counter in that case. Instead we'll fall back to the timestamp based guesstimation method used on gen2. Note that the pipe timings generated by the TV encoder are also rather peculiar. Apparently the pipe wants to run at a much higher speed (related to the oversample clock somehow it seems) but during the vertical active period the TV encoder stalls the pipe every few lines to keep its speed in check. But once the vertical blanking period is reached the pipe gets to run at full speed. This means our vblank timestamp estimates are suspect. Fixing all that would require quite a bit more work. This simple fix at least avoids the nasty vblank timeouts that are happening currently. Curiously the frame counter works just fine on i945gm and gm45. I don't really understand what kind of mishap occurred with the hardware design on i965gm. Sadly I wasn't able to find any chicken bits etc. that would fix the frame counter :( v2: Move the zero vs. non-zero hw counter value handling into i915_get_vblank_counter() (Daniel) Use the per-crtc maximum exclusively, leaving the per-device maximum at zero v3: max_vblank_count not populated yet in intel_enable_pipe() use intel_crtc_max_vblank_count() instead Cc: stable@vger.kernel.org Cc: Daniel Vetter Fixes: 51e31d49c890 ("drm/i915: Use generic vblank wait") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93782 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190122125149.GE5527@ideak-desk.fi.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/i915_irq.c | 27 ++++++++++------ drivers/gpu/drm/i915/intel_display.c | 48 +++++++++++++++++++++++----- 2 files changed, 58 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7056ae2d1e0e..926e05f0db24 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -821,11 +821,26 @@ static void i915_enable_asle_pipestat(struct drm_i915_private *dev_priv) static u32 i915_get_vblank_counter(struct drm_device *dev, unsigned int pipe) { struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; + const struct drm_display_mode *mode = &vblank->hwmode; i915_reg_t high_frame, low_frame; u32 high1, high2, low, pixel, vbl_start, hsync_start, htotal; - const struct drm_display_mode *mode = &dev->vblank[pipe].hwmode; unsigned long irqflags; + /* + * On i965gm TV output the frame counter only works up to + * the point when we enable the TV encoder. After that the + * frame counter ceases to work and reads zero. We need a + * vblank wait before enabling the TV encoder and so we + * have to enable vblank interrupts while the frame counter + * is still in a working state. However the core vblank code + * does not like us returning non-zero frame counter values + * when we've told it that we don't have a working frame + * counter. Thus we must stop non-zero values leaking out. + */ + if (!vblank->max_vblank_count) + return 0; + htotal = mode->crtc_htotal; hsync_start = mode->crtc_hsync_start; vbl_start = mode->crtc_vblank_start; @@ -4579,16 +4594,10 @@ void intel_irq_init(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) >= 8) rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; - if (IS_GEN(dev_priv, 2)) { - /* Gen2 doesn't have a hardware frame counter */ - dev->max_vblank_count = 0; - } else if (IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) { - dev->max_vblank_count = 0xffffffff; /* full 32 bit counter */ + if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) dev->driver->get_vblank_counter = g4x_get_vblank_counter; - } else { + else if (INTEL_GEN(dev_priv) >= 3) dev->driver->get_vblank_counter = i915_get_vblank_counter; - dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */ - } /* * Opt out of the vblank disable timer on everything except gen2. diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 36c1126cbc85..67a64cbbe851 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1758,6 +1758,35 @@ enum pipe intel_crtc_pch_transcoder(struct intel_crtc *crtc) return crtc->pipe; } +static u32 intel_crtc_max_vblank_count(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + + /* + * On i965gm the hardware frame counter reads + * zero when the TV encoder is enabled :( + */ + if (IS_I965GM(dev_priv) && + (crtc_state->output_types & BIT(INTEL_OUTPUT_TVOUT))) + return 0; + + if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) + return 0xffffffff; /* full 32 bit counter */ + else if (INTEL_GEN(dev_priv) >= 3) + return 0xffffff; /* only 24 bits of frame count */ + else + return 0; /* Gen2 doesn't have a hardware frame counter */ +} + +static void intel_crtc_vblank_on(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + + drm_crtc_set_max_vblank_count(&crtc->base, + intel_crtc_max_vblank_count(crtc_state)); + drm_crtc_vblank_on(&crtc->base); +} + static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state) { struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); @@ -1810,7 +1839,7 @@ static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state) * when it's derived from the timestamps. So let's wait for the * pipe to start properly before we call drm_crtc_vblank_on() */ - if (dev_priv->drm.max_vblank_count == 0) + if (intel_crtc_max_vblank_count(new_crtc_state) == 0) intel_wait_for_pipe_scanline_moving(crtc); } @@ -5678,7 +5707,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, ironlake_pch_enable(old_intel_state, pipe_config); assert_vblank_disabled(crtc); - drm_crtc_vblank_on(crtc); + intel_crtc_vblank_on(pipe_config); intel_encoders_enable(crtc, pipe_config, old_state); @@ -5832,7 +5861,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_ddi_set_vc_payload_alloc(pipe_config, true); assert_vblank_disabled(crtc); - drm_crtc_vblank_on(crtc); + intel_crtc_vblank_on(pipe_config); intel_encoders_enable(crtc, pipe_config, old_state); @@ -6171,7 +6200,7 @@ static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, intel_enable_pipe(pipe_config); assert_vblank_disabled(crtc); - drm_crtc_vblank_on(crtc); + intel_crtc_vblank_on(pipe_config); intel_encoders_enable(crtc, pipe_config, old_state); } @@ -6230,7 +6259,7 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config, intel_enable_pipe(pipe_config); assert_vblank_disabled(crtc); - drm_crtc_vblank_on(crtc); + intel_crtc_vblank_on(pipe_config); intel_encoders_enable(crtc, pipe_config, old_state); } @@ -12778,8 +12807,9 @@ static int intel_atomic_prepare_commit(struct drm_device *dev, u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; + struct drm_vblank_crtc *vblank = &dev->vblank[drm_crtc_index(&crtc->base)]; - if (!dev->max_vblank_count) + if (!vblank->max_vblank_count) return (u32)drm_crtc_accurate_vblank_count(&crtc->base); return dev->driver->get_vblank_counter(dev, crtc->pipe); @@ -15894,10 +15924,12 @@ intel_modeset_setup_hw_state(struct drm_device *dev, * waits, so we need vblank interrupts restored beforehand. */ for_each_intel_crtc(&dev_priv->drm, crtc) { + crtc_state = to_intel_crtc_state(crtc->base.state); + drm_crtc_vblank_reset(&crtc->base); - if (crtc->base.state->active) - drm_crtc_vblank_on(&crtc->base); + if (crtc_state->base.active) + intel_crtc_vblank_on(crtc_state); } intel_sanitize_plane_mapping(dev_priv); From 6801603d3d7dd05c62d368045c7d5a90980596a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 12 Nov 2018 18:59:47 +0200 Subject: [PATCH 257/539] drm/i915/tv: Fix interlaced ysize calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the calculation of the vertical active period for interlaced TV modes. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-4-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_tv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index bd5536f0ec92..91cb31ff8ff0 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1086,7 +1086,7 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, if (tv_mode->progressive) ysize = tv_mode->nbr_end + 1; else - ysize = 2*tv_mode->nbr_end + 1; + ysize = 2 * (tv_mode->nbr_end + 1); xpos += conn_state->tv.margins.left; ypos += conn_state->tv.margins.top; From d515282380df8c85ca5ed1cc9699c52ccf00fa18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 12 Nov 2018 18:59:48 +0200 Subject: [PATCH 258/539] drm/i915/tv: Fix tv mode clocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The oversample clock is always supposed to be either 108 MHz or 148.5 MHz. Make it so. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-5-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_tv.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index 91cb31ff8ff0..a053bb3460e6 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -635,7 +635,7 @@ static const struct tv_mode tv_modes[] = { }, { .name = "480p", - .clock = 107520, + .clock = 108000, .refresh = 59940, .oversample = TV_OVERSAMPLE_4X, .component_only = 1, @@ -659,7 +659,7 @@ static const struct tv_mode tv_modes[] = { }, { .name = "576p", - .clock = 107520, + .clock = 108000, .refresh = 50000, .oversample = TV_OVERSAMPLE_4X, .component_only = 1, @@ -683,7 +683,7 @@ static const struct tv_mode tv_modes[] = { }, { .name = "720p@60Hz", - .clock = 148800, + .clock = 148500, .refresh = 60000, .oversample = TV_OVERSAMPLE_2X, .component_only = 1, @@ -707,7 +707,7 @@ static const struct tv_mode tv_modes[] = { }, { .name = "720p@50Hz", - .clock = 148800, + .clock = 148500, .refresh = 50000, .oversample = TV_OVERSAMPLE_2X, .component_only = 1, @@ -732,7 +732,7 @@ static const struct tv_mode tv_modes[] = { }, { .name = "1080i@50Hz", - .clock = 148800, + .clock = 148500, .refresh = 50000, .oversample = TV_OVERSAMPLE_2X, .component_only = 1, @@ -758,7 +758,7 @@ static const struct tv_mode tv_modes[] = { }, { .name = "1080i@60Hz", - .clock = 148800, + .clock = 148500, .refresh = 60000, .oversample = TV_OVERSAMPLE_2X, .component_only = 1, @@ -1113,7 +1113,7 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, static const struct drm_display_mode reported_modes[] = { { .name = "NTSC 480i", - .clock = 107520, + .clock = 108000, .hdisplay = 1280, .hsync_start = 1368, .hsync_end = 1496, From 4f50379837434182922f06b0c6b2a28d498c6480 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 12 Nov 2018 18:59:49 +0200 Subject: [PATCH 259/539] drm/i915/tv: Store the TV oversampling factor in the TV mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Store the oversampling factor as a number in the TV modes. We shall want to arithmetic with this which is easier if it's a number we can use directly. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-6-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_tv.c | 42 ++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index a053bb3460e6..3e3eee77296a 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -306,7 +306,7 @@ struct tv_mode { u32 clock; u16 refresh; /* in millihertz (for precision) */ - u32 oversample; + u8 oversample; u8 hsync_end; u16 hblank_start, hblank_end, htotal; bool progressive : 1, trilevel_sync : 1, component_only : 1; @@ -378,7 +378,7 @@ static const struct tv_mode tv_modes[] = { .name = "NTSC-M", .clock = 108000, .refresh = 59940, - .oversample = TV_OVERSAMPLE_8X, + .oversample = 8, .component_only = 0, /* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */ @@ -421,7 +421,7 @@ static const struct tv_mode tv_modes[] = { .name = "NTSC-443", .clock = 108000, .refresh = 59940, - .oversample = TV_OVERSAMPLE_8X, + .oversample = 8, .component_only = 0, /* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 4.43MHz */ .hsync_end = 64, .hblank_end = 124, @@ -463,7 +463,7 @@ static const struct tv_mode tv_modes[] = { .name = "NTSC-J", .clock = 108000, .refresh = 59940, - .oversample = TV_OVERSAMPLE_8X, + .oversample = 8, .component_only = 0, /* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */ @@ -506,7 +506,7 @@ static const struct tv_mode tv_modes[] = { .name = "PAL-M", .clock = 108000, .refresh = 59940, - .oversample = TV_OVERSAMPLE_8X, + .oversample = 8, .component_only = 0, /* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */ @@ -550,7 +550,7 @@ static const struct tv_mode tv_modes[] = { .name = "PAL-N", .clock = 108000, .refresh = 50000, - .oversample = TV_OVERSAMPLE_8X, + .oversample = 8, .component_only = 0, .hsync_end = 64, .hblank_end = 128, @@ -595,7 +595,7 @@ static const struct tv_mode tv_modes[] = { .name = "PAL", .clock = 108000, .refresh = 50000, - .oversample = TV_OVERSAMPLE_8X, + .oversample = 8, .component_only = 0, .hsync_end = 64, .hblank_end = 142, @@ -637,7 +637,7 @@ static const struct tv_mode tv_modes[] = { .name = "480p", .clock = 108000, .refresh = 59940, - .oversample = TV_OVERSAMPLE_4X, + .oversample = 4, .component_only = 1, .hsync_end = 64, .hblank_end = 122, @@ -661,7 +661,7 @@ static const struct tv_mode tv_modes[] = { .name = "576p", .clock = 108000, .refresh = 50000, - .oversample = TV_OVERSAMPLE_4X, + .oversample = 4, .component_only = 1, .hsync_end = 64, .hblank_end = 139, @@ -685,7 +685,7 @@ static const struct tv_mode tv_modes[] = { .name = "720p@60Hz", .clock = 148500, .refresh = 60000, - .oversample = TV_OVERSAMPLE_2X, + .oversample = 2, .component_only = 1, .hsync_end = 80, .hblank_end = 300, @@ -709,7 +709,7 @@ static const struct tv_mode tv_modes[] = { .name = "720p@50Hz", .clock = 148500, .refresh = 50000, - .oversample = TV_OVERSAMPLE_2X, + .oversample = 2, .component_only = 1, .hsync_end = 80, .hblank_end = 300, @@ -734,7 +734,7 @@ static const struct tv_mode tv_modes[] = { .name = "1080i@50Hz", .clock = 148500, .refresh = 50000, - .oversample = TV_OVERSAMPLE_2X, + .oversample = 2, .component_only = 1, .hsync_end = 88, .hblank_end = 235, @@ -760,7 +760,7 @@ static const struct tv_mode tv_modes[] = { .name = "1080i@60Hz", .clock = 148500, .refresh = 60000, - .oversample = TV_OVERSAMPLE_2X, + .oversample = 2, .component_only = 1, .hsync_end = 88, .hblank_end = 235, @@ -1029,7 +1029,21 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, } tv_ctl |= TV_ENC_PIPE_SEL(intel_crtc->pipe); - tv_ctl |= tv_mode->oversample; + + switch (tv_mode->oversample) { + case 8: + tv_ctl |= TV_OVERSAMPLE_8X; + break; + case 4: + tv_ctl |= TV_OVERSAMPLE_4X; + break; + case 2: + tv_ctl |= TV_OVERSAMPLE_2X; + break; + default: + tv_ctl |= TV_OVERSAMPLE_NONE; + break; + } if (tv_mode->progressive) tv_ctl |= TV_PROGRESSIVE; From 56f6230811818a9dc659ffbfb9f954b2bed25819 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 12 Nov 2018 18:59:50 +0200 Subject: [PATCH 260/539] drm/i915/tv: Use bools where appropriate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'component_only' is a bool. Initialize it like a bool. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-7-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_tv.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index 3e3eee77296a..0536077cc955 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -379,7 +379,7 @@ static const struct tv_mode tv_modes[] = { .clock = 108000, .refresh = 59940, .oversample = 8, - .component_only = 0, + .component_only = false, /* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */ .hsync_end = 64, .hblank_end = 124, @@ -422,7 +422,7 @@ static const struct tv_mode tv_modes[] = { .clock = 108000, .refresh = 59940, .oversample = 8, - .component_only = 0, + .component_only = false, /* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 4.43MHz */ .hsync_end = 64, .hblank_end = 124, .hblank_start = 836, .htotal = 857, @@ -464,7 +464,7 @@ static const struct tv_mode tv_modes[] = { .clock = 108000, .refresh = 59940, .oversample = 8, - .component_only = 0, + .component_only = false, /* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */ .hsync_end = 64, .hblank_end = 124, @@ -507,7 +507,7 @@ static const struct tv_mode tv_modes[] = { .clock = 108000, .refresh = 59940, .oversample = 8, - .component_only = 0, + .component_only = false, /* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */ .hsync_end = 64, .hblank_end = 124, @@ -551,7 +551,7 @@ static const struct tv_mode tv_modes[] = { .clock = 108000, .refresh = 50000, .oversample = 8, - .component_only = 0, + .component_only = false, .hsync_end = 64, .hblank_end = 128, .hblank_start = 844, .htotal = 863, @@ -596,7 +596,7 @@ static const struct tv_mode tv_modes[] = { .clock = 108000, .refresh = 50000, .oversample = 8, - .component_only = 0, + .component_only = false, .hsync_end = 64, .hblank_end = 142, .hblank_start = 844, .htotal = 863, @@ -638,7 +638,7 @@ static const struct tv_mode tv_modes[] = { .clock = 108000, .refresh = 59940, .oversample = 4, - .component_only = 1, + .component_only = true, .hsync_end = 64, .hblank_end = 122, .hblank_start = 842, .htotal = 857, @@ -662,7 +662,7 @@ static const struct tv_mode tv_modes[] = { .clock = 108000, .refresh = 50000, .oversample = 4, - .component_only = 1, + .component_only = true, .hsync_end = 64, .hblank_end = 139, .hblank_start = 859, .htotal = 863, @@ -686,7 +686,7 @@ static const struct tv_mode tv_modes[] = { .clock = 148500, .refresh = 60000, .oversample = 2, - .component_only = 1, + .component_only = true, .hsync_end = 80, .hblank_end = 300, .hblank_start = 1580, .htotal = 1649, @@ -710,7 +710,7 @@ static const struct tv_mode tv_modes[] = { .clock = 148500, .refresh = 50000, .oversample = 2, - .component_only = 1, + .component_only = true, .hsync_end = 80, .hblank_end = 300, .hblank_start = 1580, .htotal = 1979, @@ -735,7 +735,7 @@ static const struct tv_mode tv_modes[] = { .clock = 148500, .refresh = 50000, .oversample = 2, - .component_only = 1, + .component_only = true, .hsync_end = 88, .hblank_end = 235, .hblank_start = 2155, .htotal = 2639, @@ -761,7 +761,7 @@ static const struct tv_mode tv_modes[] = { .clock = 148500, .refresh = 60000, .oversample = 2, - .component_only = 1, + .component_only = true, .hsync_end = 88, .hblank_end = 235, .hblank_start = 2155, .htotal = 2199, From bda5f53206e56c69ac8b7c8d60e08ee97fd4618f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 12 Nov 2018 18:59:51 +0200 Subject: [PATCH 261/539] drm/i915/tv: Nuke silly 0 initialzation of xpos/ypos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just assign the margin values directly to xpos/ypos instead of first initializing to zero and then adding the values. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-8-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_tv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index 0536077cc955..0910eadc605a 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -993,7 +993,7 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, const struct video_levels *video_levels; const struct color_conversion *color_conversion; bool burst_ena; - int xpos = 0x0, ypos = 0x0; + int xpos, ypos; unsigned int xsize, ysize; if (!tv_mode) @@ -1102,8 +1102,8 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, else ysize = 2 * (tv_mode->nbr_end + 1); - xpos += conn_state->tv.margins.left; - ypos += conn_state->tv.margins.top; + xpos = conn_state->tv.margins.left; + ypos = conn_state->tv.margins.top; xsize -= (conn_state->tv.margins.left + conn_state->tv.margins.right); ysize -= (conn_state->tv.margins.top + From 65ddf7f968b8d3c51b0fcefc43339c69f3bdd546 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 12 Nov 2018 18:59:53 +0200 Subject: [PATCH 262/539] drm/i915/tv: Deobfuscate preferred mode selection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rewrite the preferred mode selection to just check whether the TV modes is HD or SD. For SD TV modes we favor 480 line modes, for 720p we prefer 720 line modes, and for 1080i/p we prefer 1080 line modes. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-10-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_tv.c | 52 ++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index 0910eadc605a..e5eef87117e9 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -859,6 +859,14 @@ intel_tv_mode_valid(struct drm_connector *connector, return MODE_CLOCK_RANGE; } +static int +intel_tv_mode_vdisplay(const struct tv_mode *tv_mode) +{ + if (tv_mode->progressive) + return tv_mode->nbr_end + 1; + else + return 2 * (tv_mode->nbr_end + 1); +} static void intel_tv_get_config(struct intel_encoder *encoder, @@ -1097,10 +1105,7 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, /* Filter ctl must be set before TV_WIN_SIZE */ I915_WRITE(TV_FILTER_CTL_1, TV_AUTO_SCALE); xsize = tv_mode->hblank_start - tv_mode->hblank_end; - if (tv_mode->progressive) - ysize = tv_mode->nbr_end + 1; - else - ysize = 2 * (tv_mode->nbr_end + 1); + ysize = intel_tv_mode_vdisplay(tv_mode); xpos = conn_state->tv.margins.left; ypos = conn_state->tv.margins.top; @@ -1319,22 +1324,28 @@ static const struct input_res { {"1920x1080", 1920, 1080}, }; -/* - * Chose preferred mode according to line number of TV format - */ -static void -intel_tv_choose_preferred_modes(const struct tv_mode *tv_mode, - struct drm_display_mode *mode_ptr) +/* Choose preferred mode according to line number of TV format */ +static bool +intel_tv_is_preferred_mode(const struct drm_display_mode *mode, + const struct tv_mode *tv_mode) { - if (tv_mode->nbr_end < 480 && mode_ptr->vdisplay == 480) - mode_ptr->type |= DRM_MODE_TYPE_PREFERRED; - else if (tv_mode->nbr_end > 480) { - if (tv_mode->progressive == true && tv_mode->nbr_end < 720) { - if (mode_ptr->vdisplay == 720) - mode_ptr->type |= DRM_MODE_TYPE_PREFERRED; - } else if (mode_ptr->vdisplay == 1080) - mode_ptr->type |= DRM_MODE_TYPE_PREFERRED; - } + int vdisplay = intel_tv_mode_vdisplay(tv_mode); + + /* prefer 480 line modes for all SD TV modes */ + if (vdisplay <= 576) + vdisplay = 480; + + return vdisplay == mode->vdisplay; +} + +static void +intel_tv_set_mode_type(struct drm_display_mode *mode, + const struct tv_mode *tv_mode) +{ + mode->type = DRM_MODE_TYPE_DRIVER; + + if (intel_tv_is_preferred_mode(mode, tv_mode)) + mode->type |= DRM_MODE_TYPE_PREFERRED; } static int @@ -1382,8 +1393,7 @@ intel_tv_get_modes(struct drm_connector *connector) tmp = div_u64(tmp, 1000000); mode_ptr->clock = (int) tmp; - mode_ptr->type = DRM_MODE_TYPE_DRIVER; - intel_tv_choose_preferred_modes(tv_mode, mode_ptr); + intel_tv_set_mode_type(mode_ptr, tv_mode); drm_mode_probed_add(connector, mode_ptr); count++; } From 5023520fd37293f48eeb08429ac5a344a9c19ea4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 12 Nov 2018 18:59:54 +0200 Subject: [PATCH 263/539] drm/i915/tv: Use drm_mode_set_name() to name TV modes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No point in storing the mode names in the array. drm_mode_set_name() will give us the same names without wasting space for these string constants. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-11-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_tv.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index e5eef87117e9..89e60a8f0fd9 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1312,16 +1312,15 @@ intel_tv_detect(struct drm_connector *connector, } static const struct input_res { - const char *name; - int w, h; + u16 w, h; } input_res_table[] = { - {"640x480", 640, 480}, - {"800x600", 800, 600}, - {"1024x768", 1024, 768}, - {"1280x1024", 1280, 1024}, - {"848x480", 848, 480}, - {"1280x720", 1280, 720}, - {"1920x1080", 1920, 1080}, + { 640, 480 }, + { 800, 600 }, + { 1024, 768 }, + { 1280, 1024 }, + { 848, 480 }, + { 1280, 720 }, + { 1920, 1080 }, }; /* Choose preferred mode according to line number of TV format */ @@ -1372,7 +1371,6 @@ intel_tv_get_modes(struct drm_connector *connector) mode_ptr = drm_mode_create(connector->dev); if (!mode_ptr) continue; - strlcpy(mode_ptr->name, input->name, DRM_DISPLAY_MODE_LEN); mode_ptr->hdisplay = hactive_s; mode_ptr->hsync_start = hactive_s + 1; @@ -1394,6 +1392,9 @@ intel_tv_get_modes(struct drm_connector *connector) mode_ptr->clock = (int) tmp; intel_tv_set_mode_type(mode_ptr, tv_mode); + + drm_mode_set_name(mode_ptr); + drm_mode_probed_add(connector, mode_ptr); count++; } From e94390aadaf2bf0fb87c9b7b0c0dc9910f08356a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 12 Nov 2018 18:59:55 +0200 Subject: [PATCH 264/539] drm/i915/tv: Make TV mode autoselection actually useable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current code insists on picking a new TV mode when switching between component and non-component cables. That's super annoying. Let's just keep the current TV mode unless the new cable type actually disagrees with it. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-12-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_tv.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index 89e60a8f0fd9..6279d6fed7e9 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1252,16 +1252,18 @@ static void intel_tv_find_better_format(struct drm_connector *connector) const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); int i; - if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) == - tv_mode->component_only) + /* Component supports everything so we can keep the current mode */ + if (intel_tv->type == DRM_MODE_CONNECTOR_Component) return; + /* If the current mode is fine don't change it */ + if (!tv_mode->component_only) + return; for (i = 0; i < ARRAY_SIZE(tv_modes); i++) { - tv_mode = tv_modes + i; + tv_mode = &tv_modes[i]; - if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) == - tv_mode->component_only) + if (!tv_mode->component_only) break; } From 528132a341fca6181dde75a4587e3703d04568c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 12 Nov 2018 18:59:56 +0200 Subject: [PATCH 265/539] drm/i915/tv: Nuke reported_modes[] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the silly reported_modes[] array. I suppse once upon a time this actually had something to do with modes we reported to userspace. Now it is just the placeholder for the mode we use for load detection. We don't need it even for that, and instead we can just rely on the fallback mode in intel_get_load_detect_pipe(). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-13-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_tv.c | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index 6279d6fed7e9..2cacb8165fb6 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1129,23 +1129,6 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, I915_WRITE(TV_CTL, tv_ctl); } -static const struct drm_display_mode reported_modes[] = { - { - .name = "NTSC 480i", - .clock = 108000, - .hdisplay = 1280, - .hsync_start = 1368, - .hsync_end = 1496, - .htotal = 1712, - - .vdisplay = 1024, - .vsync_start = 1027, - .vsync_end = 1034, - .vtotal = 1104, - .type = DRM_MODE_TYPE_DRIVER, - }, -}; - static int intel_tv_detect_type(struct intel_tv *intel_tv, struct drm_connector *connector) @@ -1275,7 +1258,6 @@ intel_tv_detect(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx, bool force) { - struct drm_display_mode mode; struct intel_tv *intel_tv = intel_attached_tv(connector); enum drm_connector_status status; int type; @@ -1284,13 +1266,11 @@ intel_tv_detect(struct drm_connector *connector, connector->base.id, connector->name, force); - mode = reported_modes[0]; - if (force) { struct intel_load_detect_pipe tmp; int ret; - ret = intel_get_load_detect_pipe(connector, &mode, &tmp, ctx); + ret = intel_get_load_detect_pipe(connector, NULL, &tmp, ctx); if (ret < 0) return ret; From a0ff6779c75fe86a66b8c2eb439a40a12139eb02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 12 Nov 2018 18:59:57 +0200 Subject: [PATCH 266/539] drm/i915/tv: Add 1080p30/50/60 TV modes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the missing 1080p TV modes. On gen4 all of them work just fine, whereas on gen3 only the 30Hz mode actually works correctly. v2: s/IS_GEN3(dev_priv)/IS_GEN(dev_priv, 3)/ Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-14-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_tv.c | 90 +++++++++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index 2cacb8165fb6..421f71c222e5 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -782,6 +782,84 @@ static const struct tv_mode tv_modes[] = { .filter_table = filter_table, }, + + { + .name = "1080p@30Hz", + .clock = 148500, + .refresh = 30000, + .oversample = 2, + .component_only = true, + + .hsync_end = 88, .hblank_end = 235, + .hblank_start = 2155, .htotal = 2199, + + .progressive = true, .trilevel_sync = true, + + .vsync_start_f1 = 8, .vsync_start_f2 = 8, + .vsync_len = 10, + + .veq_ena = false, .veq_start_f1 = 0, + .veq_start_f2 = 0, .veq_len = 0, + + .vi_end_f1 = 44, .vi_end_f2 = 44, + .nbr_end = 1079, + + .burst_ena = false, + + .filter_table = filter_table, + }, + + { + .name = "1080p@50Hz", + .clock = 148500, + .refresh = 50000, + .oversample = 1, + .component_only = true, + + .hsync_end = 88, .hblank_end = 235, + .hblank_start = 2155, .htotal = 2639, + + .progressive = true, .trilevel_sync = true, + + .vsync_start_f1 = 8, .vsync_start_f2 = 8, + .vsync_len = 10, + + .veq_ena = false, .veq_start_f1 = 0, + .veq_start_f2 = 0, .veq_len = 0, + + .vi_end_f1 = 44, .vi_end_f2 = 44, + .nbr_end = 1079, + + .burst_ena = false, + + .filter_table = filter_table, + }, + + { + .name = "1080p@60Hz", + .clock = 148500, + .refresh = 60000, + .oversample = 1, + .component_only = true, + + .hsync_end = 88, .hblank_end = 235, + .hblank_start = 2155, .htotal = 2199, + + .progressive = true, .trilevel_sync = true, + + .vsync_start_f1 = 8, .vsync_start_f2 = 8, + .vsync_len = 10, + + .veq_ena = false, .veq_start_f1 = 0, + .veq_start_f2 = 0, .veq_len = 0, + + .vi_end_f1 = 44, .vi_end_f2 = 44, + .nbr_end = 1079, + + .burst_ena = false, + + .filter_table = filter_table, + }, }; static struct intel_tv *enc_to_tv(struct intel_encoder *encoder) @@ -1537,11 +1615,15 @@ intel_tv_init(struct drm_i915_private *dev_priv) connector->doublescan_allowed = false; /* Create TV properties then attach current values */ - for (i = 0; i < ARRAY_SIZE(tv_modes); i++) + for (i = 0; i < ARRAY_SIZE(tv_modes); i++) { + /* 1080p50/1080p60 not supported on gen3 */ + if (IS_GEN(dev_priv, 3) && + tv_modes[i].oversample == 1) + break; + tv_format_names[i] = tv_modes[i].name; - drm_mode_create_tv_properties(dev, - ARRAY_SIZE(tv_modes), - tv_format_names); + } + drm_mode_create_tv_properties(dev, i, tv_format_names); drm_object_attach_property(&connector->base, dev->mode_config.tv_mode_property, state->tv.mode); From e3bb355c7d8b2e537673066ee223a554457ff50d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 12 Nov 2018 18:59:58 +0200 Subject: [PATCH 267/539] drm/i915/tv: Generate better pipe timings for TV encoder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To make vblank timestamps work better with the TV encoder let's scale the pipe timings such that the relationship between the TV active and TV blanking periods is mirrored in the corresponding pipe timings. Note that in reality the pipe runs at a faster speed during the TV vblank, and correspondigly there are periods when the pipe is enitrely stopped. We pretend that this isn't the case and as such we incur some error in the vblank timestamps during the TV vblank. Further explanation of the issues in a big comment in the code. This makes the vblank timestamps good enough to make i965gm (which doesn't have a working frame counter with the TV encoder) report correct frame numbers. Previously you could get all kinds of nonsense which resulted in eg. glxgears reporting that it's running at twice the actual framerate in most cases. v2: s/IS_GEN4(dev_priv)/IS_GEN(dev_priv, 4)/ in the comment for consistency Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-15-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_tv.c | 322 +++++++++++++++++++++++++++----- 2 files changed, 278 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f4e447437d75..1eca166d95bb 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4895,6 +4895,7 @@ enum { # define TV_OVERSAMPLE_NONE (2 << 18) /* Selects 8x oversampling */ # define TV_OVERSAMPLE_8X (3 << 18) +# define TV_OVERSAMPLE_MASK (3 << 18) /* Selects progressive mode rather than interlaced */ # define TV_PROGRESSIVE (1 << 17) /* Sets the colorburst to PAL mode. Required for non-M PAL modes. */ diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index 421f71c222e5..6897977ac117 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -339,7 +339,6 @@ struct tv_mode { const struct video_levels *composite_levels, *svideo_levels; const struct color_conversion *composite_color, *svideo_color; const u32 *filter_table; - u16 max_srcw; }; @@ -728,7 +727,6 @@ static const struct tv_mode tv_modes[] = { .burst_ena = false, .filter_table = filter_table, - .max_srcw = 800 }, { .name = "1080i@50Hz", @@ -946,13 +944,183 @@ intel_tv_mode_vdisplay(const struct tv_mode *tv_mode) return 2 * (tv_mode->nbr_end + 1); } +static void +intel_tv_mode_to_mode(struct drm_display_mode *mode, + const struct tv_mode *tv_mode) +{ + mode->clock = tv_mode->clock / + (tv_mode->oversample >> !tv_mode->progressive); + + /* + * tv_mode horizontal timings: + * + * hsync_end + * | hblank_end + * | | hblank_start + * | | | htotal + * | _______ | + * ____/ \___ + * \__/ \ + */ + mode->hdisplay = + tv_mode->hblank_start - tv_mode->hblank_end; + mode->hsync_start = mode->hdisplay + + tv_mode->htotal - tv_mode->hblank_start; + mode->hsync_end = mode->hsync_start + + tv_mode->hsync_end; + mode->htotal = tv_mode->htotal + 1; + + /* + * tv_mode vertical timings: + * + * vsync_start + * | vsync_end + * | | vi_end nbr_end + * | | | | + * | | _______ + * \__ ____/ \ + * \__/ + */ + mode->vdisplay = intel_tv_mode_vdisplay(tv_mode); + if (tv_mode->progressive) { + mode->vsync_start = mode->vdisplay + + tv_mode->vsync_start_f1 + 1; + mode->vsync_end = mode->vsync_start + + tv_mode->vsync_len; + mode->vtotal = mode->vdisplay + + tv_mode->vi_end_f1 + 1; + } else { + mode->vsync_start = mode->vdisplay + + tv_mode->vsync_start_f1 + 1 + + tv_mode->vsync_start_f2 + 1; + mode->vsync_end = mode->vsync_start + + 2 * tv_mode->vsync_len; + mode->vtotal = mode->vdisplay + + tv_mode->vi_end_f1 + 1 + + tv_mode->vi_end_f2 + 1; + } + + /* TV has it's own notion of sync and other mode flags, so clear them. */ + mode->flags = 0; + + mode->vrefresh = 0; + mode->vrefresh = drm_mode_vrefresh(mode); + + snprintf(mode->name, sizeof(mode->name), + "%dx%d%c (%s)", + mode->hdisplay, mode->vdisplay, + tv_mode->progressive ? 'p' : 'i', + tv_mode->name); +} + +static void intel_tv_scale_mode_horiz(struct drm_display_mode *mode, + int hdisplay, int left_margin, + int right_margin) +{ + int hsync_start = mode->hsync_start - mode->hdisplay + right_margin; + int hsync_end = mode->hsync_end - mode->hdisplay + right_margin; + int new_htotal = mode->htotal * hdisplay / + (mode->hdisplay - left_margin - right_margin); + + mode->clock = mode->clock * new_htotal / mode->htotal; + + mode->hdisplay = hdisplay; + mode->hsync_start = hdisplay + hsync_start * new_htotal / mode->htotal; + mode->hsync_end = hdisplay + hsync_end * new_htotal / mode->htotal; + mode->htotal = new_htotal; +} + +static void intel_tv_scale_mode_vert(struct drm_display_mode *mode, + int vdisplay, int top_margin, + int bottom_margin) +{ + int vsync_start = mode->vsync_start - mode->vdisplay + bottom_margin; + int vsync_end = mode->vsync_end - mode->vdisplay + bottom_margin; + int new_vtotal = mode->vtotal * vdisplay / + (mode->vdisplay - top_margin - bottom_margin); + + mode->clock = mode->clock * new_vtotal / mode->vtotal; + + mode->vdisplay = vdisplay; + mode->vsync_start = vdisplay + vsync_start * new_vtotal / mode->vtotal; + mode->vsync_end = vdisplay + vsync_end * new_vtotal / mode->vtotal; + mode->vtotal = new_vtotal; +} + static void intel_tv_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct drm_display_mode *adjusted_mode = + &pipe_config->base.adjusted_mode; + struct drm_display_mode mode = {}; + u32 tv_ctl, hctl1, hctl3, vctl1, vctl2, tmp; + struct tv_mode tv_mode = {}; + int hdisplay = adjusted_mode->crtc_hdisplay; + int vdisplay = adjusted_mode->crtc_vdisplay; + int xsize, ysize, xpos, ypos; + pipe_config->output_types |= BIT(INTEL_OUTPUT_TVOUT); - pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock; + tv_ctl = I915_READ(TV_CTL); + hctl1 = I915_READ(TV_H_CTL_1); + hctl3 = I915_READ(TV_H_CTL_3); + vctl1 = I915_READ(TV_V_CTL_1); + vctl2 = I915_READ(TV_V_CTL_2); + + tv_mode.htotal = (hctl1 & TV_HTOTAL_MASK) >> TV_HTOTAL_SHIFT; + tv_mode.hsync_end = (hctl1 & TV_HSYNC_END_MASK) >> TV_HSYNC_END_SHIFT; + + tv_mode.hblank_start = (hctl3 & TV_HBLANK_START_MASK) >> TV_HBLANK_START_SHIFT; + tv_mode.hblank_end = (hctl3 & TV_HSYNC_END_MASK) >> TV_HBLANK_END_SHIFT; + + tv_mode.nbr_end = (vctl1 & TV_NBR_END_MASK) >> TV_NBR_END_SHIFT; + tv_mode.vi_end_f1 = (vctl1 & TV_VI_END_F1_MASK) >> TV_VI_END_F1_SHIFT; + tv_mode.vi_end_f2 = (vctl1 & TV_VI_END_F2_MASK) >> TV_VI_END_F2_SHIFT; + + tv_mode.vsync_len = (vctl2 & TV_VSYNC_LEN_MASK) >> TV_VSYNC_LEN_SHIFT; + tv_mode.vsync_start_f1 = (vctl2 & TV_VSYNC_START_F1_MASK) >> TV_VSYNC_START_F1_SHIFT; + tv_mode.vsync_start_f2 = (vctl2 & TV_VSYNC_START_F2_MASK) >> TV_VSYNC_START_F2_SHIFT; + + tv_mode.clock = pipe_config->port_clock; + + tv_mode.progressive = tv_ctl & TV_PROGRESSIVE; + + switch (tv_ctl & TV_OVERSAMPLE_MASK) { + case TV_OVERSAMPLE_8X: + tv_mode.oversample = 8; + break; + case TV_OVERSAMPLE_4X: + tv_mode.oversample = 4; + break; + case TV_OVERSAMPLE_2X: + tv_mode.oversample = 2; + break; + default: + tv_mode.oversample = 1; + break; + } + + tmp = I915_READ(TV_WIN_POS); + xpos = tmp >> 16; + ypos = tmp & 0xffff; + + tmp = I915_READ(TV_WIN_SIZE); + xsize = tmp >> 16; + ysize = tmp & 0xffff; + + intel_tv_mode_to_mode(&mode, &tv_mode); + + DRM_DEBUG_KMS("TV mode:\n"); + drm_mode_debug_printmodeline(&mode); + + intel_tv_scale_mode_horiz(&mode, hdisplay, + xpos, mode.hdisplay - xsize - xpos); + intel_tv_scale_mode_vert(&mode, vdisplay, + ypos, mode.vdisplay - ysize - ypos); + + adjusted_mode->crtc_clock = mode.clock; } static int @@ -963,6 +1131,8 @@ intel_tv_compute_config(struct intel_encoder *encoder, const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state); struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + int hdisplay = adjusted_mode->crtc_hdisplay; + int vdisplay = adjusted_mode->crtc_vdisplay; if (!tv_mode) return -EINVAL; @@ -971,17 +1141,90 @@ intel_tv_compute_config(struct intel_encoder *encoder, return -EINVAL; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; - adjusted_mode->crtc_clock = tv_mode->clock; + DRM_DEBUG_KMS("forcing bpc to 8 for TV\n"); pipe_config->pipe_bpp = 8*3; - /* TV has it's own notion of sync and other mode flags, so clear them. */ - adjusted_mode->flags = 0; + pipe_config->port_clock = tv_mode->clock; + + intel_tv_mode_to_mode(adjusted_mode, tv_mode); + + DRM_DEBUG_KMS("TV mode:\n"); + drm_mode_debug_printmodeline(adjusted_mode); /* - * FIXME: We don't check whether the input mode is actually what we want - * or whether userspace is doing something stupid. + * The pipe scanline counter behaviour looks as follows when + * using the TV encoder: + * + * time -> + * + * dsl=vtotal-1 | | + * || || + * ___| | ___| | + * / | / | + * / | / | + * dsl=0 ___/ |_____/ | + * | | | | | | + * ^ ^ ^ ^ ^ + * | | | | pipe vblank/first part of tv vblank + * | | | bottom margin + * | | active + * | top margin + * remainder of tv vblank + * + * When the TV encoder is used the pipe wants to run faster + * than expected rate. During the active portion the TV + * encoder stalls the pipe every few lines to keep it in + * check. When the TV encoder reaches the bottom margin the + * pipe simply stops. Once we reach the TV vblank the pipe is + * no longer stalled and it runs at the max rate (apparently + * oversample clock on gen3, cdclk on gen4). Once the pipe + * reaches the pipe vtotal the pipe stops for the remainder + * of the TV vblank/top margin. The pipe starts up again when + * the TV encoder exits the top margin. + * + * To avoid huge hassles for vblank timestamping we scale + * the pipe timings as if the pipe always runs at the average + * rate it maintains during the active period. This also + * gives us a reasonable guesstimate as to the pixel rate. + * Due to the variation in the actual pipe speed the scanline + * counter will give us slightly erroneous results during the + * TV vblank/margins. But since vtotal was selected such that + * it matches the average rate of the pipe during the active + * portion the error shouldn't cause any serious grief to + * vblank timestamps. + * + * For posterity here is the empirically derived formula + * that gives us the maximum length of the pipe vblank + * we can use without causing display corruption. Following + * this would allow us to have a ticking scanline counter + * everywhere except during the bottom margin (there the + * pipe always stops). Ie. this would eliminate the second + * flat portion of the above graph. However this would also + * complicate vblank timestamping as the pipe vtotal would + * no longer match the average rate the pipe runs at during + * the active portion. Hence following this formula seems + * more trouble that it's worth. + * + * if (IS_GEN(dev_priv, 4)) { + * num = cdclk * (tv_mode->oversample >> !tv_mode->progressive); + * den = tv_mode->clock; + * } else { + * num = tv_mode->oversample >> !tv_mode->progressive; + * den = 1; + * } + * max_pipe_vblank_len ~= + * (num * tv_htotal * (tv_vblank_len + top_margin)) / + * (den * pipe_htotal); */ + intel_tv_scale_mode_horiz(adjusted_mode, hdisplay, + conn_state->tv.margins.left, + conn_state->tv.margins.right); + intel_tv_scale_mode_vert(adjusted_mode, vdisplay, + conn_state->tv.margins.top, + conn_state->tv.margins.bottom); + drm_mode_set_crtcinfo(adjusted_mode, 0); + adjusted_mode->name[0] = '\0'; return 0; } @@ -1410,52 +1653,41 @@ intel_tv_set_mode_type(struct drm_display_mode *mode, static int intel_tv_get_modes(struct drm_connector *connector) { - struct drm_display_mode *mode_ptr; const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); - int j, count = 0; - u64 tmp; + int i, count = 0; - for (j = 0; j < ARRAY_SIZE(input_res_table); - j++) { - const struct input_res *input = &input_res_table[j]; - unsigned int hactive_s = input->w; - unsigned int vactive_s = input->h; + for (i = 0; i < ARRAY_SIZE(input_res_table); i++) { + const struct input_res *input = &input_res_table[i]; + struct drm_display_mode *mode; - if (tv_mode->max_srcw && input->w > tv_mode->max_srcw) + if (input->w > 1024 && + !tv_mode->progressive && + !tv_mode->component_only) continue; - if (input->w > 1024 && (!tv_mode->progressive - && !tv_mode->component_only)) + mode = drm_mode_create(connector->dev); + if (!mode) continue; - mode_ptr = drm_mode_create(connector->dev); - if (!mode_ptr) - continue; + /* + * We take the TV mode and scale it to look + * like it had the expected h/vdisplay. This + * provides the most information to userspace + * about the actual timings of the mode. We + * do ignore the margins though. + */ + intel_tv_mode_to_mode(mode, tv_mode); + if (count == 0) { + DRM_DEBUG_KMS("TV mode:\n"); + drm_mode_debug_printmodeline(mode); + } + intel_tv_scale_mode_horiz(mode, input->w, 0, 0); + intel_tv_scale_mode_vert(mode, input->h, 0, 0); + intel_tv_set_mode_type(mode, tv_mode); - mode_ptr->hdisplay = hactive_s; - mode_ptr->hsync_start = hactive_s + 1; - mode_ptr->hsync_end = hactive_s + 64; - if (mode_ptr->hsync_end <= mode_ptr->hsync_start) - mode_ptr->hsync_end = mode_ptr->hsync_start + 1; - mode_ptr->htotal = hactive_s + 96; + drm_mode_set_name(mode); - mode_ptr->vdisplay = vactive_s; - mode_ptr->vsync_start = vactive_s + 1; - mode_ptr->vsync_end = vactive_s + 32; - if (mode_ptr->vsync_end <= mode_ptr->vsync_start) - mode_ptr->vsync_end = mode_ptr->vsync_start + 1; - mode_ptr->vtotal = vactive_s + 33; - - tmp = mul_u32_u32(tv_mode->refresh, mode_ptr->vtotal); - tmp *= mode_ptr->htotal; - tmp = div_u64(tmp, 1000000); - mode_ptr->clock = (int) tmp; - - intel_tv_set_mode_type(mode_ptr, tv_mode); - - drm_mode_set_name(mode_ptr); - - drm_mode_probed_add(connector, mode_ptr); + drm_mode_probed_add(connector, mode); count++; } From 690157f0a9e7e4e903111794fae42bd2ff1abb93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 12 Nov 2018 18:59:59 +0200 Subject: [PATCH 268/539] drm/i915/tv: Fix >1024 modes on gen3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On gen3 we must disable the TV encoder vertical filter for >1024 pixel wide sources. Once that's done all we can is try to center the image on the screen. Naturally the TV mode vertical resolution must be equal or larger than the user mode vertical resolution or else we'd have to cut off part of the user mode. And while we may not be able to respect the user's choice of top and bottom borders exactly (or we'd have to reject he mode most likely), we can try to maintain the relative sizes of the top and bottom border with respect to each orher. Additionally we must configure the pipe as interlaced if the TV mode is interlaced. v2: Make +intel_tv_connector_duplicate_state() static and drop the badly copy pasted kerneldoc s/IS_GEN3(dev_priv/IS_GEN(dev_priv, 3)/ Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-16-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_tv.c | 91 ++++++++++++++++++++++++++++++--- 1 file changed, 83 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index 6897977ac117..cb6829bc762f 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -860,6 +860,35 @@ static const struct tv_mode tv_modes[] = { }, }; +struct intel_tv_connector_state { + struct drm_connector_state base; + + /* + * May need to override the user margins for + * gen3 >1024 wide source vertical centering. + */ + struct { + u16 top, bottom; + } margins; + + bool bypass_vfilter; +}; + +#define to_intel_tv_connector_state(x) container_of(x, struct intel_tv_connector_state, base) + +static struct drm_connector_state * +intel_tv_connector_duplicate_state(struct drm_connector *connector) +{ + struct intel_tv_connector_state *state; + + state = kmemdup(connector->state, sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + + __drm_atomic_helper_connector_duplicate_state(connector, &state->base); + return &state->base; +} + static struct intel_tv *enc_to_tv(struct intel_encoder *encoder) { return container_of(encoder, struct intel_tv, base); @@ -1128,6 +1157,9 @@ intel_tv_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_tv_connector_state *tv_conn_state = + to_intel_tv_connector_state(conn_state); const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state); struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; @@ -1148,6 +1180,43 @@ intel_tv_compute_config(struct intel_encoder *encoder, pipe_config->port_clock = tv_mode->clock; intel_tv_mode_to_mode(adjusted_mode, tv_mode); + drm_mode_set_crtcinfo(adjusted_mode, 0); + + if (IS_GEN(dev_priv, 3) && hdisplay > 1024) { + int extra, top, bottom; + + extra = adjusted_mode->crtc_vdisplay - vdisplay; + + if (extra < 0) { + DRM_DEBUG_KMS("No vertical scaling for >1024 pixel wide modes\n"); + return false; + } + + /* Need to turn off the vertical filter and center the image */ + + /* Attempt to maintain the relative sizes of the margins */ + top = conn_state->tv.margins.top; + bottom = conn_state->tv.margins.bottom; + + if (top + bottom) + top = extra * top / (top + bottom); + else + top = extra / 2; + bottom = extra - top; + + tv_conn_state->margins.top = top; + tv_conn_state->margins.bottom = bottom; + + tv_conn_state->bypass_vfilter = true; + + if (!tv_mode->progressive) + adjusted_mode->flags |= DRM_MODE_FLAG_INTERLACE; + } else { + tv_conn_state->margins.top = conn_state->tv.margins.top; + tv_conn_state->margins.bottom = conn_state->tv.margins.bottom; + + tv_conn_state->bypass_vfilter = false; + } DRM_DEBUG_KMS("TV mode:\n"); drm_mode_debug_printmodeline(adjusted_mode); @@ -1221,8 +1290,8 @@ intel_tv_compute_config(struct intel_encoder *encoder, conn_state->tv.margins.left, conn_state->tv.margins.right); intel_tv_scale_mode_vert(adjusted_mode, vdisplay, - conn_state->tv.margins.top, - conn_state->tv.margins.bottom); + tv_conn_state->margins.top, + tv_conn_state->margins.bottom); drm_mode_set_crtcinfo(adjusted_mode, 0); adjusted_mode->name[0] = '\0'; @@ -1315,8 +1384,10 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc); struct intel_tv *intel_tv = enc_to_tv(encoder); + const struct intel_tv_connector_state *tv_conn_state = + to_intel_tv_connector_state(conn_state); const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state); - u32 tv_ctl; + u32 tv_ctl, tv_filter_ctl; u32 scctl1, scctl2, scctl3; int i, j; const struct video_levels *video_levels; @@ -1424,16 +1495,20 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, assert_pipe_disabled(dev_priv, intel_crtc->pipe); /* Filter ctl must be set before TV_WIN_SIZE */ - I915_WRITE(TV_FILTER_CTL_1, TV_AUTO_SCALE); + tv_filter_ctl = TV_AUTO_SCALE; + if (tv_conn_state->bypass_vfilter) + tv_filter_ctl |= TV_V_FILTER_BYPASS; + I915_WRITE(TV_FILTER_CTL_1, tv_filter_ctl); + xsize = tv_mode->hblank_start - tv_mode->hblank_end; ysize = intel_tv_mode_vdisplay(tv_mode); xpos = conn_state->tv.margins.left; - ypos = conn_state->tv.margins.top; + ypos = tv_conn_state->margins.top; xsize -= (conn_state->tv.margins.left + conn_state->tv.margins.right); - ysize -= (conn_state->tv.margins.top + - conn_state->tv.margins.bottom); + ysize -= (tv_conn_state->margins.top + + tv_conn_state->margins.bottom); I915_WRITE(TV_WIN_POS, (xpos<<16)|ypos); I915_WRITE(TV_WIN_SIZE, (xsize<<16)|ysize); @@ -1700,7 +1775,7 @@ static const struct drm_connector_funcs intel_tv_connector_funcs = { .destroy = intel_connector_destroy, .fill_modes = drm_helper_probe_single_connector_modes, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_duplicate_state = intel_tv_connector_duplicate_state, }; static int intel_tv_atomic_check(struct drm_connector *connector, From 0bb1ffe4eaa40e953f700886515a1edb27dfc24c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 12 Nov 2018 19:00:00 +0200 Subject: [PATCH 269/539] drm/i915/tv: Filter out >1024 wide modes that would need vertical scaling on gen3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since gen3 can't handle >1024 wide sources with vertical scaling let's not advertize such modes in the mode list. Less tempetation to the user to try out things that won't work. v2: s/IS_GEN3(dev_priv/IS_GEN(dev_priv, 3)/ Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181112170000.27531-17-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_tv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index cb6829bc762f..f0b9abda7720 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1728,6 +1728,7 @@ intel_tv_set_mode_type(struct drm_display_mode *mode, static int intel_tv_get_modes(struct drm_connector *connector) { + struct drm_i915_private *dev_priv = to_i915(connector->dev); const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); int i, count = 0; @@ -1740,6 +1741,11 @@ intel_tv_get_modes(struct drm_connector *connector) !tv_mode->component_only) continue; + /* no vertical scaling with wide sources on gen3 */ + if (IS_GEN(dev_priv, 3) && input->w > 1024 && + input->h > intel_tv_mode_vdisplay(tv_mode)) + continue; + mode = drm_mode_create(connector->dev); if (!mode) continue; From 790cc9941b137e00aeb9e9cd6f44d05565c3f271 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 11 Jan 2019 19:08:11 +0200 Subject: [PATCH 270/539] drm/i915: Clean up intel_plane_atomic_check_with_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename some of the state variables in intel_plane_atomic_check_with_state() to make it less confusing. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190111170823.4441-2-ville.syrjala@linux.intel.com Reviewed-by: Matt Roper Reviewed-by: Uma Shankar --- drivers/gpu/drm/i915/intel_atomic_plane.c | 36 +++++++++++------------ 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index 9a2fdc77ebcb..a1a263026574 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -110,41 +110,39 @@ intel_plane_destroy_state(struct drm_plane *plane, } int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state, - struct intel_crtc_state *crtc_state, + struct intel_crtc_state *new_crtc_state, const struct intel_plane_state *old_plane_state, - struct intel_plane_state *intel_state) + struct intel_plane_state *new_plane_state) { - struct drm_plane *plane = intel_state->base.plane; - struct drm_plane_state *state = &intel_state->base; - struct intel_plane *intel_plane = to_intel_plane(plane); + struct intel_plane *plane = to_intel_plane(new_plane_state->base.plane); int ret; - crtc_state->active_planes &= ~BIT(intel_plane->id); - crtc_state->nv12_planes &= ~BIT(intel_plane->id); - intel_state->base.visible = false; + new_crtc_state->active_planes &= ~BIT(plane->id); + new_crtc_state->nv12_planes &= ~BIT(plane->id); + new_plane_state->base.visible = false; - /* If this is a cursor plane, no further checks are needed. */ - if (!intel_state->base.crtc && !old_plane_state->base.crtc) + if (!new_plane_state->base.crtc && !old_plane_state->base.crtc) return 0; - ret = intel_plane->check_plane(crtc_state, intel_state); + ret = plane->check_plane(new_crtc_state, new_plane_state); if (ret) return ret; /* FIXME pre-g4x don't work like this */ - if (state->visible) - crtc_state->active_planes |= BIT(intel_plane->id); + if (new_plane_state->base.visible) + new_crtc_state->active_planes |= BIT(plane->id); - if (state->visible && state->fb->format->format == DRM_FORMAT_NV12) - crtc_state->nv12_planes |= BIT(intel_plane->id); + if (new_plane_state->base.visible && + new_plane_state->base.fb->format->format == DRM_FORMAT_NV12) + new_crtc_state->nv12_planes |= BIT(plane->id); - if (state->visible || old_plane_state->base.visible) - crtc_state->update_planes |= BIT(intel_plane->id); + if (new_plane_state->base.visible || old_plane_state->base.visible) + new_crtc_state->update_planes |= BIT(plane->id); return intel_plane_atomic_calc_changes(old_crtc_state, - &crtc_state->base, + &new_crtc_state->base, old_plane_state, - state); + &new_plane_state->base); } static int intel_plane_atomic_check(struct drm_plane *plane, From 36eac4dd1f6ece0507cd2141d3e1b34ae589b896 Mon Sep 17 00:00:00 2001 From: Jim Qu Date: Tue, 20 Nov 2018 10:58:25 +0800 Subject: [PATCH 271/539] drm/amdpgu: add common functions for BACO feature in PP (v2) V2: squash in crash fix for non-register commands (Alex) Signed-off-by: Jim Qu Reviewed-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/Makefile | 2 +- .../gpu/drm/amd/powerplay/hwmgr/common_baco.c | 101 ++++++++++++++++++ .../gpu/drm/amd/powerplay/hwmgr/common_baco.h | 50 +++++++++ 3 files changed, 152 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.c create mode 100644 drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.h diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile index ade8973b6f4d..5afec1a138ac 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile @@ -35,7 +35,7 @@ HARDWARE_MGR = hwmgr.o processpptables.o \ vega12_thermal.o \ pp_overdriver.o smu_helper.o \ vega20_processpptables.o vega20_hwmgr.o vega20_powertune.o \ - vega20_thermal.o + vega20_thermal.o common_baco.o AMD_PP_HWMGR = $(addprefix $(AMD_PP_PATH)/hwmgr/,$(HARDWARE_MGR)) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.c new file mode 100644 index 000000000000..9c57c1f67749 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.c @@ -0,0 +1,101 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "common_baco.h" + + +static bool baco_wait_register(struct pp_hwmgr *hwmgr, u32 reg, u32 mask, u32 value) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); + u32 timeout = 5000, data; + + do { + msleep(1); + data = RREG32(reg); + timeout--; + } while (value != (data & mask) && (timeout != 0)); + + if (timeout == 0) + return false; + + return true; +} + +static bool baco_cmd_handler(struct pp_hwmgr *hwmgr, u32 command, u32 reg, u32 mask, + u32 shift, u32 value, u32 timeout) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); + u32 data; + bool ret = true; + + switch (command) { + case CMD_WRITE: + WREG32(reg, value << shift); + break; + case CMD_READMODIFYWRITE: + data = RREG32(reg); + data = (data & (~mask)) | (value << shift); + WREG32(reg, data); + break; + case CMD_WAITFOR: + ret = baco_wait_register(hwmgr, reg, mask, value); + break; + case CMD_DELAY_MS: + if (timeout) + /* Delay in milli Seconds */ + msleep(timeout); + break; + case CMD_DELAY_US: + if (timeout) + /* Delay in micro Seconds */ + udelay(timeout); + break; + + default: + dev_warn(adev->dev, "Invalid BACO command.\n"); + ret = false; + } + + return ret; +} + +bool soc15_baco_program_registers(struct pp_hwmgr *hwmgr, + const struct soc15_baco_cmd_entry *entry, + const u32 array_size) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); + u32 i, reg = 0; + + for (i = 0; i < array_size; i++) { + if ((entry[i].cmd == CMD_WRITE) || + (entry[i].cmd == CMD_READMODIFYWRITE) || + (entry[i].cmd == CMD_WAITFOR)) + reg = adev->reg_offset[entry[i].hwip][entry[i].inst][entry[i].seg] + + entry[i].reg_offset; + if (!baco_cmd_handler(hwmgr, entry[i].cmd, reg, entry[i].mask, + entry[i].shift, entry[i].val, entry[i].timeout)) + return false; + } + + return true; +} diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.h b/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.h new file mode 100644 index 000000000000..95296c916f4e --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.h @@ -0,0 +1,50 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __COMMON_BOCO_H__ +#define __COMMON_BOCO_H__ +#include "hwmgr.h" + + +enum baco_cmd_type { + CMD_WRITE = 0, + CMD_READMODIFYWRITE, + CMD_WAITFOR, + CMD_DELAY_MS, + CMD_DELAY_US, +}; + +struct soc15_baco_cmd_entry { + enum baco_cmd_type cmd; + uint32_t hwip; + uint32_t inst; + uint32_t seg; + uint32_t reg_offset; + uint32_t mask; + uint32_t shift; + uint32_t timeout; + uint32_t val; +}; +extern bool soc15_baco_program_registers(struct pp_hwmgr *hwmgr, + const struct soc15_baco_cmd_entry *entry, + const u32 array_size); +#endif From 2affc6234ff51f3e6c51270fd3260b03ad1d4094 Mon Sep 17 00:00:00 2001 From: Jim Qu Date: Wed, 7 Nov 2018 11:24:45 +0800 Subject: [PATCH 272/539] drm/amdgpu: add BACO interfaces for vega10 V2: delay 20ms before BACO out. V3: rename function to vega10_baco_xxx Signed-off-by: Jim Qu Reviewed-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/Makefile | 2 +- .../gpu/drm/amd/powerplay/hwmgr/vega10_baco.c | 136 ++++++++++++++++++ .../gpu/drm/amd/powerplay/hwmgr/vega10_baco.h | 32 +++++ 3 files changed, 169 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c create mode 100644 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.h diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile index 5afec1a138ac..e563811b2ebe 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile @@ -35,7 +35,7 @@ HARDWARE_MGR = hwmgr.o processpptables.o \ vega12_thermal.o \ pp_overdriver.o smu_helper.o \ vega20_processpptables.o vega20_hwmgr.o vega20_powertune.o \ - vega20_thermal.o common_baco.o + vega20_thermal.o common_baco.o vega10_baco.o AMD_PP_HWMGR = $(addprefix $(AMD_PP_PATH)/hwmgr/,$(HARDWARE_MGR)) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c new file mode 100644 index 000000000000..f94dab27f486 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c @@ -0,0 +1,136 @@ +#include "amdgpu.h" +#include "soc15.h" +#include "soc15_hw_ip.h" +#include "vega10_ip_offset.h" +#include "soc15_common.h" +#include "vega10_inc.h" +#include "vega10_ppsmc.h" +#include "vega10_baco.h" + + + +static const struct soc15_baco_cmd_entry pre_baco_tbl[] = +{ + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(NBIF, 0, mmBIF_DOORBELL_CNTL), BIF_DOORBELL_CNTL__DOORBELL_MONITOR_EN_MASK, BIF_DOORBELL_CNTL__DOORBELL_MONITOR_EN__SHIFT, 0, 1}, + {CMD_WRITE, SOC15_REG_ENTRY(NBIF, 0, mmBIF_FB_EN), 0, 0, 0, 0}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(NBIF, 0, mmBACO_CNTL), BACO_CNTL__BACO_DSTATE_BYPASS_MASK, BACO_CNTL__BACO_DSTATE_BYPASS__SHIFT, 0, 1}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(NBIF, 0, mmBACO_CNTL), BACO_CNTL__BACO_RST_INTR_MASK_MASK, BACO_CNTL__BACO_RST_INTR_MASK__SHIFT, 0, 1} +}; + +static const struct soc15_baco_cmd_entry enter_baco_tbl[] = +{ + {CMD_WAITFOR, SOC15_REG_ENTRY(THM, 0, mmTHM_BACO_CNTL), THM_BACO_CNTL__SOC_DOMAIN_IDLE_MASK, THM_BACO_CNTL__SOC_DOMAIN_IDLE__SHIFT, 0xffffffff, 0x80000000}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(NBIF, 0, mmBACO_CNTL), BACO_CNTL__BACO_EN_MASK, BACO_CNTL__BACO_EN__SHIFT, 0, 1}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(NBIF, 0, mmBACO_CNTL), BACO_CNTL__BACO_BIF_LCLK_SWITCH_MASK, BACO_CNTL__BACO_BIF_LCLK_SWITCH__SHIFT, 0, 1}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(NBIF, 0, mmBACO_CNTL), BACO_CNTL__BACO_DUMMY_EN_MASK, BACO_CNTL__BACO_DUMMY_EN__SHIFT, 0, 1}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(THM, 0, mmTHM_BACO_CNTL), THM_BACO_CNTL__BACO_SOC_VDCI_RESET_MASK, THM_BACO_CNTL__BACO_SOC_VDCI_RESET__SHIFT, 0, 1}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(THM, 0, mmTHM_BACO_CNTL), THM_BACO_CNTL__BACO_SMNCLK_MUX_MASK, THM_BACO_CNTL__BACO_SMNCLK_MUX__SHIFT,0, 1}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(THM, 0, mmTHM_BACO_CNTL), THM_BACO_CNTL__BACO_ISO_EN_MASK, THM_BACO_CNTL__BACO_ISO_EN__SHIFT, 0, 1}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(THM, 0, mmTHM_BACO_CNTL), THM_BACO_CNTL__BACO_AEB_ISO_EN_MASK, THM_BACO_CNTL__BACO_AEB_ISO_EN__SHIFT,0, 1}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(THM, 0, mmTHM_BACO_CNTL), THM_BACO_CNTL__BACO_ANA_ISO_EN_MASK, THM_BACO_CNTL__BACO_ANA_ISO_EN__SHIFT, 0, 1}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(THM, 0, mmTHM_BACO_CNTL), THM_BACO_CNTL__BACO_SOC_REFCLK_OFF_MASK, THM_BACO_CNTL__BACO_SOC_REFCLK_OFF__SHIFT, 0, 1}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(NBIF, 0, mmBACO_CNTL), BACO_CNTL__BACO_POWER_OFF_MASK, BACO_CNTL__BACO_POWER_OFF__SHIFT, 0, 1}, + {CMD_DELAY_MS, 0, 0, 0, 0, 0, 0, 5, 0}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(THM, 0, mmTHM_BACO_CNTL), THM_BACO_CNTL__BACO_RESET_EN_MASK, THM_BACO_CNTL__BACO_RESET_EN__SHIFT, 0, 1}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(THM, 0, mmTHM_BACO_CNTL), THM_BACO_CNTL__BACO_PWROKRAW_CNTL_MASK, THM_BACO_CNTL__BACO_PWROKRAW_CNTL__SHIFT, 0, 0}, + {CMD_WAITFOR, SOC15_REG_ENTRY(NBIF, 0, mmBACO_CNTL), BACO_CNTL__BACO_MODE_MASK, BACO_CNTL__BACO_MODE__SHIFT, 0xffffffff, 0x100} +}; + +static const struct soc15_baco_cmd_entry exit_baco_tbl[] = +{ + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(NBIF, 0, mmBACO_CNTL), BACO_CNTL__BACO_POWER_OFF_MASK, BACO_CNTL__BACO_POWER_OFF__SHIFT, 0, 0}, + {CMD_DELAY_MS, 0, 0, 0, 0, 0, 0, 10,0}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(THM, 0, mmTHM_BACO_CNTL), THM_BACO_CNTL__BACO_SOC_REFCLK_OFF_MASK, THM_BACO_CNTL__BACO_SOC_REFCLK_OFF__SHIFT, 0,0}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(THM, 0, mmTHM_BACO_CNTL), THM_BACO_CNTL__BACO_ANA_ISO_EN_MASK, THM_BACO_CNTL__BACO_ANA_ISO_EN__SHIFT, 0, 0}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(THM, 0, mmTHM_BACO_CNTL), THM_BACO_CNTL__BACO_AEB_ISO_EN_MASK, THM_BACO_CNTL__BACO_AEB_ISO_EN__SHIFT,0, 0}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(THM, 0, mmTHM_BACO_CNTL), THM_BACO_CNTL__BACO_ISO_EN_MASK, THM_BACO_CNTL__BACO_ISO_EN__SHIFT, 0, 0}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(THM, 0, mmTHM_BACO_CNTL), THM_BACO_CNTL__BACO_PWROKRAW_CNTL_MASK, THM_BACO_CNTL__BACO_PWROKRAW_CNTL__SHIFT, 0, 1}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(THM, 0, mmTHM_BACO_CNTL), THM_BACO_CNTL__BACO_SMNCLK_MUX_MASK, THM_BACO_CNTL__BACO_SMNCLK_MUX__SHIFT, 0, 0}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(THM, 0, mmTHM_BACO_CNTL), THM_BACO_CNTL__BACO_SOC_VDCI_RESET_MASK, THM_BACO_CNTL__BACO_SOC_VDCI_RESET__SHIFT, 0, 0}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(THM, 0, mmTHM_BACO_CNTL), THM_BACO_CNTL__BACO_EXIT_MASK, THM_BACO_CNTL__BACO_EXIT__SHIFT, 0, 1}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(THM, 0, mmTHM_BACO_CNTL), THM_BACO_CNTL__BACO_RESET_EN_MASK, THM_BACO_CNTL__BACO_RESET_EN__SHIFT, 0, 0}, + {CMD_WAITFOR, SOC15_REG_ENTRY(THM, 0, mmTHM_BACO_CNTL), THM_BACO_CNTL__BACO_EXIT_MASK, 0, 0xffffffff, 0}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(THM, 0, mmTHM_BACO_CNTL), THM_BACO_CNTL__BACO_SB_AXI_FENCE_MASK, THM_BACO_CNTL__BACO_SB_AXI_FENCE__SHIFT, 0, 0}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(NBIF, 0, mmBACO_CNTL), BACO_CNTL__BACO_DUMMY_EN_MASK, BACO_CNTL__BACO_DUMMY_EN__SHIFT, 0, 0}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(NBIF, 0, mmBACO_CNTL), BACO_CNTL__BACO_BIF_LCLK_SWITCH_MASK ,BACO_CNTL__BACO_BIF_LCLK_SWITCH__SHIFT, 0, 0}, + {CMD_READMODIFYWRITE, SOC15_REG_ENTRY(NBIF, 0, mmBACO_CNTL), BACO_CNTL__BACO_EN_MASK , BACO_CNTL__BACO_EN__SHIFT, 0,0}, + {CMD_WAITFOR, SOC15_REG_ENTRY(NBIF, 0, mmBACO_CNTL), BACO_CNTL__BACO_MODE_MASK, 0, 0xffffffff, 0} + }; + +static const struct soc15_baco_cmd_entry clean_baco_tbl[] = +{ + {CMD_WRITE, SOC15_REG_ENTRY(NBIF, 0, mmBIOS_SCRATCH_6), 0, 0, 0, 0}, + {CMD_WRITE, SOC15_REG_ENTRY(NBIF, 0, mmBIOS_SCRATCH_7), 0, 0, 0, 0}, +}; + +int vega10_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); + uint32_t reg, data; + + *cap = false; + if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO)) + return 0; + + WREG32(0x12074, 0xFFF0003B); + data = RREG32(0x12075); + + if (data == 0x1) { + reg = RREG32_SOC15(NBIF, 0, mmRCC_BIF_STRAP0); + + if (reg & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) + *cap = true; + } + + return 0; +} + +int vega10_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); + uint32_t reg; + + reg = RREG32_SOC15(NBIF, 0, mmBACO_CNTL); + + if (reg & BACO_CNTL__BACO_MODE_MASK) + /* gfx has already entered BACO state */ + *state = BACO_STATE_IN; + else + *state = BACO_STATE_OUT; + return 0; +} + +int vega10_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state) +{ + enum BACO_STATE cur_state; + + vega10_baco_get_state(hwmgr, &cur_state); + + if (cur_state == state) + /* aisc already in the target state */ + return 0; + + if (state == BACO_STATE_IN) { + if (soc15_baco_program_registers(hwmgr, pre_baco_tbl, + ARRAY_SIZE(pre_baco_tbl))) { + if (smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnterBaco)) + return -1; + + if (soc15_baco_program_registers(hwmgr, enter_baco_tbl, + ARRAY_SIZE(enter_baco_tbl))) + return 0; + } + } else if (state == BACO_STATE_OUT) { + /* HW requires at least 20ms between regulator off and on */ + msleep(20); + /* Execute Hardware BACO exit sequence */ + if (soc15_baco_program_registers(hwmgr, exit_baco_tbl, + ARRAY_SIZE(exit_baco_tbl))) { + if (soc15_baco_program_registers(hwmgr, clean_baco_tbl, + ARRAY_SIZE(clean_baco_tbl))) + return 0; + } + } + + return -1; +} diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.h new file mode 100644 index 000000000000..a93b1e6d1c66 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.h @@ -0,0 +1,32 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __VEGA10_BOCO_H__ +#define __VEGA10_BOCO_H__ +#include "hwmgr.h" +#include "common_baco.h" + +extern int vega10_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap); +extern int vega10_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state); +extern int vega10_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state); + +#endif From 425db2553e43c7a40f3a9d8c066977493825339c Mon Sep 17 00:00:00 2001 From: Jim Qu Date: Wed, 9 Jan 2019 16:42:05 +0800 Subject: [PATCH 273/539] drm/amdgpu: expose BACO interfaces to upper level from PP Signed-off-by: Jim Qu Reviewed-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 54 +++++++++++++++++++ .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 4 ++ 2 files changed, 58 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index 9bc27f468d5b..5d8b5d3c2453 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -1404,6 +1404,57 @@ static int pp_set_active_display_count(void *handle, uint32_t count) return ret; } +static int pp_get_asic_baco_capability(void *handle, bool *cap) +{ + struct pp_hwmgr *hwmgr = handle; + + if (!hwmgr) + return -EINVAL; + + if (!hwmgr->pm_en || !hwmgr->hwmgr_func->get_asic_baco_capability) + return 0; + + mutex_lock(&hwmgr->smu_lock); + hwmgr->hwmgr_func->get_asic_baco_capability(hwmgr, cap); + mutex_unlock(&hwmgr->smu_lock); + + return 0; +} + +static int pp_get_asic_baco_state(void *handle, int *state) +{ + struct pp_hwmgr *hwmgr = handle; + + if (!hwmgr) + return -EINVAL; + + if (!hwmgr->pm_en || !hwmgr->hwmgr_func->get_asic_baco_state) + return 0; + + mutex_lock(&hwmgr->smu_lock); + hwmgr->hwmgr_func->get_asic_baco_state(hwmgr, (enum BACO_STATE *)state); + mutex_unlock(&hwmgr->smu_lock); + + return 0; +} + +static int pp_set_asic_baco_state(void *handle, int state) +{ + struct pp_hwmgr *hwmgr = handle; + + if (!hwmgr) + return -EINVAL; + + if (!hwmgr->pm_en || !hwmgr->hwmgr_func->set_asic_baco_state) + return 0; + + mutex_lock(&hwmgr->smu_lock); + hwmgr->hwmgr_func->set_asic_baco_state(hwmgr, (enum BACO_STATE)state); + mutex_unlock(&hwmgr->smu_lock); + + return 0; +} + static const struct amd_pm_funcs pp_dpm_funcs = { .load_firmware = pp_dpm_load_fw, .wait_for_fw_loading_complete = pp_dpm_fw_loading_complete, @@ -1454,4 +1505,7 @@ static const struct amd_pm_funcs pp_dpm_funcs = { .set_min_deep_sleep_dcefclk = pp_set_min_deep_sleep_dcefclk, .set_hard_min_dcefclk_by_freq = pp_set_hard_min_dcefclk_by_freq, .set_hard_min_fclk_by_freq = pp_set_hard_min_fclk_by_freq, + .get_asic_baco_capability = pp_get_asic_baco_capability, + .get_asic_baco_state = pp_get_asic_baco_state, + .set_asic_baco_state = pp_set_asic_baco_state, }; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 91e3bbe6d61d..d1e262844619 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -48,6 +48,7 @@ #include "ppinterrupt.h" #include "pp_overdriver.h" #include "pp_thermal.h" +#include "vega10_baco.h" #include "smuio/smuio_9_0_offset.h" #include "smuio/smuio_9_0_sh_mask.h" @@ -4980,6 +4981,9 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .set_power_limit = vega10_set_power_limit, .odn_edit_dpm_table = vega10_odn_edit_dpm_table, .get_performance_level = vega10_get_performance_level, + .get_asic_baco_capability = vega10_baco_get_capability, + .get_asic_baco_state = vega10_baco_get_state, + .set_asic_baco_state = vega10_baco_set_state, }; int vega10_hwmgr_init(struct pp_hwmgr *hwmgr) From f5d9e9b9c15c67dc86354b606e077a5a6b6381ed Mon Sep 17 00:00:00 2001 From: Jim Qu Date: Thu, 8 Nov 2018 18:21:05 +0800 Subject: [PATCH 274/539] drm/amdgpu: update NBIO v7.4 to support BACO Signed-off-by: Jim Qu Reviewed-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h | 2 ++ drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_sh_mask.h | 3 +++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h index e932213f87f0..994e796a28d7 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h @@ -2567,6 +2567,8 @@ // addressBlock: nbio_nbif0_rcc_strap_BIFDEC1 // base address: 0x0 +#define mmRCC_BIF_STRAP0 0x0000 +#define mmRCC_BIF_STRAP0_BASE_IDX 2 #define mmRCC_DEV0_EPF0_STRAP0 0x0011 #define mmRCC_DEV0_EPF0_STRAP0_BASE_IDX 2 diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_sh_mask.h index d3704b438f2d..d467b939c971 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_sh_mask.h @@ -19690,6 +19690,9 @@ // addressBlock: nbio_nbif0_rcc_strap_BIFDEC1 +//RCC_BIF_STRAP0 +#define RCC_BIF_STRAP0__STRAP_PX_CAPABLE__SHIFT 0x7 +#define RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK 0x00000080L //RCC_DEV0_EPF0_STRAP0 #define RCC_DEV0_EPF0_STRAP0__STRAP_DEVICE_ID_DEV0_F0__SHIFT 0x0 #define RCC_DEV0_EPF0_STRAP0__STRAP_MAJOR_REV_ID_DEV0_F0__SHIFT 0x10 From d1882ab2c297de6ee1cea436bee3a1f1cd843e2f Mon Sep 17 00:00:00 2001 From: Jim Qu Date: Thu, 8 Nov 2018 18:26:22 +0800 Subject: [PATCH 275/539] drm/amdgpu: also include NBIO v7.4 register mask header Signed-off-by: Jim Qu Reviewed-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega20_inc.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_inc.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_inc.h index 6738bad53602..613cb1989b3d 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_inc.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_inc.h @@ -31,5 +31,6 @@ #include "asic_reg/mp/mp_9_0_sh_mask.h" #include "asic_reg/nbio/nbio_7_4_offset.h" +#include "asic_reg/nbio/nbio_7_4_sh_mask.h" #endif From 6a789aa8d5f4b4a165de2fb590511f22b1e81c1e Mon Sep 17 00:00:00 2001 From: Jim Qu Date: Thu, 8 Nov 2018 18:32:41 +0800 Subject: [PATCH 276/539] drm/amdgpu: update THM IP register header to support BACO Signed-off-by: Jim Qu Reviewed-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/asic_reg/thm/thm_11_0_2_offset.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/include/asic_reg/thm/thm_11_0_2_offset.h b/drivers/gpu/drm/amd/include/asic_reg/thm/thm_11_0_2_offset.h index a9eb57a53e59..a485526f3a51 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/thm/thm_11_0_2_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/thm/thm_11_0_2_offset.h @@ -46,4 +46,7 @@ #define mmTHM_TCON_THERM_TRIP 0x0002 #define mmTHM_TCON_THERM_TRIP_BASE_IDX 0 +#define mmTHM_BACO_CNTL 0x0081 +#define mmTHM_BACO_CNTL_BASE_IDX 0 + #endif From 3177b3c52f62e262c6a8eff19358ca128c26d518 Mon Sep 17 00:00:00 2001 From: Jim Qu Date: Thu, 8 Nov 2018 13:02:00 +0800 Subject: [PATCH 277/539] drm/amdgpu/powerper: add vega20 BACO functions Signed-off-by: Jim Qu Reviewed-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/Makefile | 2 +- .../gpu/drm/amd/powerplay/hwmgr/vega20_baco.c | 81 +++++++++++++++++++ .../gpu/drm/amd/powerplay/hwmgr/vega20_baco.h | 32 ++++++++ .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 5 ++ 4 files changed, 119 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c create mode 100644 drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.h diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile index e563811b2ebe..0b3c6d1d52e4 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile @@ -35,7 +35,7 @@ HARDWARE_MGR = hwmgr.o processpptables.o \ vega12_thermal.o \ pp_overdriver.o smu_helper.o \ vega20_processpptables.o vega20_hwmgr.o vega20_powertune.o \ - vega20_thermal.o common_baco.o vega10_baco.o + vega20_thermal.o common_baco.o vega10_baco.o vega20_baco.o AMD_PP_HWMGR = $(addprefix $(AMD_PP_PATH)/hwmgr/,$(HARDWARE_MGR)) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c new file mode 100644 index 000000000000..0d883b358df2 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c @@ -0,0 +1,81 @@ +#include "amdgpu.h" +#include "soc15.h" +#include "soc15_hw_ip.h" +#include "soc15_common.h" +#include "vega20_inc.h" +#include "vega20_ppsmc.h" +#include "vega20_baco.h" + + + +static const struct soc15_baco_cmd_entry clean_baco_tbl[] = +{ + {CMD_WRITE, SOC15_REG_ENTRY(NBIF, 0, mmBIOS_SCRATCH_6), 0, 0, 0, 0}, + {CMD_WRITE, SOC15_REG_ENTRY(NBIF, 0, mmBIOS_SCRATCH_7), 0, 0, 0, 0}, +}; + +int vega20_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); + uint32_t reg; + + *cap = false; + if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO)) + return 0; + + if (((RREG32(0x17569) & 0x20000000) >> 29) == 0x1) { + reg = RREG32_SOC15(NBIF, 0, mmRCC_BIF_STRAP0); + + if (reg & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) + *cap = true; + } + + return 0; +} + +int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); + uint32_t reg; + + reg = RREG32_SOC15(NBIF, 0, mmBACO_CNTL); + + if (reg & BACO_CNTL__BACO_MODE_MASK) + /* gfx has already entered BACO state */ + *state = BACO_STATE_IN; + else + *state = BACO_STATE_OUT; + return 0; +} + +int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); + enum BACO_STATE cur_state; + uint32_t data; + + vega20_baco_get_state(hwmgr, &cur_state); + + if (cur_state == state) + /* aisc already in the target state */ + return 0; + + if (state == BACO_STATE_IN) { + data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL); + data |= 0x80000000; + WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data); + + + if(smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_EnterBaco, 0)) + return -1; + + } else if (state == BACO_STATE_OUT) { + if (smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ExitBaco)) + return -1; + if (!soc15_baco_program_registers(hwmgr, clean_baco_tbl, + ARRAY_SIZE(clean_baco_tbl))) + return -1; + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.h new file mode 100644 index 000000000000..c51988a9ed77 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.h @@ -0,0 +1,32 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __VEGA20_BOCO_H__ +#define __VEGA20_BOCO_H__ +#include "hwmgr.h" +#include "common_baco.h" + +extern int vega20_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap); +extern int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state); +extern int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state); + +#endif diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 82935a3bd950..2ba387b0f27c 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -47,6 +47,7 @@ #include "pp_overdriver.h" #include "pp_thermal.h" #include "soc15_common.h" +#include "vega20_baco.h" #include "smuio/smuio_9_0_offset.h" #include "smuio/smuio_9_0_sh_mask.h" #include "nbio/nbio_7_4_sh_mask.h" @@ -3591,6 +3592,10 @@ static const struct pp_hwmgr_func vega20_hwmgr_funcs = { /* smu memory related */ .notify_cac_buffer_info = vega20_notify_cac_buffer_info, .enable_mgpu_fan_boost = vega20_enable_mgpu_fan_boost, + /* BACO related */ + .get_asic_baco_capability = vega20_baco_get_capability, + .get_asic_baco_state = vega20_baco_get_state, + .set_asic_baco_state = vega20_baco_set_state, }; int vega20_hwmgr_init(struct pp_hwmgr *hwmgr) From e2b6d053b1077d6eef025e7869690c630ab29d0c Mon Sep 17 00:00:00 2001 From: Jim Qu Date: Wed, 7 Nov 2018 12:29:39 +0800 Subject: [PATCH 278/539] drm/amdgpu: use BACO reset if platform support (v2) It will fall back to use mode1 reset if platform does not support BACO feature. v2: squash in warning fix (Alex) Signed-off-by: Jim Qu Reviewed-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 61 ++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 483867f49154..97eb0a38b77a 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -389,14 +389,13 @@ void soc15_program_register_sequence(struct amdgpu_device *adev, } - -static int soc15_asic_reset(struct amdgpu_device *adev) +static int soc15_asic_mode1_reset(struct amdgpu_device *adev) { u32 i; amdgpu_atombios_scratch_regs_engine_hung(adev, true); - dev_info(adev->dev, "GPU reset\n"); + dev_info(adev->dev, "GPU mode1 reset\n"); /* disable BM */ pci_clear_master(adev->pdev); @@ -421,6 +420,62 @@ static int soc15_asic_reset(struct amdgpu_device *adev) return 0; } +static int soc15_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap) +{ + void *pp_handle = adev->powerplay.pp_handle; + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + + if (!pp_funcs || !pp_funcs->get_asic_baco_capability) { + *cap = false; + return -1; + } + + return pp_funcs->get_asic_baco_capability(pp_handle, cap); +} + +static int soc15_asic_baco_reset(struct amdgpu_device *adev) +{ + void *pp_handle = adev->powerplay.pp_handle; + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + + if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) + return -1; + + /* enter BACO state */ + if (pp_funcs->set_asic_baco_state(pp_handle, 1)) + return -1; + + /* exit BACO state */ + if (pp_funcs->set_asic_baco_state(pp_handle, 0)) + return -1; + + dev_info(adev->dev, "GPU BACO reset\n"); + + return 0; +} + +static int soc15_asic_reset(struct amdgpu_device *adev) +{ + int ret; + bool baco_reset; + + switch (adev->asic_type) { + case CHIP_VEGA10: + soc15_asic_get_baco_capability(adev, &baco_reset); + break; + default: + baco_reset = false; + break; + } + + if (baco_reset) + ret = soc15_asic_baco_reset(adev); + else + ret = soc15_asic_mode1_reset(adev); + + return ret; +} + /*static int soc15_set_uvd_clock(struct amdgpu_device *adev, u32 clock, u32 cntl_reg, u32 status_reg) { From 2172b89e7c94605380d8c0dedf543c93f0a0b27c Mon Sep 17 00:00:00 2001 From: Jim Qu Date: Thu, 8 Nov 2018 14:07:22 +0800 Subject: [PATCH 279/539] drm/amdgpu: use BACO reset on vega20 if platform support Signed-off-by: Jim Qu Reviewed-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 97eb0a38b77a..5248b03df5fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -461,6 +461,7 @@ static int soc15_asic_reset(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: + case CHIP_VEGA20: soc15_asic_get_baco_capability(adev, &baco_reset); break; default: From 02d35a67f498f661b282d8e4175ea6f80fedde82 Mon Sep 17 00:00:00 2001 From: "Jerry (Fangzhi) Zuo" Date: Fri, 11 Jan 2019 16:34:31 -0500 Subject: [PATCH 280/539] drm/amd/display: Skip create new stream if crtc state doesn't change Need to check if crtc state is changed so that mode set is required before trying to create new stream. It deals with the MST hotplug use case when plug back to the same connector where the failure to create new stream for the inactive crtc on the old connector. Signed-off-by: Jerry (Fangzhi) Zuo Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ff2fcb99ac63..feb1b24efaaa 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5509,6 +5509,9 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, dm_new_conn_state = to_dm_connector_state(drm_new_conn_state); dm_old_conn_state = to_dm_connector_state(drm_old_conn_state); + if (!drm_atomic_crtc_needs_modeset(new_crtc_state)) + goto skip_modeset; + new_stream = create_stream_for_sink(aconnector, &new_crtc_state->mode, dm_new_conn_state, @@ -5538,6 +5541,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, } } + /* mode_changed flag may get updated above, need to check again */ if (!drm_atomic_crtc_needs_modeset(new_crtc_state)) goto skip_modeset; From 229f7b1d6344ea35fff0b113e4d91128921f8937 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Mon, 7 Jan 2019 14:06:00 +0100 Subject: [PATCH 281/539] drm/amd: fix typo Fix spelling mistake: "lenght" -> "length" Signed-off-by: Matteo Croce Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/atombios.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h index 7931502fa54f..8ba21747b40a 100644 --- a/drivers/gpu/drm/amd/include/atombios.h +++ b/drivers/gpu/drm/amd/include/atombios.h @@ -4106,7 +4106,7 @@ typedef struct _ATOM_LCD_MODE_CONTROL_CAP typedef struct _ATOM_FAKE_EDID_PATCH_RECORD { UCHAR ucRecordType; - UCHAR ucFakeEDIDLength; // = 128 means EDID lenght is 128 bytes, otherwise the EDID length = ucFakeEDIDLength*128 + UCHAR ucFakeEDIDLength; // = 128 means EDID length is 128 bytes, otherwise the EDID length = ucFakeEDIDLength*128 UCHAR ucFakeEDIDString[1]; // This actually has ucFakeEdidLength elements. } ATOM_FAKE_EDID_PATCH_RECORD; From 0982a8fc2a85ecac36901a27b7d8e29b2885d939 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 13 Jan 2019 14:24:07 +0100 Subject: [PATCH 282/539] drm/amd/display: Include names of all PP clock types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes printing clock names in cases like: [ 5.352311] [drm] DM_PPLIB: values for Invalid clock [ 5.352313] [drm] DM_PPLIB: 400000 in kHz [ 5.352313] [drm] DM_PPLIB: 933000 in kHz [ 5.352314] [drm] DM_PPLIB: 1067000 in kHz [ 5.352315] [drm] DM_PPLIB: 1200000 in kHz [ 5.352317] [drm] DM_PPLIB: values for Invalid clock [ 5.352318] [drm] DM_PPLIB: 300000 in kHz [ 5.352318] [drm] DM_PPLIB: 600000 in kHz [ 5.352319] [drm] DM_PPLIB: 626000 in kHz [ 5.352320] [drm] DM_PPLIB: 654000 in kHz (source: HP EliteBook 745 G5 w. RAVEN 0x1002:0x15DD 0x103C:0x83D5 0xD1) On my system above "Invalid" names got replaced by "F" and "DCF". The same problem was occurring on Huawei Matebook D with just 667000 kHz instead of 400000 kHz. Signed-off-by: Rafał Miłecki Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dm_services_types.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dm_services_types.h b/drivers/gpu/drm/amd/display/dc/dm_services_types.h index 1af8c777b3ac..9afd36a031a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_services_types.h +++ b/drivers/gpu/drm/amd/display/dc/dm_services_types.h @@ -82,7 +82,15 @@ enum dm_pp_clock_type { #define DC_DECODE_PP_CLOCK_TYPE(clk_type) \ (clk_type) == DM_PP_CLOCK_TYPE_DISPLAY_CLK ? "Display" : \ (clk_type) == DM_PP_CLOCK_TYPE_ENGINE_CLK ? "Engine" : \ - (clk_type) == DM_PP_CLOCK_TYPE_MEMORY_CLK ? "Memory" : "Invalid" + (clk_type) == DM_PP_CLOCK_TYPE_MEMORY_CLK ? "Memory" : \ + (clk_type) == DM_PP_CLOCK_TYPE_DCFCLK ? "DCF" : \ + (clk_type) == DM_PP_CLOCK_TYPE_DCEFCLK ? "DCEF" : \ + (clk_type) == DM_PP_CLOCK_TYPE_SOCCLK ? "SoC" : \ + (clk_type) == DM_PP_CLOCK_TYPE_PIXELCLK ? "Pixel" : \ + (clk_type) == DM_PP_CLOCK_TYPE_DISPLAYPHYCLK ? "Display PHY" : \ + (clk_type) == DM_PP_CLOCK_TYPE_DPPCLK ? "DPP" : \ + (clk_type) == DM_PP_CLOCK_TYPE_FCLK ? "F" : \ + "Invalid" #define DM_PP_MAX_CLOCK_LEVELS 8 From 1f46df61a148da7263165079b4874d61525c8ae1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 14 Jan 2019 14:56:42 -0500 Subject: [PATCH 283/539] drm/amdgpu/soc15: return proper error codes in baco reset Rather than just -1. Reviewed-by: JimQu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 5248b03df5fa..7130a4c8dd5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -427,7 +427,7 @@ static int soc15_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap) if (!pp_funcs || !pp_funcs->get_asic_baco_capability) { *cap = false; - return -1; + return -ENOENT; } return pp_funcs->get_asic_baco_capability(pp_handle, cap); @@ -439,15 +439,15 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) - return -1; + return -ENOENT; /* enter BACO state */ if (pp_funcs->set_asic_baco_state(pp_handle, 1)) - return -1; + return -EIO; /* exit BACO state */ if (pp_funcs->set_asic_baco_state(pp_handle, 0)) - return -1; + return -EIO; dev_info(adev->dev, "GPU BACO reset\n"); From fd4855409f6ebe015406cd2b2ffa4fee4cd1f4a7 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Mon, 17 Dec 2018 19:42:58 -0600 Subject: [PATCH 284/539] drm/amdgpu: Add per device sdma_doorbell_range field Different ASIC has different sdma doorbell range. Add a per device sdma_doorbell_range field and initialize it. Signed-off-by: Oak Zeng Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 2 ++ drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c | 1 + drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 1 + 3 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 35a0c05f454c..1cfec06f81d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -72,6 +72,8 @@ struct amdgpu_doorbell_index { } uvd_vce; }; uint32_t max_assignment; + /* Per engine SDMA doorbell size in dword */ + uint32_t sdma_doorbell_range; }; typedef enum _AMDGPU_DOORBELL_ASSIGNMENT diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c index b75d17ba59e2..4b5d60ea3e78 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c @@ -83,5 +83,6 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_DOORBELL64_VCE_RING6_7; /* In unit of dword doorbell */ adev->doorbell_index.max_assignment = AMDGPU_DOORBELL64_MAX_ASSIGNMENT << 1; + adev->doorbell_index.sdma_doorbell_range = 4; } diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index 63c542cfabfa..53716c593b2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -86,5 +86,6 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5; adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7; adev->doorbell_index.max_assignment = AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT << 1; + adev->doorbell_index.sdma_doorbell_range = 20; } From 8987e2e256be3551436a5abc3c48d0eb4d81084e Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Mon, 17 Dec 2018 20:03:45 -0600 Subject: [PATCH 285/539] drm/amdgpu: Fix sdma doorbell range setting Different ASIC has different SDMA queue number so different SDMA doorbell range. Introduce an extra parameter to sdma_doorbell_range function and set sdma doorbell range correctly. Signed-off-by: Oak Zeng Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 3 ++- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 88431ea88344..9efa681d0878 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -639,7 +639,7 @@ struct amdgpu_nbio_funcs { void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring); u32 (*get_memsize)(struct amdgpu_device *adev); void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, - bool use_doorbell, int doorbell_index); + bool use_doorbell, int doorbell_index, int doorbell_size); void (*enable_doorbell_aperture)(struct amdgpu_device *adev, bool enable); void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 1965756348bf..cc967dbfd631 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -68,7 +68,7 @@ static u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev) } static void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance, - bool use_doorbell, int doorbell_index) + bool use_doorbell, int doorbell_index, int doorbell_size) { u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) : SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); @@ -77,7 +77,7 @@ static void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instan if (use_doorbell) { doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); - doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2); + doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, doorbell_size); } else doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index 38291c5014be..1cdb98ad2db3 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -67,7 +67,7 @@ static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) } static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance, - bool use_doorbell, int doorbell_index) + bool use_doorbell, int doorbell_index, int doorbell_size) { u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) : SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); @@ -76,7 +76,7 @@ static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instan if (use_doorbell) { doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); - doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2); + doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, doorbell_size); } else doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 0a613098337a..e347b407bd03 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -65,7 +65,7 @@ static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev) } static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instance, - bool use_doorbell, int doorbell_index) + bool use_doorbell, int doorbell_index, int doorbell_size) { u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) : SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); @@ -74,7 +74,7 @@ static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instan if (use_doorbell) { doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); - doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2); + doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, doorbell_size); } else doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 59638b800212..658e92233958 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -835,7 +835,8 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i) WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL, doorbell); WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET, doorbell_offset); adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, - ring->doorbell_index); + ring->doorbell_index, + adev->doorbell_index.sdma_doorbell_range); sdma_v4_0_ring_set_wptr(ring); From 7c94bc828ee7e7e64f31e40bb967b6417d7e6382 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Mon, 14 Jan 2019 16:32:53 -0600 Subject: [PATCH 286/539] drm/amdgpu: Setting doorbell range registers earlier HW doorbell writing routing policy: writing to doorbell not in SDMA/IH/MM/ACV doorbell range will be routed to CP. So CP doorbell routing depends on doorbell range setting of above blocks. Setting doorbell range of above blocks earlier (soc15_common_hw_init) to make sure CP doorbell writing be routed to CP block. Signed-off-by: Oak Zeng Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 3 --- drivers/gpu/drm/amd/amdgpu/soc15.c | 22 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 2 -- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 658e92233958..48a166ba9fed 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -834,9 +834,6 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i) OFFSET, ring->doorbell_index); WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL, doorbell); WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET, doorbell_offset); - adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, - ring->doorbell_index, - adev->doorbell_index.sdma_doorbell_range); sdma_v4_0_ring_set_wptr(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 7130a4c8dd5f..671f78124cb0 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -966,6 +966,22 @@ static int soc15_common_sw_fini(void *handle) return 0; } +static void soc15_doorbell_range_init(struct amdgpu_device *adev) +{ + int i; + struct amdgpu_ring *ring; + + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + adev->nbio_funcs->sdma_doorbell_range(adev, i, + ring->use_doorbell, ring->doorbell_index, + adev->doorbell_index.sdma_doorbell_range); + } + + adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, + adev->irq.ih.doorbell_index); +} + static int soc15_common_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -978,6 +994,12 @@ static int soc15_common_hw_init(void *handle) adev->nbio_funcs->init_registers(adev); /* enable the doorbell aperture */ soc15_enable_doorbell_aperture(adev, true); + /* HW doorbell routing policy: doorbell writing not + * in SDMA/IH/MM/ACV range will be routed to CP. So + * we need to init SDMA/IH/MM/ACV doorbell range prior + * to CP ip block init and ring test. + */ + soc15_doorbell_range_init(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 562701939d3e..877b4a6d21f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -140,8 +140,6 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) ENABLE, 0); } WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, ih_doorbell_rtpr); - adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, - adev->irq.ih.doorbell_index); tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL); tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL, From 466bcb75b0791ba301817cdadeed20398f2224fe Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Mon, 14 Jan 2019 16:08:32 +0800 Subject: [PATCH 287/539] drm/amdgpu/psp: ignore psp response status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some cases, psp response status is not 0 even there is no problem while the command is submitted. Some version of PSP FW doesn't write 0 to that field. So here we would like to only print a warning instead of an error during psp initialization to avoid breaking hw_init and it doesn't return -EINVAL. Signed-off-by: Aaron Liu Reviewed-by: Huang Rui Reviewed-by: Xiangliang Yu Acked-by: Christian König Reviewed-by: Feifei Xu Reviewed-by: Paul Menzel Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 53c2d6069fa0..f26d8faa7507 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -140,14 +140,19 @@ psp_cmd_submit_buf(struct psp_context *psp, while (*((unsigned int *)psp->fence_buf) != index) msleep(1); - /* the status field must be 0 after psp command completion */ + /* In some cases, psp response status is not 0 even there is no + * problem while the command is submitted. Some version of PSP FW + * doesn't write 0 to that field. + * So here we would like to only print a warning instead of an error + * during psp initialization to avoid breaking hw_init and it doesn't + * return -EINVAL. + */ if (psp->cmd_buf_mem->resp.status) { if (ucode) - DRM_ERROR("failed to load ucode id (%d) ", + DRM_WARN("failed to load ucode id (%d) ", ucode->ucode_id); - DRM_ERROR("psp command failed and response status is (%d)\n", + DRM_WARN("psp command failed and response status is (%d)\n", psp->cmd_buf_mem->resp.status); - return -EINVAL; } /* get xGMI session id from response buffer */ From ae6d343541bb75958e9535d056adaf4ff6a66d6a Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Thu, 10 Jan 2019 17:56:39 +0800 Subject: [PATCH 288/539] drm/ttm: add lru notify to bo driver v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit allow driver do somethings when lru changed. v2: address Michel's comments. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/ttm/ttm_bo.c | 11 +++++++---- include/drm/ttm/ttm_bo_driver.h | 9 +++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 0ec08394e17a..de088c8070fb 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -198,19 +198,22 @@ static void ttm_bo_ref_bug(struct kref *list_kref) void ttm_bo_del_from_lru(struct ttm_buffer_object *bo) { + struct ttm_bo_device *bdev = bo->bdev; + bool notify = false; + if (!list_empty(&bo->swap)) { list_del_init(&bo->swap); kref_put(&bo->list_kref, ttm_bo_ref_bug); + notify = true; } if (!list_empty(&bo->lru)) { list_del_init(&bo->lru); kref_put(&bo->list_kref, ttm_bo_ref_bug); + notify = true; } - /* - * TODO: Add a driver hook to delete from - * driver-specific LRU's here. - */ + if (notify && bdev->driver->del_from_lru_notify) + bdev->driver->del_from_lru_notify(bo); } void ttm_bo_del_sub_from_lru(struct ttm_buffer_object *bo) diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 1021106438b2..15829b24277c 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -381,6 +381,15 @@ struct ttm_bo_driver { */ int (*access_memory)(struct ttm_buffer_object *bo, unsigned long offset, void *buf, int len, int write); + + /** + * struct ttm_bo_driver member del_from_lru_notify + * + * @bo: the buffer object deleted from lru + * + * notify driver that a BO was deleted from LRU. + */ + void (*del_from_lru_notify)(struct ttm_buffer_object *bo); }; /** From b61857b5e365889d67a6296c413df396032d374d Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Thu, 10 Jan 2019 15:49:54 +0800 Subject: [PATCH 289/539] drm/amdgpu: set bulk_moveable to false when lru changed v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit if lru is changed, we cannot do bulk moving. v2: root bo isn't in bulk moving, skip its change. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 ++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index c91ec3101d00..b852abb9db0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1546,7 +1546,8 @@ static struct ttm_bo_driver amdgpu_bo_driver = { .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, .io_mem_free = &amdgpu_ttm_io_mem_free, .io_mem_pfn = amdgpu_ttm_io_mem_pfn, - .access_memory = &amdgpu_ttm_access_memory + .access_memory = &amdgpu_ttm_access_memory, + .del_from_lru_notify = &amdgpu_vm_del_from_lru_notify }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ccffcadfd73b..ffffe021cccf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -623,6 +623,28 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, list_add(&entry->tv.head, validated); } +void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo) +{ + struct amdgpu_bo *abo; + struct amdgpu_vm_bo_base *bo_base; + + if (!amdgpu_bo_is_amdgpu_bo(bo)) + return; + + if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) + return; + + abo = ttm_to_amdgpu_bo(bo); + if (!abo->parent) + return; + for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) { + struct amdgpu_vm *vm = bo_base->vm; + + if (abo->tbo.resv == vm->root.base.bo->tbo.resv) + vm->bulk_moveable = false; + } + +} /** * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index e8dcfd59fc93..81ff8177f092 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -363,4 +363,6 @@ int amdgpu_vm_add_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key) void amdgpu_vm_clear_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key); +void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo); + #endif From 7ca881a8651bdeffd99ba8e0010160f9bf60673e Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 14 Jan 2019 14:06:54 +0800 Subject: [PATCH 290/539] drm/amd/powerplay: support enabled ppfeatures retrieving and setting V3 User can use "ppfeatures" sysfs interface to retrieve and set enabled powerplay features. V2: expose this feature for Vega10 and later dGPUs V3: squash in removal of unused variable (Alex) Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h | 8 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 71 ++++++++++++ .../gpu/drm/amd/include/kgd_pp_interface.h | 2 + drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 42 +++++++ .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 104 ++++++++++++++++++ drivers/gpu/drm/amd/powerplay/inc/hwmgr.h | 2 + 6 files changed, 229 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index f972cd156795..2f61e9edb1c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -364,6 +364,14 @@ enum amdgpu_pcie_gen { ((adev)->powerplay.pp_funcs->enable_mgpu_fan_boost(\ (adev)->powerplay.pp_handle)) +#define amdgpu_dpm_get_ppfeature_status(adev, buf) \ + ((adev)->powerplay.pp_funcs->get_ppfeature_status(\ + (adev)->powerplay.pp_handle, (buf))) + +#define amdgpu_dpm_set_ppfeature_status(adev, ppfeatures) \ + ((adev)->powerplay.pp_funcs->set_ppfeature_status(\ + (adev)->powerplay.pp_handle, (ppfeatures))) + struct amdgpu_dpm { struct amdgpu_ps *ps; /* number of valid power states */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 51eb2cf42b81..f21f294e6735 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -625,6 +625,60 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, } +/** + * DOC: ppfeatures + * + * The amdgpu driver provides a sysfs API for adjusting what powerplay + * features to be enabled. The file ppfeatures is used for this. And + * this is only available for Vega10 and later dGPUs. + * + * Reading back the file will show you the followings: + * - Current ppfeature masks + * - List of the all supported powerplay features with their naming, + * bitmasks and enablement status('Y'/'N' means "enabled"/"disabled"). + * + * To manually enable or disable a specific feature, just set or clear + * the corresponding bit from original ppfeature masks and input the + * new ppfeature masks. + */ +static ssize_t amdgpu_set_ppfeature_status(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + uint64_t featuremask; + int ret; + + ret = kstrtou64(buf, 0, &featuremask); + if (ret) + return -EINVAL; + + pr_debug("featuremask = 0x%llx\n", featuremask); + + if (adev->powerplay.pp_funcs->set_ppfeature_status) { + ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask); + if (ret) + return -EINVAL; + } + + return count; +} + +static ssize_t amdgpu_get_ppfeature_status(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + if (adev->powerplay.pp_funcs->get_ppfeature_status) + return amdgpu_dpm_get_ppfeature_status(adev, buf); + + return snprintf(buf, PAGE_SIZE, "\n"); +} + /** * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie * @@ -1051,6 +1105,9 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR, static DEVICE_ATTR(gpu_busy_percent, S_IRUGO, amdgpu_get_busy_percent, NULL); static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL); +static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR, + amdgpu_get_ppfeature_status, + amdgpu_set_ppfeature_status); static ssize_t amdgpu_hwmon_show_temp(struct device *dev, struct device_attribute *attr, @@ -2241,6 +2298,17 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) return ret; } + if ((adev->asic_type >= CHIP_VEGA10) && + !(adev->flags & AMD_IS_APU)) { + ret = device_create_file(adev->dev, + &dev_attr_ppfeatures); + if (ret) { + DRM_ERROR("failed to create device file " + "ppfeatures\n"); + return ret; + } + } + adev->pm.sysfs_initialized = true; return 0; @@ -2276,6 +2344,9 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_gpu_busy_percent); if (adev->flags & !AMD_IS_APU) device_remove_file(adev->dev, &dev_attr_pcie_bw); + if ((adev->asic_type >= CHIP_VEGA10) && + !(adev->flags & AMD_IS_APU)) + device_remove_file(adev->dev, &dev_attr_ppfeatures); } void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index a2ea4c933360..1130f293c4ee 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -284,6 +284,8 @@ struct amd_pm_funcs { int (*get_asic_baco_capability)(void *handle, bool *cap); int (*get_asic_baco_state)(void *handle, int *state); int (*set_asic_baco_state)(void *handle, int state); + int (*get_ppfeature_status)(void *handle, char *buf); + int (*set_ppfeature_status)(void *handle, uint64_t ppfeature_masks); }; #endif diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index 5d8b5d3c2453..3f73f7cd18b9 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -1455,6 +1455,46 @@ static int pp_set_asic_baco_state(void *handle, int state) return 0; } +static int pp_get_ppfeature_status(void *handle, char *buf) +{ + struct pp_hwmgr *hwmgr = handle; + int ret = 0; + + if (!hwmgr || !hwmgr->pm_en || !buf) + return -EINVAL; + + if (hwmgr->hwmgr_func->get_ppfeature_status == NULL) { + pr_info_ratelimited("%s was not implemented.\n", __func__); + return -EINVAL; + } + + mutex_lock(&hwmgr->smu_lock); + ret = hwmgr->hwmgr_func->get_ppfeature_status(hwmgr, buf); + mutex_unlock(&hwmgr->smu_lock); + + return ret; +} + +static int pp_set_ppfeature_status(void *handle, uint64_t ppfeature_masks) +{ + struct pp_hwmgr *hwmgr = handle; + int ret = 0; + + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; + + if (hwmgr->hwmgr_func->set_ppfeature_status == NULL) { + pr_info_ratelimited("%s was not implemented.\n", __func__); + return -EINVAL; + } + + mutex_lock(&hwmgr->smu_lock); + ret = hwmgr->hwmgr_func->set_ppfeature_status(hwmgr, ppfeature_masks); + mutex_unlock(&hwmgr->smu_lock); + + return ret; +} + static const struct amd_pm_funcs pp_dpm_funcs = { .load_firmware = pp_dpm_load_fw, .wait_for_fw_loading_complete = pp_dpm_fw_loading_complete, @@ -1508,4 +1548,6 @@ static const struct amd_pm_funcs pp_dpm_funcs = { .get_asic_baco_capability = pp_get_asic_baco_capability, .get_asic_baco_state = pp_get_asic_baco_state, .set_asic_baco_state = pp_set_asic_baco_state, + .get_ppfeature_status = pp_get_ppfeature_status, + .set_ppfeature_status = pp_set_ppfeature_status, }; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 2ba387b0f27c..3e97b9d6f450 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -2776,6 +2776,108 @@ static int vega20_odn_edit_dpm_table(struct pp_hwmgr *hwmgr, return 0; } +static int vega20_get_ppfeature_status(struct pp_hwmgr *hwmgr, char *buf) +{ + static const char *ppfeature_name[] = { + "DPM_PREFETCHER", + "GFXCLK_DPM", + "UCLK_DPM", + "SOCCLK_DPM", + "UVD_DPM", + "VCE_DPM", + "ULV", + "MP0CLK_DPM", + "LINK_DPM", + "DCEFCLK_DPM", + "GFXCLK_DS", + "SOCCLK_DS", + "LCLK_DS", + "PPT", + "TDC", + "THERMAL", + "GFX_PER_CU_CG", + "RM", + "DCEFCLK_DS", + "ACDC", + "VR0HOT", + "VR1HOT", + "FW_CTF", + "LED_DISPLAY", + "FAN_CONTROL", + "GFX_EDC", + "GFXOFF", + "CG", + "FCLK_DPM", + "FCLK_DS", + "MP1CLK_DS", + "MP0CLK_DS", + "XGMI"}; + static const char *output_title[] = { + "FEATURES", + "BITMASK", + "ENABLEMENT"}; + uint64_t features_enabled; + int i; + int ret = 0; + int size = 0; + + ret = vega20_get_enabled_smc_features(hwmgr, &features_enabled); + PP_ASSERT_WITH_CODE(!ret, + "[EnableAllSmuFeatures] Failed to get enabled smc features!", + return ret); + + size += sprintf(buf + size, "Current ppfeatures: 0x%016llx\n", features_enabled); + size += sprintf(buf + size, "%-19s %-22s %s\n", + output_title[0], + output_title[1], + output_title[2]); + for (i = 0; i < GNLD_FEATURES_MAX; i++) { + size += sprintf(buf + size, "%-19s 0x%016llx %6s\n", + ppfeature_name[i], + 1ULL << i, + (features_enabled & (1ULL << i)) ? "Y" : "N"); + } + + return size; +} + +static int vega20_set_ppfeature_status(struct pp_hwmgr *hwmgr, uint64_t new_ppfeature_masks) +{ + uint64_t features_enabled; + uint64_t features_to_enable; + uint64_t features_to_disable; + int ret = 0; + + if (new_ppfeature_masks >= (1ULL << GNLD_FEATURES_MAX)) + return -EINVAL; + + ret = vega20_get_enabled_smc_features(hwmgr, &features_enabled); + if (ret) + return ret; + + features_to_disable = + (features_enabled ^ new_ppfeature_masks) & features_enabled; + features_to_enable = + (features_enabled ^ new_ppfeature_masks) ^ features_to_disable; + + pr_debug("features_to_disable 0x%llx\n", features_to_disable); + pr_debug("features_to_enable 0x%llx\n", features_to_enable); + + if (features_to_disable) { + ret = vega20_enable_smc_features(hwmgr, false, features_to_disable); + if (ret) + return ret; + } + + if (features_to_enable) { + ret = vega20_enable_smc_features(hwmgr, true, features_to_enable); + if (ret) + return ret; + } + + return 0; +} + static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf) { @@ -3572,6 +3674,8 @@ static const struct pp_hwmgr_func vega20_hwmgr_funcs = { .force_clock_level = vega20_force_clock_level, .print_clock_levels = vega20_print_clock_levels, .read_sensor = vega20_read_sensor, + .get_ppfeature_status = vega20_get_ppfeature_status, + .set_ppfeature_status = vega20_set_ppfeature_status, /* powergate related */ .powergate_uvd = vega20_power_gate_uvd, .powergate_vce = vega20_power_gate_vce, diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index 577cec90aef1..b1cd70dcd6e7 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -341,6 +341,8 @@ struct pp_hwmgr_func { int (*get_asic_baco_capability)(struct pp_hwmgr *hwmgr, bool *cap); int (*get_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE *state); int (*set_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE state); + int (*get_ppfeature_status)(struct pp_hwmgr *hwmgr, char *buf); + int (*set_ppfeature_status)(struct pp_hwmgr *hwmgr, uint64_t ppfeature_masks); }; struct pp_table_func { From d7337ca2640cde21ff178bd78f01d94cd5ea2e08 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 14 Jan 2019 14:45:47 +0800 Subject: [PATCH 291/539] drm/amd/powerplay: support retrieving and adjusting SOC clock power levels V2 User can use "pp_dpm_socclk" to retrieve and adjust SOC clock power levels. V2: expose this interface for Vega10 and later ASICs only Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 54 ++++++++++++++++++- .../gpu/drm/amd/include/kgd_pp_interface.h | 1 + .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 45 ++++++++++++++++ 3 files changed, 98 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index f21f294e6735..4bdb1d082bea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -680,11 +680,13 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev, } /** - * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie + * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_pcie * * The amdgpu driver provides a sysfs API for adjusting what power levels * are enabled for a given power state. The files pp_dpm_sclk, pp_dpm_mclk, - * and pp_dpm_pcie are used for this. + * pp_dpm_socclk and pp_dpm_pcie are used for this. + * + * pp_dpm_socclk interface is only available for Vega10 and later ASICs. * * Reading back the files will show you the available power levels within * the power state and the clock information for those levels. @@ -804,6 +806,42 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, return count; } +static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + if (adev->powerplay.pp_funcs->print_clock_levels) + return amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf); + else + return snprintf(buf, PAGE_SIZE, "\n"); +} + +static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + int ret; + uint32_t mask = 0; + + ret = amdgpu_read_mask(buf, count, &mask); + if (ret) + return ret; + + if (adev->powerplay.pp_funcs->force_clock_level) + ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask); + + if (ret) + return -EINVAL; + + return count; +} + static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, struct device_attribute *attr, char *buf) @@ -1087,6 +1125,9 @@ static DEVICE_ATTR(pp_dpm_sclk, S_IRUGO | S_IWUSR, static DEVICE_ATTR(pp_dpm_mclk, S_IRUGO | S_IWUSR, amdgpu_get_pp_dpm_mclk, amdgpu_set_pp_dpm_mclk); +static DEVICE_ATTR(pp_dpm_socclk, S_IRUGO | S_IWUSR, + amdgpu_get_pp_dpm_socclk, + amdgpu_set_pp_dpm_socclk); static DEVICE_ATTR(pp_dpm_pcie, S_IRUGO | S_IWUSR, amdgpu_get_pp_dpm_pcie, amdgpu_set_pp_dpm_pcie); @@ -2246,6 +2287,13 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) DRM_ERROR("failed to create device file pp_dpm_mclk\n"); return ret; } + if (adev->asic_type >= CHIP_VEGA10) { + ret = device_create_file(adev->dev, &dev_attr_pp_dpm_socclk); + if (ret) { + DRM_ERROR("failed to create device file pp_dpm_socclk\n"); + return ret; + } + } ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie); if (ret) { DRM_ERROR("failed to create device file pp_dpm_pcie\n"); @@ -2333,6 +2381,8 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk); device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk); + if (adev->asic_type >= CHIP_VEGA10) + device_remove_file(adev->dev, &dev_attr_pp_dpm_socclk); device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie); device_remove_file(adev->dev, &dev_attr_pp_sclk_od); device_remove_file(adev->dev, &dev_attr_pp_mclk_od); diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 1130f293c4ee..f5ec25a6ab54 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -92,6 +92,7 @@ enum pp_clock_type { PP_SCLK, PP_MCLK, PP_PCIE, + PP_SOCCLK, OD_SCLK, OD_MCLK, OD_VDDC_CURVE, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 3e97b9d6f450..e7c890f036fa 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -2296,6 +2296,34 @@ static int vega20_force_clock_level(struct pp_hwmgr *hwmgr, break; + case PP_SOCCLK: + soft_min_level = mask ? (ffs(mask) - 1) : 0; + soft_max_level = mask ? (fls(mask) - 1) : 0; + + if (soft_max_level >= data->dpm_table.soc_table.count) { + pr_err("Clock level specified %d is over max allowed %d\n", + soft_max_level, + data->dpm_table.soc_table.count - 1); + return -EINVAL; + } + + data->dpm_table.soc_table.dpm_state.soft_min_level = + data->dpm_table.soc_table.dpm_levels[soft_min_level].value; + data->dpm_table.soc_table.dpm_state.soft_max_level = + data->dpm_table.soc_table.dpm_levels[soft_max_level].value; + + ret = vega20_upload_dpm_min_level(hwmgr, FEATURE_DPM_SOCCLK_MASK); + PP_ASSERT_WITH_CODE(!ret, + "Failed to upload boot level to lowest!", + return ret); + + ret = vega20_upload_dpm_max_level(hwmgr, FEATURE_DPM_SOCCLK_MASK); + PP_ASSERT_WITH_CODE(!ret, + "Failed to upload dpm max level to highest!", + return ret); + + break; + case PP_PCIE: soft_min_level = mask ? (ffs(mask) - 1) : 0; soft_max_level = mask ? (fls(mask) - 1) : 0; @@ -2931,6 +2959,23 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, (clocks.data[i].clocks_in_khz == now * 10) ? "*" : ""); break; + case PP_SOCCLK: + ret = vega20_get_current_clk_freq(hwmgr, PPCLK_SOCCLK, &now); + PP_ASSERT_WITH_CODE(!ret, + "Attempt to get current socclk freq Failed!", + return ret); + + ret = vega20_get_socclocks(hwmgr, &clocks); + PP_ASSERT_WITH_CODE(!ret, + "Attempt to get soc clk levels Failed!", + return ret); + + for (i = 0; i < clocks.num_levels; i++) + size += sprintf(buf + size, "%d: %uMhz %s\n", + i, clocks.data[i].clocks_in_khz / 1000, + (clocks.data[i].clocks_in_khz == now * 10) ? "*" : ""); + break; + case PP_PCIE: gen_speed = (RREG32_PCIE(smnPCIE_LC_SPEED_CNTL) & PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) From 828e37efe802ba8c868922af23099638fde5b7b4 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 14 Jan 2019 15:44:44 +0800 Subject: [PATCH 292/539] drm/amd/powerplay: support retrieving and adjusting fclock power levels V2 User can use "pp_dpm_fclk" to retrieve and adjust fclock power levels. V2: expose this interface for Vega20 and later ASICs only Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 53 ++++++++++++++- .../gpu/drm/amd/include/kgd_pp_interface.h | 1 + .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 64 +++++++++++++++++++ 3 files changed, 116 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 4bdb1d082bea..23eb6f61da13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -680,13 +680,14 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev, } /** - * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_pcie + * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_fclk pp_dpm_pcie * * The amdgpu driver provides a sysfs API for adjusting what power levels * are enabled for a given power state. The files pp_dpm_sclk, pp_dpm_mclk, - * pp_dpm_socclk and pp_dpm_pcie are used for this. + * pp_dpm_socclk, pp_dpm_fclk and pp_dpm_pcie are used for this. * * pp_dpm_socclk interface is only available for Vega10 and later ASICs. + * pp_dpm_fclk interface is only available for Vega20 and later ASICs. * * Reading back the files will show you the available power levels within * the power state and the clock information for those levels. @@ -842,6 +843,42 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, return count; } +static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + if (adev->powerplay.pp_funcs->print_clock_levels) + return amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf); + else + return snprintf(buf, PAGE_SIZE, "\n"); +} + +static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + int ret; + uint32_t mask = 0; + + ret = amdgpu_read_mask(buf, count, &mask); + if (ret) + return ret; + + if (adev->powerplay.pp_funcs->force_clock_level) + ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask); + + if (ret) + return -EINVAL; + + return count; +} + static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, struct device_attribute *attr, char *buf) @@ -1128,6 +1165,9 @@ static DEVICE_ATTR(pp_dpm_mclk, S_IRUGO | S_IWUSR, static DEVICE_ATTR(pp_dpm_socclk, S_IRUGO | S_IWUSR, amdgpu_get_pp_dpm_socclk, amdgpu_set_pp_dpm_socclk); +static DEVICE_ATTR(pp_dpm_fclk, S_IRUGO | S_IWUSR, + amdgpu_get_pp_dpm_fclk, + amdgpu_set_pp_dpm_fclk); static DEVICE_ATTR(pp_dpm_pcie, S_IRUGO | S_IWUSR, amdgpu_get_pp_dpm_pcie, amdgpu_set_pp_dpm_pcie); @@ -2294,6 +2334,13 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) return ret; } } + if (adev->asic_type >= CHIP_VEGA20) { + ret = device_create_file(adev->dev, &dev_attr_pp_dpm_fclk); + if (ret) { + DRM_ERROR("failed to create device file pp_dpm_fclk\n"); + return ret; + } + } ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie); if (ret) { DRM_ERROR("failed to create device file pp_dpm_pcie\n"); @@ -2384,6 +2431,8 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) if (adev->asic_type >= CHIP_VEGA10) device_remove_file(adev->dev, &dev_attr_pp_dpm_socclk); device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie); + if (adev->asic_type >= CHIP_VEGA20) + device_remove_file(adev->dev, &dev_attr_pp_dpm_fclk); device_remove_file(adev->dev, &dev_attr_pp_sclk_od); device_remove_file(adev->dev, &dev_attr_pp_mclk_od); device_remove_file(adev->dev, diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index f5ec25a6ab54..f82de14f6560 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -93,6 +93,7 @@ enum pp_clock_type { PP_MCLK, PP_PCIE, PP_SOCCLK, + PP_FCLK, OD_SCLK, OD_MCLK, OD_VDDC_CURVE, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index e7c890f036fa..4e7399c310e2 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -1736,6 +1736,17 @@ static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr, uint32_t feature_ return ret); } + if (data->smu_features[GNLD_DPM_FCLK].enabled && + (feature_mask & FEATURE_DPM_FCLK_MASK)) { + min_freq = data->dpm_table.fclk_table.dpm_state.soft_min_level; + + PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter( + hwmgr, PPSMC_MSG_SetSoftMinByFreq, + (PPCLK_FCLK << 16) | (min_freq & 0xffff))), + "Failed to set soft min fclk!", + return ret); + } + return ret; } @@ -1808,6 +1819,17 @@ static int vega20_upload_dpm_max_level(struct pp_hwmgr *hwmgr, uint32_t feature_ return ret); } + if (data->smu_features[GNLD_DPM_FCLK].enabled && + (feature_mask & FEATURE_DPM_FCLK_MASK)) { + max_freq = data->dpm_table.fclk_table.dpm_state.soft_max_level; + + PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter( + hwmgr, PPSMC_MSG_SetSoftMaxByFreq, + (PPCLK_FCLK << 16) | (max_freq & 0xffff))), + "Failed to set soft max fclk!", + return ret); + } + return ret; } @@ -2324,6 +2346,34 @@ static int vega20_force_clock_level(struct pp_hwmgr *hwmgr, break; + case PP_FCLK: + soft_min_level = mask ? (ffs(mask) - 1) : 0; + soft_max_level = mask ? (fls(mask) - 1) : 0; + + if (soft_max_level >= data->dpm_table.fclk_table.count) { + pr_err("Clock level specified %d is over max allowed %d\n", + soft_max_level, + data->dpm_table.fclk_table.count - 1); + return -EINVAL; + } + + data->dpm_table.fclk_table.dpm_state.soft_min_level = + data->dpm_table.fclk_table.dpm_levels[soft_min_level].value; + data->dpm_table.fclk_table.dpm_state.soft_max_level = + data->dpm_table.fclk_table.dpm_levels[soft_max_level].value; + + ret = vega20_upload_dpm_min_level(hwmgr, FEATURE_DPM_FCLK_MASK); + PP_ASSERT_WITH_CODE(!ret, + "Failed to upload boot level to lowest!", + return ret); + + ret = vega20_upload_dpm_max_level(hwmgr, FEATURE_DPM_FCLK_MASK); + PP_ASSERT_WITH_CODE(!ret, + "Failed to upload dpm max level to highest!", + return ret); + + break; + case PP_PCIE: soft_min_level = mask ? (ffs(mask) - 1) : 0; soft_max_level = mask ? (fls(mask) - 1) : 0; @@ -2920,6 +2970,8 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, PPTable_t *pptable = (PPTable_t *)pptable_information->smc_pptable; struct amdgpu_device *adev = hwmgr->adev; struct pp_clock_levels_with_latency clocks; + struct vega20_single_dpm_table *fclk_dpm_table = + &(data->dpm_table.fclk_table); int i, now, size = 0; int ret = 0; uint32_t gen_speed, lane_width; @@ -2976,6 +3028,18 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, (clocks.data[i].clocks_in_khz == now * 10) ? "*" : ""); break; + case PP_FCLK: + ret = vega20_get_current_clk_freq(hwmgr, PPCLK_FCLK, &now); + PP_ASSERT_WITH_CODE(!ret, + "Attempt to get current fclk freq Failed!", + return ret); + + for (i = 0; i < fclk_dpm_table->count; i++) + size += sprintf(buf + size, "%d: %uMhz %s\n", + i, fclk_dpm_table->dpm_levels[i].value, + fclk_dpm_table->dpm_levels[i].value == (now / 100) ? "*" : ""); + break; + case PP_PCIE: gen_speed = (RREG32_PCIE(smnPCIE_LC_SPEED_CNTL) & PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) From d7e28e2d6b518af760c81ba578ecf4e4b0c0f401 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 14 Jan 2019 17:37:26 +0800 Subject: [PATCH 293/539] drm/amd/powerplay: support retrieving and adjusting dcefclock power levels V2 User can use "pp_dpm_dcefclk" to retrieve and adjust dcefclock power levels. V2: expose this interface for Vega10 and later ASICs only Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 59 +++++++++++++++++-- .../gpu/drm/amd/include/kgd_pp_interface.h | 1 + .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 52 +++++++++++++++- 3 files changed, 107 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 23eb6f61da13..4099c6b97cb1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -680,13 +680,16 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev, } /** - * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_fclk pp_dpm_pcie + * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_fclk pp_dpm_dcefclk + * pp_dpm_pcie * * The amdgpu driver provides a sysfs API for adjusting what power levels * are enabled for a given power state. The files pp_dpm_sclk, pp_dpm_mclk, - * pp_dpm_socclk, pp_dpm_fclk and pp_dpm_pcie are used for this. + * pp_dpm_socclk, pp_dpm_fclk, pp_dpm_dcefclk and pp_dpm_pcie are used for + * this. * - * pp_dpm_socclk interface is only available for Vega10 and later ASICs. + * pp_dpm_socclk and pp_dpm_dcefclk interfaces are only available for + * Vega10 and later ASICs. * pp_dpm_fclk interface is only available for Vega20 and later ASICs. * * Reading back the files will show you the available power levels within @@ -697,6 +700,8 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev, * Secondly,Enter a new value for each level by inputing a string that * contains " echo xx xx xx > pp_dpm_sclk/mclk/pcie" * E.g., echo 4 5 6 to > pp_dpm_sclk will enable sclk levels 4, 5, and 6. + * + * NOTE: change to the dcefclk max dpm level is not supported now */ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, @@ -879,6 +884,42 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, return count; } +static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + if (adev->powerplay.pp_funcs->print_clock_levels) + return amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf); + else + return snprintf(buf, PAGE_SIZE, "\n"); +} + +static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + int ret; + uint32_t mask = 0; + + ret = amdgpu_read_mask(buf, count, &mask); + if (ret) + return ret; + + if (adev->powerplay.pp_funcs->force_clock_level) + ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask); + + if (ret) + return -EINVAL; + + return count; +} + static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, struct device_attribute *attr, char *buf) @@ -1168,6 +1209,9 @@ static DEVICE_ATTR(pp_dpm_socclk, S_IRUGO | S_IWUSR, static DEVICE_ATTR(pp_dpm_fclk, S_IRUGO | S_IWUSR, amdgpu_get_pp_dpm_fclk, amdgpu_set_pp_dpm_fclk); +static DEVICE_ATTR(pp_dpm_dcefclk, S_IRUGO | S_IWUSR, + amdgpu_get_pp_dpm_dcefclk, + amdgpu_set_pp_dpm_dcefclk); static DEVICE_ATTR(pp_dpm_pcie, S_IRUGO | S_IWUSR, amdgpu_get_pp_dpm_pcie, amdgpu_set_pp_dpm_pcie); @@ -2333,6 +2377,11 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) DRM_ERROR("failed to create device file pp_dpm_socclk\n"); return ret; } + ret = device_create_file(adev->dev, &dev_attr_pp_dpm_dcefclk); + if (ret) { + DRM_ERROR("failed to create device file pp_dpm_dcefclk\n"); + return ret; + } } if (adev->asic_type >= CHIP_VEGA20) { ret = device_create_file(adev->dev, &dev_attr_pp_dpm_fclk); @@ -2428,8 +2477,10 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk); device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk); - if (adev->asic_type >= CHIP_VEGA10) + if (adev->asic_type >= CHIP_VEGA10) { device_remove_file(adev->dev, &dev_attr_pp_dpm_socclk); + device_remove_file(adev->dev, &dev_attr_pp_dpm_dcefclk); + } device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie); if (adev->asic_type >= CHIP_VEGA20) device_remove_file(adev->dev, &dev_attr_pp_dpm_fclk); diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index f82de14f6560..2b579ba9b685 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -94,6 +94,7 @@ enum pp_clock_type { PP_PCIE, PP_SOCCLK, PP_FCLK, + PP_DCEFCLK, OD_SCLK, OD_MCLK, OD_VDDC_CURVE, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 4e7399c310e2..8c1fa985c7d4 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -1747,6 +1747,17 @@ static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr, uint32_t feature_ return ret); } + if (data->smu_features[GNLD_DPM_DCEFCLK].enabled && + (feature_mask & FEATURE_DPM_DCEFCLK_MASK)) { + min_freq = data->dpm_table.dcef_table.dpm_state.hard_min_level; + + PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter( + hwmgr, PPSMC_MSG_SetHardMinByFreq, + (PPCLK_DCEFCLK << 16) | (min_freq & 0xffff))), + "Failed to set hard min dcefclk!", + return ret); + } + return ret; } @@ -2259,7 +2270,7 @@ static int vega20_force_clock_level(struct pp_hwmgr *hwmgr, enum pp_clock_type type, uint32_t mask) { struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend); - uint32_t soft_min_level, soft_max_level; + uint32_t soft_min_level, soft_max_level, hard_min_level; int ret = 0; switch (type) { @@ -2374,6 +2385,28 @@ static int vega20_force_clock_level(struct pp_hwmgr *hwmgr, break; + case PP_DCEFCLK: + hard_min_level = mask ? (ffs(mask) - 1) : 0; + + if (hard_min_level >= data->dpm_table.dcef_table.count) { + pr_err("Clock level specified %d is over max allowed %d\n", + hard_min_level, + data->dpm_table.dcef_table.count - 1); + return -EINVAL; + } + + data->dpm_table.dcef_table.dpm_state.hard_min_level = + data->dpm_table.dcef_table.dpm_levels[hard_min_level].value; + + ret = vega20_upload_dpm_min_level(hwmgr, FEATURE_DPM_DCEFCLK_MASK); + PP_ASSERT_WITH_CODE(!ret, + "Failed to upload boot level to lowest!", + return ret); + + //TODO: Setting DCEFCLK max dpm level is not supported + + break; + case PP_PCIE: soft_min_level = mask ? (ffs(mask) - 1) : 0; soft_max_level = mask ? (fls(mask) - 1) : 0; @@ -3040,6 +3073,23 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, fclk_dpm_table->dpm_levels[i].value == (now / 100) ? "*" : ""); break; + case PP_DCEFCLK: + ret = vega20_get_current_clk_freq(hwmgr, PPCLK_DCEFCLK, &now); + PP_ASSERT_WITH_CODE(!ret, + "Attempt to get current dcefclk freq Failed!", + return ret); + + ret = vega20_get_dcefclocks(hwmgr, &clocks); + PP_ASSERT_WITH_CODE(!ret, + "Attempt to get dcefclk levels Failed!", + return ret); + + for (i = 0; i < clocks.num_levels; i++) + size += sprintf(buf + size, "%d: %uMhz %s\n", + i, clocks.data[i].clocks_in_khz / 1000, + (clocks.data[i].clocks_in_khz == now * 10) ? "*" : ""); + break; + case PP_PCIE: gen_speed = (RREG32_PCIE(smnPCIE_LC_SPEED_CNTL) & PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) From 5d50fcbda7b0acd301bb1fc3d828df0aa29237b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 11 Jan 2019 14:12:58 +0100 Subject: [PATCH 294/539] drm/ttm: stop always moving BOs on the LRU on page fault MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the BO on the LRU only when it is actually moved by a DMA operation. Signed-off-by: Christian König Reviewed-by: Michel Dänzer Tested-And-Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index a1d977fbade5..e86a29a1e51f 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -71,7 +71,7 @@ static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, ttm_bo_get(bo); up_read(&vmf->vma->vm_mm->mmap_sem); (void) dma_fence_wait(bo->moving, true); - ttm_bo_unreserve(bo); + reservation_object_unlock(bo->resv); ttm_bo_put(bo); goto out_unlock; } @@ -131,11 +131,7 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) * for reserve, and if it fails, retry the fault after waiting * for the buffer to become unreserved. */ - err = ttm_bo_reserve(bo, true, true, NULL); - if (unlikely(err != 0)) { - if (err != -EBUSY) - return VM_FAULT_NOPAGE; - + if (unlikely(!reservation_object_trylock(bo->resv))) { if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) { if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { ttm_bo_get(bo); @@ -165,6 +161,8 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) } if (bdev->driver->fault_reserve_notify) { + struct dma_fence *moving = dma_fence_get(bo->moving); + err = bdev->driver->fault_reserve_notify(bo); switch (err) { case 0: @@ -177,6 +175,13 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) ret = VM_FAULT_SIGBUS; goto out_unlock; } + + if (bo->moving != moving) { + spin_lock(&bdev->glob->lru_lock); + ttm_bo_move_to_lru_tail(bo, NULL); + spin_unlock(&bdev->glob->lru_lock); + } + dma_fence_put(moving); } /* @@ -291,7 +296,7 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) out_io_unlock: ttm_mem_io_unlock(man); out_unlock: - ttm_bo_unreserve(bo); + reservation_object_unlock(bo->resv); return ret; } From e95b93ce4116780285f11f08a2bf78f48b23ed11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 12 Dec 2018 15:13:10 +0100 Subject: [PATCH 295/539] drm/amdgpu: set the executable flag on unused Vega10 PTEs v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we run into a non-retry fault on access. It seems to be a hardware bug that the executable bit has higher priority than the valid bit. v2: handle clears as well Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 36 ++++++++++++++++++-------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ffffe021cccf..0bc6f553dc08 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -821,9 +821,16 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, addr += ats_entries * 8; } - if (entries) + if (entries) { + uint64_t value = 0; + + /* Workaround for fault priority problem on GMC9 */ + if (level == AMDGPU_VM_PTB && adev->asic_type >= CHIP_VEGA10) + value = AMDGPU_PTE_EXECUTABLE; + amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, - entries, 0, 0); + entries, 0, value); + } amdgpu_ring_pad_ib(ring, &job->ibs[0]); @@ -1525,20 +1532,27 @@ error: } /** - * amdgpu_vm_update_huge - figure out parameters for PTE updates + * amdgpu_vm_update_flags - figure out flags for PTE updates * * Make sure to set the right flags for the PTEs at the desired level. */ -static void amdgpu_vm_update_huge(struct amdgpu_pte_update_params *params, - struct amdgpu_bo *bo, unsigned level, - uint64_t pe, uint64_t addr, - unsigned count, uint32_t incr, - uint64_t flags) +static void amdgpu_vm_update_flags(struct amdgpu_pte_update_params *params, + struct amdgpu_bo *bo, unsigned level, + uint64_t pe, uint64_t addr, + unsigned count, uint32_t incr, + uint64_t flags) { if (level != AMDGPU_VM_PTB) { flags |= AMDGPU_PDE_PTE; amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags); + + } else if (params->adev->asic_type >= CHIP_VEGA10 && + !(flags & AMDGPU_PTE_VALID) && + !(flags & AMDGPU_PTE_PRT)) { + + /* Workaround for fault priority problem on GMC9 */ + flags |= AMDGPU_PTE_EXECUTABLE; } amdgpu_vm_update_func(params, bo, pe, addr, count, incr, flags); @@ -1695,9 +1709,9 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, uint64_t upd_end = min(entry_end, frag_end); unsigned nptes = (upd_end - frag_start) >> shift; - amdgpu_vm_update_huge(params, pt, cursor.level, - pe_start, dst, nptes, incr, - flags | AMDGPU_PTE_FRAG(frag)); + amdgpu_vm_update_flags(params, pt, cursor.level, + pe_start, dst, nptes, incr, + flags | AMDGPU_PTE_FRAG(frag)); pe_start += nptes * 8; dst += (uint64_t)nptes * AMDGPU_GPU_PAGE_SIZE << shift; From ad710812b5385d0128f870bece22dad25f71d756 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 18 Sep 2018 14:51:35 +0200 Subject: [PATCH 296/539] drm/amdgpu: enable IH ring 1 and ring 2 v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The entries are ignored for now, but it at least stops crashing the hardware when somebody tries to push something to the other IH rings. v2: limit ring size, add TODO comment v3: only program rings if they are actually allocated v4: limit the ring init to Vega10 Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h | 4 +- drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 145 ++++++++++++++++++++---- 2 files changed, 123 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index f6ce171cb8aa..7e06fa64321a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -87,8 +87,8 @@ struct amdgpu_irq { /* status, etc. */ bool msi_enabled; /* msi enabled */ - /* interrupt ring */ - struct amdgpu_ih_ring ih; + /* interrupt rings */ + struct amdgpu_ih_ring ih, ih1, ih2; const struct amdgpu_ih_funcs *ih_funcs; /* gen irq stuff */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 877b4a6d21f7..7553d91d10e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -50,6 +50,22 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1); WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); adev->irq.ih.enabled = true; + + if (adev->irq.ih1.ring_size) { + ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, + RB_ENABLE, 1); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + adev->irq.ih1.enabled = true; + } + + if (adev->irq.ih2.ring_size) { + ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, + RB_ENABLE, 1); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + adev->irq.ih2.enabled = true; + } } /** @@ -71,6 +87,53 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0); adev->irq.ih.enabled = false; adev->irq.ih.rptr = 0; + + if (adev->irq.ih1.ring_size) { + ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, + RB_ENABLE, 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + /* set rptr, wptr to 0 */ + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0); + adev->irq.ih1.enabled = false; + adev->irq.ih1.rptr = 0; + } + + if (adev->irq.ih2.ring_size) { + ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, + RB_ENABLE, 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + /* set rptr, wptr to 0 */ + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0); + adev->irq.ih2.enabled = false; + adev->irq.ih2.rptr = 0; + } +} + +static uint32_t vega10_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl) +{ + int rb_bufsz = order_base_2(ih->ring_size / 4); + + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + MC_SPACE, ih->use_bus_addr ? 1 : 4); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_OVERFLOW_CLEAR, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_OVERFLOW_ENABLE, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz); + /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register + * value is written to memory + */ + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_WRITEBACK_ENABLE, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0); + + return ih_rb_cntl; } /** @@ -86,9 +149,8 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) */ static int vega10_ih_irq_init(struct amdgpu_device *adev) { - struct amdgpu_ih_ring *ih = &adev->irq.ih; + struct amdgpu_ih_ring *ih; int ret = 0; - int rb_bufsz; u32 ih_rb_cntl, ih_doorbell_rtpr; u32 tmp; @@ -97,26 +159,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) adev->nbio_funcs->ih_control(adev); - ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); + ih = &adev->irq.ih; /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE, adev->irq.ih.gpu_addr >> 8); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, - (adev->irq.ih.gpu_addr >> 40) & 0xff); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SPACE, - ih->use_bus_addr ? 1 : 4); - rb_bufsz = order_base_2(adev->irq.ih.ring_size / 4); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 1); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz); - /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register value is written to memory */ - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_WRITEBACK_ENABLE, 1); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0); - - if (adev->irq.msi_enabled) - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM, 1); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE, ih->gpu_addr >> 8); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, (ih->gpu_addr >> 40) & 0xff); + ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); + ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM, + !!adev->irq.msi_enabled); WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); /* set the writeback address whether it's enabled or not */ @@ -131,16 +182,49 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) ih_doorbell_rtpr = RREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR); if (adev->irq.ih.use_doorbell) { - ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, IH_DOORBELL_RPTR, - OFFSET, adev->irq.ih.doorbell_index); - ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, IH_DOORBELL_RPTR, + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, OFFSET, + adev->irq.ih.doorbell_index); + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, ENABLE, 1); } else { - ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, IH_DOORBELL_RPTR, + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, ENABLE, 0); } WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, ih_doorbell_rtpr); + ih = &adev->irq.ih1; + if (ih->ring_size) { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_RING1, ih->gpu_addr >> 8); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI_RING1, + (ih->gpu_addr >> 40) & 0xff); + + ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); + ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + + /* set rptr, wptr to 0 */ + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0); + } + + ih = &adev->irq.ih2; + if (ih->ring_size) { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_RING2, ih->gpu_addr >> 8); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI_RING2, + (ih->gpu_addr >> 40) & 0xff); + + ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); + ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + + /* set rptr, wptr to 0 */ + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0); + } + tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL); tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL, CLIENT18_IS_STORM_CLIENT, 1); @@ -297,6 +381,17 @@ static int vega10_ih_sw_init(void *handle) if (r) return r; + if (adev->asic_type == CHIP_VEGA10) { + r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, PAGE_SIZE, true); + if (r) + return r; + + r = amdgpu_ih_ring_init(adev, &adev->irq.ih2, PAGE_SIZE, true); + if (r) + return r; + } + + /* TODO add doorbell for IH1 & IH2 as well */ adev->irq.ih.use_doorbell = true; adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; @@ -310,6 +405,8 @@ static int vega10_ih_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_irq_fini(adev); + amdgpu_ih_ring_fini(adev, &adev->irq.ih2); + amdgpu_ih_ring_fini(adev, &adev->irq.ih1); amdgpu_ih_ring_fini(adev, &adev->irq.ih); return 0; From 9dd60c4e5918e4d30ac91415f08127d1056e51ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 26 Sep 2018 13:45:38 +0200 Subject: [PATCH 297/539] drm/amdgpu: add support for processing IH ring 1 & 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously we only added the ring buffer memory, now add the handling as well. Signed-off-by: Christian König Acked-by: Alex Deucher Reviewed-by: Alex Deucher Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 33 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h | 4 ++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index b8e543e23166..8bfb3dab46f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -176,6 +176,36 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg) return ret; } +/** + * amdgpu_irq_handle_ih1 - kick of processing for IH1 + * + * @work: work structure in struct amdgpu_irq + * + * Kick of processing IH ring 1. + */ +static void amdgpu_irq_handle_ih1(struct work_struct *work) +{ + struct amdgpu_device *adev = container_of(work, struct amdgpu_device, + irq.ih1_work); + + amdgpu_ih_process(adev, &adev->irq.ih1, amdgpu_irq_callback); +} + +/** + * amdgpu_irq_handle_ih2 - kick of processing for IH2 + * + * @work: work structure in struct amdgpu_irq + * + * Kick of processing IH ring 2. + */ +static void amdgpu_irq_handle_ih2(struct work_struct *work) +{ + struct amdgpu_device *adev = container_of(work, struct amdgpu_device, + irq.ih2_work); + + amdgpu_ih_process(adev, &adev->irq.ih2, amdgpu_irq_callback); +} + /** * amdgpu_msi_ok - check whether MSI functionality is enabled * @@ -240,6 +270,9 @@ int amdgpu_irq_init(struct amdgpu_device *adev) amdgpu_hotplug_work_func); } + INIT_WORK(&adev->irq.ih1_work, amdgpu_irq_handle_ih1); + INIT_WORK(&adev->irq.ih2_work, amdgpu_irq_handle_ih2); + adev->irq.installed = true; r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index 7e06fa64321a..c27decfda494 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -89,7 +89,9 @@ struct amdgpu_irq { /* interrupt rings */ struct amdgpu_ih_ring ih, ih1, ih2; - const struct amdgpu_ih_funcs *ih_funcs; + const struct amdgpu_ih_funcs *ih_funcs; + struct work_struct ih1_work, ih2_work; + struct amdgpu_irq_src self_irq; /* gen irq stuff */ struct irq_domain *domain; /* GPU irq controller domain */ From cf67950e2241ab88b4ff1ee49bbe8716d6a283f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 26 Sep 2018 14:15:21 +0200 Subject: [PATCH 298/539] drm/amdgpu: add support for self irq on Vega10 v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This finally enables processing of ring 1 & 2. v2: fix copy&paste error Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 80 ++++++++++++++++++++++++-- 1 file changed, 74 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 7553d91d10e3..6d1f804277f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -270,7 +270,7 @@ static void vega10_ih_irq_disable(struct amdgpu_device *adev) static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) { - u32 wptr, tmp; + u32 wptr, reg, tmp; wptr = le32_to_cpu(*ih->wptr_cpu); @@ -278,7 +278,17 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, goto out; /* Double check that the overflow wasn't already cleared. */ - wptr = RREG32_NO_KIQ(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR)); + + if (ih == &adev->irq.ih) + reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR); + else if (ih == &adev->irq.ih1) + reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING1); + else if (ih == &adev->irq.ih2) + reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING2); + else + BUG(); + + wptr = RREG32_NO_KIQ(reg); if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) goto out; @@ -294,9 +304,18 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, wptr, ih->rptr, tmp); ih->rptr = tmp; - tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL)); + if (ih == &adev->irq.ih) + reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL); + else if (ih == &adev->irq.ih1) + reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING1); + else if (ih == &adev->irq.ih2) + reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING2); + else + BUG(); + + tmp = RREG32_NO_KIQ(reg); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); - WREG32_NO_KIQ(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL), tmp); + WREG32_NO_KIQ(reg, tmp); out: return (wptr & ih->ptr_mask); @@ -359,23 +378,72 @@ static void vega10_ih_set_rptr(struct amdgpu_device *adev, /* XXX check if swapping is necessary on BE */ *ih->rptr_cpu = ih->rptr; WDOORBELL32(ih->doorbell_index, ih->rptr); - } else { + } else if (ih == &adev->irq.ih) { WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, ih->rptr); + } else if (ih == &adev->irq.ih1) { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, ih->rptr); + } else if (ih == &adev->irq.ih2) { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, ih->rptr); } } +/** + * vega10_ih_self_irq - dispatch work for ring 1 and 2 + * + * @adev: amdgpu_device pointer + * @source: irq source + * @entry: IV with WPTR update + * + * Update the WPTR from the IV and schedule work to handle the entries. + */ +static int vega10_ih_self_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t wptr = cpu_to_le32(entry->src_data[0]); + + switch (entry->ring_id) { + case 1: + *adev->irq.ih1.wptr_cpu = wptr; + schedule_work(&adev->irq.ih1_work); + break; + case 2: + *adev->irq.ih2.wptr_cpu = wptr; + schedule_work(&adev->irq.ih2_work); + break; + default: break; + } + return 0; +} + +static const struct amdgpu_irq_src_funcs vega10_ih_self_irq_funcs = { + .process = vega10_ih_self_irq, +}; + +static void vega10_ih_set_self_irq_funcs(struct amdgpu_device *adev) +{ + adev->irq.self_irq.num_types = 0; + adev->irq.self_irq.funcs = &vega10_ih_self_irq_funcs; +} + static int vega10_ih_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; vega10_ih_set_interrupt_funcs(adev); + vega10_ih_set_self_irq_funcs(adev); return 0; } static int vega10_ih_sw_init(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_IH, 0, + &adev->irq.self_irq); + if (r) + return r; r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, true); if (r) From fe96b99dc72b16e3c8406ab17fe9e73e0aef1bd9 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 14 Jan 2019 23:22:02 -0600 Subject: [PATCH 299/539] drm/amdgpu: Replace kzalloc with kcalloc Replace kzalloc() function with its 2-factor argument form, kcalloc(). This patch replaces cases of: kzalloc(a * b, gfp) with: kcalloc(a, b, gfp) Also, improve the coding style and the use of sizeof during allocation by changing sizeof(struct dc_surface_update) and sizeof(struct dc_plane_state) to sizeof(*updates) and sizeof(*surfaces), correspondingly. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index feb1b24efaaa..d5b3b7a215be 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5835,11 +5835,14 @@ dm_determine_update_type_for_commit(struct dc *dc, struct dm_crtc_state *new_dm_crtc_state, *old_dm_crtc_state; struct dc_stream_status *status = NULL; - struct dc_surface_update *updates = kzalloc(MAX_SURFACES * sizeof(struct dc_surface_update), GFP_KERNEL); - struct dc_plane_state *surface = kzalloc(MAX_SURFACES * sizeof(struct dc_plane_state), GFP_KERNEL); + struct dc_surface_update *updates; + struct dc_plane_state *surface; struct dc_stream_update stream_update; enum surface_update_type update_type = UPDATE_TYPE_FAST; + updates = kcalloc(MAX_SURFACES, sizeof(*updates), GFP_KERNEL); + surface = kcalloc(MAX_SURFACES, sizeof(*surface), GFP_KERNEL); + if (!updates || !surface) { DRM_ERROR("Plane or surface update failed to allocate"); /* Set type to FULL to avoid crashing in DC*/ From 3680b2a5b6d92747b3293ccc6783da29574147f1 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 8 Jan 2019 13:57:29 +0800 Subject: [PATCH 300/539] drm/amdgpu: check PSP support before adding the ip block So that we do not need to check this in every internal function. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 24 ------------------------ drivers/gpu/drm/amd/amdgpu/soc15.c | 13 ++++++++----- 2 files changed, 8 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index f26d8faa7507..d87f165e3a23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -67,9 +67,6 @@ static int psp_sw_init(void *handle) psp->adev = adev; - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) - return 0; - ret = psp_init_microcode(psp); if (ret) { DRM_ERROR("Failed to load psp firmware!\n"); @@ -83,9 +80,6 @@ static int psp_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) - return 0; - release_firmware(adev->psp.sos_fw); adev->psp.sos_fw = NULL; release_firmware(adev->psp.asd_fw); @@ -721,10 +715,6 @@ static int psp_hw_init(void *handle) int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) - return 0; - mutex_lock(&adev->firmware.mutex); /* * This sequence is just used on hw_init only once, no need on @@ -754,9 +744,6 @@ static int psp_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) - return 0; - if (adev->gmc.xgmi.num_physical_nodes > 1 && psp->xgmi_context.initialized == 1) psp_xgmi_terminate(psp); @@ -785,9 +772,6 @@ static int psp_suspend(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) - return 0; - if (adev->gmc.xgmi.num_physical_nodes > 1 && psp->xgmi_context.initialized == 1) { ret = psp_xgmi_terminate(psp); @@ -812,9 +796,6 @@ static int psp_resume(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) - return 0; - DRM_INFO("PSP is resuming...\n"); mutex_lock(&adev->firmware.mutex); @@ -850,11 +831,6 @@ static bool psp_check_fw_loading_status(struct amdgpu_device *adev, { struct amdgpu_firmware_info *ucode = NULL; - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - DRM_INFO("firmware is not loaded by PSP\n"); - return true; - } - if (!adev->firmware.fw_size) return false; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 671f78124cb0..62d272b4be19 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -595,10 +595,12 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); - if (adev->asic_type == CHIP_VEGA20) - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - else - amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { + if (adev->asic_type == CHIP_VEGA20) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + else + amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); + } amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); if (!amdgpu_sriov_vf(adev)) @@ -620,7 +622,8 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); - amdgpu_device_ip_block_add(adev, &psp_v10_0_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); From 0208a105eead974acc6043dec027c08da3740e17 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Wed, 16 Jan 2019 14:11:50 +0800 Subject: [PATCH 301/539] drm/amdgpu: fix wrong APU judgement Fix the APU judgement to make it really work as expected. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 4099c6b97cb1..250be1ef1719 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -2429,7 +2429,7 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) return ret; } /* PCIe Perf counters won't work on APU nodes */ - if (adev->flags & !AMD_IS_APU) { + if (!(adev->flags & AMD_IS_APU)) { ret = device_create_file(adev->dev, &dev_attr_pcie_bw); if (ret) { DRM_ERROR("failed to create device file pcie_bw\n"); @@ -2492,7 +2492,7 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_pp_od_clk_voltage); device_remove_file(adev->dev, &dev_attr_gpu_busy_percent); - if (adev->flags & !AMD_IS_APU) + if (!(adev->flags & AMD_IS_APU)) device_remove_file(adev->dev, &dev_attr_pcie_bw); if ((adev->asic_type >= CHIP_VEGA10) && !(adev->flags & AMD_IS_APU)) From 24ba59501257f35b4e1adde9c643f3d149e44084 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Jan 2019 12:09:09 -0500 Subject: [PATCH 302/539] drm/amdgpu: Add APTX quirk for Lenovo laptop Needs ATPX rather than _PR3 for dGPU power control. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=202263 Reviewed-by: Jim Qu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index a028661d9e20..92b11de19581 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -576,6 +576,7 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = { { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1025, 0x125A, AMDGPU_PX_QUIRK_FORCE_ATPX }, + { 0x1002, 0x6900, 0x17AA, 0x3806, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0, 0, 0, 0, 0 }, }; From f14899fd2a560796450e9af383dfaee6ce557d6b Mon Sep 17 00:00:00 2001 From: wentalou Date: Thu, 17 Jan 2019 17:38:33 +0800 Subject: [PATCH 303/539] drm/amdgpu: sriov should skip asic_reset in device_init sriov would meet guest driver load failure, if calling amdgpu_asic_reset in amdgpu_device_init. sriov should skip asic_reset in device_init. Signed-off-by: Wentao Lou Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 8a61764e64cf..e20dce438d37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2553,7 +2553,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* check if we need to reset the asic * E.g., driver was not cleanly unloaded previously, etc. */ - if (amdgpu_asic_need_reset_on_init(adev)) { + if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) { r = amdgpu_asic_reset(adev); if (r) { dev_err(adev->dev, "asic reset on init failed\n"); From a7cd97718166be64b3359f586bbe0a6bb64a6ba4 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 18 Jan 2019 18:08:19 +0800 Subject: [PATCH 304/539] drm/amd/powerplay: OD setting fix on Vega10 gfxclk for OD setting is limited to 1980M for non-acg ASICs of Vega10 Signed-off-by: Kenneth Feng Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- .../powerplay/hwmgr/vega10_processpptables.c | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c index b8747a5c9204..99d596dc0e89 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c @@ -32,6 +32,7 @@ #include "vega10_pptable.h" #define NUM_DSPCLK_LEVELS 8 +#define VEGA10_ENGINECLOCK_HARDMAX 198000 static void set_hw_cap(struct pp_hwmgr *hwmgr, bool enable, enum phm_platform_caps cap) @@ -258,7 +259,26 @@ static int init_over_drive_limits( struct pp_hwmgr *hwmgr, const ATOM_Vega10_POWERPLAYTABLE *powerplay_table) { - hwmgr->platform_descriptor.overdriveLimit.engineClock = + const ATOM_Vega10_GFXCLK_Dependency_Table *gfxclk_dep_table = + (const ATOM_Vega10_GFXCLK_Dependency_Table *) + (((unsigned long) powerplay_table) + + le16_to_cpu(powerplay_table->usGfxclkDependencyTableOffset)); + bool is_acg_enabled = false; + ATOM_Vega10_GFXCLK_Dependency_Record_V2 *patom_record_v2; + + if (gfxclk_dep_table->ucRevId == 1) { + patom_record_v2 = + (ATOM_Vega10_GFXCLK_Dependency_Record_V2 *)gfxclk_dep_table->entries; + is_acg_enabled = + (bool)patom_record_v2[gfxclk_dep_table->ucNumEntries-1].ucACGEnable; + } + + if (powerplay_table->ulMaxODEngineClock > VEGA10_ENGINECLOCK_HARDMAX && + !is_acg_enabled) + hwmgr->platform_descriptor.overdriveLimit.engineClock = + VEGA10_ENGINECLOCK_HARDMAX; + else + hwmgr->platform_descriptor.overdriveLimit.engineClock = le32_to_cpu(powerplay_table->ulMaxODEngineClock); hwmgr->platform_descriptor.overdriveLimit.memoryClock = le32_to_cpu(powerplay_table->ulMaxODMemoryClock); From c4c905ec7b8aaeb06331ee875d3cc9b73dfa481f Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Fri, 18 Jan 2019 18:13:36 +0800 Subject: [PATCH 305/539] drm/amdgpu: add flags to emit_ib interface v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the last bool type parameter with a general flags parameter, to make the last parameter be able to contain more information. v2: drop setting need_ctx_switch = false Reviewed-by: Christian König Reviewed-by: Hawking Zhang Signed-off-by: Jack Xiao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 10 +++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | 2 +- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/si_dma.c | 2 +- drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 2 +- drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 6 +++--- 20 files changed, 34 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index c48207b377bc..0b8ef2d27d6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -202,12 +202,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, amdgpu_asic_flush_hdp(adev, ring); } + if (need_ctx_switch) + status |= AMDGPU_HAVE_CTX_SWITCH; + skip_preamble = ring->current_ctx == fence_ctx; if (job && ring->funcs->emit_cntxcntl) { - if (need_ctx_switch) - status |= AMDGPU_HAVE_CTX_SWITCH; status |= job->preamble_status; - amdgpu_ring_emit_cntxcntl(ring, status); } @@ -221,8 +221,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ continue; - amdgpu_ring_emit_ib(ring, job, ib, need_ctx_switch); - need_ctx_switch = false; + amdgpu_ring_emit_ib(ring, job, ib, status); + status &= ~AMDGPU_HAVE_CTX_SWITCH; } if (ring->funcs->emit_tmz) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index d87e828a084b..d7fae2676269 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -131,7 +131,7 @@ struct amdgpu_ring_funcs { void (*emit_ib)(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch); + uint32_t flags); void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, uint64_t seq, unsigned flags); void (*emit_pipeline_sync)(struct amdgpu_ring *ring); @@ -229,7 +229,7 @@ struct amdgpu_ring { #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r)) #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r)) -#define amdgpu_ring_emit_ib(r, job, ib, c) ((r)->funcs->emit_ib((r), (job), (ib), (c))) +#define amdgpu_ring_emit_ib(r, job, ib, flags) ((r)->funcs->emit_ib((r), (job), (ib), (flags))) #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r)) #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 98a1b2ce2b9d..c021b114c8a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -1035,7 +1035,7 @@ out: void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { amdgpu_ring_write(ring, VCE_CMD_IB); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index 50293652af14..30ea54dd9117 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -66,7 +66,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx); void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, - struct amdgpu_ib *ib, bool ctx_switch); + struct amdgpu_ib *ib, uint32_t flags); void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags); int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 45795191de1f..189599b694e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -220,7 +220,7 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 extra_bits = vmid & 0xf; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 1dc3013ea1d5..305276c7e4bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1842,13 +1842,13 @@ static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; /* insert SWITCH_BUFFER packet before first IB in the ring frame */ - if (ctx_switch) { + if (flags & AMDGPU_HAVE_CTX_SWITCH) { amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); amdgpu_ring_write(ring, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 3a9fb6018c16..7984292f9282 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2228,13 +2228,13 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring, static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; /* insert SWITCH_BUFFER packet before first IB in the ring frame */ - if (ctx_switch) { + if (flags & AMDGPU_HAVE_CTX_SWITCH) { amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); amdgpu_ring_write(ring, 0); } @@ -2259,7 +2259,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 57cb3a51bda7..a26747681ed6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6047,7 +6047,7 @@ static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; @@ -6079,7 +6079,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index fbca0494f871..262ee3cf6f1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3972,7 +3972,7 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; @@ -4005,7 +4005,7 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 9f3cb2aec7c2..cca3552b36ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -247,7 +247,7 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 06c5a277aa76..0ce8331baeb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -421,7 +421,7 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 48a166ba9fed..127b85983e8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -500,7 +500,7 @@ static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index b6e473134e19..f15f196684ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -63,7 +63,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index d69c8f6daaf8..c4fb58667fd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -511,7 +511,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0)); amdgpu_ring_write(ring, ib->gpu_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index ee8cd06ddc38..52bd8a654734 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -526,7 +526,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0)); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index d4f4a66f8324..c9edddf9f88a 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -977,7 +977,7 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); @@ -1003,7 +1003,7 @@ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring, static void uvd_v6_0_enc_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index aef924026a28..dc461df48da0 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -1272,7 +1272,7 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { struct amdgpu_device *adev = ring->adev; unsigned vmid = AMDGPU_JOB_GET_VMID(job); @@ -1303,7 +1303,7 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring, static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 2668effadd27..6ec65cf11112 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -834,7 +834,7 @@ out: static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 9fb34b7d8e03..aadc3e66ebd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -947,7 +947,7 @@ static int vce_v4_0_set_powergating_state(void *handle, #endif static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, - struct amdgpu_ib *ib, bool ctx_switch) + struct amdgpu_ib *ib, uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 89bb2fef90eb..3dbc51f9d3b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1371,7 +1371,7 @@ static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { struct amdgpu_device *adev = ring->adev; unsigned vmid = AMDGPU_JOB_GET_VMID(job); @@ -1531,7 +1531,7 @@ static void vcn_v1_0_enc_ring_insert_end(struct amdgpu_ring *ring) static void vcn_v1_0_enc_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); @@ -1736,7 +1736,7 @@ static void vcn_v1_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u6 static void vcn_v1_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, - bool ctx_switch) + uint32_t flags) { struct amdgpu_device *adev = ring->adev; unsigned vmid = AMDGPU_JOB_GET_VMID(job); From a0e4fa2f2889c2d02448a9c20d3f24f2fd13e01c Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 17 Jan 2019 17:48:30 +0800 Subject: [PATCH 306/539] drm/amd/powerplay: avoid unnecessary dpm level setting No dpm level setting is needed when the request level is actually same as current. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c index 56437866d120..68f3dcaa8070 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c @@ -278,7 +278,8 @@ int psm_adjust_power_state_dynamic(struct pp_hwmgr *hwmgr, bool skip, phm_notify_smc_display_config_after_ps_adjustment(hwmgr); - if (!phm_force_dpm_levels(hwmgr, hwmgr->request_dpm_level)) + if ((hwmgr->request_dpm_level != hwmgr->dpm_level) && + !phm_force_dpm_levels(hwmgr, hwmgr->request_dpm_level)) hwmgr->dpm_level = hwmgr->request_dpm_level; if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) { From 921935dc64040b56c785a02b2f39fd5d28932c78 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 17 Jan 2019 17:52:41 +0800 Subject: [PATCH 307/539] drm/amd/powerplay: enforce display related settings only on needed No display related settings are needed on dpm level change. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c | 10 +++++----- drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c | 11 +++++------ drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.h | 2 +- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index 310b102a9292..6cd6497c6fc2 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -273,7 +273,7 @@ int hwmgr_hw_fini(struct pp_hwmgr *hwmgr) phm_stop_thermal_controller(hwmgr); psm_set_boot_states(hwmgr); - psm_adjust_power_state_dynamic(hwmgr, false, NULL); + psm_adjust_power_state_dynamic(hwmgr, true, NULL); phm_disable_dynamic_state_management(hwmgr); phm_disable_clock_power_gatings(hwmgr); @@ -295,7 +295,7 @@ int hwmgr_suspend(struct pp_hwmgr *hwmgr) ret = psm_set_boot_states(hwmgr); if (ret) return ret; - ret = psm_adjust_power_state_dynamic(hwmgr, false, NULL); + ret = psm_adjust_power_state_dynamic(hwmgr, true, NULL); if (ret) return ret; ret = phm_power_down_asic(hwmgr); @@ -325,7 +325,7 @@ int hwmgr_resume(struct pp_hwmgr *hwmgr) if (ret) return ret; - ret = psm_adjust_power_state_dynamic(hwmgr, false, NULL); + ret = psm_adjust_power_state_dynamic(hwmgr, true, NULL); return ret; } @@ -379,12 +379,12 @@ int hwmgr_handle_task(struct pp_hwmgr *hwmgr, enum amd_pp_task task_id, ret = psm_set_user_performance_state(hwmgr, requested_ui_label, &requested_ps); if (ret) return ret; - ret = psm_adjust_power_state_dynamic(hwmgr, false, requested_ps); + ret = psm_adjust_power_state_dynamic(hwmgr, true, requested_ps); break; } case AMD_PP_TASK_COMPLETE_INIT: case AMD_PP_TASK_READJUST_POWER_STATE: - ret = psm_adjust_power_state_dynamic(hwmgr, false, NULL); + ret = psm_adjust_power_state_dynamic(hwmgr, true, NULL); break; default: break; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c index 68f3dcaa8070..ce177d7f04cb 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c @@ -256,16 +256,14 @@ static void power_state_management(struct pp_hwmgr *hwmgr, } } -int psm_adjust_power_state_dynamic(struct pp_hwmgr *hwmgr, bool skip, +int psm_adjust_power_state_dynamic(struct pp_hwmgr *hwmgr, bool skip_display_settings, struct pp_power_state *new_ps) { uint32_t index; long workload; - if (skip) - return 0; - - phm_display_configuration_changed(hwmgr); + if (!skip_display_settings) + phm_display_configuration_changed(hwmgr); if (hwmgr->ps) power_state_management(hwmgr, new_ps); @@ -276,7 +274,8 @@ int psm_adjust_power_state_dynamic(struct pp_hwmgr *hwmgr, bool skip, */ phm_apply_clock_adjust_rules(hwmgr); - phm_notify_smc_display_config_after_ps_adjustment(hwmgr); + if (!skip_display_settings) + phm_notify_smc_display_config_after_ps_adjustment(hwmgr); if ((hwmgr->request_dpm_level != hwmgr->dpm_level) && !phm_force_dpm_levels(hwmgr, hwmgr->request_dpm_level)) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.h b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.h index fa1b6825036a..b62d55f1f289 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.h @@ -34,7 +34,7 @@ int psm_set_user_performance_state(struct pp_hwmgr *hwmgr, enum PP_StateUILabel label_id, struct pp_power_state **state); int psm_adjust_power_state_dynamic(struct pp_hwmgr *hwmgr, - bool skip, + bool skip_display_settings, struct pp_power_state *new_ps); #endif From c3e5bb04cc2aa6c9caeac626948619233b2d89f4 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 21 Jan 2019 14:05:37 +0800 Subject: [PATCH 308/539] drm/amd/powerplay: fit the SOC clock also to the new performance level The SOC clock needs also to fit the new performance level. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 8c1fa985c7d4..60a22d8da7f0 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -2170,6 +2170,12 @@ static int vega20_force_dpm_highest(struct pp_hwmgr *hwmgr) data->dpm_table.mem_table.dpm_state.soft_max_level = data->dpm_table.mem_table.dpm_levels[soft_level].value; + soft_level = vega20_find_highest_dpm_level(&(data->dpm_table.soc_table)); + + data->dpm_table.soc_table.dpm_state.soft_min_level = + data->dpm_table.soc_table.dpm_state.soft_max_level = + data->dpm_table.soc_table.dpm_levels[soft_level].value; + ret = vega20_upload_dpm_min_level(hwmgr, 0xFFFFFFFF); PP_ASSERT_WITH_CODE(!ret, "Failed to upload boot level to highest!", @@ -2202,6 +2208,12 @@ static int vega20_force_dpm_lowest(struct pp_hwmgr *hwmgr) data->dpm_table.mem_table.dpm_state.soft_max_level = data->dpm_table.mem_table.dpm_levels[soft_level].value; + soft_level = vega20_find_lowest_dpm_level(&(data->dpm_table.soc_table)); + + data->dpm_table.soc_table.dpm_state.soft_min_level = + data->dpm_table.soc_table.dpm_state.soft_max_level = + data->dpm_table.soc_table.dpm_levels[soft_level].value; + ret = vega20_upload_dpm_min_level(hwmgr, 0xFFFFFFFF); PP_ASSERT_WITH_CODE(!ret, "Failed to upload boot level to highest!", @@ -2218,8 +2230,32 @@ static int vega20_force_dpm_lowest(struct pp_hwmgr *hwmgr) static int vega20_unforce_dpm_levels(struct pp_hwmgr *hwmgr) { + struct vega20_hwmgr *data = + (struct vega20_hwmgr *)(hwmgr->backend); + uint32_t soft_min_level, soft_max_level; int ret = 0; + soft_min_level = vega20_find_lowest_dpm_level(&(data->dpm_table.gfx_table)); + soft_max_level = vega20_find_highest_dpm_level(&(data->dpm_table.gfx_table)); + data->dpm_table.gfx_table.dpm_state.soft_min_level = + data->dpm_table.gfx_table.dpm_levels[soft_min_level].value; + data->dpm_table.gfx_table.dpm_state.soft_max_level = + data->dpm_table.gfx_table.dpm_levels[soft_max_level].value; + + soft_min_level = vega20_find_lowest_dpm_level(&(data->dpm_table.mem_table)); + soft_max_level = vega20_find_highest_dpm_level(&(data->dpm_table.mem_table)); + data->dpm_table.mem_table.dpm_state.soft_min_level = + data->dpm_table.mem_table.dpm_levels[soft_min_level].value; + data->dpm_table.mem_table.dpm_state.soft_max_level = + data->dpm_table.mem_table.dpm_levels[soft_max_level].value; + + soft_min_level = vega20_find_lowest_dpm_level(&(data->dpm_table.soc_table)); + soft_max_level = vega20_find_highest_dpm_level(&(data->dpm_table.soc_table)); + data->dpm_table.soc_table.dpm_state.soft_min_level = + data->dpm_table.soc_table.dpm_levels[soft_min_level].value; + data->dpm_table.soc_table.dpm_state.soft_max_level = + data->dpm_table.soc_table.dpm_levels[soft_max_level].value; + ret = vega20_upload_dpm_min_level(hwmgr, 0xFFFFFFFF); PP_ASSERT_WITH_CODE(!ret, "Failed to upload DPM Bootup Levels!", @@ -2457,6 +2493,7 @@ static int vega20_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, return ret; vega20_force_clock_level(hwmgr, PP_SCLK, 1 << sclk_mask); vega20_force_clock_level(hwmgr, PP_MCLK, 1 << mclk_mask); + vega20_force_clock_level(hwmgr, PP_SOCCLK, 1 << soc_mask); break; case AMD_DPM_FORCED_LEVEL_MANUAL: From 8ce84d4341e3abf8bc651c74f6312153c353bca8 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 21 Jan 2019 14:39:26 +0800 Subject: [PATCH 309/539] drm/amd/powerplay: run btc before enabling all SMU features BTC is needed before enabling all SMU features. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 60a22d8da7f0..5085b3636f8e 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -804,6 +804,11 @@ static int vega20_set_allowed_featuresmask(struct pp_hwmgr *hwmgr) return 0; } +static int vega20_run_btc(struct pp_hwmgr *hwmgr) +{ + return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_RunBtc); +} + static int vega20_run_btc_afll(struct pp_hwmgr *hwmgr) { return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_RunAfllBtc); @@ -1565,6 +1570,11 @@ static int vega20_enable_dpm_tasks(struct pp_hwmgr *hwmgr) "[EnableDPMTasks] Failed to initialize SMC table!", return result); + result = vega20_run_btc(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "[EnableDPMTasks] Failed to run btc!", + return result); + result = vega20_run_btc_afll(hwmgr); PP_ASSERT_WITH_CODE(!result, "[EnableDPMTasks] Failed to run btc afll!", From 0bcaefa6bfc2e11b6d32675c872d765ba69cd9d0 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 21 Jan 2019 17:57:29 +0800 Subject: [PATCH 310/539] drm/amd/display: change the max clock level to 16 As the gfxclk for SMU11 can have at most 16 discrete levels. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dm_services_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dm_services_types.h b/drivers/gpu/drm/amd/display/dc/dm_services_types.h index 9afd36a031a9..77200711abbe 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_services_types.h +++ b/drivers/gpu/drm/amd/display/dc/dm_services_types.h @@ -92,7 +92,7 @@ enum dm_pp_clock_type { (clk_type) == DM_PP_CLOCK_TYPE_FCLK ? "F" : \ "Invalid" -#define DM_PP_MAX_CLOCK_LEVELS 8 +#define DM_PP_MAX_CLOCK_LEVELS 16 struct dm_pp_clock_levels { uint32_t num_levels; From 84d3245599f527138c4d4b87deed14a7e85cd81b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 9 Jan 2019 22:19:28 -0500 Subject: [PATCH 311/539] drm/amdgpu: Add missing power attribute to APU check Add missing power_average to visible check for power attributes for APUs. Was missed before. Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 250be1ef1719..a7adb7b6bd98 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1981,7 +1981,8 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, effective_mode &= ~S_IWUSR; if ((adev->flags & AMD_IS_APU) && - (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || + (attr == &sensor_dev_attr_power1_average.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr|| attr == &sensor_dev_attr_power1_cap.dev_attr.attr)) return 0; From 089888c46841cef767927c9e17e4c9c037d24c4e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Jan 2019 12:05:16 -0500 Subject: [PATCH 312/539] drm/radeon: check if device is root before getting pci speed caps Check if the device is root rather before attempting to see what speeds the pcie port supports. Fixes a crash with pci passthrough in a VM. Bug: https://bugs.freedesktop.org/show_bug.cgi?id=109366 Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/ci_dpm.c | 5 +++-- drivers/gpu/drm/radeon/si_dpm.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index d587779a80b4..a97294ac96d5 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -5676,7 +5676,7 @@ int ci_dpm_init(struct radeon_device *rdev) u16 data_offset, size; u8 frev, crev; struct ci_power_info *pi; - enum pci_bus_speed speed_cap; + enum pci_bus_speed speed_cap = PCI_SPEED_UNKNOWN; struct pci_dev *root = rdev->pdev->bus->self; int ret; @@ -5685,7 +5685,8 @@ int ci_dpm_init(struct radeon_device *rdev) return -ENOMEM; rdev->pm.dpm.priv = pi; - speed_cap = pcie_get_speed_cap(root); + if (!pci_is_root_bus(rdev->pdev->bus)) + speed_cap = pcie_get_speed_cap(root); if (speed_cap == PCI_SPEED_UNKNOWN) { pi->sys_pcie_mask = 0; } else { diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 8fb60b3af015..0a785ef0ab66 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -6899,7 +6899,7 @@ int si_dpm_init(struct radeon_device *rdev) struct ni_power_info *ni_pi; struct si_power_info *si_pi; struct atom_clock_dividers dividers; - enum pci_bus_speed speed_cap; + enum pci_bus_speed speed_cap = PCI_SPEED_UNKNOWN; struct pci_dev *root = rdev->pdev->bus->self; int ret; @@ -6911,7 +6911,8 @@ int si_dpm_init(struct radeon_device *rdev) eg_pi = &ni_pi->eg; pi = &eg_pi->rv7xx; - speed_cap = pcie_get_speed_cap(root); + if (!pci_is_root_bus(rdev->pdev->bus)) + speed_cap = pcie_get_speed_cap(root); if (speed_cap == PCI_SPEED_UNKNOWN) { si_pi->sys_pcie_mask = 0; } else { From bc4b539e38508839a6c85d5c5ff52d32f6471fab Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 16 Jan 2019 11:55:04 -0500 Subject: [PATCH 313/539] drm/amdgpu: remove old CI DPM implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The power smu7 powerplay code is much more robust and has been the default for a while now. Remove the old code. Acked-by: Christian König Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 6844 -------------------------- drivers/gpu/drm/amd/amdgpu/ci_dpm.h | 349 -- drivers/gpu/drm/amd/amdgpu/ci_smc.c | 279 -- drivers/gpu/drm/amd/amdgpu/cik.c | 10 +- drivers/gpu/drm/amd/amdgpu/cik_dpm.h | 1 - 6 files changed, 3 insertions(+), 7482 deletions(-) delete mode 100644 drivers/gpu/drm/amd/amdgpu/ci_dpm.c delete mode 100644 drivers/gpu/drm/amd/amdgpu/ci_dpm.h delete mode 100644 drivers/gpu/drm/amd/amdgpu/ci_smc.c diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index f76bcb9c45e4..466da5954a68 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -57,7 +57,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ - ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o + dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c deleted file mode 100644 index 86e14c754dd4..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ /dev/null @@ -1,6844 +0,0 @@ -/* - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include -#include -#include "amdgpu.h" -#include "amdgpu_pm.h" -#include "amdgpu_ucode.h" -#include "cikd.h" -#include "amdgpu_dpm.h" -#include "ci_dpm.h" -#include "gfx_v7_0.h" -#include "atom.h" -#include "amd_pcie.h" -#include - -#include "smu/smu_7_0_1_d.h" -#include "smu/smu_7_0_1_sh_mask.h" - -#include "dce/dce_8_0_d.h" -#include "dce/dce_8_0_sh_mask.h" - -#include "bif/bif_4_1_d.h" -#include "bif/bif_4_1_sh_mask.h" - -#include "gca/gfx_7_2_d.h" -#include "gca/gfx_7_2_sh_mask.h" - -#include "gmc/gmc_7_1_d.h" -#include "gmc/gmc_7_1_sh_mask.h" - -MODULE_FIRMWARE("amdgpu/bonaire_smc.bin"); -MODULE_FIRMWARE("amdgpu/bonaire_k_smc.bin"); -MODULE_FIRMWARE("amdgpu/hawaii_smc.bin"); -MODULE_FIRMWARE("amdgpu/hawaii_k_smc.bin"); - -#define MC_CG_ARB_FREQ_F0 0x0a -#define MC_CG_ARB_FREQ_F1 0x0b -#define MC_CG_ARB_FREQ_F2 0x0c -#define MC_CG_ARB_FREQ_F3 0x0d - -#define SMC_RAM_END 0x40000 - -#define VOLTAGE_SCALE 4 -#define VOLTAGE_VID_OFFSET_SCALE1 625 -#define VOLTAGE_VID_OFFSET_SCALE2 100 - -static const struct amd_pm_funcs ci_dpm_funcs; - -static const struct ci_pt_defaults defaults_hawaii_xt = -{ - 1, 0xF, 0xFD, 0x19, 5, 0x14, 0, 0xB0000, - { 0x2E, 0x00, 0x00, 0x88, 0x00, 0x00, 0x72, 0x60, 0x51, 0xA7, 0x79, 0x6B, 0x90, 0xBD, 0x79 }, - { 0x217, 0x217, 0x217, 0x242, 0x242, 0x242, 0x269, 0x269, 0x269, 0x2A1, 0x2A1, 0x2A1, 0x2C9, 0x2C9, 0x2C9 } -}; - -static const struct ci_pt_defaults defaults_hawaii_pro = -{ - 1, 0xF, 0xFD, 0x19, 5, 0x14, 0, 0x65062, - { 0x2E, 0x00, 0x00, 0x88, 0x00, 0x00, 0x72, 0x60, 0x51, 0xA7, 0x79, 0x6B, 0x90, 0xBD, 0x79 }, - { 0x217, 0x217, 0x217, 0x242, 0x242, 0x242, 0x269, 0x269, 0x269, 0x2A1, 0x2A1, 0x2A1, 0x2C9, 0x2C9, 0x2C9 } -}; - -static const struct ci_pt_defaults defaults_bonaire_xt = -{ - 1, 0xF, 0xFD, 0x19, 5, 45, 0, 0xB0000, - { 0x79, 0x253, 0x25D, 0xAE, 0x72, 0x80, 0x83, 0x86, 0x6F, 0xC8, 0xC9, 0xC9, 0x2F, 0x4D, 0x61 }, - { 0x17C, 0x172, 0x180, 0x1BC, 0x1B3, 0x1BD, 0x206, 0x200, 0x203, 0x25D, 0x25A, 0x255, 0x2C3, 0x2C5, 0x2B4 } -}; - -#if 0 -static const struct ci_pt_defaults defaults_bonaire_pro = -{ - 1, 0xF, 0xFD, 0x19, 5, 45, 0, 0x65062, - { 0x8C, 0x23F, 0x244, 0xA6, 0x83, 0x85, 0x86, 0x86, 0x83, 0xDB, 0xDB, 0xDA, 0x67, 0x60, 0x5F }, - { 0x187, 0x193, 0x193, 0x1C7, 0x1D1, 0x1D1, 0x210, 0x219, 0x219, 0x266, 0x26C, 0x26C, 0x2C9, 0x2CB, 0x2CB } -}; -#endif - -static const struct ci_pt_defaults defaults_saturn_xt = -{ - 1, 0xF, 0xFD, 0x19, 5, 55, 0, 0x70000, - { 0x8C, 0x247, 0x249, 0xA6, 0x80, 0x81, 0x8B, 0x89, 0x86, 0xC9, 0xCA, 0xC9, 0x4D, 0x4D, 0x4D }, - { 0x187, 0x187, 0x187, 0x1C7, 0x1C7, 0x1C7, 0x210, 0x210, 0x210, 0x266, 0x266, 0x266, 0x2C9, 0x2C9, 0x2C9 } -}; - -#if 0 -static const struct ci_pt_defaults defaults_saturn_pro = -{ - 1, 0xF, 0xFD, 0x19, 5, 55, 0, 0x30000, - { 0x96, 0x21D, 0x23B, 0xA1, 0x85, 0x87, 0x83, 0x84, 0x81, 0xE6, 0xE6, 0xE6, 0x71, 0x6A, 0x6A }, - { 0x193, 0x19E, 0x19E, 0x1D2, 0x1DC, 0x1DC, 0x21A, 0x223, 0x223, 0x26E, 0x27E, 0x274, 0x2CF, 0x2D2, 0x2D2 } -}; -#endif - -static const struct ci_pt_config_reg didt_config_ci[] = -{ - { 0x10, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x10, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x10, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x10, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x11, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x11, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x11, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x11, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x12, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x12, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x12, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x12, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x2, 0x00003fff, 0, 0x4, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x2, 0x03ff0000, 16, 0x80, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x2, 0x78000000, 27, 0x3, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x1, 0x0000ffff, 0, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x1, 0xffff0000, 16, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x0, 0x00000001, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x30, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x30, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x30, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x30, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x31, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x31, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x31, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x31, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x32, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x32, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x32, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x32, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x22, 0x00003fff, 0, 0x4, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x22, 0x03ff0000, 16, 0x80, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x22, 0x78000000, 27, 0x3, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x21, 0x0000ffff, 0, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x21, 0xffff0000, 16, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x20, 0x00000001, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x50, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x50, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x50, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x50, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x51, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x51, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x51, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x51, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x52, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x52, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x52, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x52, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x42, 0x00003fff, 0, 0x4, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x42, 0x03ff0000, 16, 0x80, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x42, 0x78000000, 27, 0x3, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x41, 0x0000ffff, 0, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x41, 0xffff0000, 16, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x40, 0x00000001, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x70, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x70, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x70, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x70, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x71, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x71, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x71, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x71, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x72, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x72, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x72, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x72, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x62, 0x00003fff, 0, 0x4, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x62, 0x03ff0000, 16, 0x80, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x62, 0x78000000, 27, 0x3, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x61, 0x0000ffff, 0, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x61, 0xffff0000, 16, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND }, - { 0x60, 0x00000001, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND }, - { 0xFFFFFFFF } -}; - -static u8 ci_get_memory_module_index(struct amdgpu_device *adev) -{ - return (u8) ((RREG32(mmBIOS_SCRATCH_4) >> 16) & 0xff); -} - -#define MC_CG_ARB_FREQ_F0 0x0a -#define MC_CG_ARB_FREQ_F1 0x0b -#define MC_CG_ARB_FREQ_F2 0x0c -#define MC_CG_ARB_FREQ_F3 0x0d - -static int ci_copy_and_switch_arb_sets(struct amdgpu_device *adev, - u32 arb_freq_src, u32 arb_freq_dest) -{ - u32 mc_arb_dram_timing; - u32 mc_arb_dram_timing2; - u32 burst_time; - u32 mc_cg_config; - - switch (arb_freq_src) { - case MC_CG_ARB_FREQ_F0: - mc_arb_dram_timing = RREG32(mmMC_ARB_DRAM_TIMING); - mc_arb_dram_timing2 = RREG32(mmMC_ARB_DRAM_TIMING2); - burst_time = (RREG32(mmMC_ARB_BURST_TIME) & MC_ARB_BURST_TIME__STATE0_MASK) >> - MC_ARB_BURST_TIME__STATE0__SHIFT; - break; - case MC_CG_ARB_FREQ_F1: - mc_arb_dram_timing = RREG32(mmMC_ARB_DRAM_TIMING_1); - mc_arb_dram_timing2 = RREG32(mmMC_ARB_DRAM_TIMING2_1); - burst_time = (RREG32(mmMC_ARB_BURST_TIME) & MC_ARB_BURST_TIME__STATE1_MASK) >> - MC_ARB_BURST_TIME__STATE1__SHIFT; - break; - default: - return -EINVAL; - } - - switch (arb_freq_dest) { - case MC_CG_ARB_FREQ_F0: - WREG32(mmMC_ARB_DRAM_TIMING, mc_arb_dram_timing); - WREG32(mmMC_ARB_DRAM_TIMING2, mc_arb_dram_timing2); - WREG32_P(mmMC_ARB_BURST_TIME, (burst_time << MC_ARB_BURST_TIME__STATE0__SHIFT), - ~MC_ARB_BURST_TIME__STATE0_MASK); - break; - case MC_CG_ARB_FREQ_F1: - WREG32(mmMC_ARB_DRAM_TIMING_1, mc_arb_dram_timing); - WREG32(mmMC_ARB_DRAM_TIMING2_1, mc_arb_dram_timing2); - WREG32_P(mmMC_ARB_BURST_TIME, (burst_time << MC_ARB_BURST_TIME__STATE1__SHIFT), - ~MC_ARB_BURST_TIME__STATE1_MASK); - break; - default: - return -EINVAL; - } - - mc_cg_config = RREG32(mmMC_CG_CONFIG) | 0x0000000F; - WREG32(mmMC_CG_CONFIG, mc_cg_config); - WREG32_P(mmMC_ARB_CG, (arb_freq_dest) << MC_ARB_CG__CG_ARB_REQ__SHIFT, - ~MC_ARB_CG__CG_ARB_REQ_MASK); - - return 0; -} - -static u8 ci_get_ddr3_mclk_frequency_ratio(u32 memory_clock) -{ - u8 mc_para_index; - - if (memory_clock < 10000) - mc_para_index = 0; - else if (memory_clock >= 80000) - mc_para_index = 0x0f; - else - mc_para_index = (u8)((memory_clock - 10000) / 5000 + 1); - return mc_para_index; -} - -static u8 ci_get_mclk_frequency_ratio(u32 memory_clock, bool strobe_mode) -{ - u8 mc_para_index; - - if (strobe_mode) { - if (memory_clock < 12500) - mc_para_index = 0x00; - else if (memory_clock > 47500) - mc_para_index = 0x0f; - else - mc_para_index = (u8)((memory_clock - 10000) / 2500); - } else { - if (memory_clock < 65000) - mc_para_index = 0x00; - else if (memory_clock > 135000) - mc_para_index = 0x0f; - else - mc_para_index = (u8)((memory_clock - 60000) / 5000); - } - return mc_para_index; -} - -static void ci_trim_voltage_table_to_fit_state_table(struct amdgpu_device *adev, - u32 max_voltage_steps, - struct atom_voltage_table *voltage_table) -{ - unsigned int i, diff; - - if (voltage_table->count <= max_voltage_steps) - return; - - diff = voltage_table->count - max_voltage_steps; - - for (i = 0; i < max_voltage_steps; i++) - voltage_table->entries[i] = voltage_table->entries[i + diff]; - - voltage_table->count = max_voltage_steps; -} - -static int ci_get_std_voltage_value_sidd(struct amdgpu_device *adev, - struct atom_voltage_table_entry *voltage_table, - u16 *std_voltage_hi_sidd, u16 *std_voltage_lo_sidd); -static int ci_set_power_limit(struct amdgpu_device *adev, u32 n); -static int ci_set_overdrive_target_tdp(struct amdgpu_device *adev, - u32 target_tdp); -static int ci_update_uvd_dpm(struct amdgpu_device *adev, bool gate); -static void ci_dpm_set_irq_funcs(struct amdgpu_device *adev); - -static PPSMC_Result amdgpu_ci_send_msg_to_smc_with_parameter(struct amdgpu_device *adev, - PPSMC_Msg msg, u32 parameter); -static void ci_thermal_start_smc_fan_control(struct amdgpu_device *adev); -static void ci_fan_ctrl_set_default_mode(struct amdgpu_device *adev); - -static struct ci_power_info *ci_get_pi(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = adev->pm.dpm.priv; - - return pi; -} - -static struct ci_ps *ci_get_ps(struct amdgpu_ps *rps) -{ - struct ci_ps *ps = rps->ps_priv; - - return ps; -} - -static void ci_initialize_powertune_defaults(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - - switch (adev->pdev->device) { - case 0x6649: - case 0x6650: - case 0x6651: - case 0x6658: - case 0x665C: - case 0x665D: - default: - pi->powertune_defaults = &defaults_bonaire_xt; - break; - case 0x6640: - case 0x6641: - case 0x6646: - case 0x6647: - pi->powertune_defaults = &defaults_saturn_xt; - break; - case 0x67B8: - case 0x67B0: - pi->powertune_defaults = &defaults_hawaii_xt; - break; - case 0x67BA: - case 0x67B1: - pi->powertune_defaults = &defaults_hawaii_pro; - break; - case 0x67A0: - case 0x67A1: - case 0x67A2: - case 0x67A8: - case 0x67A9: - case 0x67AA: - case 0x67B9: - case 0x67BE: - pi->powertune_defaults = &defaults_bonaire_xt; - break; - } - - pi->dte_tj_offset = 0; - - pi->caps_power_containment = true; - pi->caps_cac = false; - pi->caps_sq_ramping = false; - pi->caps_db_ramping = false; - pi->caps_td_ramping = false; - pi->caps_tcp_ramping = false; - - if (pi->caps_power_containment) { - pi->caps_cac = true; - if (adev->asic_type == CHIP_HAWAII) - pi->enable_bapm_feature = false; - else - pi->enable_bapm_feature = true; - pi->enable_tdc_limit_feature = true; - pi->enable_pkg_pwr_tracking_feature = true; - } -} - -static u8 ci_convert_to_vid(u16 vddc) -{ - return (6200 - (vddc * VOLTAGE_SCALE)) / 25; -} - -static int ci_populate_bapm_vddc_vid_sidd(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - u8 *hi_vid = pi->smc_powertune_table.BapmVddCVidHiSidd; - u8 *lo_vid = pi->smc_powertune_table.BapmVddCVidLoSidd; - u8 *hi2_vid = pi->smc_powertune_table.BapmVddCVidHiSidd2; - u32 i; - - if (adev->pm.dpm.dyn_state.cac_leakage_table.entries == NULL) - return -EINVAL; - if (adev->pm.dpm.dyn_state.cac_leakage_table.count > 8) - return -EINVAL; - if (adev->pm.dpm.dyn_state.cac_leakage_table.count != - adev->pm.dpm.dyn_state.vddc_dependency_on_sclk.count) - return -EINVAL; - - for (i = 0; i < adev->pm.dpm.dyn_state.cac_leakage_table.count; i++) { - if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_EVV) { - lo_vid[i] = ci_convert_to_vid(adev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc1); - hi_vid[i] = ci_convert_to_vid(adev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc2); - hi2_vid[i] = ci_convert_to_vid(adev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc3); - } else { - lo_vid[i] = ci_convert_to_vid(adev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc); - hi_vid[i] = ci_convert_to_vid((u16)adev->pm.dpm.dyn_state.cac_leakage_table.entries[i].leakage); - } - } - return 0; -} - -static int ci_populate_vddc_vid(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - u8 *vid = pi->smc_powertune_table.VddCVid; - u32 i; - - if (pi->vddc_voltage_table.count > 8) - return -EINVAL; - - for (i = 0; i < pi->vddc_voltage_table.count; i++) - vid[i] = ci_convert_to_vid(pi->vddc_voltage_table.entries[i].value); - - return 0; -} - -static int ci_populate_svi_load_line(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - const struct ci_pt_defaults *pt_defaults = pi->powertune_defaults; - - pi->smc_powertune_table.SviLoadLineEn = pt_defaults->svi_load_line_en; - pi->smc_powertune_table.SviLoadLineVddC = pt_defaults->svi_load_line_vddc; - pi->smc_powertune_table.SviLoadLineTrimVddC = 3; - pi->smc_powertune_table.SviLoadLineOffsetVddC = 0; - - return 0; -} - -static int ci_populate_tdc_limit(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - const struct ci_pt_defaults *pt_defaults = pi->powertune_defaults; - u16 tdc_limit; - - tdc_limit = adev->pm.dpm.dyn_state.cac_tdp_table->tdc * 256; - pi->smc_powertune_table.TDC_VDDC_PkgLimit = cpu_to_be16(tdc_limit); - pi->smc_powertune_table.TDC_VDDC_ThrottleReleaseLimitPerc = - pt_defaults->tdc_vddc_throttle_release_limit_perc; - pi->smc_powertune_table.TDC_MAWt = pt_defaults->tdc_mawt; - - return 0; -} - -static int ci_populate_dw8(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - const struct ci_pt_defaults *pt_defaults = pi->powertune_defaults; - int ret; - - ret = amdgpu_ci_read_smc_sram_dword(adev, - SMU7_FIRMWARE_HEADER_LOCATION + - offsetof(SMU7_Firmware_Header, PmFuseTable) + - offsetof(SMU7_Discrete_PmFuses, TdcWaterfallCtl), - (u32 *)&pi->smc_powertune_table.TdcWaterfallCtl, - pi->sram_end); - if (ret) - return -EINVAL; - else - pi->smc_powertune_table.TdcWaterfallCtl = pt_defaults->tdc_waterfall_ctl; - - return 0; -} - -static int ci_populate_fuzzy_fan(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - - if ((adev->pm.dpm.fan.fan_output_sensitivity & (1 << 15)) || - (adev->pm.dpm.fan.fan_output_sensitivity == 0)) - adev->pm.dpm.fan.fan_output_sensitivity = - adev->pm.dpm.fan.default_fan_output_sensitivity; - - pi->smc_powertune_table.FuzzyFan_PwmSetDelta = - cpu_to_be16(adev->pm.dpm.fan.fan_output_sensitivity); - - return 0; -} - -static int ci_min_max_v_gnbl_pm_lid_from_bapm_vddc(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - u8 *hi_vid = pi->smc_powertune_table.BapmVddCVidHiSidd; - u8 *lo_vid = pi->smc_powertune_table.BapmVddCVidLoSidd; - int i, min, max; - - min = max = hi_vid[0]; - for (i = 0; i < 8; i++) { - if (0 != hi_vid[i]) { - if (min > hi_vid[i]) - min = hi_vid[i]; - if (max < hi_vid[i]) - max = hi_vid[i]; - } - - if (0 != lo_vid[i]) { - if (min > lo_vid[i]) - min = lo_vid[i]; - if (max < lo_vid[i]) - max = lo_vid[i]; - } - } - - if ((min == 0) || (max == 0)) - return -EINVAL; - pi->smc_powertune_table.GnbLPMLMaxVid = (u8)max; - pi->smc_powertune_table.GnbLPMLMinVid = (u8)min; - - return 0; -} - -static int ci_populate_bapm_vddc_base_leakage_sidd(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - u16 hi_sidd = pi->smc_powertune_table.BapmVddCBaseLeakageHiSidd; - u16 lo_sidd = pi->smc_powertune_table.BapmVddCBaseLeakageLoSidd; - struct amdgpu_cac_tdp_table *cac_tdp_table = - adev->pm.dpm.dyn_state.cac_tdp_table; - - hi_sidd = cac_tdp_table->high_cac_leakage / 100 * 256; - lo_sidd = cac_tdp_table->low_cac_leakage / 100 * 256; - - pi->smc_powertune_table.BapmVddCBaseLeakageHiSidd = cpu_to_be16(hi_sidd); - pi->smc_powertune_table.BapmVddCBaseLeakageLoSidd = cpu_to_be16(lo_sidd); - - return 0; -} - -static int ci_populate_bapm_parameters_in_dpm_table(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - const struct ci_pt_defaults *pt_defaults = pi->powertune_defaults; - SMU7_Discrete_DpmTable *dpm_table = &pi->smc_state_table; - struct amdgpu_cac_tdp_table *cac_tdp_table = - adev->pm.dpm.dyn_state.cac_tdp_table; - struct amdgpu_ppm_table *ppm = adev->pm.dpm.dyn_state.ppm_table; - int i, j, k; - const u16 *def1; - const u16 *def2; - - dpm_table->DefaultTdp = cac_tdp_table->tdp * 256; - dpm_table->TargetTdp = cac_tdp_table->configurable_tdp * 256; - - dpm_table->DTETjOffset = (u8)pi->dte_tj_offset; - dpm_table->GpuTjMax = - (u8)(pi->thermal_temp_setting.temperature_high / 1000); - dpm_table->GpuTjHyst = 8; - - dpm_table->DTEAmbientTempBase = pt_defaults->dte_ambient_temp_base; - - if (ppm) { - dpm_table->PPM_PkgPwrLimit = cpu_to_be16((u16)ppm->dgpu_tdp * 256 / 1000); - dpm_table->PPM_TemperatureLimit = cpu_to_be16((u16)ppm->tj_max * 256); - } else { - dpm_table->PPM_PkgPwrLimit = cpu_to_be16(0); - dpm_table->PPM_TemperatureLimit = cpu_to_be16(0); - } - - dpm_table->BAPM_TEMP_GRADIENT = cpu_to_be32(pt_defaults->bapm_temp_gradient); - def1 = pt_defaults->bapmti_r; - def2 = pt_defaults->bapmti_rc; - - for (i = 0; i < SMU7_DTE_ITERATIONS; i++) { - for (j = 0; j < SMU7_DTE_SOURCES; j++) { - for (k = 0; k < SMU7_DTE_SINKS; k++) { - dpm_table->BAPMTI_R[i][j][k] = cpu_to_be16(*def1); - dpm_table->BAPMTI_RC[i][j][k] = cpu_to_be16(*def2); - def1++; - def2++; - } - } - } - - return 0; -} - -static int ci_populate_pm_base(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - u32 pm_fuse_table_offset; - int ret; - - if (pi->caps_power_containment) { - ret = amdgpu_ci_read_smc_sram_dword(adev, - SMU7_FIRMWARE_HEADER_LOCATION + - offsetof(SMU7_Firmware_Header, PmFuseTable), - &pm_fuse_table_offset, pi->sram_end); - if (ret) - return ret; - ret = ci_populate_bapm_vddc_vid_sidd(adev); - if (ret) - return ret; - ret = ci_populate_vddc_vid(adev); - if (ret) - return ret; - ret = ci_populate_svi_load_line(adev); - if (ret) - return ret; - ret = ci_populate_tdc_limit(adev); - if (ret) - return ret; - ret = ci_populate_dw8(adev); - if (ret) - return ret; - ret = ci_populate_fuzzy_fan(adev); - if (ret) - return ret; - ret = ci_min_max_v_gnbl_pm_lid_from_bapm_vddc(adev); - if (ret) - return ret; - ret = ci_populate_bapm_vddc_base_leakage_sidd(adev); - if (ret) - return ret; - ret = amdgpu_ci_copy_bytes_to_smc(adev, pm_fuse_table_offset, - (u8 *)&pi->smc_powertune_table, - sizeof(SMU7_Discrete_PmFuses), pi->sram_end); - if (ret) - return ret; - } - - return 0; -} - -static void ci_do_enable_didt(struct amdgpu_device *adev, const bool enable) -{ - struct ci_power_info *pi = ci_get_pi(adev); - u32 data; - - if (pi->caps_sq_ramping) { - data = RREG32_DIDT(ixDIDT_SQ_CTRL0); - if (enable) - data |= DIDT_SQ_CTRL0__DIDT_CTRL_EN_MASK; - else - data &= ~DIDT_SQ_CTRL0__DIDT_CTRL_EN_MASK; - WREG32_DIDT(ixDIDT_SQ_CTRL0, data); - } - - if (pi->caps_db_ramping) { - data = RREG32_DIDT(ixDIDT_DB_CTRL0); - if (enable) - data |= DIDT_DB_CTRL0__DIDT_CTRL_EN_MASK; - else - data &= ~DIDT_DB_CTRL0__DIDT_CTRL_EN_MASK; - WREG32_DIDT(ixDIDT_DB_CTRL0, data); - } - - if (pi->caps_td_ramping) { - data = RREG32_DIDT(ixDIDT_TD_CTRL0); - if (enable) - data |= DIDT_TD_CTRL0__DIDT_CTRL_EN_MASK; - else - data &= ~DIDT_TD_CTRL0__DIDT_CTRL_EN_MASK; - WREG32_DIDT(ixDIDT_TD_CTRL0, data); - } - - if (pi->caps_tcp_ramping) { - data = RREG32_DIDT(ixDIDT_TCP_CTRL0); - if (enable) - data |= DIDT_TCP_CTRL0__DIDT_CTRL_EN_MASK; - else - data &= ~DIDT_TCP_CTRL0__DIDT_CTRL_EN_MASK; - WREG32_DIDT(ixDIDT_TCP_CTRL0, data); - } -} - -static int ci_program_pt_config_registers(struct amdgpu_device *adev, - const struct ci_pt_config_reg *cac_config_regs) -{ - const struct ci_pt_config_reg *config_regs = cac_config_regs; - u32 data; - u32 cache = 0; - - if (config_regs == NULL) - return -EINVAL; - - while (config_regs->offset != 0xFFFFFFFF) { - if (config_regs->type == CISLANDS_CONFIGREG_CACHE) { - cache |= ((config_regs->value << config_regs->shift) & config_regs->mask); - } else { - switch (config_regs->type) { - case CISLANDS_CONFIGREG_SMC_IND: - data = RREG32_SMC(config_regs->offset); - break; - case CISLANDS_CONFIGREG_DIDT_IND: - data = RREG32_DIDT(config_regs->offset); - break; - default: - data = RREG32(config_regs->offset); - break; - } - - data &= ~config_regs->mask; - data |= ((config_regs->value << config_regs->shift) & config_regs->mask); - data |= cache; - - switch (config_regs->type) { - case CISLANDS_CONFIGREG_SMC_IND: - WREG32_SMC(config_regs->offset, data); - break; - case CISLANDS_CONFIGREG_DIDT_IND: - WREG32_DIDT(config_regs->offset, data); - break; - default: - WREG32(config_regs->offset, data); - break; - } - cache = 0; - } - config_regs++; - } - return 0; -} - -static int ci_enable_didt(struct amdgpu_device *adev, bool enable) -{ - struct ci_power_info *pi = ci_get_pi(adev); - int ret; - - if (pi->caps_sq_ramping || pi->caps_db_ramping || - pi->caps_td_ramping || pi->caps_tcp_ramping) { - amdgpu_gfx_rlc_enter_safe_mode(adev); - - if (enable) { - ret = ci_program_pt_config_registers(adev, didt_config_ci); - if (ret) { - amdgpu_gfx_rlc_exit_safe_mode(adev); - return ret; - } - } - - ci_do_enable_didt(adev, enable); - - amdgpu_gfx_rlc_exit_safe_mode(adev); - } - - return 0; -} - -static int ci_enable_power_containment(struct amdgpu_device *adev, bool enable) -{ - struct ci_power_info *pi = ci_get_pi(adev); - PPSMC_Result smc_result; - int ret = 0; - - if (enable) { - pi->power_containment_features = 0; - if (pi->caps_power_containment) { - if (pi->enable_bapm_feature) { - smc_result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_EnableDTE); - if (smc_result != PPSMC_Result_OK) - ret = -EINVAL; - else - pi->power_containment_features |= POWERCONTAINMENT_FEATURE_BAPM; - } - - if (pi->enable_tdc_limit_feature) { - smc_result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_TDCLimitEnable); - if (smc_result != PPSMC_Result_OK) - ret = -EINVAL; - else - pi->power_containment_features |= POWERCONTAINMENT_FEATURE_TDCLimit; - } - - if (pi->enable_pkg_pwr_tracking_feature) { - smc_result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_PkgPwrLimitEnable); - if (smc_result != PPSMC_Result_OK) { - ret = -EINVAL; - } else { - struct amdgpu_cac_tdp_table *cac_tdp_table = - adev->pm.dpm.dyn_state.cac_tdp_table; - u32 default_pwr_limit = - (u32)(cac_tdp_table->maximum_power_delivery_limit * 256); - - pi->power_containment_features |= POWERCONTAINMENT_FEATURE_PkgPwrLimit; - - ci_set_power_limit(adev, default_pwr_limit); - } - } - } - } else { - if (pi->caps_power_containment && pi->power_containment_features) { - if (pi->power_containment_features & POWERCONTAINMENT_FEATURE_TDCLimit) - amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_TDCLimitDisable); - - if (pi->power_containment_features & POWERCONTAINMENT_FEATURE_BAPM) - amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_DisableDTE); - - if (pi->power_containment_features & POWERCONTAINMENT_FEATURE_PkgPwrLimit) - amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_PkgPwrLimitDisable); - pi->power_containment_features = 0; - } - } - - return ret; -} - -static int ci_enable_smc_cac(struct amdgpu_device *adev, bool enable) -{ - struct ci_power_info *pi = ci_get_pi(adev); - PPSMC_Result smc_result; - int ret = 0; - - if (pi->caps_cac) { - if (enable) { - smc_result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_EnableCac); - if (smc_result != PPSMC_Result_OK) { - ret = -EINVAL; - pi->cac_enabled = false; - } else { - pi->cac_enabled = true; - } - } else if (pi->cac_enabled) { - amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_DisableCac); - pi->cac_enabled = false; - } - } - - return ret; -} - -static int ci_enable_thermal_based_sclk_dpm(struct amdgpu_device *adev, - bool enable) -{ - struct ci_power_info *pi = ci_get_pi(adev); - PPSMC_Result smc_result = PPSMC_Result_OK; - - if (pi->thermal_sclk_dpm_enabled) { - if (enable) - smc_result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_ENABLE_THERMAL_DPM); - else - smc_result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_DISABLE_THERMAL_DPM); - } - - if (smc_result == PPSMC_Result_OK) - return 0; - else - return -EINVAL; -} - -static int ci_power_control_set_level(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct amdgpu_cac_tdp_table *cac_tdp_table = - adev->pm.dpm.dyn_state.cac_tdp_table; - s32 adjust_percent; - s32 target_tdp; - int ret = 0; - bool adjust_polarity = false; /* ??? */ - - if (pi->caps_power_containment) { - adjust_percent = adjust_polarity ? - adev->pm.dpm.tdp_adjustment : (-1 * adev->pm.dpm.tdp_adjustment); - target_tdp = ((100 + adjust_percent) * - (s32)cac_tdp_table->configurable_tdp) / 100; - - ret = ci_set_overdrive_target_tdp(adev, (u32)target_tdp); - } - - return ret; -} - -static void ci_dpm_powergate_uvd(void *handle, bool gate) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - - pi->uvd_power_gated = gate; - - if (gate) { - /* stop the UVD block */ - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, - AMD_PG_STATE_GATE); - ci_update_uvd_dpm(adev, gate); - } else { - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, - AMD_PG_STATE_UNGATE); - ci_update_uvd_dpm(adev, gate); - } -} - -static bool ci_dpm_vblank_too_short(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - u32 vblank_time = amdgpu_dpm_get_vblank_time(adev); - u32 switch_limit = adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300; - - /* disable mclk switching if the refresh is >120Hz, even if the - * blanking period would allow it - */ - if (amdgpu_dpm_get_vrefresh(adev) > 120) - return true; - - if (vblank_time < switch_limit) - return true; - else - return false; - -} - -static void ci_apply_state_adjust_rules(struct amdgpu_device *adev, - struct amdgpu_ps *rps) -{ - struct ci_ps *ps = ci_get_ps(rps); - struct ci_power_info *pi = ci_get_pi(adev); - struct amdgpu_clock_and_voltage_limits *max_limits; - bool disable_mclk_switching; - u32 sclk, mclk; - int i; - - if (rps->vce_active) { - rps->evclk = adev->pm.dpm.vce_states[adev->pm.dpm.vce_level].evclk; - rps->ecclk = adev->pm.dpm.vce_states[adev->pm.dpm.vce_level].ecclk; - } else { - rps->evclk = 0; - rps->ecclk = 0; - } - - if ((adev->pm.dpm.new_active_crtc_count > 1) || - ci_dpm_vblank_too_short(adev)) - disable_mclk_switching = true; - else - disable_mclk_switching = false; - - if ((rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) == ATOM_PPLIB_CLASSIFICATION_UI_BATTERY) - pi->battery_state = true; - else - pi->battery_state = false; - - if (adev->pm.ac_power) - max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac; - else - max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; - - if (adev->pm.ac_power == false) { - for (i = 0; i < ps->performance_level_count; i++) { - if (ps->performance_levels[i].mclk > max_limits->mclk) - ps->performance_levels[i].mclk = max_limits->mclk; - if (ps->performance_levels[i].sclk > max_limits->sclk) - ps->performance_levels[i].sclk = max_limits->sclk; - } - } - - /* XXX validate the min clocks required for display */ - - if (disable_mclk_switching) { - mclk = ps->performance_levels[ps->performance_level_count - 1].mclk; - sclk = ps->performance_levels[0].sclk; - } else { - mclk = ps->performance_levels[0].mclk; - sclk = ps->performance_levels[0].sclk; - } - - if (adev->pm.pm_display_cfg.min_core_set_clock > sclk) - sclk = adev->pm.pm_display_cfg.min_core_set_clock; - - if (adev->pm.pm_display_cfg.min_mem_set_clock > mclk) - mclk = adev->pm.pm_display_cfg.min_mem_set_clock; - - if (rps->vce_active) { - if (sclk < adev->pm.dpm.vce_states[adev->pm.dpm.vce_level].sclk) - sclk = adev->pm.dpm.vce_states[adev->pm.dpm.vce_level].sclk; - if (mclk < adev->pm.dpm.vce_states[adev->pm.dpm.vce_level].mclk) - mclk = adev->pm.dpm.vce_states[adev->pm.dpm.vce_level].mclk; - } - - ps->performance_levels[0].sclk = sclk; - ps->performance_levels[0].mclk = mclk; - - if (ps->performance_levels[1].sclk < ps->performance_levels[0].sclk) - ps->performance_levels[1].sclk = ps->performance_levels[0].sclk; - - if (disable_mclk_switching) { - if (ps->performance_levels[0].mclk < ps->performance_levels[1].mclk) - ps->performance_levels[0].mclk = ps->performance_levels[1].mclk; - } else { - if (ps->performance_levels[1].mclk < ps->performance_levels[0].mclk) - ps->performance_levels[1].mclk = ps->performance_levels[0].mclk; - } -} - -static int ci_thermal_set_temperature_range(struct amdgpu_device *adev, - int min_temp, int max_temp) -{ - int low_temp = 0 * 1000; - int high_temp = 255 * 1000; - u32 tmp; - - if (low_temp < min_temp) - low_temp = min_temp; - if (high_temp > max_temp) - high_temp = max_temp; - if (high_temp < low_temp) { - DRM_ERROR("invalid thermal range: %d - %d\n", low_temp, high_temp); - return -EINVAL; - } - - tmp = RREG32_SMC(ixCG_THERMAL_INT); - tmp &= ~(CG_THERMAL_INT__DIG_THERM_INTH_MASK | CG_THERMAL_INT__DIG_THERM_INTL_MASK); - tmp |= ((high_temp / 1000) << CG_THERMAL_INT__DIG_THERM_INTH__SHIFT) | - ((low_temp / 1000)) << CG_THERMAL_INT__DIG_THERM_INTL__SHIFT; - WREG32_SMC(ixCG_THERMAL_INT, tmp); - -#if 0 - /* XXX: need to figure out how to handle this properly */ - tmp = RREG32_SMC(ixCG_THERMAL_CTRL); - tmp &= DIG_THERM_DPM_MASK; - tmp |= DIG_THERM_DPM(high_temp / 1000); - WREG32_SMC(ixCG_THERMAL_CTRL, tmp); -#endif - - adev->pm.dpm.thermal.min_temp = low_temp; - adev->pm.dpm.thermal.max_temp = high_temp; - return 0; -} - -static int ci_thermal_enable_alert(struct amdgpu_device *adev, - bool enable) -{ - u32 thermal_int = RREG32_SMC(ixCG_THERMAL_INT); - PPSMC_Result result; - - if (enable) { - thermal_int &= ~(CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK | - CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK); - WREG32_SMC(ixCG_THERMAL_INT, thermal_int); - result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_Thermal_Cntl_Enable); - if (result != PPSMC_Result_OK) { - DRM_DEBUG_KMS("Could not enable thermal interrupts.\n"); - return -EINVAL; - } - } else { - thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK | - CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK; - WREG32_SMC(ixCG_THERMAL_INT, thermal_int); - result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_Thermal_Cntl_Disable); - if (result != PPSMC_Result_OK) { - DRM_DEBUG_KMS("Could not disable thermal interrupts.\n"); - return -EINVAL; - } - } - - return 0; -} - -static void ci_fan_ctrl_set_static_mode(struct amdgpu_device *adev, u32 mode) -{ - struct ci_power_info *pi = ci_get_pi(adev); - u32 tmp; - - if (pi->fan_ctrl_is_in_default_mode) { - tmp = (RREG32_SMC(ixCG_FDO_CTRL2) & CG_FDO_CTRL2__FDO_PWM_MODE_MASK) - >> CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT; - pi->fan_ctrl_default_mode = tmp; - tmp = (RREG32_SMC(ixCG_FDO_CTRL2) & CG_FDO_CTRL2__TMIN_MASK) - >> CG_FDO_CTRL2__TMIN__SHIFT; - pi->t_min = tmp; - pi->fan_ctrl_is_in_default_mode = false; - } - - tmp = RREG32_SMC(ixCG_FDO_CTRL2) & ~CG_FDO_CTRL2__TMIN_MASK; - tmp |= 0 << CG_FDO_CTRL2__TMIN__SHIFT; - WREG32_SMC(ixCG_FDO_CTRL2, tmp); - - tmp = RREG32_SMC(ixCG_FDO_CTRL2) & ~CG_FDO_CTRL2__FDO_PWM_MODE_MASK; - tmp |= mode << CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT; - WREG32_SMC(ixCG_FDO_CTRL2, tmp); -} - -static int ci_thermal_setup_fan_table(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - SMU7_Discrete_FanTable fan_table = { FDO_MODE_HARDWARE }; - u32 duty100; - u32 t_diff1, t_diff2, pwm_diff1, pwm_diff2; - u16 fdo_min, slope1, slope2; - u32 reference_clock, tmp; - int ret; - u64 tmp64; - - if (!pi->fan_table_start) { - adev->pm.dpm.fan.ucode_fan_control = false; - return 0; - } - - duty100 = (RREG32_SMC(ixCG_FDO_CTRL1) & CG_FDO_CTRL1__FMAX_DUTY100_MASK) - >> CG_FDO_CTRL1__FMAX_DUTY100__SHIFT; - - if (duty100 == 0) { - adev->pm.dpm.fan.ucode_fan_control = false; - return 0; - } - - tmp64 = (u64)adev->pm.dpm.fan.pwm_min * duty100; - do_div(tmp64, 10000); - fdo_min = (u16)tmp64; - - t_diff1 = adev->pm.dpm.fan.t_med - adev->pm.dpm.fan.t_min; - t_diff2 = adev->pm.dpm.fan.t_high - adev->pm.dpm.fan.t_med; - - pwm_diff1 = adev->pm.dpm.fan.pwm_med - adev->pm.dpm.fan.pwm_min; - pwm_diff2 = adev->pm.dpm.fan.pwm_high - adev->pm.dpm.fan.pwm_med; - - slope1 = (u16)((50 + ((16 * duty100 * pwm_diff1) / t_diff1)) / 100); - slope2 = (u16)((50 + ((16 * duty100 * pwm_diff2) / t_diff2)) / 100); - - fan_table.TempMin = cpu_to_be16((50 + adev->pm.dpm.fan.t_min) / 100); - fan_table.TempMed = cpu_to_be16((50 + adev->pm.dpm.fan.t_med) / 100); - fan_table.TempMax = cpu_to_be16((50 + adev->pm.dpm.fan.t_max) / 100); - - fan_table.Slope1 = cpu_to_be16(slope1); - fan_table.Slope2 = cpu_to_be16(slope2); - - fan_table.FdoMin = cpu_to_be16(fdo_min); - - fan_table.HystDown = cpu_to_be16(adev->pm.dpm.fan.t_hyst); - - fan_table.HystUp = cpu_to_be16(1); - - fan_table.HystSlope = cpu_to_be16(1); - - fan_table.TempRespLim = cpu_to_be16(5); - - reference_clock = amdgpu_asic_get_xclk(adev); - - fan_table.RefreshPeriod = cpu_to_be32((adev->pm.dpm.fan.cycle_delay * - reference_clock) / 1600); - - fan_table.FdoMax = cpu_to_be16((u16)duty100); - - tmp = (RREG32_SMC(ixCG_MULT_THERMAL_CTRL) & CG_MULT_THERMAL_CTRL__TEMP_SEL_MASK) - >> CG_MULT_THERMAL_CTRL__TEMP_SEL__SHIFT; - fan_table.TempSrc = (uint8_t)tmp; - - ret = amdgpu_ci_copy_bytes_to_smc(adev, - pi->fan_table_start, - (u8 *)(&fan_table), - sizeof(fan_table), - pi->sram_end); - - if (ret) { - DRM_ERROR("Failed to load fan table to the SMC."); - adev->pm.dpm.fan.ucode_fan_control = false; - } - - return 0; -} - -static int ci_fan_ctrl_start_smc_fan_control(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - PPSMC_Result ret; - - if (pi->caps_od_fuzzy_fan_control_support) { - ret = amdgpu_ci_send_msg_to_smc_with_parameter(adev, - PPSMC_StartFanControl, - FAN_CONTROL_FUZZY); - if (ret != PPSMC_Result_OK) - return -EINVAL; - ret = amdgpu_ci_send_msg_to_smc_with_parameter(adev, - PPSMC_MSG_SetFanPwmMax, - adev->pm.dpm.fan.default_max_fan_pwm); - if (ret != PPSMC_Result_OK) - return -EINVAL; - } else { - ret = amdgpu_ci_send_msg_to_smc_with_parameter(adev, - PPSMC_StartFanControl, - FAN_CONTROL_TABLE); - if (ret != PPSMC_Result_OK) - return -EINVAL; - } - - pi->fan_is_controlled_by_smc = true; - return 0; -} - - -static int ci_fan_ctrl_stop_smc_fan_control(struct amdgpu_device *adev) -{ - PPSMC_Result ret; - struct ci_power_info *pi = ci_get_pi(adev); - - ret = amdgpu_ci_send_msg_to_smc(adev, PPSMC_StopFanControl); - if (ret == PPSMC_Result_OK) { - pi->fan_is_controlled_by_smc = false; - return 0; - } else { - return -EINVAL; - } -} - -static int ci_dpm_get_fan_speed_percent(void *handle, - u32 *speed) -{ - u32 duty, duty100; - u64 tmp64; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->pm.no_fan) - return -ENOENT; - - duty100 = (RREG32_SMC(ixCG_FDO_CTRL1) & CG_FDO_CTRL1__FMAX_DUTY100_MASK) - >> CG_FDO_CTRL1__FMAX_DUTY100__SHIFT; - duty = (RREG32_SMC(ixCG_THERMAL_STATUS) & CG_THERMAL_STATUS__FDO_PWM_DUTY_MASK) - >> CG_THERMAL_STATUS__FDO_PWM_DUTY__SHIFT; - - if (duty100 == 0) - return -EINVAL; - - tmp64 = (u64)duty * 100; - do_div(tmp64, duty100); - *speed = (u32)tmp64; - - if (*speed > 100) - *speed = 100; - - return 0; -} - -static int ci_dpm_set_fan_speed_percent(void *handle, - u32 speed) -{ - u32 tmp; - u32 duty, duty100; - u64 tmp64; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - - if (adev->pm.no_fan) - return -ENOENT; - - if (pi->fan_is_controlled_by_smc) - return -EINVAL; - - if (speed > 100) - return -EINVAL; - - duty100 = (RREG32_SMC(ixCG_FDO_CTRL1) & CG_FDO_CTRL1__FMAX_DUTY100_MASK) - >> CG_FDO_CTRL1__FMAX_DUTY100__SHIFT; - - if (duty100 == 0) - return -EINVAL; - - tmp64 = (u64)speed * duty100; - do_div(tmp64, 100); - duty = (u32)tmp64; - - tmp = RREG32_SMC(ixCG_FDO_CTRL0) & ~CG_FDO_CTRL0__FDO_STATIC_DUTY_MASK; - tmp |= duty << CG_FDO_CTRL0__FDO_STATIC_DUTY__SHIFT; - WREG32_SMC(ixCG_FDO_CTRL0, tmp); - - return 0; -} - -static void ci_dpm_set_fan_control_mode(void *handle, u32 mode) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - switch (mode) { - case AMD_FAN_CTRL_NONE: - if (adev->pm.dpm.fan.ucode_fan_control) - ci_fan_ctrl_stop_smc_fan_control(adev); - ci_dpm_set_fan_speed_percent(adev, 100); - break; - case AMD_FAN_CTRL_MANUAL: - if (adev->pm.dpm.fan.ucode_fan_control) - ci_fan_ctrl_stop_smc_fan_control(adev); - break; - case AMD_FAN_CTRL_AUTO: - if (adev->pm.dpm.fan.ucode_fan_control) - ci_thermal_start_smc_fan_control(adev); - break; - default: - break; - } -} - -static u32 ci_dpm_get_fan_control_mode(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - - if (pi->fan_is_controlled_by_smc) - return AMD_FAN_CTRL_AUTO; - else - return AMD_FAN_CTRL_MANUAL; -} - -#if 0 -static int ci_fan_ctrl_get_fan_speed_rpm(struct amdgpu_device *adev, - u32 *speed) -{ - u32 tach_period; - u32 xclk = amdgpu_asic_get_xclk(adev); - - if (adev->pm.no_fan) - return -ENOENT; - - if (adev->pm.fan_pulses_per_revolution == 0) - return -ENOENT; - - tach_period = (RREG32_SMC(ixCG_TACH_STATUS) & CG_TACH_STATUS__TACH_PERIOD_MASK) - >> CG_TACH_STATUS__TACH_PERIOD__SHIFT; - if (tach_period == 0) - return -ENOENT; - - *speed = 60 * xclk * 10000 / tach_period; - - return 0; -} - -static int ci_fan_ctrl_set_fan_speed_rpm(struct amdgpu_device *adev, - u32 speed) -{ - u32 tach_period, tmp; - u32 xclk = amdgpu_asic_get_xclk(adev); - - if (adev->pm.no_fan) - return -ENOENT; - - if (adev->pm.fan_pulses_per_revolution == 0) - return -ENOENT; - - if ((speed < adev->pm.fan_min_rpm) || - (speed > adev->pm.fan_max_rpm)) - return -EINVAL; - - if (adev->pm.dpm.fan.ucode_fan_control) - ci_fan_ctrl_stop_smc_fan_control(adev); - - tach_period = 60 * xclk * 10000 / (8 * speed); - tmp = RREG32_SMC(ixCG_TACH_CTRL) & ~CG_TACH_CTRL__TARGET_PERIOD_MASK; - tmp |= tach_period << CG_TACH_CTRL__TARGET_PERIOD__SHIFT; - WREG32_SMC(CG_TACH_CTRL, tmp); - - ci_fan_ctrl_set_static_mode(adev, FDO_PWM_MODE_STATIC_RPM); - - return 0; -} -#endif - -static void ci_fan_ctrl_set_default_mode(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - u32 tmp; - - if (!pi->fan_ctrl_is_in_default_mode) { - tmp = RREG32_SMC(ixCG_FDO_CTRL2) & ~CG_FDO_CTRL2__FDO_PWM_MODE_MASK; - tmp |= pi->fan_ctrl_default_mode << CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT; - WREG32_SMC(ixCG_FDO_CTRL2, tmp); - - tmp = RREG32_SMC(ixCG_FDO_CTRL2) & ~CG_FDO_CTRL2__TMIN_MASK; - tmp |= pi->t_min << CG_FDO_CTRL2__TMIN__SHIFT; - WREG32_SMC(ixCG_FDO_CTRL2, tmp); - pi->fan_ctrl_is_in_default_mode = true; - } -} - -static void ci_thermal_start_smc_fan_control(struct amdgpu_device *adev) -{ - if (adev->pm.dpm.fan.ucode_fan_control) { - ci_fan_ctrl_start_smc_fan_control(adev); - ci_fan_ctrl_set_static_mode(adev, FDO_PWM_MODE_STATIC); - } -} - -static void ci_thermal_initialize(struct amdgpu_device *adev) -{ - u32 tmp; - - if (adev->pm.fan_pulses_per_revolution) { - tmp = RREG32_SMC(ixCG_TACH_CTRL) & ~CG_TACH_CTRL__EDGE_PER_REV_MASK; - tmp |= (adev->pm.fan_pulses_per_revolution - 1) - << CG_TACH_CTRL__EDGE_PER_REV__SHIFT; - WREG32_SMC(ixCG_TACH_CTRL, tmp); - } - - tmp = RREG32_SMC(ixCG_FDO_CTRL2) & ~CG_FDO_CTRL2__TACH_PWM_RESP_RATE_MASK; - tmp |= 0x28 << CG_FDO_CTRL2__TACH_PWM_RESP_RATE__SHIFT; - WREG32_SMC(ixCG_FDO_CTRL2, tmp); -} - -static int ci_thermal_start_thermal_controller(struct amdgpu_device *adev) -{ - int ret; - - ci_thermal_initialize(adev); - ret = ci_thermal_set_temperature_range(adev, CISLANDS_TEMP_RANGE_MIN, CISLANDS_TEMP_RANGE_MAX); - if (ret) - return ret; - ret = ci_thermal_enable_alert(adev, true); - if (ret) - return ret; - if (adev->pm.dpm.fan.ucode_fan_control) { - ret = ci_thermal_setup_fan_table(adev); - if (ret) - return ret; - ci_thermal_start_smc_fan_control(adev); - } - - return 0; -} - -static void ci_thermal_stop_thermal_controller(struct amdgpu_device *adev) -{ - if (!adev->pm.no_fan) - ci_fan_ctrl_set_default_mode(adev); -} - -static int ci_read_smc_soft_register(struct amdgpu_device *adev, - u16 reg_offset, u32 *value) -{ - struct ci_power_info *pi = ci_get_pi(adev); - - return amdgpu_ci_read_smc_sram_dword(adev, - pi->soft_regs_start + reg_offset, - value, pi->sram_end); -} - -static int ci_write_smc_soft_register(struct amdgpu_device *adev, - u16 reg_offset, u32 value) -{ - struct ci_power_info *pi = ci_get_pi(adev); - - return amdgpu_ci_write_smc_sram_dword(adev, - pi->soft_regs_start + reg_offset, - value, pi->sram_end); -} - -static void ci_init_fps_limits(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - SMU7_Discrete_DpmTable *table = &pi->smc_state_table; - - if (pi->caps_fps) { - u16 tmp; - - tmp = 45; - table->FpsHighT = cpu_to_be16(tmp); - - tmp = 30; - table->FpsLowT = cpu_to_be16(tmp); - } -} - -static int ci_update_sclk_t(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - int ret = 0; - u32 low_sclk_interrupt_t = 0; - - if (pi->caps_sclk_throttle_low_notification) { - low_sclk_interrupt_t = cpu_to_be32(pi->low_sclk_interrupt_t); - - ret = amdgpu_ci_copy_bytes_to_smc(adev, - pi->dpm_table_start + - offsetof(SMU7_Discrete_DpmTable, LowSclkInterruptT), - (u8 *)&low_sclk_interrupt_t, - sizeof(u32), pi->sram_end); - - } - - return ret; -} - -static void ci_get_leakage_voltages(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - u16 leakage_id, virtual_voltage_id; - u16 vddc, vddci; - int i; - - pi->vddc_leakage.count = 0; - pi->vddci_leakage.count = 0; - - if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_EVV) { - for (i = 0; i < CISLANDS_MAX_LEAKAGE_COUNT; i++) { - virtual_voltage_id = ATOM_VIRTUAL_VOLTAGE_ID0 + i; - if (amdgpu_atombios_get_voltage_evv(adev, virtual_voltage_id, &vddc) != 0) - continue; - if (vddc != 0 && vddc != virtual_voltage_id) { - pi->vddc_leakage.actual_voltage[pi->vddc_leakage.count] = vddc; - pi->vddc_leakage.leakage_id[pi->vddc_leakage.count] = virtual_voltage_id; - pi->vddc_leakage.count++; - } - } - } else if (amdgpu_atombios_get_leakage_id_from_vbios(adev, &leakage_id) == 0) { - for (i = 0; i < CISLANDS_MAX_LEAKAGE_COUNT; i++) { - virtual_voltage_id = ATOM_VIRTUAL_VOLTAGE_ID0 + i; - if (amdgpu_atombios_get_leakage_vddc_based_on_leakage_params(adev, &vddc, &vddci, - virtual_voltage_id, - leakage_id) == 0) { - if (vddc != 0 && vddc != virtual_voltage_id) { - pi->vddc_leakage.actual_voltage[pi->vddc_leakage.count] = vddc; - pi->vddc_leakage.leakage_id[pi->vddc_leakage.count] = virtual_voltage_id; - pi->vddc_leakage.count++; - } - if (vddci != 0 && vddci != virtual_voltage_id) { - pi->vddci_leakage.actual_voltage[pi->vddci_leakage.count] = vddci; - pi->vddci_leakage.leakage_id[pi->vddci_leakage.count] = virtual_voltage_id; - pi->vddci_leakage.count++; - } - } - } - } -} - -static void ci_set_dpm_event_sources(struct amdgpu_device *adev, u32 sources) -{ - struct ci_power_info *pi = ci_get_pi(adev); - bool want_thermal_protection; - enum amdgpu_dpm_event_src dpm_event_src; - u32 tmp; - - switch (sources) { - case 0: - default: - want_thermal_protection = false; - break; - case (1 << AMDGPU_DPM_AUTO_THROTTLE_SRC_THERMAL): - want_thermal_protection = true; - dpm_event_src = AMDGPU_DPM_EVENT_SRC_DIGITAL; - break; - case (1 << AMDGPU_DPM_AUTO_THROTTLE_SRC_EXTERNAL): - want_thermal_protection = true; - dpm_event_src = AMDGPU_DPM_EVENT_SRC_EXTERNAL; - break; - case ((1 << AMDGPU_DPM_AUTO_THROTTLE_SRC_EXTERNAL) | - (1 << AMDGPU_DPM_AUTO_THROTTLE_SRC_THERMAL)): - want_thermal_protection = true; - dpm_event_src = AMDGPU_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL; - break; - } - - if (want_thermal_protection) { -#if 0 - /* XXX: need to figure out how to handle this properly */ - tmp = RREG32_SMC(ixCG_THERMAL_CTRL); - tmp &= DPM_EVENT_SRC_MASK; - tmp |= DPM_EVENT_SRC(dpm_event_src); - WREG32_SMC(ixCG_THERMAL_CTRL, tmp); -#endif - - tmp = RREG32_SMC(ixGENERAL_PWRMGT); - if (pi->thermal_protection) - tmp &= ~GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK; - else - tmp |= GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK; - WREG32_SMC(ixGENERAL_PWRMGT, tmp); - } else { - tmp = RREG32_SMC(ixGENERAL_PWRMGT); - tmp |= GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK; - WREG32_SMC(ixGENERAL_PWRMGT, tmp); - } -} - -static void ci_enable_auto_throttle_source(struct amdgpu_device *adev, - enum amdgpu_dpm_auto_throttle_src source, - bool enable) -{ - struct ci_power_info *pi = ci_get_pi(adev); - - if (enable) { - if (!(pi->active_auto_throttle_sources & (1 << source))) { - pi->active_auto_throttle_sources |= 1 << source; - ci_set_dpm_event_sources(adev, pi->active_auto_throttle_sources); - } - } else { - if (pi->active_auto_throttle_sources & (1 << source)) { - pi->active_auto_throttle_sources &= ~(1 << source); - ci_set_dpm_event_sources(adev, pi->active_auto_throttle_sources); - } - } -} - -static void ci_enable_vr_hot_gpio_interrupt(struct amdgpu_device *adev) -{ - if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_REGULATOR_HOT) - amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_EnableVRHotGPIOInterrupt); -} - -static int ci_unfreeze_sclk_mclk_dpm(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - PPSMC_Result smc_result; - - if (!pi->need_update_smu7_dpm_table) - return 0; - - if ((!pi->sclk_dpm_key_disabled) && - (pi->need_update_smu7_dpm_table & (DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_UPDATE_SCLK))) { - smc_result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_SCLKDPM_UnfreezeLevel); - if (smc_result != PPSMC_Result_OK) - return -EINVAL; - } - - if ((!pi->mclk_dpm_key_disabled) && - (pi->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) { - smc_result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_MCLKDPM_UnfreezeLevel); - if (smc_result != PPSMC_Result_OK) - return -EINVAL; - } - - pi->need_update_smu7_dpm_table = 0; - return 0; -} - -static int ci_enable_sclk_mclk_dpm(struct amdgpu_device *adev, bool enable) -{ - struct ci_power_info *pi = ci_get_pi(adev); - PPSMC_Result smc_result; - - if (enable) { - if (!pi->sclk_dpm_key_disabled) { - smc_result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_DPM_Enable); - if (smc_result != PPSMC_Result_OK) - return -EINVAL; - } - - if (!pi->mclk_dpm_key_disabled) { - smc_result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_MCLKDPM_Enable); - if (smc_result != PPSMC_Result_OK) - return -EINVAL; - - WREG32_P(mmMC_SEQ_CNTL_3, MC_SEQ_CNTL_3__CAC_EN_MASK, - ~MC_SEQ_CNTL_3__CAC_EN_MASK); - - WREG32_SMC(ixLCAC_MC0_CNTL, 0x05); - WREG32_SMC(ixLCAC_MC1_CNTL, 0x05); - WREG32_SMC(ixLCAC_CPL_CNTL, 0x100005); - - udelay(10); - - WREG32_SMC(ixLCAC_MC0_CNTL, 0x400005); - WREG32_SMC(ixLCAC_MC1_CNTL, 0x400005); - WREG32_SMC(ixLCAC_CPL_CNTL, 0x500005); - } - } else { - if (!pi->sclk_dpm_key_disabled) { - smc_result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_DPM_Disable); - if (smc_result != PPSMC_Result_OK) - return -EINVAL; - } - - if (!pi->mclk_dpm_key_disabled) { - smc_result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_MCLKDPM_Disable); - if (smc_result != PPSMC_Result_OK) - return -EINVAL; - } - } - - return 0; -} - -static int ci_start_dpm(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - PPSMC_Result smc_result; - int ret; - u32 tmp; - - tmp = RREG32_SMC(ixGENERAL_PWRMGT); - tmp |= GENERAL_PWRMGT__GLOBAL_PWRMGT_EN_MASK; - WREG32_SMC(ixGENERAL_PWRMGT, tmp); - - tmp = RREG32_SMC(ixSCLK_PWRMGT_CNTL); - tmp |= SCLK_PWRMGT_CNTL__DYNAMIC_PM_EN_MASK; - WREG32_SMC(ixSCLK_PWRMGT_CNTL, tmp); - - ci_write_smc_soft_register(adev, offsetof(SMU7_SoftRegisters, VoltageChangeTimeout), 0x1000); - - WREG32_P(mmBIF_LNCNT_RESET, 0, ~BIF_LNCNT_RESET__RESET_LNCNT_EN_MASK); - - smc_result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_Voltage_Cntl_Enable); - if (smc_result != PPSMC_Result_OK) - return -EINVAL; - - ret = ci_enable_sclk_mclk_dpm(adev, true); - if (ret) - return ret; - - if (!pi->pcie_dpm_key_disabled) { - smc_result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_PCIeDPM_Enable); - if (smc_result != PPSMC_Result_OK) - return -EINVAL; - } - - return 0; -} - -static int ci_freeze_sclk_mclk_dpm(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - PPSMC_Result smc_result; - - if (!pi->need_update_smu7_dpm_table) - return 0; - - if ((!pi->sclk_dpm_key_disabled) && - (pi->need_update_smu7_dpm_table & (DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_UPDATE_SCLK))) { - smc_result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_SCLKDPM_FreezeLevel); - if (smc_result != PPSMC_Result_OK) - return -EINVAL; - } - - if ((!pi->mclk_dpm_key_disabled) && - (pi->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) { - smc_result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_MCLKDPM_FreezeLevel); - if (smc_result != PPSMC_Result_OK) - return -EINVAL; - } - - return 0; -} - -static int ci_stop_dpm(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - PPSMC_Result smc_result; - int ret; - u32 tmp; - - tmp = RREG32_SMC(ixGENERAL_PWRMGT); - tmp &= ~GENERAL_PWRMGT__GLOBAL_PWRMGT_EN_MASK; - WREG32_SMC(ixGENERAL_PWRMGT, tmp); - - tmp = RREG32_SMC(ixSCLK_PWRMGT_CNTL); - tmp &= ~SCLK_PWRMGT_CNTL__DYNAMIC_PM_EN_MASK; - WREG32_SMC(ixSCLK_PWRMGT_CNTL, tmp); - - if (!pi->pcie_dpm_key_disabled) { - smc_result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_PCIeDPM_Disable); - if (smc_result != PPSMC_Result_OK) - return -EINVAL; - } - - ret = ci_enable_sclk_mclk_dpm(adev, false); - if (ret) - return ret; - - smc_result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_Voltage_Cntl_Disable); - if (smc_result != PPSMC_Result_OK) - return -EINVAL; - - return 0; -} - -static void ci_enable_sclk_control(struct amdgpu_device *adev, bool enable) -{ - u32 tmp = RREG32_SMC(ixSCLK_PWRMGT_CNTL); - - if (enable) - tmp &= ~SCLK_PWRMGT_CNTL__SCLK_PWRMGT_OFF_MASK; - else - tmp |= SCLK_PWRMGT_CNTL__SCLK_PWRMGT_OFF_MASK; - WREG32_SMC(ixSCLK_PWRMGT_CNTL, tmp); -} - -#if 0 -static int ci_notify_hw_of_power_source(struct amdgpu_device *adev, - bool ac_power) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct amdgpu_cac_tdp_table *cac_tdp_table = - adev->pm.dpm.dyn_state.cac_tdp_table; - u32 power_limit; - - if (ac_power) - power_limit = (u32)(cac_tdp_table->maximum_power_delivery_limit * 256); - else - power_limit = (u32)(cac_tdp_table->battery_power_limit * 256); - - ci_set_power_limit(adev, power_limit); - - if (pi->caps_automatic_dc_transition) { - if (ac_power) - amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_RunningOnAC); - else - amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_Remove_DC_Clamp); - } - - return 0; -} -#endif - -static PPSMC_Result amdgpu_ci_send_msg_to_smc_with_parameter(struct amdgpu_device *adev, - PPSMC_Msg msg, u32 parameter) -{ - WREG32(mmSMC_MSG_ARG_0, parameter); - return amdgpu_ci_send_msg_to_smc(adev, msg); -} - -static PPSMC_Result amdgpu_ci_send_msg_to_smc_return_parameter(struct amdgpu_device *adev, - PPSMC_Msg msg, u32 *parameter) -{ - PPSMC_Result smc_result; - - smc_result = amdgpu_ci_send_msg_to_smc(adev, msg); - - if ((smc_result == PPSMC_Result_OK) && parameter) - *parameter = RREG32(mmSMC_MSG_ARG_0); - - return smc_result; -} - -static int ci_dpm_force_state_sclk(struct amdgpu_device *adev, u32 n) -{ - struct ci_power_info *pi = ci_get_pi(adev); - - if (!pi->sclk_dpm_key_disabled) { - PPSMC_Result smc_result = - amdgpu_ci_send_msg_to_smc_with_parameter(adev, PPSMC_MSG_SCLKDPM_SetEnabledMask, 1 << n); - if (smc_result != PPSMC_Result_OK) - return -EINVAL; - } - - return 0; -} - -static int ci_dpm_force_state_mclk(struct amdgpu_device *adev, u32 n) -{ - struct ci_power_info *pi = ci_get_pi(adev); - - if (!pi->mclk_dpm_key_disabled) { - PPSMC_Result smc_result = - amdgpu_ci_send_msg_to_smc_with_parameter(adev, PPSMC_MSG_MCLKDPM_SetEnabledMask, 1 << n); - if (smc_result != PPSMC_Result_OK) - return -EINVAL; - } - - return 0; -} - -static int ci_dpm_force_state_pcie(struct amdgpu_device *adev, u32 n) -{ - struct ci_power_info *pi = ci_get_pi(adev); - - if (!pi->pcie_dpm_key_disabled) { - PPSMC_Result smc_result = - amdgpu_ci_send_msg_to_smc_with_parameter(adev, PPSMC_MSG_PCIeDPM_ForceLevel, n); - if (smc_result != PPSMC_Result_OK) - return -EINVAL; - } - - return 0; -} - -static int ci_set_power_limit(struct amdgpu_device *adev, u32 n) -{ - struct ci_power_info *pi = ci_get_pi(adev); - - if (pi->power_containment_features & POWERCONTAINMENT_FEATURE_PkgPwrLimit) { - PPSMC_Result smc_result = - amdgpu_ci_send_msg_to_smc_with_parameter(adev, PPSMC_MSG_PkgPwrSetLimit, n); - if (smc_result != PPSMC_Result_OK) - return -EINVAL; - } - - return 0; -} - -static int ci_set_overdrive_target_tdp(struct amdgpu_device *adev, - u32 target_tdp) -{ - PPSMC_Result smc_result = - amdgpu_ci_send_msg_to_smc_with_parameter(adev, PPSMC_MSG_OverDriveSetTargetTdp, target_tdp); - if (smc_result != PPSMC_Result_OK) - return -EINVAL; - return 0; -} - -#if 0 -static int ci_set_boot_state(struct amdgpu_device *adev) -{ - return ci_enable_sclk_mclk_dpm(adev, false); -} -#endif - -static u32 ci_get_average_sclk_freq(struct amdgpu_device *adev) -{ - u32 sclk_freq; - PPSMC_Result smc_result = - amdgpu_ci_send_msg_to_smc_return_parameter(adev, - PPSMC_MSG_API_GetSclkFrequency, - &sclk_freq); - if (smc_result != PPSMC_Result_OK) - sclk_freq = 0; - - return sclk_freq; -} - -static u32 ci_get_average_mclk_freq(struct amdgpu_device *adev) -{ - u32 mclk_freq; - PPSMC_Result smc_result = - amdgpu_ci_send_msg_to_smc_return_parameter(adev, - PPSMC_MSG_API_GetMclkFrequency, - &mclk_freq); - if (smc_result != PPSMC_Result_OK) - mclk_freq = 0; - - return mclk_freq; -} - -static void ci_dpm_start_smc(struct amdgpu_device *adev) -{ - int i; - - amdgpu_ci_program_jump_on_start(adev); - amdgpu_ci_start_smc_clock(adev); - amdgpu_ci_start_smc(adev); - for (i = 0; i < adev->usec_timeout; i++) { - if (RREG32_SMC(ixFIRMWARE_FLAGS) & FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) - break; - } -} - -static void ci_dpm_stop_smc(struct amdgpu_device *adev) -{ - amdgpu_ci_reset_smc(adev); - amdgpu_ci_stop_smc_clock(adev); -} - -static int ci_process_firmware_header(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - u32 tmp; - int ret; - - ret = amdgpu_ci_read_smc_sram_dword(adev, - SMU7_FIRMWARE_HEADER_LOCATION + - offsetof(SMU7_Firmware_Header, DpmTable), - &tmp, pi->sram_end); - if (ret) - return ret; - - pi->dpm_table_start = tmp; - - ret = amdgpu_ci_read_smc_sram_dword(adev, - SMU7_FIRMWARE_HEADER_LOCATION + - offsetof(SMU7_Firmware_Header, SoftRegisters), - &tmp, pi->sram_end); - if (ret) - return ret; - - pi->soft_regs_start = tmp; - - ret = amdgpu_ci_read_smc_sram_dword(adev, - SMU7_FIRMWARE_HEADER_LOCATION + - offsetof(SMU7_Firmware_Header, mcRegisterTable), - &tmp, pi->sram_end); - if (ret) - return ret; - - pi->mc_reg_table_start = tmp; - - ret = amdgpu_ci_read_smc_sram_dword(adev, - SMU7_FIRMWARE_HEADER_LOCATION + - offsetof(SMU7_Firmware_Header, FanTable), - &tmp, pi->sram_end); - if (ret) - return ret; - - pi->fan_table_start = tmp; - - ret = amdgpu_ci_read_smc_sram_dword(adev, - SMU7_FIRMWARE_HEADER_LOCATION + - offsetof(SMU7_Firmware_Header, mcArbDramTimingTable), - &tmp, pi->sram_end); - if (ret) - return ret; - - pi->arb_table_start = tmp; - - return 0; -} - -static void ci_read_clock_registers(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - - pi->clock_registers.cg_spll_func_cntl = - RREG32_SMC(ixCG_SPLL_FUNC_CNTL); - pi->clock_registers.cg_spll_func_cntl_2 = - RREG32_SMC(ixCG_SPLL_FUNC_CNTL_2); - pi->clock_registers.cg_spll_func_cntl_3 = - RREG32_SMC(ixCG_SPLL_FUNC_CNTL_3); - pi->clock_registers.cg_spll_func_cntl_4 = - RREG32_SMC(ixCG_SPLL_FUNC_CNTL_4); - pi->clock_registers.cg_spll_spread_spectrum = - RREG32_SMC(ixCG_SPLL_SPREAD_SPECTRUM); - pi->clock_registers.cg_spll_spread_spectrum_2 = - RREG32_SMC(ixCG_SPLL_SPREAD_SPECTRUM_2); - pi->clock_registers.dll_cntl = RREG32(mmDLL_CNTL); - pi->clock_registers.mclk_pwrmgt_cntl = RREG32(mmMCLK_PWRMGT_CNTL); - pi->clock_registers.mpll_ad_func_cntl = RREG32(mmMPLL_AD_FUNC_CNTL); - pi->clock_registers.mpll_dq_func_cntl = RREG32(mmMPLL_DQ_FUNC_CNTL); - pi->clock_registers.mpll_func_cntl = RREG32(mmMPLL_FUNC_CNTL); - pi->clock_registers.mpll_func_cntl_1 = RREG32(mmMPLL_FUNC_CNTL_1); - pi->clock_registers.mpll_func_cntl_2 = RREG32(mmMPLL_FUNC_CNTL_2); - pi->clock_registers.mpll_ss1 = RREG32(mmMPLL_SS1); - pi->clock_registers.mpll_ss2 = RREG32(mmMPLL_SS2); -} - -static void ci_init_sclk_t(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - - pi->low_sclk_interrupt_t = 0; -} - -static void ci_enable_thermal_protection(struct amdgpu_device *adev, - bool enable) -{ - u32 tmp = RREG32_SMC(ixGENERAL_PWRMGT); - - if (enable) - tmp &= ~GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK; - else - tmp |= GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK; - WREG32_SMC(ixGENERAL_PWRMGT, tmp); -} - -static void ci_enable_acpi_power_management(struct amdgpu_device *adev) -{ - u32 tmp = RREG32_SMC(ixGENERAL_PWRMGT); - - tmp |= GENERAL_PWRMGT__STATIC_PM_EN_MASK; - - WREG32_SMC(ixGENERAL_PWRMGT, tmp); -} - -#if 0 -static int ci_enter_ulp_state(struct amdgpu_device *adev) -{ - - WREG32(mmSMC_MESSAGE_0, PPSMC_MSG_SwitchToMinimumPower); - - udelay(25000); - - return 0; -} - -static int ci_exit_ulp_state(struct amdgpu_device *adev) -{ - int i; - - WREG32(mmSMC_MESSAGE_0, PPSMC_MSG_ResumeFromMinimumPower); - - udelay(7000); - - for (i = 0; i < adev->usec_timeout; i++) { - if (RREG32(mmSMC_RESP_0) == 1) - break; - udelay(1000); - } - - return 0; -} -#endif - -static int ci_notify_smc_display_change(struct amdgpu_device *adev, - bool has_display) -{ - PPSMC_Msg msg = has_display ? PPSMC_MSG_HasDisplay : PPSMC_MSG_NoDisplay; - - return (amdgpu_ci_send_msg_to_smc(adev, msg) == PPSMC_Result_OK) ? 0 : -EINVAL; -} - -static int ci_enable_ds_master_switch(struct amdgpu_device *adev, - bool enable) -{ - struct ci_power_info *pi = ci_get_pi(adev); - - if (enable) { - if (pi->caps_sclk_ds) { - if (amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_MASTER_DeepSleep_ON) != PPSMC_Result_OK) - return -EINVAL; - } else { - if (amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_MASTER_DeepSleep_OFF) != PPSMC_Result_OK) - return -EINVAL; - } - } else { - if (pi->caps_sclk_ds) { - if (amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_MASTER_DeepSleep_OFF) != PPSMC_Result_OK) - return -EINVAL; - } - } - - return 0; -} - -static void ci_program_display_gap(struct amdgpu_device *adev) -{ - u32 tmp = RREG32_SMC(ixCG_DISPLAY_GAP_CNTL); - u32 pre_vbi_time_in_us; - u32 frame_time_in_us; - u32 ref_clock = adev->clock.spll.reference_freq; - u32 refresh_rate = amdgpu_dpm_get_vrefresh(adev); - u32 vblank_time = amdgpu_dpm_get_vblank_time(adev); - - tmp &= ~CG_DISPLAY_GAP_CNTL__DISP_GAP_MASK; - if (adev->pm.dpm.new_active_crtc_count > 0) - tmp |= (AMDGPU_PM_DISPLAY_GAP_VBLANK_OR_WM << CG_DISPLAY_GAP_CNTL__DISP_GAP__SHIFT); - else - tmp |= (AMDGPU_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP_GAP__SHIFT); - WREG32_SMC(ixCG_DISPLAY_GAP_CNTL, tmp); - - if (refresh_rate == 0) - refresh_rate = 60; - if (vblank_time == 0xffffffff) - vblank_time = 500; - frame_time_in_us = 1000000 / refresh_rate; - pre_vbi_time_in_us = - frame_time_in_us - 200 - vblank_time; - tmp = pre_vbi_time_in_us * (ref_clock / 100); - - WREG32_SMC(ixCG_DISPLAY_GAP_CNTL2, tmp); - ci_write_smc_soft_register(adev, offsetof(SMU7_SoftRegisters, PreVBlankGap), 0x64); - ci_write_smc_soft_register(adev, offsetof(SMU7_SoftRegisters, VBlankTimeout), (frame_time_in_us - pre_vbi_time_in_us)); - - - ci_notify_smc_display_change(adev, (adev->pm.dpm.new_active_crtc_count == 1)); - -} - -static void ci_enable_spread_spectrum(struct amdgpu_device *adev, bool enable) -{ - struct ci_power_info *pi = ci_get_pi(adev); - u32 tmp; - - if (enable) { - if (pi->caps_sclk_ss_support) { - tmp = RREG32_SMC(ixGENERAL_PWRMGT); - tmp |= GENERAL_PWRMGT__DYN_SPREAD_SPECTRUM_EN_MASK; - WREG32_SMC(ixGENERAL_PWRMGT, tmp); - } - } else { - tmp = RREG32_SMC(ixCG_SPLL_SPREAD_SPECTRUM); - tmp &= ~CG_SPLL_SPREAD_SPECTRUM__SSEN_MASK; - WREG32_SMC(ixCG_SPLL_SPREAD_SPECTRUM, tmp); - - tmp = RREG32_SMC(ixGENERAL_PWRMGT); - tmp &= ~GENERAL_PWRMGT__DYN_SPREAD_SPECTRUM_EN_MASK; - WREG32_SMC(ixGENERAL_PWRMGT, tmp); - } -} - -static void ci_program_sstp(struct amdgpu_device *adev) -{ - WREG32_SMC(ixCG_STATIC_SCREEN_PARAMETER, - ((CISLANDS_SSTU_DFLT << CG_STATIC_SCREEN_PARAMETER__STATIC_SCREEN_THRESHOLD_UNIT__SHIFT) | - (CISLANDS_SST_DFLT << CG_STATIC_SCREEN_PARAMETER__STATIC_SCREEN_THRESHOLD__SHIFT))); -} - -static void ci_enable_display_gap(struct amdgpu_device *adev) -{ - u32 tmp = RREG32_SMC(ixCG_DISPLAY_GAP_CNTL); - - tmp &= ~(CG_DISPLAY_GAP_CNTL__DISP_GAP_MASK | - CG_DISPLAY_GAP_CNTL__DISP_GAP_MCHG_MASK); - tmp |= ((AMDGPU_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP_GAP__SHIFT) | - (AMDGPU_PM_DISPLAY_GAP_VBLANK << CG_DISPLAY_GAP_CNTL__DISP_GAP_MCHG__SHIFT)); - - WREG32_SMC(ixCG_DISPLAY_GAP_CNTL, tmp); -} - -static void ci_program_vc(struct amdgpu_device *adev) -{ - u32 tmp; - - tmp = RREG32_SMC(ixSCLK_PWRMGT_CNTL); - tmp &= ~(SCLK_PWRMGT_CNTL__RESET_SCLK_CNT_MASK | SCLK_PWRMGT_CNTL__RESET_BUSY_CNT_MASK); - WREG32_SMC(ixSCLK_PWRMGT_CNTL, tmp); - - WREG32_SMC(ixCG_FREQ_TRAN_VOTING_0, CISLANDS_VRC_DFLT0); - WREG32_SMC(ixCG_FREQ_TRAN_VOTING_1, CISLANDS_VRC_DFLT1); - WREG32_SMC(ixCG_FREQ_TRAN_VOTING_2, CISLANDS_VRC_DFLT2); - WREG32_SMC(ixCG_FREQ_TRAN_VOTING_3, CISLANDS_VRC_DFLT3); - WREG32_SMC(ixCG_FREQ_TRAN_VOTING_4, CISLANDS_VRC_DFLT4); - WREG32_SMC(ixCG_FREQ_TRAN_VOTING_5, CISLANDS_VRC_DFLT5); - WREG32_SMC(ixCG_FREQ_TRAN_VOTING_6, CISLANDS_VRC_DFLT6); - WREG32_SMC(ixCG_FREQ_TRAN_VOTING_7, CISLANDS_VRC_DFLT7); -} - -static void ci_clear_vc(struct amdgpu_device *adev) -{ - u32 tmp; - - tmp = RREG32_SMC(ixSCLK_PWRMGT_CNTL); - tmp |= (SCLK_PWRMGT_CNTL__RESET_SCLK_CNT_MASK | SCLK_PWRMGT_CNTL__RESET_BUSY_CNT_MASK); - WREG32_SMC(ixSCLK_PWRMGT_CNTL, tmp); - - WREG32_SMC(ixCG_FREQ_TRAN_VOTING_0, 0); - WREG32_SMC(ixCG_FREQ_TRAN_VOTING_1, 0); - WREG32_SMC(ixCG_FREQ_TRAN_VOTING_2, 0); - WREG32_SMC(ixCG_FREQ_TRAN_VOTING_3, 0); - WREG32_SMC(ixCG_FREQ_TRAN_VOTING_4, 0); - WREG32_SMC(ixCG_FREQ_TRAN_VOTING_5, 0); - WREG32_SMC(ixCG_FREQ_TRAN_VOTING_6, 0); - WREG32_SMC(ixCG_FREQ_TRAN_VOTING_7, 0); -} - -static int ci_upload_firmware(struct amdgpu_device *adev) -{ - int i, ret; - - if (amdgpu_ci_is_smc_running(adev)) { - DRM_INFO("smc is running, no need to load smc firmware\n"); - return 0; - } - - for (i = 0; i < adev->usec_timeout; i++) { - if (RREG32_SMC(ixRCU_UC_EVENTS) & RCU_UC_EVENTS__boot_seq_done_MASK) - break; - } - WREG32_SMC(ixSMC_SYSCON_MISC_CNTL, 1); - - amdgpu_ci_stop_smc_clock(adev); - amdgpu_ci_reset_smc(adev); - - ret = amdgpu_ci_load_smc_ucode(adev, SMC_RAM_END); - - return ret; - -} - -static int ci_get_svi2_voltage_table(struct amdgpu_device *adev, - struct amdgpu_clock_voltage_dependency_table *voltage_dependency_table, - struct atom_voltage_table *voltage_table) -{ - u32 i; - - if (voltage_dependency_table == NULL) - return -EINVAL; - - voltage_table->mask_low = 0; - voltage_table->phase_delay = 0; - - voltage_table->count = voltage_dependency_table->count; - for (i = 0; i < voltage_table->count; i++) { - voltage_table->entries[i].value = voltage_dependency_table->entries[i].v; - voltage_table->entries[i].smio_low = 0; - } - - return 0; -} - -static int ci_construct_voltage_tables(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - int ret; - - if (pi->voltage_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO) { - ret = amdgpu_atombios_get_voltage_table(adev, VOLTAGE_TYPE_VDDC, - VOLTAGE_OBJ_GPIO_LUT, - &pi->vddc_voltage_table); - if (ret) - return ret; - } else if (pi->voltage_control == CISLANDS_VOLTAGE_CONTROL_BY_SVID2) { - ret = ci_get_svi2_voltage_table(adev, - &adev->pm.dpm.dyn_state.vddc_dependency_on_mclk, - &pi->vddc_voltage_table); - if (ret) - return ret; - } - - if (pi->vddc_voltage_table.count > SMU7_MAX_LEVELS_VDDC) - ci_trim_voltage_table_to_fit_state_table(adev, SMU7_MAX_LEVELS_VDDC, - &pi->vddc_voltage_table); - - if (pi->vddci_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO) { - ret = amdgpu_atombios_get_voltage_table(adev, VOLTAGE_TYPE_VDDCI, - VOLTAGE_OBJ_GPIO_LUT, - &pi->vddci_voltage_table); - if (ret) - return ret; - } else if (pi->vddci_control == CISLANDS_VOLTAGE_CONTROL_BY_SVID2) { - ret = ci_get_svi2_voltage_table(adev, - &adev->pm.dpm.dyn_state.vddci_dependency_on_mclk, - &pi->vddci_voltage_table); - if (ret) - return ret; - } - - if (pi->vddci_voltage_table.count > SMU7_MAX_LEVELS_VDDCI) - ci_trim_voltage_table_to_fit_state_table(adev, SMU7_MAX_LEVELS_VDDCI, - &pi->vddci_voltage_table); - - if (pi->mvdd_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO) { - ret = amdgpu_atombios_get_voltage_table(adev, VOLTAGE_TYPE_MVDDC, - VOLTAGE_OBJ_GPIO_LUT, - &pi->mvdd_voltage_table); - if (ret) - return ret; - } else if (pi->mvdd_control == CISLANDS_VOLTAGE_CONTROL_BY_SVID2) { - ret = ci_get_svi2_voltage_table(adev, - &adev->pm.dpm.dyn_state.mvdd_dependency_on_mclk, - &pi->mvdd_voltage_table); - if (ret) - return ret; - } - - if (pi->mvdd_voltage_table.count > SMU7_MAX_LEVELS_MVDD) - ci_trim_voltage_table_to_fit_state_table(adev, SMU7_MAX_LEVELS_MVDD, - &pi->mvdd_voltage_table); - - return 0; -} - -static void ci_populate_smc_voltage_table(struct amdgpu_device *adev, - struct atom_voltage_table_entry *voltage_table, - SMU7_Discrete_VoltageLevel *smc_voltage_table) -{ - int ret; - - ret = ci_get_std_voltage_value_sidd(adev, voltage_table, - &smc_voltage_table->StdVoltageHiSidd, - &smc_voltage_table->StdVoltageLoSidd); - - if (ret) { - smc_voltage_table->StdVoltageHiSidd = voltage_table->value * VOLTAGE_SCALE; - smc_voltage_table->StdVoltageLoSidd = voltage_table->value * VOLTAGE_SCALE; - } - - smc_voltage_table->Voltage = cpu_to_be16(voltage_table->value * VOLTAGE_SCALE); - smc_voltage_table->StdVoltageHiSidd = - cpu_to_be16(smc_voltage_table->StdVoltageHiSidd); - smc_voltage_table->StdVoltageLoSidd = - cpu_to_be16(smc_voltage_table->StdVoltageLoSidd); -} - -static int ci_populate_smc_vddc_table(struct amdgpu_device *adev, - SMU7_Discrete_DpmTable *table) -{ - struct ci_power_info *pi = ci_get_pi(adev); - unsigned int count; - - table->VddcLevelCount = pi->vddc_voltage_table.count; - for (count = 0; count < table->VddcLevelCount; count++) { - ci_populate_smc_voltage_table(adev, - &pi->vddc_voltage_table.entries[count], - &table->VddcLevel[count]); - - if (pi->voltage_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO) - table->VddcLevel[count].Smio |= - pi->vddc_voltage_table.entries[count].smio_low; - else - table->VddcLevel[count].Smio = 0; - } - table->VddcLevelCount = cpu_to_be32(table->VddcLevelCount); - - return 0; -} - -static int ci_populate_smc_vddci_table(struct amdgpu_device *adev, - SMU7_Discrete_DpmTable *table) -{ - unsigned int count; - struct ci_power_info *pi = ci_get_pi(adev); - - table->VddciLevelCount = pi->vddci_voltage_table.count; - for (count = 0; count < table->VddciLevelCount; count++) { - ci_populate_smc_voltage_table(adev, - &pi->vddci_voltage_table.entries[count], - &table->VddciLevel[count]); - - if (pi->vddci_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO) - table->VddciLevel[count].Smio |= - pi->vddci_voltage_table.entries[count].smio_low; - else - table->VddciLevel[count].Smio = 0; - } - table->VddciLevelCount = cpu_to_be32(table->VddciLevelCount); - - return 0; -} - -static int ci_populate_smc_mvdd_table(struct amdgpu_device *adev, - SMU7_Discrete_DpmTable *table) -{ - struct ci_power_info *pi = ci_get_pi(adev); - unsigned int count; - - table->MvddLevelCount = pi->mvdd_voltage_table.count; - for (count = 0; count < table->MvddLevelCount; count++) { - ci_populate_smc_voltage_table(adev, - &pi->mvdd_voltage_table.entries[count], - &table->MvddLevel[count]); - - if (pi->mvdd_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO) - table->MvddLevel[count].Smio |= - pi->mvdd_voltage_table.entries[count].smio_low; - else - table->MvddLevel[count].Smio = 0; - } - table->MvddLevelCount = cpu_to_be32(table->MvddLevelCount); - - return 0; -} - -static int ci_populate_smc_voltage_tables(struct amdgpu_device *adev, - SMU7_Discrete_DpmTable *table) -{ - int ret; - - ret = ci_populate_smc_vddc_table(adev, table); - if (ret) - return ret; - - ret = ci_populate_smc_vddci_table(adev, table); - if (ret) - return ret; - - ret = ci_populate_smc_mvdd_table(adev, table); - if (ret) - return ret; - - return 0; -} - -static int ci_populate_mvdd_value(struct amdgpu_device *adev, u32 mclk, - SMU7_Discrete_VoltageLevel *voltage) -{ - struct ci_power_info *pi = ci_get_pi(adev); - u32 i = 0; - - if (pi->mvdd_control != CISLANDS_VOLTAGE_CONTROL_NONE) { - for (i = 0; i < adev->pm.dpm.dyn_state.mvdd_dependency_on_mclk.count; i++) { - if (mclk <= adev->pm.dpm.dyn_state.mvdd_dependency_on_mclk.entries[i].clk) { - voltage->Voltage = pi->mvdd_voltage_table.entries[i].value; - break; - } - } - - if (i >= adev->pm.dpm.dyn_state.mvdd_dependency_on_mclk.count) - return -EINVAL; - } - - return -EINVAL; -} - -static int ci_get_std_voltage_value_sidd(struct amdgpu_device *adev, - struct atom_voltage_table_entry *voltage_table, - u16 *std_voltage_hi_sidd, u16 *std_voltage_lo_sidd) -{ - u16 v_index, idx; - bool voltage_found = false; - *std_voltage_hi_sidd = voltage_table->value * VOLTAGE_SCALE; - *std_voltage_lo_sidd = voltage_table->value * VOLTAGE_SCALE; - - if (adev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries == NULL) - return -EINVAL; - - if (adev->pm.dpm.dyn_state.cac_leakage_table.entries) { - for (v_index = 0; (u32)v_index < adev->pm.dpm.dyn_state.vddc_dependency_on_sclk.count; v_index++) { - if (voltage_table->value == - adev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[v_index].v) { - voltage_found = true; - if ((u32)v_index < adev->pm.dpm.dyn_state.cac_leakage_table.count) - idx = v_index; - else - idx = adev->pm.dpm.dyn_state.cac_leakage_table.count - 1; - *std_voltage_lo_sidd = - adev->pm.dpm.dyn_state.cac_leakage_table.entries[idx].vddc * VOLTAGE_SCALE; - *std_voltage_hi_sidd = - adev->pm.dpm.dyn_state.cac_leakage_table.entries[idx].leakage * VOLTAGE_SCALE; - break; - } - } - - if (!voltage_found) { - for (v_index = 0; (u32)v_index < adev->pm.dpm.dyn_state.vddc_dependency_on_sclk.count; v_index++) { - if (voltage_table->value <= - adev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[v_index].v) { - voltage_found = true; - if ((u32)v_index < adev->pm.dpm.dyn_state.cac_leakage_table.count) - idx = v_index; - else - idx = adev->pm.dpm.dyn_state.cac_leakage_table.count - 1; - *std_voltage_lo_sidd = - adev->pm.dpm.dyn_state.cac_leakage_table.entries[idx].vddc * VOLTAGE_SCALE; - *std_voltage_hi_sidd = - adev->pm.dpm.dyn_state.cac_leakage_table.entries[idx].leakage * VOLTAGE_SCALE; - break; - } - } - } - } - - return 0; -} - -static void ci_populate_phase_value_based_on_sclk(struct amdgpu_device *adev, - const struct amdgpu_phase_shedding_limits_table *limits, - u32 sclk, - u32 *phase_shedding) -{ - unsigned int i; - - *phase_shedding = 1; - - for (i = 0; i < limits->count; i++) { - if (sclk < limits->entries[i].sclk) { - *phase_shedding = i; - break; - } - } -} - -static void ci_populate_phase_value_based_on_mclk(struct amdgpu_device *adev, - const struct amdgpu_phase_shedding_limits_table *limits, - u32 mclk, - u32 *phase_shedding) -{ - unsigned int i; - - *phase_shedding = 1; - - for (i = 0; i < limits->count; i++) { - if (mclk < limits->entries[i].mclk) { - *phase_shedding = i; - break; - } - } -} - -static int ci_init_arb_table_index(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - u32 tmp; - int ret; - - ret = amdgpu_ci_read_smc_sram_dword(adev, pi->arb_table_start, - &tmp, pi->sram_end); - if (ret) - return ret; - - tmp &= 0x00FFFFFF; - tmp |= MC_CG_ARB_FREQ_F1 << 24; - - return amdgpu_ci_write_smc_sram_dword(adev, pi->arb_table_start, - tmp, pi->sram_end); -} - -static int ci_get_dependency_volt_by_clk(struct amdgpu_device *adev, - struct amdgpu_clock_voltage_dependency_table *allowed_clock_voltage_table, - u32 clock, u32 *voltage) -{ - u32 i = 0; - - if (allowed_clock_voltage_table->count == 0) - return -EINVAL; - - for (i = 0; i < allowed_clock_voltage_table->count; i++) { - if (allowed_clock_voltage_table->entries[i].clk >= clock) { - *voltage = allowed_clock_voltage_table->entries[i].v; - return 0; - } - } - - *voltage = allowed_clock_voltage_table->entries[i-1].v; - - return 0; -} - -static u8 ci_get_sleep_divider_id_from_clock(u32 sclk, u32 min_sclk_in_sr) -{ - u32 i; - u32 tmp; - u32 min = max(min_sclk_in_sr, (u32)CISLAND_MINIMUM_ENGINE_CLOCK); - - if (sclk < min) - return 0; - - for (i = CISLAND_MAX_DEEPSLEEP_DIVIDER_ID; ; i--) { - tmp = sclk >> i; - if (tmp >= min || i == 0) - break; - } - - return (u8)i; -} - -static int ci_initial_switch_from_arb_f0_to_f1(struct amdgpu_device *adev) -{ - return ci_copy_and_switch_arb_sets(adev, MC_CG_ARB_FREQ_F0, MC_CG_ARB_FREQ_F1); -} - -static int ci_reset_to_default(struct amdgpu_device *adev) -{ - return (amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_ResetToDefaults) == PPSMC_Result_OK) ? - 0 : -EINVAL; -} - -static int ci_force_switch_to_arb_f0(struct amdgpu_device *adev) -{ - u32 tmp; - - tmp = (RREG32_SMC(ixSMC_SCRATCH9) & 0x0000ff00) >> 8; - - if (tmp == MC_CG_ARB_FREQ_F0) - return 0; - - return ci_copy_and_switch_arb_sets(adev, tmp, MC_CG_ARB_FREQ_F0); -} - -static void ci_register_patching_mc_arb(struct amdgpu_device *adev, - const u32 engine_clock, - const u32 memory_clock, - u32 *dram_timimg2) -{ - bool patch; - u32 tmp, tmp2; - - tmp = RREG32(mmMC_SEQ_MISC0); - patch = ((tmp & 0x0000f00) == 0x300) ? true : false; - - if (patch && - ((adev->pdev->device == 0x67B0) || - (adev->pdev->device == 0x67B1))) { - if ((memory_clock > 100000) && (memory_clock <= 125000)) { - tmp2 = (((0x31 * engine_clock) / 125000) - 1) & 0xff; - *dram_timimg2 &= ~0x00ff0000; - *dram_timimg2 |= tmp2 << 16; - } else if ((memory_clock > 125000) && (memory_clock <= 137500)) { - tmp2 = (((0x36 * engine_clock) / 137500) - 1) & 0xff; - *dram_timimg2 &= ~0x00ff0000; - *dram_timimg2 |= tmp2 << 16; - } - } -} - -static int ci_populate_memory_timing_parameters(struct amdgpu_device *adev, - u32 sclk, - u32 mclk, - SMU7_Discrete_MCArbDramTimingTableEntry *arb_regs) -{ - u32 dram_timing; - u32 dram_timing2; - u32 burst_time; - - amdgpu_atombios_set_engine_dram_timings(adev, sclk, mclk); - - dram_timing = RREG32(mmMC_ARB_DRAM_TIMING); - dram_timing2 = RREG32(mmMC_ARB_DRAM_TIMING2); - burst_time = RREG32(mmMC_ARB_BURST_TIME) & MC_ARB_BURST_TIME__STATE0_MASK; - - ci_register_patching_mc_arb(adev, sclk, mclk, &dram_timing2); - - arb_regs->McArbDramTiming = cpu_to_be32(dram_timing); - arb_regs->McArbDramTiming2 = cpu_to_be32(dram_timing2); - arb_regs->McArbBurstTime = (u8)burst_time; - - return 0; -} - -static int ci_do_program_memory_timing_parameters(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - SMU7_Discrete_MCArbDramTimingTable arb_regs; - u32 i, j; - int ret = 0; - - memset(&arb_regs, 0, sizeof(SMU7_Discrete_MCArbDramTimingTable)); - - for (i = 0; i < pi->dpm_table.sclk_table.count; i++) { - for (j = 0; j < pi->dpm_table.mclk_table.count; j++) { - ret = ci_populate_memory_timing_parameters(adev, - pi->dpm_table.sclk_table.dpm_levels[i].value, - pi->dpm_table.mclk_table.dpm_levels[j].value, - &arb_regs.entries[i][j]); - if (ret) - break; - } - } - - if (ret == 0) - ret = amdgpu_ci_copy_bytes_to_smc(adev, - pi->arb_table_start, - (u8 *)&arb_regs, - sizeof(SMU7_Discrete_MCArbDramTimingTable), - pi->sram_end); - - return ret; -} - -static int ci_program_memory_timing_parameters(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - - if (pi->need_update_smu7_dpm_table == 0) - return 0; - - return ci_do_program_memory_timing_parameters(adev); -} - -static void ci_populate_smc_initial_state(struct amdgpu_device *adev, - struct amdgpu_ps *amdgpu_boot_state) -{ - struct ci_ps *boot_state = ci_get_ps(amdgpu_boot_state); - struct ci_power_info *pi = ci_get_pi(adev); - u32 level = 0; - - for (level = 0; level < adev->pm.dpm.dyn_state.vddc_dependency_on_sclk.count; level++) { - if (adev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[level].clk >= - boot_state->performance_levels[0].sclk) { - pi->smc_state_table.GraphicsBootLevel = level; - break; - } - } - - for (level = 0; level < adev->pm.dpm.dyn_state.vddc_dependency_on_mclk.count; level++) { - if (adev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries[level].clk >= - boot_state->performance_levels[0].mclk) { - pi->smc_state_table.MemoryBootLevel = level; - break; - } - } -} - -static u32 ci_get_dpm_level_enable_mask_value(struct ci_single_dpm_table *dpm_table) -{ - u32 i; - u32 mask_value = 0; - - for (i = dpm_table->count; i > 0; i--) { - mask_value = mask_value << 1; - if (dpm_table->dpm_levels[i-1].enabled) - mask_value |= 0x1; - else - mask_value &= 0xFFFFFFFE; - } - - return mask_value; -} - -static void ci_populate_smc_link_level(struct amdgpu_device *adev, - SMU7_Discrete_DpmTable *table) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_dpm_table *dpm_table = &pi->dpm_table; - u32 i; - - for (i = 0; i < dpm_table->pcie_speed_table.count; i++) { - table->LinkLevel[i].PcieGenSpeed = - (u8)dpm_table->pcie_speed_table.dpm_levels[i].value; - table->LinkLevel[i].PcieLaneCount = - amdgpu_encode_pci_lane_width(dpm_table->pcie_speed_table.dpm_levels[i].param1); - table->LinkLevel[i].EnabledForActivity = 1; - table->LinkLevel[i].DownT = cpu_to_be32(5); - table->LinkLevel[i].UpT = cpu_to_be32(30); - } - - pi->smc_state_table.LinkLevelCount = (u8)dpm_table->pcie_speed_table.count; - pi->dpm_level_enable_mask.pcie_dpm_enable_mask = - ci_get_dpm_level_enable_mask_value(&dpm_table->pcie_speed_table); -} - -static int ci_populate_smc_uvd_level(struct amdgpu_device *adev, - SMU7_Discrete_DpmTable *table) -{ - u32 count; - struct atom_clock_dividers dividers; - int ret = -EINVAL; - - table->UvdLevelCount = - adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count; - - for (count = 0; count < table->UvdLevelCount; count++) { - table->UvdLevel[count].VclkFrequency = - adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[count].vclk; - table->UvdLevel[count].DclkFrequency = - adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[count].dclk; - table->UvdLevel[count].MinVddc = - adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[count].v * VOLTAGE_SCALE; - table->UvdLevel[count].MinVddcPhases = 1; - - ret = amdgpu_atombios_get_clock_dividers(adev, - COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, - table->UvdLevel[count].VclkFrequency, false, ÷rs); - if (ret) - return ret; - - table->UvdLevel[count].VclkDivider = (u8)dividers.post_divider; - - ret = amdgpu_atombios_get_clock_dividers(adev, - COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, - table->UvdLevel[count].DclkFrequency, false, ÷rs); - if (ret) - return ret; - - table->UvdLevel[count].DclkDivider = (u8)dividers.post_divider; - - table->UvdLevel[count].VclkFrequency = cpu_to_be32(table->UvdLevel[count].VclkFrequency); - table->UvdLevel[count].DclkFrequency = cpu_to_be32(table->UvdLevel[count].DclkFrequency); - table->UvdLevel[count].MinVddc = cpu_to_be16(table->UvdLevel[count].MinVddc); - } - - return ret; -} - -static int ci_populate_smc_vce_level(struct amdgpu_device *adev, - SMU7_Discrete_DpmTable *table) -{ - u32 count; - struct atom_clock_dividers dividers; - int ret = -EINVAL; - - table->VceLevelCount = - adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.count; - - for (count = 0; count < table->VceLevelCount; count++) { - table->VceLevel[count].Frequency = - adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[count].evclk; - table->VceLevel[count].MinVoltage = - (u16)adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[count].v * VOLTAGE_SCALE; - table->VceLevel[count].MinPhases = 1; - - ret = amdgpu_atombios_get_clock_dividers(adev, - COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, - table->VceLevel[count].Frequency, false, ÷rs); - if (ret) - return ret; - - table->VceLevel[count].Divider = (u8)dividers.post_divider; - - table->VceLevel[count].Frequency = cpu_to_be32(table->VceLevel[count].Frequency); - table->VceLevel[count].MinVoltage = cpu_to_be16(table->VceLevel[count].MinVoltage); - } - - return ret; - -} - -static int ci_populate_smc_acp_level(struct amdgpu_device *adev, - SMU7_Discrete_DpmTable *table) -{ - u32 count; - struct atom_clock_dividers dividers; - int ret = -EINVAL; - - table->AcpLevelCount = (u8) - (adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.count); - - for (count = 0; count < table->AcpLevelCount; count++) { - table->AcpLevel[count].Frequency = - adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries[count].clk; - table->AcpLevel[count].MinVoltage = - adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries[count].v; - table->AcpLevel[count].MinPhases = 1; - - ret = amdgpu_atombios_get_clock_dividers(adev, - COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, - table->AcpLevel[count].Frequency, false, ÷rs); - if (ret) - return ret; - - table->AcpLevel[count].Divider = (u8)dividers.post_divider; - - table->AcpLevel[count].Frequency = cpu_to_be32(table->AcpLevel[count].Frequency); - table->AcpLevel[count].MinVoltage = cpu_to_be16(table->AcpLevel[count].MinVoltage); - } - - return ret; -} - -static int ci_populate_smc_samu_level(struct amdgpu_device *adev, - SMU7_Discrete_DpmTable *table) -{ - u32 count; - struct atom_clock_dividers dividers; - int ret = -EINVAL; - - table->SamuLevelCount = - adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.count; - - for (count = 0; count < table->SamuLevelCount; count++) { - table->SamuLevel[count].Frequency = - adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries[count].clk; - table->SamuLevel[count].MinVoltage = - adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries[count].v * VOLTAGE_SCALE; - table->SamuLevel[count].MinPhases = 1; - - ret = amdgpu_atombios_get_clock_dividers(adev, - COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, - table->SamuLevel[count].Frequency, false, ÷rs); - if (ret) - return ret; - - table->SamuLevel[count].Divider = (u8)dividers.post_divider; - - table->SamuLevel[count].Frequency = cpu_to_be32(table->SamuLevel[count].Frequency); - table->SamuLevel[count].MinVoltage = cpu_to_be16(table->SamuLevel[count].MinVoltage); - } - - return ret; -} - -static int ci_calculate_mclk_params(struct amdgpu_device *adev, - u32 memory_clock, - SMU7_Discrete_MemoryLevel *mclk, - bool strobe_mode, - bool dll_state_on) -{ - struct ci_power_info *pi = ci_get_pi(adev); - u32 dll_cntl = pi->clock_registers.dll_cntl; - u32 mclk_pwrmgt_cntl = pi->clock_registers.mclk_pwrmgt_cntl; - u32 mpll_ad_func_cntl = pi->clock_registers.mpll_ad_func_cntl; - u32 mpll_dq_func_cntl = pi->clock_registers.mpll_dq_func_cntl; - u32 mpll_func_cntl = pi->clock_registers.mpll_func_cntl; - u32 mpll_func_cntl_1 = pi->clock_registers.mpll_func_cntl_1; - u32 mpll_func_cntl_2 = pi->clock_registers.mpll_func_cntl_2; - u32 mpll_ss1 = pi->clock_registers.mpll_ss1; - u32 mpll_ss2 = pi->clock_registers.mpll_ss2; - struct atom_mpll_param mpll_param; - int ret; - - ret = amdgpu_atombios_get_memory_pll_dividers(adev, memory_clock, strobe_mode, &mpll_param); - if (ret) - return ret; - - mpll_func_cntl &= ~MPLL_FUNC_CNTL__BWCTRL_MASK; - mpll_func_cntl |= (mpll_param.bwcntl << MPLL_FUNC_CNTL__BWCTRL__SHIFT); - - mpll_func_cntl_1 &= ~(MPLL_FUNC_CNTL_1__CLKF_MASK | MPLL_FUNC_CNTL_1__CLKFRAC_MASK | - MPLL_FUNC_CNTL_1__VCO_MODE_MASK); - mpll_func_cntl_1 |= (mpll_param.clkf) << MPLL_FUNC_CNTL_1__CLKF__SHIFT | - (mpll_param.clkfrac << MPLL_FUNC_CNTL_1__CLKFRAC__SHIFT) | - (mpll_param.vco_mode << MPLL_FUNC_CNTL_1__VCO_MODE__SHIFT); - - mpll_ad_func_cntl &= ~MPLL_AD_FUNC_CNTL__YCLK_POST_DIV_MASK; - mpll_ad_func_cntl |= (mpll_param.post_div << MPLL_AD_FUNC_CNTL__YCLK_POST_DIV__SHIFT); - - if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { - mpll_dq_func_cntl &= ~(MPLL_DQ_FUNC_CNTL__YCLK_SEL_MASK | - MPLL_AD_FUNC_CNTL__YCLK_POST_DIV_MASK); - mpll_dq_func_cntl |= (mpll_param.yclk_sel << MPLL_DQ_FUNC_CNTL__YCLK_SEL__SHIFT) | - (mpll_param.post_div << MPLL_AD_FUNC_CNTL__YCLK_POST_DIV__SHIFT); - } - - if (pi->caps_mclk_ss_support) { - struct amdgpu_atom_ss ss; - u32 freq_nom; - u32 tmp; - u32 reference_clock = adev->clock.mpll.reference_freq; - - if (mpll_param.qdr == 1) - freq_nom = memory_clock * 4 * (1 << mpll_param.post_div); - else - freq_nom = memory_clock * 2 * (1 << mpll_param.post_div); - - tmp = (freq_nom / reference_clock); - tmp = tmp * tmp; - if (amdgpu_atombios_get_asic_ss_info(adev, &ss, - ASIC_INTERNAL_MEMORY_SS, freq_nom)) { - u32 clks = reference_clock * 5 / ss.rate; - u32 clkv = (u32)((((131 * ss.percentage * ss.rate) / 100) * tmp) / freq_nom); - - mpll_ss1 &= ~MPLL_SS1__CLKV_MASK; - mpll_ss1 |= (clkv << MPLL_SS1__CLKV__SHIFT); - - mpll_ss2 &= ~MPLL_SS2__CLKS_MASK; - mpll_ss2 |= (clks << MPLL_SS2__CLKS__SHIFT); - } - } - - mclk_pwrmgt_cntl &= ~MCLK_PWRMGT_CNTL__DLL_SPEED_MASK; - mclk_pwrmgt_cntl |= (mpll_param.dll_speed << MCLK_PWRMGT_CNTL__DLL_SPEED__SHIFT); - - if (dll_state_on) - mclk_pwrmgt_cntl |= MCLK_PWRMGT_CNTL__MRDCK0_PDNB_MASK | - MCLK_PWRMGT_CNTL__MRDCK1_PDNB_MASK; - else - mclk_pwrmgt_cntl &= ~(MCLK_PWRMGT_CNTL__MRDCK0_PDNB_MASK | - MCLK_PWRMGT_CNTL__MRDCK1_PDNB_MASK); - - mclk->MclkFrequency = memory_clock; - mclk->MpllFuncCntl = mpll_func_cntl; - mclk->MpllFuncCntl_1 = mpll_func_cntl_1; - mclk->MpllFuncCntl_2 = mpll_func_cntl_2; - mclk->MpllAdFuncCntl = mpll_ad_func_cntl; - mclk->MpllDqFuncCntl = mpll_dq_func_cntl; - mclk->MclkPwrmgtCntl = mclk_pwrmgt_cntl; - mclk->DllCntl = dll_cntl; - mclk->MpllSs1 = mpll_ss1; - mclk->MpllSs2 = mpll_ss2; - - return 0; -} - -static int ci_populate_single_memory_level(struct amdgpu_device *adev, - u32 memory_clock, - SMU7_Discrete_MemoryLevel *memory_level) -{ - struct ci_power_info *pi = ci_get_pi(adev); - int ret; - bool dll_state_on; - - if (adev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries) { - ret = ci_get_dependency_volt_by_clk(adev, - &adev->pm.dpm.dyn_state.vddc_dependency_on_mclk, - memory_clock, &memory_level->MinVddc); - if (ret) - return ret; - } - - if (adev->pm.dpm.dyn_state.vddci_dependency_on_mclk.entries) { - ret = ci_get_dependency_volt_by_clk(adev, - &adev->pm.dpm.dyn_state.vddci_dependency_on_mclk, - memory_clock, &memory_level->MinVddci); - if (ret) - return ret; - } - - if (adev->pm.dpm.dyn_state.mvdd_dependency_on_mclk.entries) { - ret = ci_get_dependency_volt_by_clk(adev, - &adev->pm.dpm.dyn_state.mvdd_dependency_on_mclk, - memory_clock, &memory_level->MinMvdd); - if (ret) - return ret; - } - - memory_level->MinVddcPhases = 1; - - if (pi->vddc_phase_shed_control) - ci_populate_phase_value_based_on_mclk(adev, - &adev->pm.dpm.dyn_state.phase_shedding_limits_table, - memory_clock, - &memory_level->MinVddcPhases); - - memory_level->EnabledForActivity = 1; - memory_level->EnabledForThrottle = 1; - memory_level->UpH = 0; - memory_level->DownH = 100; - memory_level->VoltageDownH = 0; - memory_level->ActivityLevel = (u16)pi->mclk_activity_target; - - memory_level->StutterEnable = false; - memory_level->StrobeEnable = false; - memory_level->EdcReadEnable = false; - memory_level->EdcWriteEnable = false; - memory_level->RttEnable = false; - - memory_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; - - if (pi->mclk_stutter_mode_threshold && - (memory_clock <= pi->mclk_stutter_mode_threshold) && - (!pi->uvd_enabled) && - (RREG32(mmDPG_PIPE_STUTTER_CONTROL) & DPG_PIPE_STUTTER_CONTROL__STUTTER_ENABLE_MASK) && - (adev->pm.dpm.new_active_crtc_count <= 2)) - memory_level->StutterEnable = true; - - if (pi->mclk_strobe_mode_threshold && - (memory_clock <= pi->mclk_strobe_mode_threshold)) - memory_level->StrobeEnable = 1; - - if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { - memory_level->StrobeRatio = - ci_get_mclk_frequency_ratio(memory_clock, memory_level->StrobeEnable); - if (pi->mclk_edc_enable_threshold && - (memory_clock > pi->mclk_edc_enable_threshold)) - memory_level->EdcReadEnable = true; - - if (pi->mclk_edc_wr_enable_threshold && - (memory_clock > pi->mclk_edc_wr_enable_threshold)) - memory_level->EdcWriteEnable = true; - - if (memory_level->StrobeEnable) { - if (ci_get_mclk_frequency_ratio(memory_clock, true) >= - ((RREG32(mmMC_SEQ_MISC7) >> 16) & 0xf)) - dll_state_on = ((RREG32(mmMC_SEQ_MISC5) >> 1) & 0x1) ? true : false; - else - dll_state_on = ((RREG32(mmMC_SEQ_MISC6) >> 1) & 0x1) ? true : false; - } else { - dll_state_on = pi->dll_default_on; - } - } else { - memory_level->StrobeRatio = ci_get_ddr3_mclk_frequency_ratio(memory_clock); - dll_state_on = ((RREG32(mmMC_SEQ_MISC5) >> 1) & 0x1) ? true : false; - } - - ret = ci_calculate_mclk_params(adev, memory_clock, memory_level, memory_level->StrobeEnable, dll_state_on); - if (ret) - return ret; - - memory_level->MinVddc = cpu_to_be32(memory_level->MinVddc * VOLTAGE_SCALE); - memory_level->MinVddcPhases = cpu_to_be32(memory_level->MinVddcPhases); - memory_level->MinVddci = cpu_to_be32(memory_level->MinVddci * VOLTAGE_SCALE); - memory_level->MinMvdd = cpu_to_be32(memory_level->MinMvdd * VOLTAGE_SCALE); - - memory_level->MclkFrequency = cpu_to_be32(memory_level->MclkFrequency); - memory_level->ActivityLevel = cpu_to_be16(memory_level->ActivityLevel); - memory_level->MpllFuncCntl = cpu_to_be32(memory_level->MpllFuncCntl); - memory_level->MpllFuncCntl_1 = cpu_to_be32(memory_level->MpllFuncCntl_1); - memory_level->MpllFuncCntl_2 = cpu_to_be32(memory_level->MpllFuncCntl_2); - memory_level->MpllAdFuncCntl = cpu_to_be32(memory_level->MpllAdFuncCntl); - memory_level->MpllDqFuncCntl = cpu_to_be32(memory_level->MpllDqFuncCntl); - memory_level->MclkPwrmgtCntl = cpu_to_be32(memory_level->MclkPwrmgtCntl); - memory_level->DllCntl = cpu_to_be32(memory_level->DllCntl); - memory_level->MpllSs1 = cpu_to_be32(memory_level->MpllSs1); - memory_level->MpllSs2 = cpu_to_be32(memory_level->MpllSs2); - - return 0; -} - -static int ci_populate_smc_acpi_level(struct amdgpu_device *adev, - SMU7_Discrete_DpmTable *table) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct atom_clock_dividers dividers; - SMU7_Discrete_VoltageLevel voltage_level; - u32 spll_func_cntl = pi->clock_registers.cg_spll_func_cntl; - u32 spll_func_cntl_2 = pi->clock_registers.cg_spll_func_cntl_2; - u32 dll_cntl = pi->clock_registers.dll_cntl; - u32 mclk_pwrmgt_cntl = pi->clock_registers.mclk_pwrmgt_cntl; - int ret; - - table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC; - - if (pi->acpi_vddc) - table->ACPILevel.MinVddc = cpu_to_be32(pi->acpi_vddc * VOLTAGE_SCALE); - else - table->ACPILevel.MinVddc = cpu_to_be32(pi->min_vddc_in_pp_table * VOLTAGE_SCALE); - - table->ACPILevel.MinVddcPhases = pi->vddc_phase_shed_control ? 0 : 1; - - table->ACPILevel.SclkFrequency = adev->clock.spll.reference_freq; - - ret = amdgpu_atombios_get_clock_dividers(adev, - COMPUTE_GPUCLK_INPUT_FLAG_SCLK, - table->ACPILevel.SclkFrequency, false, ÷rs); - if (ret) - return ret; - - table->ACPILevel.SclkDid = (u8)dividers.post_divider; - table->ACPILevel.DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; - table->ACPILevel.DeepSleepDivId = 0; - - spll_func_cntl &= ~CG_SPLL_FUNC_CNTL__SPLL_PWRON_MASK; - spll_func_cntl |= CG_SPLL_FUNC_CNTL__SPLL_RESET_MASK; - - spll_func_cntl_2 &= ~CG_SPLL_FUNC_CNTL_2__SCLK_MUX_SEL_MASK; - spll_func_cntl_2 |= (4 << CG_SPLL_FUNC_CNTL_2__SCLK_MUX_SEL__SHIFT); - - table->ACPILevel.CgSpllFuncCntl = spll_func_cntl; - table->ACPILevel.CgSpllFuncCntl2 = spll_func_cntl_2; - table->ACPILevel.CgSpllFuncCntl3 = pi->clock_registers.cg_spll_func_cntl_3; - table->ACPILevel.CgSpllFuncCntl4 = pi->clock_registers.cg_spll_func_cntl_4; - table->ACPILevel.SpllSpreadSpectrum = pi->clock_registers.cg_spll_spread_spectrum; - table->ACPILevel.SpllSpreadSpectrum2 = pi->clock_registers.cg_spll_spread_spectrum_2; - table->ACPILevel.CcPwrDynRm = 0; - table->ACPILevel.CcPwrDynRm1 = 0; - - table->ACPILevel.Flags = cpu_to_be32(table->ACPILevel.Flags); - table->ACPILevel.MinVddcPhases = cpu_to_be32(table->ACPILevel.MinVddcPhases); - table->ACPILevel.SclkFrequency = cpu_to_be32(table->ACPILevel.SclkFrequency); - table->ACPILevel.CgSpllFuncCntl = cpu_to_be32(table->ACPILevel.CgSpllFuncCntl); - table->ACPILevel.CgSpllFuncCntl2 = cpu_to_be32(table->ACPILevel.CgSpllFuncCntl2); - table->ACPILevel.CgSpllFuncCntl3 = cpu_to_be32(table->ACPILevel.CgSpllFuncCntl3); - table->ACPILevel.CgSpllFuncCntl4 = cpu_to_be32(table->ACPILevel.CgSpllFuncCntl4); - table->ACPILevel.SpllSpreadSpectrum = cpu_to_be32(table->ACPILevel.SpllSpreadSpectrum); - table->ACPILevel.SpllSpreadSpectrum2 = cpu_to_be32(table->ACPILevel.SpllSpreadSpectrum2); - table->ACPILevel.CcPwrDynRm = cpu_to_be32(table->ACPILevel.CcPwrDynRm); - table->ACPILevel.CcPwrDynRm1 = cpu_to_be32(table->ACPILevel.CcPwrDynRm1); - - table->MemoryACPILevel.MinVddc = table->ACPILevel.MinVddc; - table->MemoryACPILevel.MinVddcPhases = table->ACPILevel.MinVddcPhases; - - if (pi->vddci_control != CISLANDS_VOLTAGE_CONTROL_NONE) { - if (pi->acpi_vddci) - table->MemoryACPILevel.MinVddci = - cpu_to_be32(pi->acpi_vddci * VOLTAGE_SCALE); - else - table->MemoryACPILevel.MinVddci = - cpu_to_be32(pi->min_vddci_in_pp_table * VOLTAGE_SCALE); - } - - if (ci_populate_mvdd_value(adev, 0, &voltage_level)) - table->MemoryACPILevel.MinMvdd = 0; - else - table->MemoryACPILevel.MinMvdd = - cpu_to_be32(voltage_level.Voltage * VOLTAGE_SCALE); - - mclk_pwrmgt_cntl |= MCLK_PWRMGT_CNTL__MRDCK0_RESET_MASK | - MCLK_PWRMGT_CNTL__MRDCK1_RESET_MASK; - mclk_pwrmgt_cntl &= ~(MCLK_PWRMGT_CNTL__MRDCK0_PDNB_MASK | - MCLK_PWRMGT_CNTL__MRDCK1_PDNB_MASK); - - dll_cntl &= ~(DLL_CNTL__MRDCK0_BYPASS_MASK | DLL_CNTL__MRDCK1_BYPASS_MASK); - - table->MemoryACPILevel.DllCntl = cpu_to_be32(dll_cntl); - table->MemoryACPILevel.MclkPwrmgtCntl = cpu_to_be32(mclk_pwrmgt_cntl); - table->MemoryACPILevel.MpllAdFuncCntl = - cpu_to_be32(pi->clock_registers.mpll_ad_func_cntl); - table->MemoryACPILevel.MpllDqFuncCntl = - cpu_to_be32(pi->clock_registers.mpll_dq_func_cntl); - table->MemoryACPILevel.MpllFuncCntl = - cpu_to_be32(pi->clock_registers.mpll_func_cntl); - table->MemoryACPILevel.MpllFuncCntl_1 = - cpu_to_be32(pi->clock_registers.mpll_func_cntl_1); - table->MemoryACPILevel.MpllFuncCntl_2 = - cpu_to_be32(pi->clock_registers.mpll_func_cntl_2); - table->MemoryACPILevel.MpllSs1 = cpu_to_be32(pi->clock_registers.mpll_ss1); - table->MemoryACPILevel.MpllSs2 = cpu_to_be32(pi->clock_registers.mpll_ss2); - - table->MemoryACPILevel.EnabledForThrottle = 0; - table->MemoryACPILevel.EnabledForActivity = 0; - table->MemoryACPILevel.UpH = 0; - table->MemoryACPILevel.DownH = 100; - table->MemoryACPILevel.VoltageDownH = 0; - table->MemoryACPILevel.ActivityLevel = - cpu_to_be16((u16)pi->mclk_activity_target); - - table->MemoryACPILevel.StutterEnable = false; - table->MemoryACPILevel.StrobeEnable = false; - table->MemoryACPILevel.EdcReadEnable = false; - table->MemoryACPILevel.EdcWriteEnable = false; - table->MemoryACPILevel.RttEnable = false; - - return 0; -} - - -static int ci_enable_ulv(struct amdgpu_device *adev, bool enable) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_ulv_parm *ulv = &pi->ulv; - - if (ulv->supported) { - if (enable) - return (amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_EnableULV) == PPSMC_Result_OK) ? - 0 : -EINVAL; - else - return (amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_DisableULV) == PPSMC_Result_OK) ? - 0 : -EINVAL; - } - - return 0; -} - -static int ci_populate_ulv_level(struct amdgpu_device *adev, - SMU7_Discrete_Ulv *state) -{ - struct ci_power_info *pi = ci_get_pi(adev); - u16 ulv_voltage = adev->pm.dpm.backbias_response_time; - - state->CcPwrDynRm = 0; - state->CcPwrDynRm1 = 0; - - if (ulv_voltage == 0) { - pi->ulv.supported = false; - return 0; - } - - if (pi->voltage_control != CISLANDS_VOLTAGE_CONTROL_BY_SVID2) { - if (ulv_voltage > adev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[0].v) - state->VddcOffset = 0; - else - state->VddcOffset = - adev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[0].v - ulv_voltage; - } else { - if (ulv_voltage > adev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[0].v) - state->VddcOffsetVid = 0; - else - state->VddcOffsetVid = (u8) - ((adev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[0].v - ulv_voltage) * - VOLTAGE_VID_OFFSET_SCALE2 / VOLTAGE_VID_OFFSET_SCALE1); - } - state->VddcPhase = pi->vddc_phase_shed_control ? 0 : 1; - - state->CcPwrDynRm = cpu_to_be32(state->CcPwrDynRm); - state->CcPwrDynRm1 = cpu_to_be32(state->CcPwrDynRm1); - state->VddcOffset = cpu_to_be16(state->VddcOffset); - - return 0; -} - -static int ci_calculate_sclk_params(struct amdgpu_device *adev, - u32 engine_clock, - SMU7_Discrete_GraphicsLevel *sclk) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct atom_clock_dividers dividers; - u32 spll_func_cntl_3 = pi->clock_registers.cg_spll_func_cntl_3; - u32 spll_func_cntl_4 = pi->clock_registers.cg_spll_func_cntl_4; - u32 cg_spll_spread_spectrum = pi->clock_registers.cg_spll_spread_spectrum; - u32 cg_spll_spread_spectrum_2 = pi->clock_registers.cg_spll_spread_spectrum_2; - u32 reference_clock = adev->clock.spll.reference_freq; - u32 reference_divider; - u32 fbdiv; - int ret; - - ret = amdgpu_atombios_get_clock_dividers(adev, - COMPUTE_GPUCLK_INPUT_FLAG_SCLK, - engine_clock, false, ÷rs); - if (ret) - return ret; - - reference_divider = 1 + dividers.ref_div; - fbdiv = dividers.fb_div & 0x3FFFFFF; - - spll_func_cntl_3 &= ~CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV_MASK; - spll_func_cntl_3 |= (fbdiv << CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV__SHIFT); - spll_func_cntl_3 |= CG_SPLL_FUNC_CNTL_3__SPLL_DITHEN_MASK; - - if (pi->caps_sclk_ss_support) { - struct amdgpu_atom_ss ss; - u32 vco_freq = engine_clock * dividers.post_div; - - if (amdgpu_atombios_get_asic_ss_info(adev, &ss, - ASIC_INTERNAL_ENGINE_SS, vco_freq)) { - u32 clk_s = reference_clock * 5 / (reference_divider * ss.rate); - u32 clk_v = 4 * ss.percentage * fbdiv / (clk_s * 10000); - - cg_spll_spread_spectrum &= ~(CG_SPLL_SPREAD_SPECTRUM__CLKS_MASK | CG_SPLL_SPREAD_SPECTRUM__SSEN_MASK); - cg_spll_spread_spectrum |= (clk_s << CG_SPLL_SPREAD_SPECTRUM__CLKS__SHIFT); - cg_spll_spread_spectrum |= (1 << CG_SPLL_SPREAD_SPECTRUM__SSEN__SHIFT); - - cg_spll_spread_spectrum_2 &= ~CG_SPLL_SPREAD_SPECTRUM_2__CLKV_MASK; - cg_spll_spread_spectrum_2 |= (clk_v << CG_SPLL_SPREAD_SPECTRUM_2__CLKV__SHIFT); - } - } - - sclk->SclkFrequency = engine_clock; - sclk->CgSpllFuncCntl3 = spll_func_cntl_3; - sclk->CgSpllFuncCntl4 = spll_func_cntl_4; - sclk->SpllSpreadSpectrum = cg_spll_spread_spectrum; - sclk->SpllSpreadSpectrum2 = cg_spll_spread_spectrum_2; - sclk->SclkDid = (u8)dividers.post_divider; - - return 0; -} - -static int ci_populate_single_graphic_level(struct amdgpu_device *adev, - u32 engine_clock, - u16 sclk_activity_level_t, - SMU7_Discrete_GraphicsLevel *graphic_level) -{ - struct ci_power_info *pi = ci_get_pi(adev); - int ret; - - ret = ci_calculate_sclk_params(adev, engine_clock, graphic_level); - if (ret) - return ret; - - ret = ci_get_dependency_volt_by_clk(adev, - &adev->pm.dpm.dyn_state.vddc_dependency_on_sclk, - engine_clock, &graphic_level->MinVddc); - if (ret) - return ret; - - graphic_level->SclkFrequency = engine_clock; - - graphic_level->Flags = 0; - graphic_level->MinVddcPhases = 1; - - if (pi->vddc_phase_shed_control) - ci_populate_phase_value_based_on_sclk(adev, - &adev->pm.dpm.dyn_state.phase_shedding_limits_table, - engine_clock, - &graphic_level->MinVddcPhases); - - graphic_level->ActivityLevel = sclk_activity_level_t; - - graphic_level->CcPwrDynRm = 0; - graphic_level->CcPwrDynRm1 = 0; - graphic_level->EnabledForThrottle = 1; - graphic_level->UpH = 0; - graphic_level->DownH = 0; - graphic_level->VoltageDownH = 0; - graphic_level->PowerThrottle = 0; - - if (pi->caps_sclk_ds) - graphic_level->DeepSleepDivId = ci_get_sleep_divider_id_from_clock(engine_clock, - CISLAND_MINIMUM_ENGINE_CLOCK); - - graphic_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; - - graphic_level->Flags = cpu_to_be32(graphic_level->Flags); - graphic_level->MinVddc = cpu_to_be32(graphic_level->MinVddc * VOLTAGE_SCALE); - graphic_level->MinVddcPhases = cpu_to_be32(graphic_level->MinVddcPhases); - graphic_level->SclkFrequency = cpu_to_be32(graphic_level->SclkFrequency); - graphic_level->ActivityLevel = cpu_to_be16(graphic_level->ActivityLevel); - graphic_level->CgSpllFuncCntl3 = cpu_to_be32(graphic_level->CgSpllFuncCntl3); - graphic_level->CgSpllFuncCntl4 = cpu_to_be32(graphic_level->CgSpllFuncCntl4); - graphic_level->SpllSpreadSpectrum = cpu_to_be32(graphic_level->SpllSpreadSpectrum); - graphic_level->SpllSpreadSpectrum2 = cpu_to_be32(graphic_level->SpllSpreadSpectrum2); - graphic_level->CcPwrDynRm = cpu_to_be32(graphic_level->CcPwrDynRm); - graphic_level->CcPwrDynRm1 = cpu_to_be32(graphic_level->CcPwrDynRm1); - - return 0; -} - -static int ci_populate_all_graphic_levels(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_dpm_table *dpm_table = &pi->dpm_table; - u32 level_array_address = pi->dpm_table_start + - offsetof(SMU7_Discrete_DpmTable, GraphicsLevel); - u32 level_array_size = sizeof(SMU7_Discrete_GraphicsLevel) * - SMU7_MAX_LEVELS_GRAPHICS; - SMU7_Discrete_GraphicsLevel *levels = pi->smc_state_table.GraphicsLevel; - u32 i, ret; - - memset(levels, 0, level_array_size); - - for (i = 0; i < dpm_table->sclk_table.count; i++) { - ret = ci_populate_single_graphic_level(adev, - dpm_table->sclk_table.dpm_levels[i].value, - (u16)pi->activity_target[i], - &pi->smc_state_table.GraphicsLevel[i]); - if (ret) - return ret; - if (i > 1) - pi->smc_state_table.GraphicsLevel[i].DeepSleepDivId = 0; - if (i == (dpm_table->sclk_table.count - 1)) - pi->smc_state_table.GraphicsLevel[i].DisplayWatermark = - PPSMC_DISPLAY_WATERMARK_HIGH; - } - pi->smc_state_table.GraphicsLevel[0].EnabledForActivity = 1; - - pi->smc_state_table.GraphicsDpmLevelCount = (u8)dpm_table->sclk_table.count; - pi->dpm_level_enable_mask.sclk_dpm_enable_mask = - ci_get_dpm_level_enable_mask_value(&dpm_table->sclk_table); - - ret = amdgpu_ci_copy_bytes_to_smc(adev, level_array_address, - (u8 *)levels, level_array_size, - pi->sram_end); - if (ret) - return ret; - - return 0; -} - -static int ci_populate_ulv_state(struct amdgpu_device *adev, - SMU7_Discrete_Ulv *ulv_level) -{ - return ci_populate_ulv_level(adev, ulv_level); -} - -static int ci_populate_all_memory_levels(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_dpm_table *dpm_table = &pi->dpm_table; - u32 level_array_address = pi->dpm_table_start + - offsetof(SMU7_Discrete_DpmTable, MemoryLevel); - u32 level_array_size = sizeof(SMU7_Discrete_MemoryLevel) * - SMU7_MAX_LEVELS_MEMORY; - SMU7_Discrete_MemoryLevel *levels = pi->smc_state_table.MemoryLevel; - u32 i, ret; - - memset(levels, 0, level_array_size); - - for (i = 0; i < dpm_table->mclk_table.count; i++) { - if (dpm_table->mclk_table.dpm_levels[i].value == 0) - return -EINVAL; - ret = ci_populate_single_memory_level(adev, - dpm_table->mclk_table.dpm_levels[i].value, - &pi->smc_state_table.MemoryLevel[i]); - if (ret) - return ret; - } - - if ((dpm_table->mclk_table.count >= 2) && - ((adev->pdev->device == 0x67B0) || (adev->pdev->device == 0x67B1))) { - pi->smc_state_table.MemoryLevel[1].MinVddc = - pi->smc_state_table.MemoryLevel[0].MinVddc; - pi->smc_state_table.MemoryLevel[1].MinVddcPhases = - pi->smc_state_table.MemoryLevel[0].MinVddcPhases; - } - - pi->smc_state_table.MemoryLevel[0].ActivityLevel = cpu_to_be16(0x1F); - - pi->smc_state_table.MemoryDpmLevelCount = (u8)dpm_table->mclk_table.count; - pi->dpm_level_enable_mask.mclk_dpm_enable_mask = - ci_get_dpm_level_enable_mask_value(&dpm_table->mclk_table); - - pi->smc_state_table.MemoryLevel[dpm_table->mclk_table.count - 1].DisplayWatermark = - PPSMC_DISPLAY_WATERMARK_HIGH; - - ret = amdgpu_ci_copy_bytes_to_smc(adev, level_array_address, - (u8 *)levels, level_array_size, - pi->sram_end); - if (ret) - return ret; - - return 0; -} - -static void ci_reset_single_dpm_table(struct amdgpu_device *adev, - struct ci_single_dpm_table* dpm_table, - u32 count) -{ - u32 i; - - dpm_table->count = count; - for (i = 0; i < MAX_REGULAR_DPM_NUMBER; i++) - dpm_table->dpm_levels[i].enabled = false; -} - -static void ci_setup_pcie_table_entry(struct ci_single_dpm_table* dpm_table, - u32 index, u32 pcie_gen, u32 pcie_lanes) -{ - dpm_table->dpm_levels[index].value = pcie_gen; - dpm_table->dpm_levels[index].param1 = pcie_lanes; - dpm_table->dpm_levels[index].enabled = true; -} - -static int ci_setup_default_pcie_tables(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - - if (!pi->use_pcie_performance_levels && !pi->use_pcie_powersaving_levels) - return -EINVAL; - - if (pi->use_pcie_performance_levels && !pi->use_pcie_powersaving_levels) { - pi->pcie_gen_powersaving = pi->pcie_gen_performance; - pi->pcie_lane_powersaving = pi->pcie_lane_performance; - } else if (!pi->use_pcie_performance_levels && pi->use_pcie_powersaving_levels) { - pi->pcie_gen_performance = pi->pcie_gen_powersaving; - pi->pcie_lane_performance = pi->pcie_lane_powersaving; - } - - ci_reset_single_dpm_table(adev, - &pi->dpm_table.pcie_speed_table, - SMU7_MAX_LEVELS_LINK); - - if (adev->asic_type == CHIP_BONAIRE) - ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 0, - pi->pcie_gen_powersaving.min, - pi->pcie_lane_powersaving.max); - else - ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 0, - pi->pcie_gen_powersaving.min, - pi->pcie_lane_powersaving.min); - ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 1, - pi->pcie_gen_performance.min, - pi->pcie_lane_performance.min); - ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 2, - pi->pcie_gen_powersaving.min, - pi->pcie_lane_powersaving.max); - ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 3, - pi->pcie_gen_performance.min, - pi->pcie_lane_performance.max); - ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 4, - pi->pcie_gen_powersaving.max, - pi->pcie_lane_powersaving.max); - ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 5, - pi->pcie_gen_performance.max, - pi->pcie_lane_performance.max); - - pi->dpm_table.pcie_speed_table.count = 6; - - return 0; -} - -static int ci_setup_default_dpm_tables(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct amdgpu_clock_voltage_dependency_table *allowed_sclk_vddc_table = - &adev->pm.dpm.dyn_state.vddc_dependency_on_sclk; - struct amdgpu_clock_voltage_dependency_table *allowed_mclk_table = - &adev->pm.dpm.dyn_state.vddc_dependency_on_mclk; - struct amdgpu_cac_leakage_table *std_voltage_table = - &adev->pm.dpm.dyn_state.cac_leakage_table; - u32 i; - - if (allowed_sclk_vddc_table == NULL) - return -EINVAL; - if (allowed_sclk_vddc_table->count < 1) - return -EINVAL; - if (allowed_mclk_table == NULL) - return -EINVAL; - if (allowed_mclk_table->count < 1) - return -EINVAL; - - memset(&pi->dpm_table, 0, sizeof(struct ci_dpm_table)); - - ci_reset_single_dpm_table(adev, - &pi->dpm_table.sclk_table, - SMU7_MAX_LEVELS_GRAPHICS); - ci_reset_single_dpm_table(adev, - &pi->dpm_table.mclk_table, - SMU7_MAX_LEVELS_MEMORY); - ci_reset_single_dpm_table(adev, - &pi->dpm_table.vddc_table, - SMU7_MAX_LEVELS_VDDC); - ci_reset_single_dpm_table(adev, - &pi->dpm_table.vddci_table, - SMU7_MAX_LEVELS_VDDCI); - ci_reset_single_dpm_table(adev, - &pi->dpm_table.mvdd_table, - SMU7_MAX_LEVELS_MVDD); - - pi->dpm_table.sclk_table.count = 0; - for (i = 0; i < allowed_sclk_vddc_table->count; i++) { - if ((i == 0) || - (pi->dpm_table.sclk_table.dpm_levels[pi->dpm_table.sclk_table.count-1].value != - allowed_sclk_vddc_table->entries[i].clk)) { - pi->dpm_table.sclk_table.dpm_levels[pi->dpm_table.sclk_table.count].value = - allowed_sclk_vddc_table->entries[i].clk; - pi->dpm_table.sclk_table.dpm_levels[pi->dpm_table.sclk_table.count].enabled = - (i == 0) ? true : false; - pi->dpm_table.sclk_table.count++; - } - } - - pi->dpm_table.mclk_table.count = 0; - for (i = 0; i < allowed_mclk_table->count; i++) { - if ((i == 0) || - (pi->dpm_table.mclk_table.dpm_levels[pi->dpm_table.mclk_table.count-1].value != - allowed_mclk_table->entries[i].clk)) { - pi->dpm_table.mclk_table.dpm_levels[pi->dpm_table.mclk_table.count].value = - allowed_mclk_table->entries[i].clk; - pi->dpm_table.mclk_table.dpm_levels[pi->dpm_table.mclk_table.count].enabled = - (i == 0) ? true : false; - pi->dpm_table.mclk_table.count++; - } - } - - for (i = 0; i < allowed_sclk_vddc_table->count; i++) { - pi->dpm_table.vddc_table.dpm_levels[i].value = - allowed_sclk_vddc_table->entries[i].v; - pi->dpm_table.vddc_table.dpm_levels[i].param1 = - std_voltage_table->entries[i].leakage; - pi->dpm_table.vddc_table.dpm_levels[i].enabled = true; - } - pi->dpm_table.vddc_table.count = allowed_sclk_vddc_table->count; - - allowed_mclk_table = &adev->pm.dpm.dyn_state.vddci_dependency_on_mclk; - if (allowed_mclk_table) { - for (i = 0; i < allowed_mclk_table->count; i++) { - pi->dpm_table.vddci_table.dpm_levels[i].value = - allowed_mclk_table->entries[i].v; - pi->dpm_table.vddci_table.dpm_levels[i].enabled = true; - } - pi->dpm_table.vddci_table.count = allowed_mclk_table->count; - } - - allowed_mclk_table = &adev->pm.dpm.dyn_state.mvdd_dependency_on_mclk; - if (allowed_mclk_table) { - for (i = 0; i < allowed_mclk_table->count; i++) { - pi->dpm_table.mvdd_table.dpm_levels[i].value = - allowed_mclk_table->entries[i].v; - pi->dpm_table.mvdd_table.dpm_levels[i].enabled = true; - } - pi->dpm_table.mvdd_table.count = allowed_mclk_table->count; - } - - ci_setup_default_pcie_tables(adev); - - /* save a copy of the default DPM table */ - memcpy(&(pi->golden_dpm_table), &(pi->dpm_table), - sizeof(struct ci_dpm_table)); - - return 0; -} - -static int ci_find_boot_level(struct ci_single_dpm_table *table, - u32 value, u32 *boot_level) -{ - u32 i; - int ret = -EINVAL; - - for(i = 0; i < table->count; i++) { - if (value == table->dpm_levels[i].value) { - *boot_level = i; - ret = 0; - } - } - - return ret; -} - -static int ci_init_smc_table(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_ulv_parm *ulv = &pi->ulv; - struct amdgpu_ps *amdgpu_boot_state = adev->pm.dpm.boot_ps; - SMU7_Discrete_DpmTable *table = &pi->smc_state_table; - int ret; - - ret = ci_setup_default_dpm_tables(adev); - if (ret) - return ret; - - if (pi->voltage_control != CISLANDS_VOLTAGE_CONTROL_NONE) - ci_populate_smc_voltage_tables(adev, table); - - ci_init_fps_limits(adev); - - if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_HARDWAREDC) - table->SystemFlags |= PPSMC_SYSTEMFLAG_GPIO_DC; - - if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_STEPVDDC) - table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC; - - if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) - table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5; - - if (ulv->supported) { - ret = ci_populate_ulv_state(adev, &pi->smc_state_table.Ulv); - if (ret) - return ret; - WREG32_SMC(ixCG_ULV_PARAMETER, ulv->cg_ulv_parameter); - } - - ret = ci_populate_all_graphic_levels(adev); - if (ret) - return ret; - - ret = ci_populate_all_memory_levels(adev); - if (ret) - return ret; - - ci_populate_smc_link_level(adev, table); - - ret = ci_populate_smc_acpi_level(adev, table); - if (ret) - return ret; - - ret = ci_populate_smc_vce_level(adev, table); - if (ret) - return ret; - - ret = ci_populate_smc_acp_level(adev, table); - if (ret) - return ret; - - ret = ci_populate_smc_samu_level(adev, table); - if (ret) - return ret; - - ret = ci_do_program_memory_timing_parameters(adev); - if (ret) - return ret; - - ret = ci_populate_smc_uvd_level(adev, table); - if (ret) - return ret; - - table->UvdBootLevel = 0; - table->VceBootLevel = 0; - table->AcpBootLevel = 0; - table->SamuBootLevel = 0; - table->GraphicsBootLevel = 0; - table->MemoryBootLevel = 0; - - ret = ci_find_boot_level(&pi->dpm_table.sclk_table, - pi->vbios_boot_state.sclk_bootup_value, - (u32 *)&pi->smc_state_table.GraphicsBootLevel); - - ret = ci_find_boot_level(&pi->dpm_table.mclk_table, - pi->vbios_boot_state.mclk_bootup_value, - (u32 *)&pi->smc_state_table.MemoryBootLevel); - - table->BootVddc = pi->vbios_boot_state.vddc_bootup_value; - table->BootVddci = pi->vbios_boot_state.vddci_bootup_value; - table->BootMVdd = pi->vbios_boot_state.mvdd_bootup_value; - - ci_populate_smc_initial_state(adev, amdgpu_boot_state); - - ret = ci_populate_bapm_parameters_in_dpm_table(adev); - if (ret) - return ret; - - table->UVDInterval = 1; - table->VCEInterval = 1; - table->ACPInterval = 1; - table->SAMUInterval = 1; - table->GraphicsVoltageChangeEnable = 1; - table->GraphicsThermThrottleEnable = 1; - table->GraphicsInterval = 1; - table->VoltageInterval = 1; - table->ThermalInterval = 1; - table->TemperatureLimitHigh = (u16)((pi->thermal_temp_setting.temperature_high * - CISLANDS_Q88_FORMAT_CONVERSION_UNIT) / 1000); - table->TemperatureLimitLow = (u16)((pi->thermal_temp_setting.temperature_low * - CISLANDS_Q88_FORMAT_CONVERSION_UNIT) / 1000); - table->MemoryVoltageChangeEnable = 1; - table->MemoryInterval = 1; - table->VoltageResponseTime = 0; - table->VddcVddciDelta = 4000; - table->PhaseResponseTime = 0; - table->MemoryThermThrottleEnable = 1; - table->PCIeBootLinkLevel = pi->dpm_table.pcie_speed_table.count - 1; - table->PCIeGenInterval = 1; - if (pi->voltage_control == CISLANDS_VOLTAGE_CONTROL_BY_SVID2) - table->SVI2Enable = 1; - else - table->SVI2Enable = 0; - - table->ThermGpio = 17; - table->SclkStepSize = 0x4000; - - table->SystemFlags = cpu_to_be32(table->SystemFlags); - table->SmioMaskVddcVid = cpu_to_be32(table->SmioMaskVddcVid); - table->SmioMaskVddcPhase = cpu_to_be32(table->SmioMaskVddcPhase); - table->SmioMaskVddciVid = cpu_to_be32(table->SmioMaskVddciVid); - table->SmioMaskMvddVid = cpu_to_be32(table->SmioMaskMvddVid); - table->SclkStepSize = cpu_to_be32(table->SclkStepSize); - table->TemperatureLimitHigh = cpu_to_be16(table->TemperatureLimitHigh); - table->TemperatureLimitLow = cpu_to_be16(table->TemperatureLimitLow); - table->VddcVddciDelta = cpu_to_be16(table->VddcVddciDelta); - table->VoltageResponseTime = cpu_to_be16(table->VoltageResponseTime); - table->PhaseResponseTime = cpu_to_be16(table->PhaseResponseTime); - table->BootVddc = cpu_to_be16(table->BootVddc * VOLTAGE_SCALE); - table->BootVddci = cpu_to_be16(table->BootVddci * VOLTAGE_SCALE); - table->BootMVdd = cpu_to_be16(table->BootMVdd * VOLTAGE_SCALE); - - ret = amdgpu_ci_copy_bytes_to_smc(adev, - pi->dpm_table_start + - offsetof(SMU7_Discrete_DpmTable, SystemFlags), - (u8 *)&table->SystemFlags, - sizeof(SMU7_Discrete_DpmTable) - 3 * sizeof(SMU7_PIDController), - pi->sram_end); - if (ret) - return ret; - - return 0; -} - -static void ci_trim_single_dpm_states(struct amdgpu_device *adev, - struct ci_single_dpm_table *dpm_table, - u32 low_limit, u32 high_limit) -{ - u32 i; - - for (i = 0; i < dpm_table->count; i++) { - if ((dpm_table->dpm_levels[i].value < low_limit) || - (dpm_table->dpm_levels[i].value > high_limit)) - dpm_table->dpm_levels[i].enabled = false; - else - dpm_table->dpm_levels[i].enabled = true; - } -} - -static void ci_trim_pcie_dpm_states(struct amdgpu_device *adev, - u32 speed_low, u32 lanes_low, - u32 speed_high, u32 lanes_high) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_single_dpm_table *pcie_table = &pi->dpm_table.pcie_speed_table; - u32 i, j; - - for (i = 0; i < pcie_table->count; i++) { - if ((pcie_table->dpm_levels[i].value < speed_low) || - (pcie_table->dpm_levels[i].param1 < lanes_low) || - (pcie_table->dpm_levels[i].value > speed_high) || - (pcie_table->dpm_levels[i].param1 > lanes_high)) - pcie_table->dpm_levels[i].enabled = false; - else - pcie_table->dpm_levels[i].enabled = true; - } - - for (i = 0; i < pcie_table->count; i++) { - if (pcie_table->dpm_levels[i].enabled) { - for (j = i + 1; j < pcie_table->count; j++) { - if (pcie_table->dpm_levels[j].enabled) { - if ((pcie_table->dpm_levels[i].value == pcie_table->dpm_levels[j].value) && - (pcie_table->dpm_levels[i].param1 == pcie_table->dpm_levels[j].param1)) - pcie_table->dpm_levels[j].enabled = false; - } - } - } - } -} - -static int ci_trim_dpm_states(struct amdgpu_device *adev, - struct amdgpu_ps *amdgpu_state) -{ - struct ci_ps *state = ci_get_ps(amdgpu_state); - struct ci_power_info *pi = ci_get_pi(adev); - u32 high_limit_count; - - if (state->performance_level_count < 1) - return -EINVAL; - - if (state->performance_level_count == 1) - high_limit_count = 0; - else - high_limit_count = 1; - - ci_trim_single_dpm_states(adev, - &pi->dpm_table.sclk_table, - state->performance_levels[0].sclk, - state->performance_levels[high_limit_count].sclk); - - ci_trim_single_dpm_states(adev, - &pi->dpm_table.mclk_table, - state->performance_levels[0].mclk, - state->performance_levels[high_limit_count].mclk); - - ci_trim_pcie_dpm_states(adev, - state->performance_levels[0].pcie_gen, - state->performance_levels[0].pcie_lane, - state->performance_levels[high_limit_count].pcie_gen, - state->performance_levels[high_limit_count].pcie_lane); - - return 0; -} - -static int ci_apply_disp_minimum_voltage_request(struct amdgpu_device *adev) -{ - struct amdgpu_clock_voltage_dependency_table *disp_voltage_table = - &adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk; - struct amdgpu_clock_voltage_dependency_table *vddc_table = - &adev->pm.dpm.dyn_state.vddc_dependency_on_sclk; - u32 requested_voltage = 0; - u32 i; - - if (disp_voltage_table == NULL) - return -EINVAL; - if (!disp_voltage_table->count) - return -EINVAL; - - for (i = 0; i < disp_voltage_table->count; i++) { - if (adev->clock.current_dispclk == disp_voltage_table->entries[i].clk) - requested_voltage = disp_voltage_table->entries[i].v; - } - - for (i = 0; i < vddc_table->count; i++) { - if (requested_voltage <= vddc_table->entries[i].v) { - requested_voltage = vddc_table->entries[i].v; - return (amdgpu_ci_send_msg_to_smc_with_parameter(adev, - PPSMC_MSG_VddC_Request, - requested_voltage * VOLTAGE_SCALE) == PPSMC_Result_OK) ? - 0 : -EINVAL; - } - } - - return -EINVAL; -} - -static int ci_upload_dpm_level_enable_mask(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - PPSMC_Result result; - - ci_apply_disp_minimum_voltage_request(adev); - - if (!pi->sclk_dpm_key_disabled) { - if (pi->dpm_level_enable_mask.sclk_dpm_enable_mask) { - result = amdgpu_ci_send_msg_to_smc_with_parameter(adev, - PPSMC_MSG_SCLKDPM_SetEnabledMask, - pi->dpm_level_enable_mask.sclk_dpm_enable_mask); - if (result != PPSMC_Result_OK) - return -EINVAL; - } - } - - if (!pi->mclk_dpm_key_disabled) { - if (pi->dpm_level_enable_mask.mclk_dpm_enable_mask) { - result = amdgpu_ci_send_msg_to_smc_with_parameter(adev, - PPSMC_MSG_MCLKDPM_SetEnabledMask, - pi->dpm_level_enable_mask.mclk_dpm_enable_mask); - if (result != PPSMC_Result_OK) - return -EINVAL; - } - } - -#if 0 - if (!pi->pcie_dpm_key_disabled) { - if (pi->dpm_level_enable_mask.pcie_dpm_enable_mask) { - result = amdgpu_ci_send_msg_to_smc_with_parameter(adev, - PPSMC_MSG_PCIeDPM_SetEnabledMask, - pi->dpm_level_enable_mask.pcie_dpm_enable_mask); - if (result != PPSMC_Result_OK) - return -EINVAL; - } - } -#endif - - return 0; -} - -static void ci_find_dpm_states_clocks_in_dpm_table(struct amdgpu_device *adev, - struct amdgpu_ps *amdgpu_state) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_ps *state = ci_get_ps(amdgpu_state); - struct ci_single_dpm_table *sclk_table = &pi->dpm_table.sclk_table; - u32 sclk = state->performance_levels[state->performance_level_count-1].sclk; - struct ci_single_dpm_table *mclk_table = &pi->dpm_table.mclk_table; - u32 mclk = state->performance_levels[state->performance_level_count-1].mclk; - u32 i; - - pi->need_update_smu7_dpm_table = 0; - - for (i = 0; i < sclk_table->count; i++) { - if (sclk == sclk_table->dpm_levels[i].value) - break; - } - - if (i >= sclk_table->count) { - pi->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK; - } else { - /* XXX check display min clock requirements */ - if (CISLAND_MINIMUM_ENGINE_CLOCK != CISLAND_MINIMUM_ENGINE_CLOCK) - pi->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_SCLK; - } - - for (i = 0; i < mclk_table->count; i++) { - if (mclk == mclk_table->dpm_levels[i].value) - break; - } - - if (i >= mclk_table->count) - pi->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK; - - if (adev->pm.dpm.current_active_crtc_count != - adev->pm.dpm.new_active_crtc_count) - pi->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_MCLK; -} - -static int ci_populate_and_upload_sclk_mclk_dpm_levels(struct amdgpu_device *adev, - struct amdgpu_ps *amdgpu_state) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_ps *state = ci_get_ps(amdgpu_state); - u32 sclk = state->performance_levels[state->performance_level_count-1].sclk; - u32 mclk = state->performance_levels[state->performance_level_count-1].mclk; - struct ci_dpm_table *dpm_table = &pi->dpm_table; - int ret; - - if (!pi->need_update_smu7_dpm_table) - return 0; - - if (pi->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_SCLK) - dpm_table->sclk_table.dpm_levels[dpm_table->sclk_table.count-1].value = sclk; - - if (pi->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK) - dpm_table->mclk_table.dpm_levels[dpm_table->mclk_table.count-1].value = mclk; - - if (pi->need_update_smu7_dpm_table & (DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_UPDATE_SCLK)) { - ret = ci_populate_all_graphic_levels(adev); - if (ret) - return ret; - } - - if (pi->need_update_smu7_dpm_table & (DPMTABLE_OD_UPDATE_MCLK | DPMTABLE_UPDATE_MCLK)) { - ret = ci_populate_all_memory_levels(adev); - if (ret) - return ret; - } - - return 0; -} - -static int ci_enable_uvd_dpm(struct amdgpu_device *adev, bool enable) -{ - struct ci_power_info *pi = ci_get_pi(adev); - const struct amdgpu_clock_and_voltage_limits *max_limits; - int i; - - if (adev->pm.ac_power) - max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac; - else - max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; - - if (enable) { - pi->dpm_level_enable_mask.uvd_dpm_enable_mask = 0; - - for (i = adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count - 1; i >= 0; i--) { - if (adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[i].v <= max_limits->vddc) { - pi->dpm_level_enable_mask.uvd_dpm_enable_mask |= 1 << i; - - if (!pi->caps_uvd_dpm) - break; - } - } - - amdgpu_ci_send_msg_to_smc_with_parameter(adev, - PPSMC_MSG_UVDDPM_SetEnabledMask, - pi->dpm_level_enable_mask.uvd_dpm_enable_mask); - - if (pi->last_mclk_dpm_enable_mask & 0x1) { - pi->uvd_enabled = true; - pi->dpm_level_enable_mask.mclk_dpm_enable_mask &= 0xFFFFFFFE; - amdgpu_ci_send_msg_to_smc_with_parameter(adev, - PPSMC_MSG_MCLKDPM_SetEnabledMask, - pi->dpm_level_enable_mask.mclk_dpm_enable_mask); - } - } else { - if (pi->uvd_enabled) { - pi->uvd_enabled = false; - pi->dpm_level_enable_mask.mclk_dpm_enable_mask |= 1; - amdgpu_ci_send_msg_to_smc_with_parameter(adev, - PPSMC_MSG_MCLKDPM_SetEnabledMask, - pi->dpm_level_enable_mask.mclk_dpm_enable_mask); - } - } - - return (amdgpu_ci_send_msg_to_smc(adev, enable ? - PPSMC_MSG_UVDDPM_Enable : PPSMC_MSG_UVDDPM_Disable) == PPSMC_Result_OK) ? - 0 : -EINVAL; -} - -static int ci_enable_vce_dpm(struct amdgpu_device *adev, bool enable) -{ - struct ci_power_info *pi = ci_get_pi(adev); - const struct amdgpu_clock_and_voltage_limits *max_limits; - int i; - - if (adev->pm.ac_power) - max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac; - else - max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; - - if (enable) { - pi->dpm_level_enable_mask.vce_dpm_enable_mask = 0; - for (i = adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.count - 1; i >= 0; i--) { - if (adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[i].v <= max_limits->vddc) { - pi->dpm_level_enable_mask.vce_dpm_enable_mask |= 1 << i; - - if (!pi->caps_vce_dpm) - break; - } - } - - amdgpu_ci_send_msg_to_smc_with_parameter(adev, - PPSMC_MSG_VCEDPM_SetEnabledMask, - pi->dpm_level_enable_mask.vce_dpm_enable_mask); - } - - return (amdgpu_ci_send_msg_to_smc(adev, enable ? - PPSMC_MSG_VCEDPM_Enable : PPSMC_MSG_VCEDPM_Disable) == PPSMC_Result_OK) ? - 0 : -EINVAL; -} - -#if 0 -static int ci_enable_samu_dpm(struct amdgpu_device *adev, bool enable) -{ - struct ci_power_info *pi = ci_get_pi(adev); - const struct amdgpu_clock_and_voltage_limits *max_limits; - int i; - - if (adev->pm.ac_power) - max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac; - else - max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; - - if (enable) { - pi->dpm_level_enable_mask.samu_dpm_enable_mask = 0; - for (i = adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.count - 1; i >= 0; i--) { - if (adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries[i].v <= max_limits->vddc) { - pi->dpm_level_enable_mask.samu_dpm_enable_mask |= 1 << i; - - if (!pi->caps_samu_dpm) - break; - } - } - - amdgpu_ci_send_msg_to_smc_with_parameter(adev, - PPSMC_MSG_SAMUDPM_SetEnabledMask, - pi->dpm_level_enable_mask.samu_dpm_enable_mask); - } - return (amdgpu_ci_send_msg_to_smc(adev, enable ? - PPSMC_MSG_SAMUDPM_Enable : PPSMC_MSG_SAMUDPM_Disable) == PPSMC_Result_OK) ? - 0 : -EINVAL; -} - -static int ci_enable_acp_dpm(struct amdgpu_device *adev, bool enable) -{ - struct ci_power_info *pi = ci_get_pi(adev); - const struct amdgpu_clock_and_voltage_limits *max_limits; - int i; - - if (adev->pm.ac_power) - max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac; - else - max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; - - if (enable) { - pi->dpm_level_enable_mask.acp_dpm_enable_mask = 0; - for (i = adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.count - 1; i >= 0; i--) { - if (adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries[i].v <= max_limits->vddc) { - pi->dpm_level_enable_mask.acp_dpm_enable_mask |= 1 << i; - - if (!pi->caps_acp_dpm) - break; - } - } - - amdgpu_ci_send_msg_to_smc_with_parameter(adev, - PPSMC_MSG_ACPDPM_SetEnabledMask, - pi->dpm_level_enable_mask.acp_dpm_enable_mask); - } - - return (amdgpu_ci_send_msg_to_smc(adev, enable ? - PPSMC_MSG_ACPDPM_Enable : PPSMC_MSG_ACPDPM_Disable) == PPSMC_Result_OK) ? - 0 : -EINVAL; -} -#endif - -static int ci_update_uvd_dpm(struct amdgpu_device *adev, bool gate) -{ - struct ci_power_info *pi = ci_get_pi(adev); - u32 tmp; - int ret = 0; - - if (!gate) { - /* turn the clocks on when decoding */ - if (pi->caps_uvd_dpm || - (adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count <= 0)) - pi->smc_state_table.UvdBootLevel = 0; - else - pi->smc_state_table.UvdBootLevel = - adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count - 1; - - tmp = RREG32_SMC(ixDPM_TABLE_475); - tmp &= ~DPM_TABLE_475__UvdBootLevel_MASK; - tmp |= (pi->smc_state_table.UvdBootLevel << DPM_TABLE_475__UvdBootLevel__SHIFT); - WREG32_SMC(ixDPM_TABLE_475, tmp); - ret = ci_enable_uvd_dpm(adev, true); - } else { - ret = ci_enable_uvd_dpm(adev, false); - if (ret) - return ret; - } - - return ret; -} - -static u8 ci_get_vce_boot_level(struct amdgpu_device *adev) -{ - u8 i; - u32 min_evclk = 30000; /* ??? */ - struct amdgpu_vce_clock_voltage_dependency_table *table = - &adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table; - - for (i = 0; i < table->count; i++) { - if (table->entries[i].evclk >= min_evclk) - return i; - } - - return table->count - 1; -} - -static int ci_update_vce_dpm(struct amdgpu_device *adev, - struct amdgpu_ps *amdgpu_new_state, - struct amdgpu_ps *amdgpu_current_state) -{ - struct ci_power_info *pi = ci_get_pi(adev); - int ret = 0; - u32 tmp; - - if (amdgpu_current_state->evclk != amdgpu_new_state->evclk) { - if (amdgpu_new_state->evclk) { - pi->smc_state_table.VceBootLevel = ci_get_vce_boot_level(adev); - tmp = RREG32_SMC(ixDPM_TABLE_475); - tmp &= ~DPM_TABLE_475__VceBootLevel_MASK; - tmp |= (pi->smc_state_table.VceBootLevel << DPM_TABLE_475__VceBootLevel__SHIFT); - WREG32_SMC(ixDPM_TABLE_475, tmp); - - ret = ci_enable_vce_dpm(adev, true); - } else { - ret = ci_enable_vce_dpm(adev, false); - if (ret) - return ret; - } - } - return ret; -} - -#if 0 -static int ci_update_samu_dpm(struct amdgpu_device *adev, bool gate) -{ - return ci_enable_samu_dpm(adev, gate); -} - -static int ci_update_acp_dpm(struct amdgpu_device *adev, bool gate) -{ - struct ci_power_info *pi = ci_get_pi(adev); - u32 tmp; - - if (!gate) { - pi->smc_state_table.AcpBootLevel = 0; - - tmp = RREG32_SMC(ixDPM_TABLE_475); - tmp &= ~AcpBootLevel_MASK; - tmp |= AcpBootLevel(pi->smc_state_table.AcpBootLevel); - WREG32_SMC(ixDPM_TABLE_475, tmp); - } - - return ci_enable_acp_dpm(adev, !gate); -} -#endif - -static int ci_generate_dpm_level_enable_mask(struct amdgpu_device *adev, - struct amdgpu_ps *amdgpu_state) -{ - struct ci_power_info *pi = ci_get_pi(adev); - int ret; - - ret = ci_trim_dpm_states(adev, amdgpu_state); - if (ret) - return ret; - - pi->dpm_level_enable_mask.sclk_dpm_enable_mask = - ci_get_dpm_level_enable_mask_value(&pi->dpm_table.sclk_table); - pi->dpm_level_enable_mask.mclk_dpm_enable_mask = - ci_get_dpm_level_enable_mask_value(&pi->dpm_table.mclk_table); - pi->last_mclk_dpm_enable_mask = - pi->dpm_level_enable_mask.mclk_dpm_enable_mask; - if (pi->uvd_enabled) { - if (pi->dpm_level_enable_mask.mclk_dpm_enable_mask & 1) - pi->dpm_level_enable_mask.mclk_dpm_enable_mask &= 0xFFFFFFFE; - } - pi->dpm_level_enable_mask.pcie_dpm_enable_mask = - ci_get_dpm_level_enable_mask_value(&pi->dpm_table.pcie_speed_table); - - return 0; -} - -static u32 ci_get_lowest_enabled_level(struct amdgpu_device *adev, - u32 level_mask) -{ - u32 level = 0; - - while ((level_mask & (1 << level)) == 0) - level++; - - return level; -} - - -static int ci_dpm_force_performance_level(void *handle, - enum amd_dpm_forced_level level) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - u32 tmp, levels, i; - int ret; - - if (level == AMD_DPM_FORCED_LEVEL_HIGH) { - if ((!pi->pcie_dpm_key_disabled) && - pi->dpm_level_enable_mask.pcie_dpm_enable_mask) { - levels = 0; - tmp = pi->dpm_level_enable_mask.pcie_dpm_enable_mask; - while (tmp >>= 1) - levels++; - if (levels) { - ret = ci_dpm_force_state_pcie(adev, level); - if (ret) - return ret; - for (i = 0; i < adev->usec_timeout; i++) { - tmp = (RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX_1) & - TARGET_AND_CURRENT_PROFILE_INDEX_1__CURR_PCIE_INDEX_MASK) >> - TARGET_AND_CURRENT_PROFILE_INDEX_1__CURR_PCIE_INDEX__SHIFT; - if (tmp == levels) - break; - udelay(1); - } - } - } - if ((!pi->sclk_dpm_key_disabled) && - pi->dpm_level_enable_mask.sclk_dpm_enable_mask) { - levels = 0; - tmp = pi->dpm_level_enable_mask.sclk_dpm_enable_mask; - while (tmp >>= 1) - levels++; - if (levels) { - ret = ci_dpm_force_state_sclk(adev, levels); - if (ret) - return ret; - for (i = 0; i < adev->usec_timeout; i++) { - tmp = (RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX) & - TARGET_AND_CURRENT_PROFILE_INDEX__CURR_SCLK_INDEX_MASK) >> - TARGET_AND_CURRENT_PROFILE_INDEX__CURR_SCLK_INDEX__SHIFT; - if (tmp == levels) - break; - udelay(1); - } - } - } - if ((!pi->mclk_dpm_key_disabled) && - pi->dpm_level_enable_mask.mclk_dpm_enable_mask) { - levels = 0; - tmp = pi->dpm_level_enable_mask.mclk_dpm_enable_mask; - while (tmp >>= 1) - levels++; - if (levels) { - ret = ci_dpm_force_state_mclk(adev, levels); - if (ret) - return ret; - for (i = 0; i < adev->usec_timeout; i++) { - tmp = (RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX) & - TARGET_AND_CURRENT_PROFILE_INDEX__CURR_MCLK_INDEX_MASK) >> - TARGET_AND_CURRENT_PROFILE_INDEX__CURR_MCLK_INDEX__SHIFT; - if (tmp == levels) - break; - udelay(1); - } - } - } - } else if (level == AMD_DPM_FORCED_LEVEL_LOW) { - if ((!pi->sclk_dpm_key_disabled) && - pi->dpm_level_enable_mask.sclk_dpm_enable_mask) { - levels = ci_get_lowest_enabled_level(adev, - pi->dpm_level_enable_mask.sclk_dpm_enable_mask); - ret = ci_dpm_force_state_sclk(adev, levels); - if (ret) - return ret; - for (i = 0; i < adev->usec_timeout; i++) { - tmp = (RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX) & - TARGET_AND_CURRENT_PROFILE_INDEX__CURR_SCLK_INDEX_MASK) >> - TARGET_AND_CURRENT_PROFILE_INDEX__CURR_SCLK_INDEX__SHIFT; - if (tmp == levels) - break; - udelay(1); - } - } - if ((!pi->mclk_dpm_key_disabled) && - pi->dpm_level_enable_mask.mclk_dpm_enable_mask) { - levels = ci_get_lowest_enabled_level(adev, - pi->dpm_level_enable_mask.mclk_dpm_enable_mask); - ret = ci_dpm_force_state_mclk(adev, levels); - if (ret) - return ret; - for (i = 0; i < adev->usec_timeout; i++) { - tmp = (RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX) & - TARGET_AND_CURRENT_PROFILE_INDEX__CURR_MCLK_INDEX_MASK) >> - TARGET_AND_CURRENT_PROFILE_INDEX__CURR_MCLK_INDEX__SHIFT; - if (tmp == levels) - break; - udelay(1); - } - } - if ((!pi->pcie_dpm_key_disabled) && - pi->dpm_level_enable_mask.pcie_dpm_enable_mask) { - levels = ci_get_lowest_enabled_level(adev, - pi->dpm_level_enable_mask.pcie_dpm_enable_mask); - ret = ci_dpm_force_state_pcie(adev, levels); - if (ret) - return ret; - for (i = 0; i < adev->usec_timeout; i++) { - tmp = (RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX_1) & - TARGET_AND_CURRENT_PROFILE_INDEX_1__CURR_PCIE_INDEX_MASK) >> - TARGET_AND_CURRENT_PROFILE_INDEX_1__CURR_PCIE_INDEX__SHIFT; - if (tmp == levels) - break; - udelay(1); - } - } - } else if (level == AMD_DPM_FORCED_LEVEL_AUTO) { - if (!pi->pcie_dpm_key_disabled) { - PPSMC_Result smc_result; - - smc_result = amdgpu_ci_send_msg_to_smc(adev, - PPSMC_MSG_PCIeDPM_UnForceLevel); - if (smc_result != PPSMC_Result_OK) - return -EINVAL; - } - ret = ci_upload_dpm_level_enable_mask(adev); - if (ret) - return ret; - } - - adev->pm.dpm.forced_level = level; - - return 0; -} - -static int ci_set_mc_special_registers(struct amdgpu_device *adev, - struct ci_mc_reg_table *table) -{ - u8 i, j, k; - u32 temp_reg; - - for (i = 0, j = table->last; i < table->last; i++) { - if (j >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) - return -EINVAL; - switch(table->mc_reg_address[i].s1) { - case mmMC_SEQ_MISC1: - temp_reg = RREG32(mmMC_PMG_CMD_EMRS); - table->mc_reg_address[j].s1 = mmMC_PMG_CMD_EMRS; - table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_EMRS_LP; - for (k = 0; k < table->num_entries; k++) { - table->mc_reg_table_entry[k].mc_data[j] = - ((temp_reg & 0xffff0000)) | ((table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16); - } - j++; - - if (j >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) - return -EINVAL; - temp_reg = RREG32(mmMC_PMG_CMD_MRS); - table->mc_reg_address[j].s1 = mmMC_PMG_CMD_MRS; - table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_MRS_LP; - for (k = 0; k < table->num_entries; k++) { - table->mc_reg_table_entry[k].mc_data[j] = - (temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); - if (adev->gmc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) - table->mc_reg_table_entry[k].mc_data[j] |= 0x100; - } - j++; - - if (adev->gmc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) { - if (j >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) - return -EINVAL; - table->mc_reg_address[j].s1 = mmMC_PMG_AUTO_CMD; - table->mc_reg_address[j].s0 = mmMC_PMG_AUTO_CMD; - for (k = 0; k < table->num_entries; k++) { - table->mc_reg_table_entry[k].mc_data[j] = - (table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16; - } - j++; - } - break; - case mmMC_SEQ_RESERVE_M: - temp_reg = RREG32(mmMC_PMG_CMD_MRS1); - table->mc_reg_address[j].s1 = mmMC_PMG_CMD_MRS1; - table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_MRS1_LP; - for (k = 0; k < table->num_entries; k++) { - table->mc_reg_table_entry[k].mc_data[j] = - (temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); - } - j++; - break; - default: - break; - } - - } - - table->last = j; - - return 0; -} - -static bool ci_check_s0_mc_reg_index(u16 in_reg, u16 *out_reg) -{ - bool result = true; - - switch(in_reg) { - case mmMC_SEQ_RAS_TIMING: - *out_reg = mmMC_SEQ_RAS_TIMING_LP; - break; - case mmMC_SEQ_DLL_STBY: - *out_reg = mmMC_SEQ_DLL_STBY_LP; - break; - case mmMC_SEQ_G5PDX_CMD0: - *out_reg = mmMC_SEQ_G5PDX_CMD0_LP; - break; - case mmMC_SEQ_G5PDX_CMD1: - *out_reg = mmMC_SEQ_G5PDX_CMD1_LP; - break; - case mmMC_SEQ_G5PDX_CTRL: - *out_reg = mmMC_SEQ_G5PDX_CTRL_LP; - break; - case mmMC_SEQ_CAS_TIMING: - *out_reg = mmMC_SEQ_CAS_TIMING_LP; - break; - case mmMC_SEQ_MISC_TIMING: - *out_reg = mmMC_SEQ_MISC_TIMING_LP; - break; - case mmMC_SEQ_MISC_TIMING2: - *out_reg = mmMC_SEQ_MISC_TIMING2_LP; - break; - case mmMC_SEQ_PMG_DVS_CMD: - *out_reg = mmMC_SEQ_PMG_DVS_CMD_LP; - break; - case mmMC_SEQ_PMG_DVS_CTL: - *out_reg = mmMC_SEQ_PMG_DVS_CTL_LP; - break; - case mmMC_SEQ_RD_CTL_D0: - *out_reg = mmMC_SEQ_RD_CTL_D0_LP; - break; - case mmMC_SEQ_RD_CTL_D1: - *out_reg = mmMC_SEQ_RD_CTL_D1_LP; - break; - case mmMC_SEQ_WR_CTL_D0: - *out_reg = mmMC_SEQ_WR_CTL_D0_LP; - break; - case mmMC_SEQ_WR_CTL_D1: - *out_reg = mmMC_SEQ_WR_CTL_D1_LP; - break; - case mmMC_PMG_CMD_EMRS: - *out_reg = mmMC_SEQ_PMG_CMD_EMRS_LP; - break; - case mmMC_PMG_CMD_MRS: - *out_reg = mmMC_SEQ_PMG_CMD_MRS_LP; - break; - case mmMC_PMG_CMD_MRS1: - *out_reg = mmMC_SEQ_PMG_CMD_MRS1_LP; - break; - case mmMC_SEQ_PMG_TIMING: - *out_reg = mmMC_SEQ_PMG_TIMING_LP; - break; - case mmMC_PMG_CMD_MRS2: - *out_reg = mmMC_SEQ_PMG_CMD_MRS2_LP; - break; - case mmMC_SEQ_WR_CTL_2: - *out_reg = mmMC_SEQ_WR_CTL_2_LP; - break; - default: - result = false; - break; - } - - return result; -} - -static void ci_set_valid_flag(struct ci_mc_reg_table *table) -{ - u8 i, j; - - for (i = 0; i < table->last; i++) { - for (j = 1; j < table->num_entries; j++) { - if (table->mc_reg_table_entry[j-1].mc_data[i] != - table->mc_reg_table_entry[j].mc_data[i]) { - table->valid_flag |= 1 << i; - break; - } - } - } -} - -static void ci_set_s0_mc_reg_index(struct ci_mc_reg_table *table) -{ - u32 i; - u16 address; - - for (i = 0; i < table->last; i++) { - table->mc_reg_address[i].s0 = - ci_check_s0_mc_reg_index(table->mc_reg_address[i].s1, &address) ? - address : table->mc_reg_address[i].s1; - } -} - -static int ci_copy_vbios_mc_reg_table(const struct atom_mc_reg_table *table, - struct ci_mc_reg_table *ci_table) -{ - u8 i, j; - - if (table->last > SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) - return -EINVAL; - if (table->num_entries > MAX_AC_TIMING_ENTRIES) - return -EINVAL; - - for (i = 0; i < table->last; i++) - ci_table->mc_reg_address[i].s1 = table->mc_reg_address[i].s1; - - ci_table->last = table->last; - - for (i = 0; i < table->num_entries; i++) { - ci_table->mc_reg_table_entry[i].mclk_max = - table->mc_reg_table_entry[i].mclk_max; - for (j = 0; j < table->last; j++) - ci_table->mc_reg_table_entry[i].mc_data[j] = - table->mc_reg_table_entry[i].mc_data[j]; - } - ci_table->num_entries = table->num_entries; - - return 0; -} - -static int ci_register_patching_mc_seq(struct amdgpu_device *adev, - struct ci_mc_reg_table *table) -{ - u8 i, k; - u32 tmp; - bool patch; - - tmp = RREG32(mmMC_SEQ_MISC0); - patch = ((tmp & 0x0000f00) == 0x300) ? true : false; - - if (patch && - ((adev->pdev->device == 0x67B0) || - (adev->pdev->device == 0x67B1))) { - for (i = 0; i < table->last; i++) { - if (table->last >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) - return -EINVAL; - switch (table->mc_reg_address[i].s1) { - case mmMC_SEQ_MISC1: - for (k = 0; k < table->num_entries; k++) { - if ((table->mc_reg_table_entry[k].mclk_max == 125000) || - (table->mc_reg_table_entry[k].mclk_max == 137500)) - table->mc_reg_table_entry[k].mc_data[i] = - (table->mc_reg_table_entry[k].mc_data[i] & 0xFFFFFFF8) | - 0x00000007; - } - break; - case mmMC_SEQ_WR_CTL_D0: - for (k = 0; k < table->num_entries; k++) { - if ((table->mc_reg_table_entry[k].mclk_max == 125000) || - (table->mc_reg_table_entry[k].mclk_max == 137500)) - table->mc_reg_table_entry[k].mc_data[i] = - (table->mc_reg_table_entry[k].mc_data[i] & 0xFFFF0F00) | - 0x0000D0DD; - } - break; - case mmMC_SEQ_WR_CTL_D1: - for (k = 0; k < table->num_entries; k++) { - if ((table->mc_reg_table_entry[k].mclk_max == 125000) || - (table->mc_reg_table_entry[k].mclk_max == 137500)) - table->mc_reg_table_entry[k].mc_data[i] = - (table->mc_reg_table_entry[k].mc_data[i] & 0xFFFF0F00) | - 0x0000D0DD; - } - break; - case mmMC_SEQ_WR_CTL_2: - for (k = 0; k < table->num_entries; k++) { - if ((table->mc_reg_table_entry[k].mclk_max == 125000) || - (table->mc_reg_table_entry[k].mclk_max == 137500)) - table->mc_reg_table_entry[k].mc_data[i] = 0; - } - break; - case mmMC_SEQ_CAS_TIMING: - for (k = 0; k < table->num_entries; k++) { - if (table->mc_reg_table_entry[k].mclk_max == 125000) - table->mc_reg_table_entry[k].mc_data[i] = - (table->mc_reg_table_entry[k].mc_data[i] & 0xFFE0FE0F) | - 0x000C0140; - else if (table->mc_reg_table_entry[k].mclk_max == 137500) - table->mc_reg_table_entry[k].mc_data[i] = - (table->mc_reg_table_entry[k].mc_data[i] & 0xFFE0FE0F) | - 0x000C0150; - } - break; - case mmMC_SEQ_MISC_TIMING: - for (k = 0; k < table->num_entries; k++) { - if (table->mc_reg_table_entry[k].mclk_max == 125000) - table->mc_reg_table_entry[k].mc_data[i] = - (table->mc_reg_table_entry[k].mc_data[i] & 0xFFFFFFE0) | - 0x00000030; - else if (table->mc_reg_table_entry[k].mclk_max == 137500) - table->mc_reg_table_entry[k].mc_data[i] = - (table->mc_reg_table_entry[k].mc_data[i] & 0xFFFFFFE0) | - 0x00000035; - } - break; - default: - break; - } - } - - WREG32(mmMC_SEQ_IO_DEBUG_INDEX, 3); - tmp = RREG32(mmMC_SEQ_IO_DEBUG_DATA); - tmp = (tmp & 0xFFF8FFFF) | (1 << 16); - WREG32(mmMC_SEQ_IO_DEBUG_INDEX, 3); - WREG32(mmMC_SEQ_IO_DEBUG_DATA, tmp); - } - - return 0; -} - -static int ci_initialize_mc_reg_table(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct atom_mc_reg_table *table; - struct ci_mc_reg_table *ci_table = &pi->mc_reg_table; - u8 module_index = ci_get_memory_module_index(adev); - int ret; - - table = kzalloc(sizeof(struct atom_mc_reg_table), GFP_KERNEL); - if (!table) - return -ENOMEM; - - WREG32(mmMC_SEQ_RAS_TIMING_LP, RREG32(mmMC_SEQ_RAS_TIMING)); - WREG32(mmMC_SEQ_CAS_TIMING_LP, RREG32(mmMC_SEQ_CAS_TIMING)); - WREG32(mmMC_SEQ_DLL_STBY_LP, RREG32(mmMC_SEQ_DLL_STBY)); - WREG32(mmMC_SEQ_G5PDX_CMD0_LP, RREG32(mmMC_SEQ_G5PDX_CMD0)); - WREG32(mmMC_SEQ_G5PDX_CMD1_LP, RREG32(mmMC_SEQ_G5PDX_CMD1)); - WREG32(mmMC_SEQ_G5PDX_CTRL_LP, RREG32(mmMC_SEQ_G5PDX_CTRL)); - WREG32(mmMC_SEQ_PMG_DVS_CMD_LP, RREG32(mmMC_SEQ_PMG_DVS_CMD)); - WREG32(mmMC_SEQ_PMG_DVS_CTL_LP, RREG32(mmMC_SEQ_PMG_DVS_CTL)); - WREG32(mmMC_SEQ_MISC_TIMING_LP, RREG32(mmMC_SEQ_MISC_TIMING)); - WREG32(mmMC_SEQ_MISC_TIMING2_LP, RREG32(mmMC_SEQ_MISC_TIMING2)); - WREG32(mmMC_SEQ_PMG_CMD_EMRS_LP, RREG32(mmMC_PMG_CMD_EMRS)); - WREG32(mmMC_SEQ_PMG_CMD_MRS_LP, RREG32(mmMC_PMG_CMD_MRS)); - WREG32(mmMC_SEQ_PMG_CMD_MRS1_LP, RREG32(mmMC_PMG_CMD_MRS1)); - WREG32(mmMC_SEQ_WR_CTL_D0_LP, RREG32(mmMC_SEQ_WR_CTL_D0)); - WREG32(mmMC_SEQ_WR_CTL_D1_LP, RREG32(mmMC_SEQ_WR_CTL_D1)); - WREG32(mmMC_SEQ_RD_CTL_D0_LP, RREG32(mmMC_SEQ_RD_CTL_D0)); - WREG32(mmMC_SEQ_RD_CTL_D1_LP, RREG32(mmMC_SEQ_RD_CTL_D1)); - WREG32(mmMC_SEQ_PMG_TIMING_LP, RREG32(mmMC_SEQ_PMG_TIMING)); - WREG32(mmMC_SEQ_PMG_CMD_MRS2_LP, RREG32(mmMC_PMG_CMD_MRS2)); - WREG32(mmMC_SEQ_WR_CTL_2_LP, RREG32(mmMC_SEQ_WR_CTL_2)); - - ret = amdgpu_atombios_init_mc_reg_table(adev, module_index, table); - if (ret) - goto init_mc_done; - - ret = ci_copy_vbios_mc_reg_table(table, ci_table); - if (ret) - goto init_mc_done; - - ci_set_s0_mc_reg_index(ci_table); - - ret = ci_register_patching_mc_seq(adev, ci_table); - if (ret) - goto init_mc_done; - - ret = ci_set_mc_special_registers(adev, ci_table); - if (ret) - goto init_mc_done; - - ci_set_valid_flag(ci_table); - -init_mc_done: - kfree(table); - - return ret; -} - -static int ci_populate_mc_reg_addresses(struct amdgpu_device *adev, - SMU7_Discrete_MCRegisters *mc_reg_table) -{ - struct ci_power_info *pi = ci_get_pi(adev); - u32 i, j; - - for (i = 0, j = 0; j < pi->mc_reg_table.last; j++) { - if (pi->mc_reg_table.valid_flag & (1 << j)) { - if (i >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) - return -EINVAL; - mc_reg_table->address[i].s0 = cpu_to_be16(pi->mc_reg_table.mc_reg_address[j].s0); - mc_reg_table->address[i].s1 = cpu_to_be16(pi->mc_reg_table.mc_reg_address[j].s1); - i++; - } - } - - mc_reg_table->last = (u8)i; - - return 0; -} - -static void ci_convert_mc_registers(const struct ci_mc_reg_entry *entry, - SMU7_Discrete_MCRegisterSet *data, - u32 num_entries, u32 valid_flag) -{ - u32 i, j; - - for (i = 0, j = 0; j < num_entries; j++) { - if (valid_flag & (1 << j)) { - data->value[i] = cpu_to_be32(entry->mc_data[j]); - i++; - } - } -} - -static void ci_convert_mc_reg_table_entry_to_smc(struct amdgpu_device *adev, - const u32 memory_clock, - SMU7_Discrete_MCRegisterSet *mc_reg_table_data) -{ - struct ci_power_info *pi = ci_get_pi(adev); - u32 i = 0; - - for(i = 0; i < pi->mc_reg_table.num_entries; i++) { - if (memory_clock <= pi->mc_reg_table.mc_reg_table_entry[i].mclk_max) - break; - } - - if ((i == pi->mc_reg_table.num_entries) && (i > 0)) - --i; - - ci_convert_mc_registers(&pi->mc_reg_table.mc_reg_table_entry[i], - mc_reg_table_data, pi->mc_reg_table.last, - pi->mc_reg_table.valid_flag); -} - -static void ci_convert_mc_reg_table_to_smc(struct amdgpu_device *adev, - SMU7_Discrete_MCRegisters *mc_reg_table) -{ - struct ci_power_info *pi = ci_get_pi(adev); - u32 i; - - for (i = 0; i < pi->dpm_table.mclk_table.count; i++) - ci_convert_mc_reg_table_entry_to_smc(adev, - pi->dpm_table.mclk_table.dpm_levels[i].value, - &mc_reg_table->data[i]); -} - -static int ci_populate_initial_mc_reg_table(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - int ret; - - memset(&pi->smc_mc_reg_table, 0, sizeof(SMU7_Discrete_MCRegisters)); - - ret = ci_populate_mc_reg_addresses(adev, &pi->smc_mc_reg_table); - if (ret) - return ret; - ci_convert_mc_reg_table_to_smc(adev, &pi->smc_mc_reg_table); - - return amdgpu_ci_copy_bytes_to_smc(adev, - pi->mc_reg_table_start, - (u8 *)&pi->smc_mc_reg_table, - sizeof(SMU7_Discrete_MCRegisters), - pi->sram_end); -} - -static int ci_update_and_upload_mc_reg_table(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - - if (!(pi->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) - return 0; - - memset(&pi->smc_mc_reg_table, 0, sizeof(SMU7_Discrete_MCRegisters)); - - ci_convert_mc_reg_table_to_smc(adev, &pi->smc_mc_reg_table); - - return amdgpu_ci_copy_bytes_to_smc(adev, - pi->mc_reg_table_start + - offsetof(SMU7_Discrete_MCRegisters, data[0]), - (u8 *)&pi->smc_mc_reg_table.data[0], - sizeof(SMU7_Discrete_MCRegisterSet) * - pi->dpm_table.mclk_table.count, - pi->sram_end); -} - -static void ci_enable_voltage_control(struct amdgpu_device *adev) -{ - u32 tmp = RREG32_SMC(ixGENERAL_PWRMGT); - - tmp |= GENERAL_PWRMGT__VOLT_PWRMGT_EN_MASK; - WREG32_SMC(ixGENERAL_PWRMGT, tmp); -} - -static enum amdgpu_pcie_gen ci_get_maximum_link_speed(struct amdgpu_device *adev, - struct amdgpu_ps *amdgpu_state) -{ - struct ci_ps *state = ci_get_ps(amdgpu_state); - int i; - u16 pcie_speed, max_speed = 0; - - for (i = 0; i < state->performance_level_count; i++) { - pcie_speed = state->performance_levels[i].pcie_gen; - if (max_speed < pcie_speed) - max_speed = pcie_speed; - } - - return max_speed; -} - -static u16 ci_get_current_pcie_speed(struct amdgpu_device *adev) -{ - u32 speed_cntl = 0; - - speed_cntl = RREG32_PCIE(ixPCIE_LC_SPEED_CNTL) & - PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK; - speed_cntl >>= PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT; - - return (u16)speed_cntl; -} - -static int ci_get_current_pcie_lane_number(struct amdgpu_device *adev) -{ - u32 link_width = 0; - - link_width = RREG32_PCIE(ixPCIE_LC_LINK_WIDTH_CNTL) & - PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK; - link_width >>= PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT; - - switch (link_width) { - case 1: - return 1; - case 2: - return 2; - case 3: - return 4; - case 4: - return 8; - case 0: - case 6: - default: - return 16; - } -} - -static void ci_request_link_speed_change_before_state_change(struct amdgpu_device *adev, - struct amdgpu_ps *amdgpu_new_state, - struct amdgpu_ps *amdgpu_current_state) -{ - struct ci_power_info *pi = ci_get_pi(adev); - enum amdgpu_pcie_gen target_link_speed = - ci_get_maximum_link_speed(adev, amdgpu_new_state); - enum amdgpu_pcie_gen current_link_speed; - - if (pi->force_pcie_gen == AMDGPU_PCIE_GEN_INVALID) - current_link_speed = ci_get_maximum_link_speed(adev, amdgpu_current_state); - else - current_link_speed = pi->force_pcie_gen; - - pi->force_pcie_gen = AMDGPU_PCIE_GEN_INVALID; - pi->pspp_notify_required = false; - if (target_link_speed > current_link_speed) { - switch (target_link_speed) { -#ifdef CONFIG_ACPI - case AMDGPU_PCIE_GEN3: - if (amdgpu_acpi_pcie_performance_request(adev, PCIE_PERF_REQ_PECI_GEN3, false) == 0) - break; - pi->force_pcie_gen = AMDGPU_PCIE_GEN2; - if (current_link_speed == AMDGPU_PCIE_GEN2) - break; - case AMDGPU_PCIE_GEN2: - if (amdgpu_acpi_pcie_performance_request(adev, PCIE_PERF_REQ_PECI_GEN2, false) == 0) - break; -#endif - default: - pi->force_pcie_gen = ci_get_current_pcie_speed(adev); - break; - } - } else { - if (target_link_speed < current_link_speed) - pi->pspp_notify_required = true; - } -} - -static void ci_notify_link_speed_change_after_state_change(struct amdgpu_device *adev, - struct amdgpu_ps *amdgpu_new_state, - struct amdgpu_ps *amdgpu_current_state) -{ - struct ci_power_info *pi = ci_get_pi(adev); - enum amdgpu_pcie_gen target_link_speed = - ci_get_maximum_link_speed(adev, amdgpu_new_state); - u8 request; - - if (pi->pspp_notify_required) { - if (target_link_speed == AMDGPU_PCIE_GEN3) - request = PCIE_PERF_REQ_PECI_GEN3; - else if (target_link_speed == AMDGPU_PCIE_GEN2) - request = PCIE_PERF_REQ_PECI_GEN2; - else - request = PCIE_PERF_REQ_PECI_GEN1; - - if ((request == PCIE_PERF_REQ_PECI_GEN1) && - (ci_get_current_pcie_speed(adev) > 0)) - return; - -#ifdef CONFIG_ACPI - amdgpu_acpi_pcie_performance_request(adev, request, false); -#endif - } -} - -static int ci_set_private_data_variables_based_on_pptable(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct amdgpu_clock_voltage_dependency_table *allowed_sclk_vddc_table = - &adev->pm.dpm.dyn_state.vddc_dependency_on_sclk; - struct amdgpu_clock_voltage_dependency_table *allowed_mclk_vddc_table = - &adev->pm.dpm.dyn_state.vddc_dependency_on_mclk; - struct amdgpu_clock_voltage_dependency_table *allowed_mclk_vddci_table = - &adev->pm.dpm.dyn_state.vddci_dependency_on_mclk; - - if (allowed_sclk_vddc_table == NULL) - return -EINVAL; - if (allowed_sclk_vddc_table->count < 1) - return -EINVAL; - if (allowed_mclk_vddc_table == NULL) - return -EINVAL; - if (allowed_mclk_vddc_table->count < 1) - return -EINVAL; - if (allowed_mclk_vddci_table == NULL) - return -EINVAL; - if (allowed_mclk_vddci_table->count < 1) - return -EINVAL; - - pi->min_vddc_in_pp_table = allowed_sclk_vddc_table->entries[0].v; - pi->max_vddc_in_pp_table = - allowed_sclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].v; - - pi->min_vddci_in_pp_table = allowed_mclk_vddci_table->entries[0].v; - pi->max_vddci_in_pp_table = - allowed_mclk_vddci_table->entries[allowed_mclk_vddci_table->count - 1].v; - - adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk = - allowed_sclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].clk; - adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.mclk = - allowed_mclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].clk; - adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.vddc = - allowed_sclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].v; - adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.vddci = - allowed_mclk_vddci_table->entries[allowed_mclk_vddci_table->count - 1].v; - - return 0; -} - -static void ci_patch_with_vddc_leakage(struct amdgpu_device *adev, u16 *vddc) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_leakage_voltage *leakage_table = &pi->vddc_leakage; - u32 leakage_index; - - for (leakage_index = 0; leakage_index < leakage_table->count; leakage_index++) { - if (leakage_table->leakage_id[leakage_index] == *vddc) { - *vddc = leakage_table->actual_voltage[leakage_index]; - break; - } - } -} - -static void ci_patch_with_vddci_leakage(struct amdgpu_device *adev, u16 *vddci) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_leakage_voltage *leakage_table = &pi->vddci_leakage; - u32 leakage_index; - - for (leakage_index = 0; leakage_index < leakage_table->count; leakage_index++) { - if (leakage_table->leakage_id[leakage_index] == *vddci) { - *vddci = leakage_table->actual_voltage[leakage_index]; - break; - } - } -} - -static void ci_patch_clock_voltage_dependency_table_with_vddc_leakage(struct amdgpu_device *adev, - struct amdgpu_clock_voltage_dependency_table *table) -{ - u32 i; - - if (table) { - for (i = 0; i < table->count; i++) - ci_patch_with_vddc_leakage(adev, &table->entries[i].v); - } -} - -static void ci_patch_clock_voltage_dependency_table_with_vddci_leakage(struct amdgpu_device *adev, - struct amdgpu_clock_voltage_dependency_table *table) -{ - u32 i; - - if (table) { - for (i = 0; i < table->count; i++) - ci_patch_with_vddci_leakage(adev, &table->entries[i].v); - } -} - -static void ci_patch_vce_clock_voltage_dependency_table_with_vddc_leakage(struct amdgpu_device *adev, - struct amdgpu_vce_clock_voltage_dependency_table *table) -{ - u32 i; - - if (table) { - for (i = 0; i < table->count; i++) - ci_patch_with_vddc_leakage(adev, &table->entries[i].v); - } -} - -static void ci_patch_uvd_clock_voltage_dependency_table_with_vddc_leakage(struct amdgpu_device *adev, - struct amdgpu_uvd_clock_voltage_dependency_table *table) -{ - u32 i; - - if (table) { - for (i = 0; i < table->count; i++) - ci_patch_with_vddc_leakage(adev, &table->entries[i].v); - } -} - -static void ci_patch_vddc_phase_shed_limit_table_with_vddc_leakage(struct amdgpu_device *adev, - struct amdgpu_phase_shedding_limits_table *table) -{ - u32 i; - - if (table) { - for (i = 0; i < table->count; i++) - ci_patch_with_vddc_leakage(adev, &table->entries[i].voltage); - } -} - -static void ci_patch_clock_voltage_limits_with_vddc_leakage(struct amdgpu_device *adev, - struct amdgpu_clock_and_voltage_limits *table) -{ - if (table) { - ci_patch_with_vddc_leakage(adev, (u16 *)&table->vddc); - ci_patch_with_vddci_leakage(adev, (u16 *)&table->vddci); - } -} - -static void ci_patch_cac_leakage_table_with_vddc_leakage(struct amdgpu_device *adev, - struct amdgpu_cac_leakage_table *table) -{ - u32 i; - - if (table) { - for (i = 0; i < table->count; i++) - ci_patch_with_vddc_leakage(adev, &table->entries[i].vddc); - } -} - -static void ci_patch_dependency_tables_with_leakage(struct amdgpu_device *adev) -{ - - ci_patch_clock_voltage_dependency_table_with_vddc_leakage(adev, - &adev->pm.dpm.dyn_state.vddc_dependency_on_sclk); - ci_patch_clock_voltage_dependency_table_with_vddc_leakage(adev, - &adev->pm.dpm.dyn_state.vddc_dependency_on_mclk); - ci_patch_clock_voltage_dependency_table_with_vddc_leakage(adev, - &adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk); - ci_patch_clock_voltage_dependency_table_with_vddci_leakage(adev, - &adev->pm.dpm.dyn_state.vddci_dependency_on_mclk); - ci_patch_vce_clock_voltage_dependency_table_with_vddc_leakage(adev, - &adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table); - ci_patch_uvd_clock_voltage_dependency_table_with_vddc_leakage(adev, - &adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table); - ci_patch_clock_voltage_dependency_table_with_vddc_leakage(adev, - &adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table); - ci_patch_clock_voltage_dependency_table_with_vddc_leakage(adev, - &adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table); - ci_patch_vddc_phase_shed_limit_table_with_vddc_leakage(adev, - &adev->pm.dpm.dyn_state.phase_shedding_limits_table); - ci_patch_clock_voltage_limits_with_vddc_leakage(adev, - &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac); - ci_patch_clock_voltage_limits_with_vddc_leakage(adev, - &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc); - ci_patch_cac_leakage_table_with_vddc_leakage(adev, - &adev->pm.dpm.dyn_state.cac_leakage_table); - -} - -static void ci_update_current_ps(struct amdgpu_device *adev, - struct amdgpu_ps *rps) -{ - struct ci_ps *new_ps = ci_get_ps(rps); - struct ci_power_info *pi = ci_get_pi(adev); - - pi->current_rps = *rps; - pi->current_ps = *new_ps; - pi->current_rps.ps_priv = &pi->current_ps; - adev->pm.dpm.current_ps = &pi->current_rps; -} - -static void ci_update_requested_ps(struct amdgpu_device *adev, - struct amdgpu_ps *rps) -{ - struct ci_ps *new_ps = ci_get_ps(rps); - struct ci_power_info *pi = ci_get_pi(adev); - - pi->requested_rps = *rps; - pi->requested_ps = *new_ps; - pi->requested_rps.ps_priv = &pi->requested_ps; - adev->pm.dpm.requested_ps = &pi->requested_rps; -} - -static int ci_dpm_pre_set_power_state(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - struct amdgpu_ps requested_ps = *adev->pm.dpm.requested_ps; - struct amdgpu_ps *new_ps = &requested_ps; - - ci_update_requested_ps(adev, new_ps); - - ci_apply_state_adjust_rules(adev, &pi->requested_rps); - - return 0; -} - -static void ci_dpm_post_set_power_state(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - struct amdgpu_ps *new_ps = &pi->requested_rps; - - ci_update_current_ps(adev, new_ps); -} - - -static void ci_dpm_setup_asic(struct amdgpu_device *adev) -{ - ci_read_clock_registers(adev); - ci_enable_acpi_power_management(adev); - ci_init_sclk_t(adev); -} - -static int ci_dpm_enable(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct amdgpu_ps *boot_ps = adev->pm.dpm.boot_ps; - int ret; - - if (pi->voltage_control != CISLANDS_VOLTAGE_CONTROL_NONE) { - ci_enable_voltage_control(adev); - ret = ci_construct_voltage_tables(adev); - if (ret) { - DRM_ERROR("ci_construct_voltage_tables failed\n"); - return ret; - } - } - if (pi->caps_dynamic_ac_timing) { - ret = ci_initialize_mc_reg_table(adev); - if (ret) - pi->caps_dynamic_ac_timing = false; - } - if (pi->dynamic_ss) - ci_enable_spread_spectrum(adev, true); - if (pi->thermal_protection) - ci_enable_thermal_protection(adev, true); - ci_program_sstp(adev); - ci_enable_display_gap(adev); - ci_program_vc(adev); - ret = ci_upload_firmware(adev); - if (ret) { - DRM_ERROR("ci_upload_firmware failed\n"); - return ret; - } - ret = ci_process_firmware_header(adev); - if (ret) { - DRM_ERROR("ci_process_firmware_header failed\n"); - return ret; - } - ret = ci_initial_switch_from_arb_f0_to_f1(adev); - if (ret) { - DRM_ERROR("ci_initial_switch_from_arb_f0_to_f1 failed\n"); - return ret; - } - ret = ci_init_smc_table(adev); - if (ret) { - DRM_ERROR("ci_init_smc_table failed\n"); - return ret; - } - ret = ci_init_arb_table_index(adev); - if (ret) { - DRM_ERROR("ci_init_arb_table_index failed\n"); - return ret; - } - if (pi->caps_dynamic_ac_timing) { - ret = ci_populate_initial_mc_reg_table(adev); - if (ret) { - DRM_ERROR("ci_populate_initial_mc_reg_table failed\n"); - return ret; - } - } - ret = ci_populate_pm_base(adev); - if (ret) { - DRM_ERROR("ci_populate_pm_base failed\n"); - return ret; - } - ci_dpm_start_smc(adev); - ci_enable_vr_hot_gpio_interrupt(adev); - ret = ci_notify_smc_display_change(adev, false); - if (ret) { - DRM_ERROR("ci_notify_smc_display_change failed\n"); - return ret; - } - ci_enable_sclk_control(adev, true); - ret = ci_enable_ulv(adev, true); - if (ret) { - DRM_ERROR("ci_enable_ulv failed\n"); - return ret; - } - ret = ci_enable_ds_master_switch(adev, true); - if (ret) { - DRM_ERROR("ci_enable_ds_master_switch failed\n"); - return ret; - } - ret = ci_start_dpm(adev); - if (ret) { - DRM_ERROR("ci_start_dpm failed\n"); - return ret; - } - ret = ci_enable_didt(adev, true); - if (ret) { - DRM_ERROR("ci_enable_didt failed\n"); - return ret; - } - ret = ci_enable_smc_cac(adev, true); - if (ret) { - DRM_ERROR("ci_enable_smc_cac failed\n"); - return ret; - } - ret = ci_enable_power_containment(adev, true); - if (ret) { - DRM_ERROR("ci_enable_power_containment failed\n"); - return ret; - } - - ret = ci_power_control_set_level(adev); - if (ret) { - DRM_ERROR("ci_power_control_set_level failed\n"); - return ret; - } - - ci_enable_auto_throttle_source(adev, AMDGPU_DPM_AUTO_THROTTLE_SRC_THERMAL, true); - - ret = ci_enable_thermal_based_sclk_dpm(adev, true); - if (ret) { - DRM_ERROR("ci_enable_thermal_based_sclk_dpm failed\n"); - return ret; - } - - ci_thermal_start_thermal_controller(adev); - - ci_update_current_ps(adev, boot_ps); - - return 0; -} - -static void ci_dpm_disable(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct amdgpu_ps *boot_ps = adev->pm.dpm.boot_ps; - - amdgpu_irq_put(adev, &adev->pm.dpm.thermal.irq, - AMDGPU_THERMAL_IRQ_LOW_TO_HIGH); - amdgpu_irq_put(adev, &adev->pm.dpm.thermal.irq, - AMDGPU_THERMAL_IRQ_HIGH_TO_LOW); - - ci_dpm_powergate_uvd(adev, true); - - if (!amdgpu_ci_is_smc_running(adev)) - return; - - ci_thermal_stop_thermal_controller(adev); - - if (pi->thermal_protection) - ci_enable_thermal_protection(adev, false); - ci_enable_power_containment(adev, false); - ci_enable_smc_cac(adev, false); - ci_enable_didt(adev, false); - ci_enable_spread_spectrum(adev, false); - ci_enable_auto_throttle_source(adev, AMDGPU_DPM_AUTO_THROTTLE_SRC_THERMAL, false); - ci_stop_dpm(adev); - ci_enable_ds_master_switch(adev, false); - ci_enable_ulv(adev, false); - ci_clear_vc(adev); - ci_reset_to_default(adev); - ci_dpm_stop_smc(adev); - ci_force_switch_to_arb_f0(adev); - ci_enable_thermal_based_sclk_dpm(adev, false); - - ci_update_current_ps(adev, boot_ps); -} - -static int ci_dpm_set_power_state(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - struct amdgpu_ps *new_ps = &pi->requested_rps; - struct amdgpu_ps *old_ps = &pi->current_rps; - int ret; - - ci_find_dpm_states_clocks_in_dpm_table(adev, new_ps); - if (pi->pcie_performance_request) - ci_request_link_speed_change_before_state_change(adev, new_ps, old_ps); - ret = ci_freeze_sclk_mclk_dpm(adev); - if (ret) { - DRM_ERROR("ci_freeze_sclk_mclk_dpm failed\n"); - return ret; - } - ret = ci_populate_and_upload_sclk_mclk_dpm_levels(adev, new_ps); - if (ret) { - DRM_ERROR("ci_populate_and_upload_sclk_mclk_dpm_levels failed\n"); - return ret; - } - ret = ci_generate_dpm_level_enable_mask(adev, new_ps); - if (ret) { - DRM_ERROR("ci_generate_dpm_level_enable_mask failed\n"); - return ret; - } - - ret = ci_update_vce_dpm(adev, new_ps, old_ps); - if (ret) { - DRM_ERROR("ci_update_vce_dpm failed\n"); - return ret; - } - - ret = ci_update_sclk_t(adev); - if (ret) { - DRM_ERROR("ci_update_sclk_t failed\n"); - return ret; - } - if (pi->caps_dynamic_ac_timing) { - ret = ci_update_and_upload_mc_reg_table(adev); - if (ret) { - DRM_ERROR("ci_update_and_upload_mc_reg_table failed\n"); - return ret; - } - } - ret = ci_program_memory_timing_parameters(adev); - if (ret) { - DRM_ERROR("ci_program_memory_timing_parameters failed\n"); - return ret; - } - ret = ci_unfreeze_sclk_mclk_dpm(adev); - if (ret) { - DRM_ERROR("ci_unfreeze_sclk_mclk_dpm failed\n"); - return ret; - } - ret = ci_upload_dpm_level_enable_mask(adev); - if (ret) { - DRM_ERROR("ci_upload_dpm_level_enable_mask failed\n"); - return ret; - } - if (pi->pcie_performance_request) - ci_notify_link_speed_change_after_state_change(adev, new_ps, old_ps); - - return 0; -} - -#if 0 -static void ci_dpm_reset_asic(struct amdgpu_device *adev) -{ - ci_set_boot_state(adev); -} -#endif - -static void ci_dpm_display_configuration_changed(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - ci_program_display_gap(adev); -} - -union power_info { - struct _ATOM_POWERPLAY_INFO info; - struct _ATOM_POWERPLAY_INFO_V2 info_2; - struct _ATOM_POWERPLAY_INFO_V3 info_3; - struct _ATOM_PPLIB_POWERPLAYTABLE pplib; - struct _ATOM_PPLIB_POWERPLAYTABLE2 pplib2; - struct _ATOM_PPLIB_POWERPLAYTABLE3 pplib3; -}; - -union pplib_clock_info { - struct _ATOM_PPLIB_R600_CLOCK_INFO r600; - struct _ATOM_PPLIB_RS780_CLOCK_INFO rs780; - struct _ATOM_PPLIB_EVERGREEN_CLOCK_INFO evergreen; - struct _ATOM_PPLIB_SUMO_CLOCK_INFO sumo; - struct _ATOM_PPLIB_SI_CLOCK_INFO si; - struct _ATOM_PPLIB_CI_CLOCK_INFO ci; -}; - -union pplib_power_state { - struct _ATOM_PPLIB_STATE v1; - struct _ATOM_PPLIB_STATE_V2 v2; -}; - -static void ci_parse_pplib_non_clock_info(struct amdgpu_device *adev, - struct amdgpu_ps *rps, - struct _ATOM_PPLIB_NONCLOCK_INFO *non_clock_info, - u8 table_rev) -{ - rps->caps = le32_to_cpu(non_clock_info->ulCapsAndSettings); - rps->class = le16_to_cpu(non_clock_info->usClassification); - rps->class2 = le16_to_cpu(non_clock_info->usClassification2); - - if (ATOM_PPLIB_NONCLOCKINFO_VER1 < table_rev) { - rps->vclk = le32_to_cpu(non_clock_info->ulVCLK); - rps->dclk = le32_to_cpu(non_clock_info->ulDCLK); - } else { - rps->vclk = 0; - rps->dclk = 0; - } - - if (rps->class & ATOM_PPLIB_CLASSIFICATION_BOOT) - adev->pm.dpm.boot_ps = rps; - if (rps->class & ATOM_PPLIB_CLASSIFICATION_UVDSTATE) - adev->pm.dpm.uvd_ps = rps; -} - -static void ci_parse_pplib_clock_info(struct amdgpu_device *adev, - struct amdgpu_ps *rps, int index, - union pplib_clock_info *clock_info) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_ps *ps = ci_get_ps(rps); - struct ci_pl *pl = &ps->performance_levels[index]; - - ps->performance_level_count = index + 1; - - pl->sclk = le16_to_cpu(clock_info->ci.usEngineClockLow); - pl->sclk |= clock_info->ci.ucEngineClockHigh << 16; - pl->mclk = le16_to_cpu(clock_info->ci.usMemoryClockLow); - pl->mclk |= clock_info->ci.ucMemoryClockHigh << 16; - - pl->pcie_gen = amdgpu_get_pcie_gen_support(adev, - pi->sys_pcie_mask, - pi->vbios_boot_state.pcie_gen_bootup_value, - clock_info->ci.ucPCIEGen); - pl->pcie_lane = amdgpu_get_pcie_lane_support(adev, - pi->vbios_boot_state.pcie_lane_bootup_value, - le16_to_cpu(clock_info->ci.usPCIELane)); - - if (rps->class & ATOM_PPLIB_CLASSIFICATION_ACPI) { - pi->acpi_pcie_gen = pl->pcie_gen; - } - - if (rps->class2 & ATOM_PPLIB_CLASSIFICATION2_ULV) { - pi->ulv.supported = true; - pi->ulv.pl = *pl; - pi->ulv.cg_ulv_parameter = CISLANDS_CGULVPARAMETER_DFLT; - } - - /* patch up boot state */ - if (rps->class & ATOM_PPLIB_CLASSIFICATION_BOOT) { - pl->mclk = pi->vbios_boot_state.mclk_bootup_value; - pl->sclk = pi->vbios_boot_state.sclk_bootup_value; - pl->pcie_gen = pi->vbios_boot_state.pcie_gen_bootup_value; - pl->pcie_lane = pi->vbios_boot_state.pcie_lane_bootup_value; - } - - switch (rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) { - case ATOM_PPLIB_CLASSIFICATION_UI_BATTERY: - pi->use_pcie_powersaving_levels = true; - if (pi->pcie_gen_powersaving.max < pl->pcie_gen) - pi->pcie_gen_powersaving.max = pl->pcie_gen; - if (pi->pcie_gen_powersaving.min > pl->pcie_gen) - pi->pcie_gen_powersaving.min = pl->pcie_gen; - if (pi->pcie_lane_powersaving.max < pl->pcie_lane) - pi->pcie_lane_powersaving.max = pl->pcie_lane; - if (pi->pcie_lane_powersaving.min > pl->pcie_lane) - pi->pcie_lane_powersaving.min = pl->pcie_lane; - break; - case ATOM_PPLIB_CLASSIFICATION_UI_PERFORMANCE: - pi->use_pcie_performance_levels = true; - if (pi->pcie_gen_performance.max < pl->pcie_gen) - pi->pcie_gen_performance.max = pl->pcie_gen; - if (pi->pcie_gen_performance.min > pl->pcie_gen) - pi->pcie_gen_performance.min = pl->pcie_gen; - if (pi->pcie_lane_performance.max < pl->pcie_lane) - pi->pcie_lane_performance.max = pl->pcie_lane; - if (pi->pcie_lane_performance.min > pl->pcie_lane) - pi->pcie_lane_performance.min = pl->pcie_lane; - break; - default: - break; - } -} - -static int ci_parse_power_table(struct amdgpu_device *adev) -{ - struct amdgpu_mode_info *mode_info = &adev->mode_info; - struct _ATOM_PPLIB_NONCLOCK_INFO *non_clock_info; - union pplib_power_state *power_state; - int i, j, k, non_clock_array_index, clock_array_index; - union pplib_clock_info *clock_info; - struct _StateArray *state_array; - struct _ClockInfoArray *clock_info_array; - struct _NonClockInfoArray *non_clock_info_array; - union power_info *power_info; - int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo); - u16 data_offset; - u8 frev, crev; - u8 *power_state_offset; - struct ci_ps *ps; - - if (!amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL, - &frev, &crev, &data_offset)) - return -EINVAL; - power_info = (union power_info *)(mode_info->atom_context->bios + data_offset); - - amdgpu_add_thermal_controller(adev); - - state_array = (struct _StateArray *) - (mode_info->atom_context->bios + data_offset + - le16_to_cpu(power_info->pplib.usStateArrayOffset)); - clock_info_array = (struct _ClockInfoArray *) - (mode_info->atom_context->bios + data_offset + - le16_to_cpu(power_info->pplib.usClockInfoArrayOffset)); - non_clock_info_array = (struct _NonClockInfoArray *) - (mode_info->atom_context->bios + data_offset + - le16_to_cpu(power_info->pplib.usNonClockInfoArrayOffset)); - - adev->pm.dpm.ps = kcalloc(state_array->ucNumEntries, - sizeof(struct amdgpu_ps), - GFP_KERNEL); - if (!adev->pm.dpm.ps) - return -ENOMEM; - power_state_offset = (u8 *)state_array->states; - for (i = 0; i < state_array->ucNumEntries; i++) { - u8 *idx; - power_state = (union pplib_power_state *)power_state_offset; - non_clock_array_index = power_state->v2.nonClockInfoIndex; - non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *) - &non_clock_info_array->nonClockInfo[non_clock_array_index]; - ps = kzalloc(sizeof(struct ci_ps), GFP_KERNEL); - if (ps == NULL) { - kfree(adev->pm.dpm.ps); - return -ENOMEM; - } - adev->pm.dpm.ps[i].ps_priv = ps; - ci_parse_pplib_non_clock_info(adev, &adev->pm.dpm.ps[i], - non_clock_info, - non_clock_info_array->ucEntrySize); - k = 0; - idx = (u8 *)&power_state->v2.clockInfoIndex[0]; - for (j = 0; j < power_state->v2.ucNumDPMLevels; j++) { - clock_array_index = idx[j]; - if (clock_array_index >= clock_info_array->ucNumEntries) - continue; - if (k >= CISLANDS_MAX_HARDWARE_POWERLEVELS) - break; - clock_info = (union pplib_clock_info *) - ((u8 *)&clock_info_array->clockInfo[0] + - (clock_array_index * clock_info_array->ucEntrySize)); - ci_parse_pplib_clock_info(adev, - &adev->pm.dpm.ps[i], k, - clock_info); - k++; - } - power_state_offset += 2 + power_state->v2.ucNumDPMLevels; - } - adev->pm.dpm.num_ps = state_array->ucNumEntries; - - /* fill in the vce power states */ - for (i = 0; i < adev->pm.dpm.num_of_vce_states; i++) { - u32 sclk, mclk; - clock_array_index = adev->pm.dpm.vce_states[i].clk_idx; - clock_info = (union pplib_clock_info *) - &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize]; - sclk = le16_to_cpu(clock_info->ci.usEngineClockLow); - sclk |= clock_info->ci.ucEngineClockHigh << 16; - mclk = le16_to_cpu(clock_info->ci.usMemoryClockLow); - mclk |= clock_info->ci.ucMemoryClockHigh << 16; - adev->pm.dpm.vce_states[i].sclk = sclk; - adev->pm.dpm.vce_states[i].mclk = mclk; - } - - return 0; -} - -static int ci_get_vbios_boot_values(struct amdgpu_device *adev, - struct ci_vbios_boot_state *boot_state) -{ - struct amdgpu_mode_info *mode_info = &adev->mode_info; - int index = GetIndexIntoMasterTable(DATA, FirmwareInfo); - ATOM_FIRMWARE_INFO_V2_2 *firmware_info; - u8 frev, crev; - u16 data_offset; - - if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL, - &frev, &crev, &data_offset)) { - firmware_info = - (ATOM_FIRMWARE_INFO_V2_2 *)(mode_info->atom_context->bios + - data_offset); - boot_state->mvdd_bootup_value = le16_to_cpu(firmware_info->usBootUpMVDDCVoltage); - boot_state->vddc_bootup_value = le16_to_cpu(firmware_info->usBootUpVDDCVoltage); - boot_state->vddci_bootup_value = le16_to_cpu(firmware_info->usBootUpVDDCIVoltage); - boot_state->pcie_gen_bootup_value = ci_get_current_pcie_speed(adev); - boot_state->pcie_lane_bootup_value = ci_get_current_pcie_lane_number(adev); - boot_state->sclk_bootup_value = le32_to_cpu(firmware_info->ulDefaultEngineClock); - boot_state->mclk_bootup_value = le32_to_cpu(firmware_info->ulDefaultMemoryClock); - - return 0; - } - return -EINVAL; -} - -static void ci_dpm_fini(struct amdgpu_device *adev) -{ - int i; - - for (i = 0; i < adev->pm.dpm.num_ps; i++) { - kfree(adev->pm.dpm.ps[i].ps_priv); - } - kfree(adev->pm.dpm.ps); - kfree(adev->pm.dpm.priv); - kfree(adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries); - amdgpu_free_extended_power_table(adev); -} - -/** - * ci_dpm_init_microcode - load ucode images from disk - * - * @adev: amdgpu_device pointer - * - * Use the firmware interface to load the ucode images into - * the driver (not loaded into hw). - * Returns 0 on success, error on failure. - */ -static int ci_dpm_init_microcode(struct amdgpu_device *adev) -{ - const char *chip_name; - char fw_name[30]; - int err; - - DRM_DEBUG("\n"); - - switch (adev->asic_type) { - case CHIP_BONAIRE: - if ((adev->pdev->revision == 0x80) || - (adev->pdev->revision == 0x81) || - (adev->pdev->device == 0x665f)) - chip_name = "bonaire_k"; - else - chip_name = "bonaire"; - break; - case CHIP_HAWAII: - if (adev->pdev->revision == 0x80) - chip_name = "hawaii_k"; - else - chip_name = "hawaii"; - break; - case CHIP_KAVERI: - case CHIP_KABINI: - case CHIP_MULLINS: - default: BUG(); - } - - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_smc.bin", chip_name); - err = request_firmware(&adev->pm.fw, fw_name, adev->dev); - if (err) - goto out; - err = amdgpu_ucode_validate(adev->pm.fw); - -out: - if (err) { - pr_err("cik_smc: Failed to load firmware \"%s\"\n", fw_name); - release_firmware(adev->pm.fw); - adev->pm.fw = NULL; - } - return err; -} - -static int ci_dpm_init(struct amdgpu_device *adev) -{ - int index = GetIndexIntoMasterTable(DATA, ASIC_InternalSS_Info); - SMU7_Discrete_DpmTable *dpm_table; - struct amdgpu_gpio_rec gpio; - u16 data_offset, size; - u8 frev, crev; - struct ci_power_info *pi; - int ret; - - pi = kzalloc(sizeof(struct ci_power_info), GFP_KERNEL); - if (pi == NULL) - return -ENOMEM; - adev->pm.dpm.priv = pi; - - pi->sys_pcie_mask = - adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK; - - pi->force_pcie_gen = AMDGPU_PCIE_GEN_INVALID; - - pi->pcie_gen_performance.max = AMDGPU_PCIE_GEN1; - pi->pcie_gen_performance.min = AMDGPU_PCIE_GEN3; - pi->pcie_gen_powersaving.max = AMDGPU_PCIE_GEN1; - pi->pcie_gen_powersaving.min = AMDGPU_PCIE_GEN3; - - pi->pcie_lane_performance.max = 0; - pi->pcie_lane_performance.min = 16; - pi->pcie_lane_powersaving.max = 0; - pi->pcie_lane_powersaving.min = 16; - - ret = ci_get_vbios_boot_values(adev, &pi->vbios_boot_state); - if (ret) { - ci_dpm_fini(adev); - return ret; - } - - ret = amdgpu_get_platform_caps(adev); - if (ret) { - ci_dpm_fini(adev); - return ret; - } - - ret = amdgpu_parse_extended_power_table(adev); - if (ret) { - ci_dpm_fini(adev); - return ret; - } - - ret = ci_parse_power_table(adev); - if (ret) { - ci_dpm_fini(adev); - return ret; - } - - pi->dll_default_on = false; - pi->sram_end = SMC_RAM_END; - - pi->activity_target[0] = CISLAND_TARGETACTIVITY_DFLT; - pi->activity_target[1] = CISLAND_TARGETACTIVITY_DFLT; - pi->activity_target[2] = CISLAND_TARGETACTIVITY_DFLT; - pi->activity_target[3] = CISLAND_TARGETACTIVITY_DFLT; - pi->activity_target[4] = CISLAND_TARGETACTIVITY_DFLT; - pi->activity_target[5] = CISLAND_TARGETACTIVITY_DFLT; - pi->activity_target[6] = CISLAND_TARGETACTIVITY_DFLT; - pi->activity_target[7] = CISLAND_TARGETACTIVITY_DFLT; - - pi->mclk_activity_target = CISLAND_MCLK_TARGETACTIVITY_DFLT; - - pi->sclk_dpm_key_disabled = 0; - pi->mclk_dpm_key_disabled = 0; - pi->pcie_dpm_key_disabled = 0; - pi->thermal_sclk_dpm_enabled = 0; - - if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK) - pi->caps_sclk_ds = true; - else - pi->caps_sclk_ds = false; - - pi->mclk_strobe_mode_threshold = 40000; - pi->mclk_stutter_mode_threshold = 40000; - pi->mclk_edc_enable_threshold = 40000; - pi->mclk_edc_wr_enable_threshold = 40000; - - ci_initialize_powertune_defaults(adev); - - pi->caps_fps = false; - - pi->caps_sclk_throttle_low_notification = false; - - pi->caps_uvd_dpm = true; - pi->caps_vce_dpm = true; - - ci_get_leakage_voltages(adev); - ci_patch_dependency_tables_with_leakage(adev); - ci_set_private_data_variables_based_on_pptable(adev); - - adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries = - kcalloc(4, - sizeof(struct amdgpu_clock_voltage_dependency_entry), - GFP_KERNEL); - if (!adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries) { - ci_dpm_fini(adev); - return -ENOMEM; - } - adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.count = 4; - adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].clk = 0; - adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].v = 0; - adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[1].clk = 36000; - adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[1].v = 720; - adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[2].clk = 54000; - adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[2].v = 810; - adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[3].clk = 72000; - adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[3].v = 900; - - adev->pm.dpm.dyn_state.mclk_sclk_ratio = 4; - adev->pm.dpm.dyn_state.sclk_mclk_delta = 15000; - adev->pm.dpm.dyn_state.vddc_vddci_delta = 200; - - adev->pm.dpm.dyn_state.valid_sclk_values.count = 0; - adev->pm.dpm.dyn_state.valid_sclk_values.values = NULL; - adev->pm.dpm.dyn_state.valid_mclk_values.count = 0; - adev->pm.dpm.dyn_state.valid_mclk_values.values = NULL; - - if (adev->asic_type == CHIP_HAWAII) { - pi->thermal_temp_setting.temperature_low = 94500; - pi->thermal_temp_setting.temperature_high = 95000; - pi->thermal_temp_setting.temperature_shutdown = 104000; - } else { - pi->thermal_temp_setting.temperature_low = 99500; - pi->thermal_temp_setting.temperature_high = 100000; - pi->thermal_temp_setting.temperature_shutdown = 104000; - } - - pi->uvd_enabled = false; - - dpm_table = &pi->smc_state_table; - - gpio = amdgpu_atombios_lookup_gpio(adev, VDDC_VRHOT_GPIO_PINID); - if (gpio.valid) { - dpm_table->VRHotGpio = gpio.shift; - adev->pm.dpm.platform_caps |= ATOM_PP_PLATFORM_CAP_REGULATOR_HOT; - } else { - dpm_table->VRHotGpio = CISLANDS_UNUSED_GPIO_PIN; - adev->pm.dpm.platform_caps &= ~ATOM_PP_PLATFORM_CAP_REGULATOR_HOT; - } - - gpio = amdgpu_atombios_lookup_gpio(adev, PP_AC_DC_SWITCH_GPIO_PINID); - if (gpio.valid) { - dpm_table->AcDcGpio = gpio.shift; - adev->pm.dpm.platform_caps |= ATOM_PP_PLATFORM_CAP_HARDWAREDC; - } else { - dpm_table->AcDcGpio = CISLANDS_UNUSED_GPIO_PIN; - adev->pm.dpm.platform_caps &= ~ATOM_PP_PLATFORM_CAP_HARDWAREDC; - } - - gpio = amdgpu_atombios_lookup_gpio(adev, VDDC_PCC_GPIO_PINID); - if (gpio.valid) { - u32 tmp = RREG32_SMC(ixCNB_PWRMGT_CNTL); - - switch (gpio.shift) { - case 0: - tmp &= ~CNB_PWRMGT_CNTL__GNB_SLOW_MODE_MASK; - tmp |= 1 << CNB_PWRMGT_CNTL__GNB_SLOW_MODE__SHIFT; - break; - case 1: - tmp &= ~CNB_PWRMGT_CNTL__GNB_SLOW_MODE_MASK; - tmp |= 2 << CNB_PWRMGT_CNTL__GNB_SLOW_MODE__SHIFT; - break; - case 2: - tmp |= CNB_PWRMGT_CNTL__GNB_SLOW_MASK; - break; - case 3: - tmp |= CNB_PWRMGT_CNTL__FORCE_NB_PS1_MASK; - break; - case 4: - tmp |= CNB_PWRMGT_CNTL__DPM_ENABLED_MASK; - break; - default: - DRM_INFO("Invalid PCC GPIO: %u!\n", gpio.shift); - break; - } - WREG32_SMC(ixCNB_PWRMGT_CNTL, tmp); - } - - pi->voltage_control = CISLANDS_VOLTAGE_CONTROL_NONE; - pi->vddci_control = CISLANDS_VOLTAGE_CONTROL_NONE; - pi->mvdd_control = CISLANDS_VOLTAGE_CONTROL_NONE; - if (amdgpu_atombios_is_voltage_gpio(adev, VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_GPIO_LUT)) - pi->voltage_control = CISLANDS_VOLTAGE_CONTROL_BY_GPIO; - else if (amdgpu_atombios_is_voltage_gpio(adev, VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2)) - pi->voltage_control = CISLANDS_VOLTAGE_CONTROL_BY_SVID2; - - if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_VDDCI_CONTROL) { - if (amdgpu_atombios_is_voltage_gpio(adev, VOLTAGE_TYPE_VDDCI, VOLTAGE_OBJ_GPIO_LUT)) - pi->vddci_control = CISLANDS_VOLTAGE_CONTROL_BY_GPIO; - else if (amdgpu_atombios_is_voltage_gpio(adev, VOLTAGE_TYPE_VDDCI, VOLTAGE_OBJ_SVID2)) - pi->vddci_control = CISLANDS_VOLTAGE_CONTROL_BY_SVID2; - else - adev->pm.dpm.platform_caps &= ~ATOM_PP_PLATFORM_CAP_VDDCI_CONTROL; - } - - if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_MVDDCONTROL) { - if (amdgpu_atombios_is_voltage_gpio(adev, VOLTAGE_TYPE_MVDDC, VOLTAGE_OBJ_GPIO_LUT)) - pi->mvdd_control = CISLANDS_VOLTAGE_CONTROL_BY_GPIO; - else if (amdgpu_atombios_is_voltage_gpio(adev, VOLTAGE_TYPE_MVDDC, VOLTAGE_OBJ_SVID2)) - pi->mvdd_control = CISLANDS_VOLTAGE_CONTROL_BY_SVID2; - else - adev->pm.dpm.platform_caps &= ~ATOM_PP_PLATFORM_CAP_MVDDCONTROL; - } - - pi->vddc_phase_shed_control = true; - -#if defined(CONFIG_ACPI) - pi->pcie_performance_request = - amdgpu_acpi_is_pcie_performance_request_supported(adev); -#else - pi->pcie_performance_request = false; -#endif - - if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size, - &frev, &crev, &data_offset)) { - pi->caps_sclk_ss_support = true; - pi->caps_mclk_ss_support = true; - pi->dynamic_ss = true; - } else { - pi->caps_sclk_ss_support = false; - pi->caps_mclk_ss_support = false; - pi->dynamic_ss = true; - } - - if (adev->pm.int_thermal_type != THERMAL_TYPE_NONE) - pi->thermal_protection = true; - else - pi->thermal_protection = false; - - pi->caps_dynamic_ac_timing = true; - - pi->uvd_power_gated = true; - - /* make sure dc limits are valid */ - if ((adev->pm.dpm.dyn_state.max_clock_voltage_on_dc.sclk == 0) || - (adev->pm.dpm.dyn_state.max_clock_voltage_on_dc.mclk == 0)) - adev->pm.dpm.dyn_state.max_clock_voltage_on_dc = - adev->pm.dpm.dyn_state.max_clock_voltage_on_ac; - - pi->fan_ctrl_is_in_default_mode = true; - - return 0; -} - -static void -ci_dpm_debugfs_print_current_performance_level(void *handle, - struct seq_file *m) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - struct amdgpu_ps *rps = &pi->current_rps; - u32 sclk = ci_get_average_sclk_freq(adev); - u32 mclk = ci_get_average_mclk_freq(adev); - u32 activity_percent = 50; - int ret; - - ret = ci_read_smc_soft_register(adev, offsetof(SMU7_SoftRegisters, AverageGraphicsA), - &activity_percent); - - if (ret == 0) { - activity_percent += 0x80; - activity_percent >>= 8; - activity_percent = activity_percent > 100 ? 100 : activity_percent; - } - - seq_printf(m, "uvd %sabled\n", pi->uvd_power_gated ? "dis" : "en"); - seq_printf(m, "vce %sabled\n", rps->vce_active ? "en" : "dis"); - seq_printf(m, "power level avg sclk: %u mclk: %u\n", - sclk, mclk); - seq_printf(m, "GPU load: %u %%\n", activity_percent); -} - -static void ci_dpm_print_power_state(void *handle, void *current_ps) -{ - struct amdgpu_ps *rps = (struct amdgpu_ps *)current_ps; - struct ci_ps *ps = ci_get_ps(rps); - struct ci_pl *pl; - int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - amdgpu_dpm_print_class_info(rps->class, rps->class2); - amdgpu_dpm_print_cap_info(rps->caps); - printk("\tuvd vclk: %d dclk: %d\n", rps->vclk, rps->dclk); - for (i = 0; i < ps->performance_level_count; i++) { - pl = &ps->performance_levels[i]; - printk("\t\tpower level %d sclk: %u mclk: %u pcie gen: %u pcie lanes: %u\n", - i, pl->sclk, pl->mclk, pl->pcie_gen + 1, pl->pcie_lane); - } - amdgpu_dpm_print_ps_status(adev, rps); -} - -static inline bool ci_are_power_levels_equal(const struct ci_pl *ci_cpl1, - const struct ci_pl *ci_cpl2) -{ - return ((ci_cpl1->mclk == ci_cpl2->mclk) && - (ci_cpl1->sclk == ci_cpl2->sclk) && - (ci_cpl1->pcie_gen == ci_cpl2->pcie_gen) && - (ci_cpl1->pcie_lane == ci_cpl2->pcie_lane)); -} - -static int ci_check_state_equal(void *handle, - void *current_ps, - void *request_ps, - bool *equal) -{ - struct ci_ps *ci_cps; - struct ci_ps *ci_rps; - int i; - struct amdgpu_ps *cps = (struct amdgpu_ps *)current_ps; - struct amdgpu_ps *rps = (struct amdgpu_ps *)request_ps; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev == NULL || cps == NULL || rps == NULL || equal == NULL) - return -EINVAL; - - ci_cps = ci_get_ps((struct amdgpu_ps *)cps); - ci_rps = ci_get_ps((struct amdgpu_ps *)rps); - - if (ci_cps == NULL) { - *equal = false; - return 0; - } - - if (ci_cps->performance_level_count != ci_rps->performance_level_count) { - - *equal = false; - return 0; - } - - for (i = 0; i < ci_cps->performance_level_count; i++) { - if (!ci_are_power_levels_equal(&(ci_cps->performance_levels[i]), - &(ci_rps->performance_levels[i]))) { - *equal = false; - return 0; - } - } - - /* If all performance levels are the same try to use the UVD clocks to break the tie.*/ - *equal = ((cps->vclk == rps->vclk) && (cps->dclk == rps->dclk)); - *equal &= ((cps->evclk == rps->evclk) && (cps->ecclk == rps->ecclk)); - - return 0; -} - -static u32 ci_dpm_get_sclk(void *handle, bool low) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_ps *requested_state = ci_get_ps(&pi->requested_rps); - - if (low) - return requested_state->performance_levels[0].sclk; - else - return requested_state->performance_levels[requested_state->performance_level_count - 1].sclk; -} - -static u32 ci_dpm_get_mclk(void *handle, bool low) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_ps *requested_state = ci_get_ps(&pi->requested_rps); - - if (low) - return requested_state->performance_levels[0].mclk; - else - return requested_state->performance_levels[requested_state->performance_level_count - 1].mclk; -} - -/* get temperature in millidegrees */ -static int ci_dpm_get_temp(void *handle) -{ - u32 temp; - int actual_temp = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - temp = (RREG32_SMC(ixCG_MULT_THERMAL_STATUS) & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >> - CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT; - - if (temp & 0x200) - actual_temp = 255; - else - actual_temp = temp & 0x1ff; - - actual_temp = actual_temp * 1000; - - return actual_temp; -} - -static int ci_set_temperature_range(struct amdgpu_device *adev) -{ - int ret; - - ret = ci_thermal_enable_alert(adev, false); - if (ret) - return ret; - ret = ci_thermal_set_temperature_range(adev, CISLANDS_TEMP_RANGE_MIN, - CISLANDS_TEMP_RANGE_MAX); - if (ret) - return ret; - ret = ci_thermal_enable_alert(adev, true); - if (ret) - return ret; - return ret; -} - -static int ci_dpm_early_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - adev->powerplay.pp_funcs = &ci_dpm_funcs; - adev->powerplay.pp_handle = adev; - ci_dpm_set_irq_funcs(adev); - - return 0; -} - -static int ci_dpm_late_init(void *handle) -{ - int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (!adev->pm.dpm_enabled) - return 0; - - /* init the sysfs and debugfs files late */ - ret = amdgpu_pm_sysfs_init(adev); - if (ret) - return ret; - - ret = ci_set_temperature_range(adev); - if (ret) - return ret; - - return 0; -} - -static int ci_dpm_sw_init(void *handle) -{ - int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 230, - &adev->pm.dpm.thermal.irq); - if (ret) - return ret; - - ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 231, - &adev->pm.dpm.thermal.irq); - if (ret) - return ret; - - /* default to balanced state */ - adev->pm.dpm.state = POWER_STATE_TYPE_BALANCED; - adev->pm.dpm.user_state = POWER_STATE_TYPE_BALANCED; - adev->pm.dpm.forced_level = AMD_DPM_FORCED_LEVEL_AUTO; - adev->pm.default_sclk = adev->clock.default_sclk; - adev->pm.default_mclk = adev->clock.default_mclk; - adev->pm.current_sclk = adev->clock.default_sclk; - adev->pm.current_mclk = adev->clock.default_mclk; - adev->pm.int_thermal_type = THERMAL_TYPE_NONE; - - ret = ci_dpm_init_microcode(adev); - if (ret) - return ret; - - if (amdgpu_dpm == 0) - return 0; - - INIT_WORK(&adev->pm.dpm.thermal.work, amdgpu_dpm_thermal_work_handler); - mutex_lock(&adev->pm.mutex); - ret = ci_dpm_init(adev); - if (ret) - goto dpm_failed; - adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps; - if (amdgpu_dpm == 1) - amdgpu_pm_print_power_states(adev); - mutex_unlock(&adev->pm.mutex); - DRM_INFO("amdgpu: dpm initialized\n"); - - return 0; - -dpm_failed: - ci_dpm_fini(adev); - mutex_unlock(&adev->pm.mutex); - DRM_ERROR("amdgpu: dpm initialization failed\n"); - return ret; -} - -static int ci_dpm_sw_fini(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - flush_work(&adev->pm.dpm.thermal.work); - - mutex_lock(&adev->pm.mutex); - ci_dpm_fini(adev); - mutex_unlock(&adev->pm.mutex); - - release_firmware(adev->pm.fw); - adev->pm.fw = NULL; - - return 0; -} - -static int ci_dpm_hw_init(void *handle) -{ - int ret; - - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (!amdgpu_dpm) { - ret = ci_upload_firmware(adev); - if (ret) { - DRM_ERROR("ci_upload_firmware failed\n"); - return ret; - } - ci_dpm_start_smc(adev); - return 0; - } - - mutex_lock(&adev->pm.mutex); - ci_dpm_setup_asic(adev); - ret = ci_dpm_enable(adev); - if (ret) - adev->pm.dpm_enabled = false; - else - adev->pm.dpm_enabled = true; - mutex_unlock(&adev->pm.mutex); - - return ret; -} - -static int ci_dpm_hw_fini(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->pm.dpm_enabled) { - mutex_lock(&adev->pm.mutex); - ci_dpm_disable(adev); - mutex_unlock(&adev->pm.mutex); - } else { - ci_dpm_stop_smc(adev); - } - - return 0; -} - -static int ci_dpm_suspend(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->pm.dpm_enabled) { - mutex_lock(&adev->pm.mutex); - amdgpu_irq_put(adev, &adev->pm.dpm.thermal.irq, - AMDGPU_THERMAL_IRQ_LOW_TO_HIGH); - amdgpu_irq_put(adev, &adev->pm.dpm.thermal.irq, - AMDGPU_THERMAL_IRQ_HIGH_TO_LOW); - adev->pm.dpm.last_user_state = adev->pm.dpm.user_state; - adev->pm.dpm.last_state = adev->pm.dpm.state; - adev->pm.dpm.user_state = POWER_STATE_TYPE_INTERNAL_BOOT; - adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_BOOT; - mutex_unlock(&adev->pm.mutex); - amdgpu_pm_compute_clocks(adev); - - } - - return 0; -} - -static int ci_dpm_resume(void *handle) -{ - int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->pm.dpm_enabled) { - /* asic init will reset to the boot state */ - mutex_lock(&adev->pm.mutex); - ci_dpm_setup_asic(adev); - ret = ci_dpm_enable(adev); - if (ret) - adev->pm.dpm_enabled = false; - else - adev->pm.dpm_enabled = true; - adev->pm.dpm.user_state = adev->pm.dpm.last_user_state; - adev->pm.dpm.state = adev->pm.dpm.last_state; - mutex_unlock(&adev->pm.mutex); - if (adev->pm.dpm_enabled) - amdgpu_pm_compute_clocks(adev); - } - return 0; -} - -static bool ci_dpm_is_idle(void *handle) -{ - /* XXX */ - return true; -} - -static int ci_dpm_wait_for_idle(void *handle) -{ - /* XXX */ - return 0; -} - -static int ci_dpm_soft_reset(void *handle) -{ - return 0; -} - -static int ci_dpm_set_interrupt_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - unsigned type, - enum amdgpu_interrupt_state state) -{ - u32 cg_thermal_int; - - switch (type) { - case AMDGPU_THERMAL_IRQ_LOW_TO_HIGH: - switch (state) { - case AMDGPU_IRQ_STATE_DISABLE: - cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); - cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK; - WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); - break; - case AMDGPU_IRQ_STATE_ENABLE: - cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); - cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK; - WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); - break; - default: - break; - } - break; - - case AMDGPU_THERMAL_IRQ_HIGH_TO_LOW: - switch (state) { - case AMDGPU_IRQ_STATE_DISABLE: - cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); - cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK; - WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); - break; - case AMDGPU_IRQ_STATE_ENABLE: - cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); - cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK; - WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); - break; - default: - break; - } - break; - - default: - break; - } - return 0; -} - -static int ci_dpm_process_interrupt(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - bool queue_thermal = false; - - if (entry == NULL) - return -EINVAL; - - switch (entry->src_id) { - case 230: /* thermal low to high */ - DRM_DEBUG("IH: thermal low to high\n"); - adev->pm.dpm.thermal.high_to_low = false; - queue_thermal = true; - break; - case 231: /* thermal high to low */ - DRM_DEBUG("IH: thermal high to low\n"); - adev->pm.dpm.thermal.high_to_low = true; - queue_thermal = true; - break; - default: - break; - } - - if (queue_thermal) - schedule_work(&adev->pm.dpm.thermal.work); - - return 0; -} - -static int ci_dpm_set_clockgating_state(void *handle, - enum amd_clockgating_state state) -{ - return 0; -} - -static int ci_dpm_set_powergating_state(void *handle, - enum amd_powergating_state state) -{ - return 0; -} - -static int ci_dpm_print_clock_levels(void *handle, - enum pp_clock_type type, char *buf) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_single_dpm_table *sclk_table = &pi->dpm_table.sclk_table; - struct ci_single_dpm_table *mclk_table = &pi->dpm_table.mclk_table; - struct ci_single_dpm_table *pcie_table = &pi->dpm_table.pcie_speed_table; - - int i, now, size = 0; - uint32_t clock, pcie_speed; - - switch (type) { - case PP_SCLK: - amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_API_GetSclkFrequency); - clock = RREG32(mmSMC_MSG_ARG_0); - - for (i = 0; i < sclk_table->count; i++) { - if (clock > sclk_table->dpm_levels[i].value) - continue; - break; - } - now = i; - - for (i = 0; i < sclk_table->count; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", - i, sclk_table->dpm_levels[i].value / 100, - (i == now) ? "*" : ""); - break; - case PP_MCLK: - amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_API_GetMclkFrequency); - clock = RREG32(mmSMC_MSG_ARG_0); - - for (i = 0; i < mclk_table->count; i++) { - if (clock > mclk_table->dpm_levels[i].value) - continue; - break; - } - now = i; - - for (i = 0; i < mclk_table->count; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", - i, mclk_table->dpm_levels[i].value / 100, - (i == now) ? "*" : ""); - break; - case PP_PCIE: - pcie_speed = ci_get_current_pcie_speed(adev); - for (i = 0; i < pcie_table->count; i++) { - if (pcie_speed != pcie_table->dpm_levels[i].value) - continue; - break; - } - now = i; - - for (i = 0; i < pcie_table->count; i++) - size += sprintf(buf + size, "%d: %s %s\n", i, - (pcie_table->dpm_levels[i].value == 0) ? "2.5GT/s, x1" : - (pcie_table->dpm_levels[i].value == 1) ? "5.0GT/s, x16" : - (pcie_table->dpm_levels[i].value == 2) ? "8.0GT/s, x16" : "", - (i == now) ? "*" : ""); - break; - default: - break; - } - - return size; -} - -static int ci_dpm_force_clock_level(void *handle, - enum pp_clock_type type, uint32_t mask) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - - if (adev->pm.dpm.forced_level != AMD_DPM_FORCED_LEVEL_MANUAL) - return -EINVAL; - - if (mask == 0) - return -EINVAL; - - switch (type) { - case PP_SCLK: - if (!pi->sclk_dpm_key_disabled) - amdgpu_ci_send_msg_to_smc_with_parameter(adev, - PPSMC_MSG_SCLKDPM_SetEnabledMask, - pi->dpm_level_enable_mask.sclk_dpm_enable_mask & mask); - break; - - case PP_MCLK: - if (!pi->mclk_dpm_key_disabled) - amdgpu_ci_send_msg_to_smc_with_parameter(adev, - PPSMC_MSG_MCLKDPM_SetEnabledMask, - pi->dpm_level_enable_mask.mclk_dpm_enable_mask & mask); - break; - - case PP_PCIE: - { - uint32_t tmp = mask & pi->dpm_level_enable_mask.pcie_dpm_enable_mask; - - if (!pi->pcie_dpm_key_disabled) { - if (fls(tmp) != ffs(tmp)) - amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_PCIeDPM_UnForceLevel); - else - amdgpu_ci_send_msg_to_smc_with_parameter(adev, - PPSMC_MSG_PCIeDPM_ForceLevel, - fls(tmp) - 1); - } - break; - } - default: - break; - } - - return 0; -} - -static int ci_dpm_get_sclk_od(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_single_dpm_table *sclk_table = &(pi->dpm_table.sclk_table); - struct ci_single_dpm_table *golden_sclk_table = - &(pi->golden_dpm_table.sclk_table); - int value; - - value = (sclk_table->dpm_levels[sclk_table->count - 1].value - - golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value) * - 100 / - golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value; - - return value; -} - -static int ci_dpm_set_sclk_od(void *handle, uint32_t value) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_ps *ps = ci_get_ps(adev->pm.dpm.requested_ps); - struct ci_single_dpm_table *golden_sclk_table = - &(pi->golden_dpm_table.sclk_table); - - if (value > 20) - value = 20; - - ps->performance_levels[ps->performance_level_count - 1].sclk = - golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value * - value / 100 + - golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value; - - return 0; -} - -static int ci_dpm_get_mclk_od(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_single_dpm_table *mclk_table = &(pi->dpm_table.mclk_table); - struct ci_single_dpm_table *golden_mclk_table = - &(pi->golden_dpm_table.mclk_table); - int value; - - value = (mclk_table->dpm_levels[mclk_table->count - 1].value - - golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value) * - 100 / - golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value; - - return value; -} - -static int ci_dpm_set_mclk_od(void *handle, uint32_t value) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_ps *ps = ci_get_ps(adev->pm.dpm.requested_ps); - struct ci_single_dpm_table *golden_mclk_table = - &(pi->golden_dpm_table.mclk_table); - - if (value > 20) - value = 20; - - ps->performance_levels[ps->performance_level_count - 1].mclk = - golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value * - value / 100 + - golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value; - - return 0; -} - -static int ci_dpm_read_sensor(void *handle, int idx, - void *value, int *size) -{ - u32 activity_percent = 50; - int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - /* size must be at least 4 bytes for all sensors */ - if (*size < 4) - return -EINVAL; - - switch (idx) { - case AMDGPU_PP_SENSOR_GFX_SCLK: - *((uint32_t *)value) = ci_get_average_sclk_freq(adev); - *size = 4; - return 0; - case AMDGPU_PP_SENSOR_GFX_MCLK: - *((uint32_t *)value) = ci_get_average_mclk_freq(adev); - *size = 4; - return 0; - case AMDGPU_PP_SENSOR_GPU_TEMP: - *((uint32_t *)value) = ci_dpm_get_temp(adev); - *size = 4; - return 0; - case AMDGPU_PP_SENSOR_GPU_LOAD: - ret = ci_read_smc_soft_register(adev, - offsetof(SMU7_SoftRegisters, - AverageGraphicsA), - &activity_percent); - if (ret == 0) { - activity_percent += 0x80; - activity_percent >>= 8; - activity_percent = - activity_percent > 100 ? 100 : activity_percent; - } - *((uint32_t *)value) = activity_percent; - *size = 4; - return 0; - default: - return -EINVAL; - } -} - -static int ci_set_powergating_by_smu(void *handle, - uint32_t block_type, bool gate) -{ - switch (block_type) { - case AMD_IP_BLOCK_TYPE_UVD: - ci_dpm_powergate_uvd(handle, gate); - break; - default: - break; - } - return 0; -} - -static const struct amd_ip_funcs ci_dpm_ip_funcs = { - .name = "ci_dpm", - .early_init = ci_dpm_early_init, - .late_init = ci_dpm_late_init, - .sw_init = ci_dpm_sw_init, - .sw_fini = ci_dpm_sw_fini, - .hw_init = ci_dpm_hw_init, - .hw_fini = ci_dpm_hw_fini, - .suspend = ci_dpm_suspend, - .resume = ci_dpm_resume, - .is_idle = ci_dpm_is_idle, - .wait_for_idle = ci_dpm_wait_for_idle, - .soft_reset = ci_dpm_soft_reset, - .set_clockgating_state = ci_dpm_set_clockgating_state, - .set_powergating_state = ci_dpm_set_powergating_state, -}; - -const struct amdgpu_ip_block_version ci_smu_ip_block = -{ - .type = AMD_IP_BLOCK_TYPE_SMC, - .major = 7, - .minor = 0, - .rev = 0, - .funcs = &ci_dpm_ip_funcs, -}; - -static const struct amd_pm_funcs ci_dpm_funcs = { - .pre_set_power_state = &ci_dpm_pre_set_power_state, - .set_power_state = &ci_dpm_set_power_state, - .post_set_power_state = &ci_dpm_post_set_power_state, - .display_configuration_changed = &ci_dpm_display_configuration_changed, - .get_sclk = &ci_dpm_get_sclk, - .get_mclk = &ci_dpm_get_mclk, - .print_power_state = &ci_dpm_print_power_state, - .debugfs_print_current_performance_level = &ci_dpm_debugfs_print_current_performance_level, - .force_performance_level = &ci_dpm_force_performance_level, - .vblank_too_short = &ci_dpm_vblank_too_short, - .set_powergating_by_smu = &ci_set_powergating_by_smu, - .set_fan_control_mode = &ci_dpm_set_fan_control_mode, - .get_fan_control_mode = &ci_dpm_get_fan_control_mode, - .set_fan_speed_percent = &ci_dpm_set_fan_speed_percent, - .get_fan_speed_percent = &ci_dpm_get_fan_speed_percent, - .print_clock_levels = ci_dpm_print_clock_levels, - .force_clock_level = ci_dpm_force_clock_level, - .get_sclk_od = ci_dpm_get_sclk_od, - .set_sclk_od = ci_dpm_set_sclk_od, - .get_mclk_od = ci_dpm_get_mclk_od, - .set_mclk_od = ci_dpm_set_mclk_od, - .check_state_equal = ci_check_state_equal, - .get_vce_clock_state = amdgpu_get_vce_clock_state, - .read_sensor = ci_dpm_read_sensor, -}; - -static const struct amdgpu_irq_src_funcs ci_dpm_irq_funcs = { - .set = ci_dpm_set_interrupt_state, - .process = ci_dpm_process_interrupt, -}; - -static void ci_dpm_set_irq_funcs(struct amdgpu_device *adev) -{ - adev->pm.dpm.thermal.irq.num_types = AMDGPU_THERMAL_IRQ_LAST; - adev->pm.dpm.thermal.irq.funcs = &ci_dpm_irq_funcs; -} diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h b/drivers/gpu/drm/amd/amdgpu/ci_dpm.h deleted file mode 100644 index 91be2996ae7c..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h +++ /dev/null @@ -1,349 +0,0 @@ -/* - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ -#ifndef __CI_DPM_H__ -#define __CI_DPM_H__ - -#include "amdgpu_atombios.h" -#include "ppsmc.h" - -#define SMU__NUM_SCLK_DPM_STATE 8 -#define SMU__NUM_MCLK_DPM_LEVELS 6 -#define SMU__NUM_LCLK_DPM_LEVELS 8 -#define SMU__NUM_PCIE_DPM_LEVELS 8 -#include "smu7_discrete.h" - -#define CISLANDS_MAX_HARDWARE_POWERLEVELS 2 - -#define CISLANDS_UNUSED_GPIO_PIN 0x7F - -struct ci_pl { - u32 mclk; - u32 sclk; - enum amdgpu_pcie_gen pcie_gen; - u16 pcie_lane; -}; - -struct ci_ps { - u16 performance_level_count; - bool dc_compatible; - u32 sclk_t; - struct ci_pl performance_levels[CISLANDS_MAX_HARDWARE_POWERLEVELS]; -}; - -struct ci_dpm_level { - bool enabled; - u32 value; - u32 param1; -}; - -#define CISLAND_MAX_DEEPSLEEP_DIVIDER_ID 5 -#define MAX_REGULAR_DPM_NUMBER 8 -#define CISLAND_MINIMUM_ENGINE_CLOCK 800 - -struct ci_single_dpm_table { - u32 count; - struct ci_dpm_level dpm_levels[MAX_REGULAR_DPM_NUMBER]; -}; - -struct ci_dpm_table { - struct ci_single_dpm_table sclk_table; - struct ci_single_dpm_table mclk_table; - struct ci_single_dpm_table pcie_speed_table; - struct ci_single_dpm_table vddc_table; - struct ci_single_dpm_table vddci_table; - struct ci_single_dpm_table mvdd_table; -}; - -struct ci_mc_reg_entry { - u32 mclk_max; - u32 mc_data[SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE]; -}; - -struct ci_mc_reg_table { - u8 last; - u8 num_entries; - u16 valid_flag; - struct ci_mc_reg_entry mc_reg_table_entry[MAX_AC_TIMING_ENTRIES]; - SMU7_Discrete_MCRegisterAddress mc_reg_address[SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE]; -}; - -struct ci_ulv_parm -{ - bool supported; - u32 cg_ulv_parameter; - u32 volt_change_delay; - struct ci_pl pl; -}; - -#define CISLANDS_MAX_LEAKAGE_COUNT 8 - -struct ci_leakage_voltage { - u16 count; - u16 leakage_id[CISLANDS_MAX_LEAKAGE_COUNT]; - u16 actual_voltage[CISLANDS_MAX_LEAKAGE_COUNT]; -}; - -struct ci_dpm_level_enable_mask { - u32 uvd_dpm_enable_mask; - u32 vce_dpm_enable_mask; - u32 acp_dpm_enable_mask; - u32 samu_dpm_enable_mask; - u32 sclk_dpm_enable_mask; - u32 mclk_dpm_enable_mask; - u32 pcie_dpm_enable_mask; -}; - -struct ci_vbios_boot_state -{ - u16 mvdd_bootup_value; - u16 vddc_bootup_value; - u16 vddci_bootup_value; - u32 sclk_bootup_value; - u32 mclk_bootup_value; - u16 pcie_gen_bootup_value; - u16 pcie_lane_bootup_value; -}; - -struct ci_clock_registers { - u32 cg_spll_func_cntl; - u32 cg_spll_func_cntl_2; - u32 cg_spll_func_cntl_3; - u32 cg_spll_func_cntl_4; - u32 cg_spll_spread_spectrum; - u32 cg_spll_spread_spectrum_2; - u32 dll_cntl; - u32 mclk_pwrmgt_cntl; - u32 mpll_ad_func_cntl; - u32 mpll_dq_func_cntl; - u32 mpll_func_cntl; - u32 mpll_func_cntl_1; - u32 mpll_func_cntl_2; - u32 mpll_ss1; - u32 mpll_ss2; -}; - -struct ci_thermal_temperature_setting { - s32 temperature_low; - s32 temperature_high; - s32 temperature_shutdown; -}; - -struct ci_pcie_perf_range { - u16 max; - u16 min; -}; - -enum ci_pt_config_reg_type { - CISLANDS_CONFIGREG_MMR = 0, - CISLANDS_CONFIGREG_SMC_IND, - CISLANDS_CONFIGREG_DIDT_IND, - CISLANDS_CONFIGREG_CACHE, - CISLANDS_CONFIGREG_MAX -}; - -#define POWERCONTAINMENT_FEATURE_BAPM 0x00000001 -#define POWERCONTAINMENT_FEATURE_TDCLimit 0x00000002 -#define POWERCONTAINMENT_FEATURE_PkgPwrLimit 0x00000004 - -struct ci_pt_config_reg { - u32 offset; - u32 mask; - u32 shift; - u32 value; - enum ci_pt_config_reg_type type; -}; - -struct ci_pt_defaults { - u8 svi_load_line_en; - u8 svi_load_line_vddc; - u8 tdc_vddc_throttle_release_limit_perc; - u8 tdc_mawt; - u8 tdc_waterfall_ctl; - u8 dte_ambient_temp_base; - u32 display_cac; - u32 bapm_temp_gradient; - u16 bapmti_r[SMU7_DTE_ITERATIONS * SMU7_DTE_SOURCES * SMU7_DTE_SINKS]; - u16 bapmti_rc[SMU7_DTE_ITERATIONS * SMU7_DTE_SOURCES * SMU7_DTE_SINKS]; -}; - -#define DPMTABLE_OD_UPDATE_SCLK 0x00000001 -#define DPMTABLE_OD_UPDATE_MCLK 0x00000002 -#define DPMTABLE_UPDATE_SCLK 0x00000004 -#define DPMTABLE_UPDATE_MCLK 0x00000008 - -struct ci_power_info { - struct ci_dpm_table dpm_table; - struct ci_dpm_table golden_dpm_table; - u32 voltage_control; - u32 mvdd_control; - u32 vddci_control; - u32 active_auto_throttle_sources; - struct ci_clock_registers clock_registers; - u16 acpi_vddc; - u16 acpi_vddci; - enum amdgpu_pcie_gen force_pcie_gen; - enum amdgpu_pcie_gen acpi_pcie_gen; - struct ci_leakage_voltage vddc_leakage; - struct ci_leakage_voltage vddci_leakage; - u16 max_vddc_in_pp_table; - u16 min_vddc_in_pp_table; - u16 max_vddci_in_pp_table; - u16 min_vddci_in_pp_table; - u32 mclk_strobe_mode_threshold; - u32 mclk_stutter_mode_threshold; - u32 mclk_edc_enable_threshold; - u32 mclk_edc_wr_enable_threshold; - struct ci_vbios_boot_state vbios_boot_state; - /* smc offsets */ - u32 sram_end; - u32 dpm_table_start; - u32 soft_regs_start; - u32 mc_reg_table_start; - u32 fan_table_start; - u32 arb_table_start; - /* smc tables */ - SMU7_Discrete_DpmTable smc_state_table; - SMU7_Discrete_MCRegisters smc_mc_reg_table; - SMU7_Discrete_PmFuses smc_powertune_table; - /* other stuff */ - struct ci_mc_reg_table mc_reg_table; - struct atom_voltage_table vddc_voltage_table; - struct atom_voltage_table vddci_voltage_table; - struct atom_voltage_table mvdd_voltage_table; - struct ci_ulv_parm ulv; - u32 power_containment_features; - const struct ci_pt_defaults *powertune_defaults; - u32 dte_tj_offset; - bool vddc_phase_shed_control; - struct ci_thermal_temperature_setting thermal_temp_setting; - struct ci_dpm_level_enable_mask dpm_level_enable_mask; - u32 need_update_smu7_dpm_table; - u32 sclk_dpm_key_disabled; - u32 mclk_dpm_key_disabled; - u32 pcie_dpm_key_disabled; - u32 thermal_sclk_dpm_enabled; - struct ci_pcie_perf_range pcie_gen_performance; - struct ci_pcie_perf_range pcie_lane_performance; - struct ci_pcie_perf_range pcie_gen_powersaving; - struct ci_pcie_perf_range pcie_lane_powersaving; - u32 activity_target[SMU7_MAX_LEVELS_GRAPHICS]; - u32 mclk_activity_target; - u32 low_sclk_interrupt_t; - u32 last_mclk_dpm_enable_mask; - u32 sys_pcie_mask; - /* caps */ - bool caps_power_containment; - bool caps_cac; - bool caps_sq_ramping; - bool caps_db_ramping; - bool caps_td_ramping; - bool caps_tcp_ramping; - bool caps_fps; - bool caps_sclk_ds; - bool caps_sclk_ss_support; - bool caps_mclk_ss_support; - bool caps_uvd_dpm; - bool caps_vce_dpm; - bool caps_samu_dpm; - bool caps_acp_dpm; - bool caps_automatic_dc_transition; - bool caps_sclk_throttle_low_notification; - bool caps_dynamic_ac_timing; - bool caps_od_fuzzy_fan_control_support; - /* flags */ - bool thermal_protection; - bool pcie_performance_request; - bool dynamic_ss; - bool dll_default_on; - bool cac_enabled; - bool uvd_enabled; - bool battery_state; - bool pspp_notify_required; - bool enable_bapm_feature; - bool enable_tdc_limit_feature; - bool enable_pkg_pwr_tracking_feature; - bool use_pcie_performance_levels; - bool use_pcie_powersaving_levels; - bool uvd_power_gated; - /* driver states */ - struct amdgpu_ps current_rps; - struct ci_ps current_ps; - struct amdgpu_ps requested_rps; - struct ci_ps requested_ps; - /* fan control */ - bool fan_ctrl_is_in_default_mode; - bool fan_is_controlled_by_smc; - u32 t_min; - u32 fan_ctrl_default_mode; -}; - -#define CISLANDS_VOLTAGE_CONTROL_NONE 0x0 -#define CISLANDS_VOLTAGE_CONTROL_BY_GPIO 0x1 -#define CISLANDS_VOLTAGE_CONTROL_BY_SVID2 0x2 - -#define CISLANDS_Q88_FORMAT_CONVERSION_UNIT 256 - -#define CISLANDS_VRC_DFLT0 0x3FFFC000 -#define CISLANDS_VRC_DFLT1 0x000400 -#define CISLANDS_VRC_DFLT2 0xC00080 -#define CISLANDS_VRC_DFLT3 0xC00200 -#define CISLANDS_VRC_DFLT4 0xC01680 -#define CISLANDS_VRC_DFLT5 0xC00033 -#define CISLANDS_VRC_DFLT6 0xC00033 -#define CISLANDS_VRC_DFLT7 0x3FFFC000 - -#define CISLANDS_CGULVPARAMETER_DFLT 0x00040035 -#define CISLAND_TARGETACTIVITY_DFLT 30 -#define CISLAND_MCLK_TARGETACTIVITY_DFLT 10 - -#define PCIE_PERF_REQ_REMOVE_REGISTRY 0 -#define PCIE_PERF_REQ_FORCE_LOWPOWER 1 -#define PCIE_PERF_REQ_PECI_GEN1 2 -#define PCIE_PERF_REQ_PECI_GEN2 3 -#define PCIE_PERF_REQ_PECI_GEN3 4 - -#define CISLANDS_SSTU_DFLT 0 -#define CISLANDS_SST_DFLT 0x00C8 - -/* XXX are these ok? */ -#define CISLANDS_TEMP_RANGE_MIN (90 * 1000) -#define CISLANDS_TEMP_RANGE_MAX (120 * 1000) - -int amdgpu_ci_copy_bytes_to_smc(struct amdgpu_device *adev, - u32 smc_start_address, - const u8 *src, u32 byte_count, u32 limit); -void amdgpu_ci_start_smc(struct amdgpu_device *adev); -void amdgpu_ci_reset_smc(struct amdgpu_device *adev); -int amdgpu_ci_program_jump_on_start(struct amdgpu_device *adev); -void amdgpu_ci_stop_smc_clock(struct amdgpu_device *adev); -void amdgpu_ci_start_smc_clock(struct amdgpu_device *adev); -bool amdgpu_ci_is_smc_running(struct amdgpu_device *adev); -PPSMC_Result amdgpu_ci_send_msg_to_smc(struct amdgpu_device *adev, PPSMC_Msg msg); -PPSMC_Result amdgpu_ci_wait_for_smc_inactive(struct amdgpu_device *adev); -int amdgpu_ci_load_smc_ucode(struct amdgpu_device *adev, u32 limit); -int amdgpu_ci_read_smc_sram_dword(struct amdgpu_device *adev, - u32 smc_address, u32 *value, u32 limit); -int amdgpu_ci_write_smc_sram_dword(struct amdgpu_device *adev, - u32 smc_address, u32 value, u32 limit); - -#endif diff --git a/drivers/gpu/drm/amd/amdgpu/ci_smc.c b/drivers/gpu/drm/amd/amdgpu/ci_smc.c deleted file mode 100644 index b8ba51e045b5..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/ci_smc.c +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Copyright 2011 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Alex Deucher - */ - -#include -#include -#include "amdgpu.h" -#include "cikd.h" -#include "ppsmc.h" -#include "amdgpu_ucode.h" -#include "ci_dpm.h" - -#include "smu/smu_7_0_1_d.h" -#include "smu/smu_7_0_1_sh_mask.h" - -static int ci_set_smc_sram_address(struct amdgpu_device *adev, - u32 smc_address, u32 limit) -{ - if (smc_address & 3) - return -EINVAL; - if ((smc_address + 3) > limit) - return -EINVAL; - - WREG32(mmSMC_IND_INDEX_0, smc_address); - WREG32_P(mmSMC_IND_ACCESS_CNTL, 0, ~SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK); - - return 0; -} - -int amdgpu_ci_copy_bytes_to_smc(struct amdgpu_device *adev, - u32 smc_start_address, - const u8 *src, u32 byte_count, u32 limit) -{ - unsigned long flags; - u32 data, original_data; - u32 addr; - u32 extra_shift; - int ret = 0; - - if (smc_start_address & 3) - return -EINVAL; - if ((smc_start_address + byte_count) > limit) - return -EINVAL; - - addr = smc_start_address; - - spin_lock_irqsave(&adev->smc_idx_lock, flags); - while (byte_count >= 4) { - /* SMC address space is BE */ - data = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | src[3]; - - ret = ci_set_smc_sram_address(adev, addr, limit); - if (ret) - goto done; - - WREG32(mmSMC_IND_DATA_0, data); - - src += 4; - byte_count -= 4; - addr += 4; - } - - /* RMW for the final bytes */ - if (byte_count > 0) { - data = 0; - - ret = ci_set_smc_sram_address(adev, addr, limit); - if (ret) - goto done; - - original_data = RREG32(mmSMC_IND_DATA_0); - - extra_shift = 8 * (4 - byte_count); - - while (byte_count > 0) { - data = (data << 8) + *src++; - byte_count--; - } - - data <<= extra_shift; - - data |= (original_data & ~((~0UL) << extra_shift)); - - ret = ci_set_smc_sram_address(adev, addr, limit); - if (ret) - goto done; - - WREG32(mmSMC_IND_DATA_0, data); - } - -done: - spin_unlock_irqrestore(&adev->smc_idx_lock, flags); - - return ret; -} - -void amdgpu_ci_start_smc(struct amdgpu_device *adev) -{ - u32 tmp = RREG32_SMC(ixSMC_SYSCON_RESET_CNTL); - - tmp &= ~SMC_SYSCON_RESET_CNTL__rst_reg_MASK; - WREG32_SMC(ixSMC_SYSCON_RESET_CNTL, tmp); -} - -void amdgpu_ci_reset_smc(struct amdgpu_device *adev) -{ - u32 tmp = RREG32_SMC(ixSMC_SYSCON_RESET_CNTL); - - tmp |= SMC_SYSCON_RESET_CNTL__rst_reg_MASK; - WREG32_SMC(ixSMC_SYSCON_RESET_CNTL, tmp); -} - -int amdgpu_ci_program_jump_on_start(struct amdgpu_device *adev) -{ - static u8 data[] = { 0xE0, 0x00, 0x80, 0x40 }; - - return amdgpu_ci_copy_bytes_to_smc(adev, 0x0, data, 4, sizeof(data)+1); -} - -void amdgpu_ci_stop_smc_clock(struct amdgpu_device *adev) -{ - u32 tmp = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0); - - tmp |= SMC_SYSCON_CLOCK_CNTL_0__ck_disable_MASK; - - WREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0, tmp); -} - -void amdgpu_ci_start_smc_clock(struct amdgpu_device *adev) -{ - u32 tmp = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0); - - tmp &= ~SMC_SYSCON_CLOCK_CNTL_0__ck_disable_MASK; - - WREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0, tmp); -} - -bool amdgpu_ci_is_smc_running(struct amdgpu_device *adev) -{ - u32 clk = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0); - u32 pc_c = RREG32_SMC(ixSMC_PC_C); - - if (!(clk & SMC_SYSCON_CLOCK_CNTL_0__ck_disable_MASK) && (0x20100 <= pc_c)) - return true; - - return false; -} - -PPSMC_Result amdgpu_ci_send_msg_to_smc(struct amdgpu_device *adev, PPSMC_Msg msg) -{ - u32 tmp; - int i; - - if (!amdgpu_ci_is_smc_running(adev)) - return PPSMC_Result_Failed; - - WREG32(mmSMC_MESSAGE_0, msg); - - for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(mmSMC_RESP_0); - if (tmp != 0) - break; - udelay(1); - } - tmp = RREG32(mmSMC_RESP_0); - - return (PPSMC_Result)tmp; -} - -PPSMC_Result amdgpu_ci_wait_for_smc_inactive(struct amdgpu_device *adev) -{ - u32 tmp; - int i; - - if (!amdgpu_ci_is_smc_running(adev)) - return PPSMC_Result_OK; - - for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0); - if ((tmp & SMC_SYSCON_CLOCK_CNTL_0__cken_MASK) == 0) - break; - udelay(1); - } - - return PPSMC_Result_OK; -} - -int amdgpu_ci_load_smc_ucode(struct amdgpu_device *adev, u32 limit) -{ - const struct smc_firmware_header_v1_0 *hdr; - unsigned long flags; - u32 ucode_start_address; - u32 ucode_size; - const u8 *src; - u32 data; - - if (!adev->pm.fw) - return -EINVAL; - - hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data; - amdgpu_ucode_print_smc_hdr(&hdr->header); - - adev->pm.fw_version = le32_to_cpu(hdr->header.ucode_version); - ucode_start_address = le32_to_cpu(hdr->ucode_start_addr); - ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes); - src = (const u8 *) - (adev->pm.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - - if (ucode_size & 3) - return -EINVAL; - - spin_lock_irqsave(&adev->smc_idx_lock, flags); - WREG32(mmSMC_IND_INDEX_0, ucode_start_address); - WREG32_P(mmSMC_IND_ACCESS_CNTL, SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK, - ~SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK); - while (ucode_size >= 4) { - /* SMC address space is BE */ - data = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | src[3]; - - WREG32(mmSMC_IND_DATA_0, data); - - src += 4; - ucode_size -= 4; - } - WREG32_P(mmSMC_IND_ACCESS_CNTL, 0, ~SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK); - spin_unlock_irqrestore(&adev->smc_idx_lock, flags); - - return 0; -} - -int amdgpu_ci_read_smc_sram_dword(struct amdgpu_device *adev, - u32 smc_address, u32 *value, u32 limit) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&adev->smc_idx_lock, flags); - ret = ci_set_smc_sram_address(adev, smc_address, limit); - if (ret == 0) - *value = RREG32(mmSMC_IND_DATA_0); - spin_unlock_irqrestore(&adev->smc_idx_lock, flags); - - return ret; -} - -int amdgpu_ci_write_smc_sram_dword(struct amdgpu_device *adev, - u32 smc_address, u32 value, u32 limit) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&adev->smc_idx_lock, flags); - ret = ci_set_smc_sram_address(adev, smc_address, limit); - if (ret == 0) - WREG32(mmSMC_IND_DATA_0, value); - spin_unlock_irqrestore(&adev->smc_idx_lock, flags); - - return ret; -} diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index b1b7abed7dab..07c1f239e9c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -2070,10 +2070,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block); amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); - if (amdgpu_dpm == -1) - amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); - else - amdgpu_device_ip_block_add(adev, &ci_smu_ip_block); + amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) @@ -2091,10 +2088,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); amdgpu_device_ip_block_add(adev, &gfx_v7_3_ip_block); amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); - if (amdgpu_dpm == -1) - amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); - else - amdgpu_device_ip_block_add(adev, &ci_smu_ip_block); + amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_dpm.h b/drivers/gpu/drm/amd/amdgpu/cik_dpm.h index 2a086610f74d..2fcc4b60153c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/cik_dpm.h @@ -24,7 +24,6 @@ #ifndef __CIK_DPM_H__ #define __CIK_DPM_H__ -extern const struct amdgpu_ip_block_version ci_smu_ip_block; extern const struct amdgpu_ip_block_version kv_smu_ip_block; #endif From 049d69df2db6da6203880e7da5f30900cd5825a4 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 22 Jan 2019 17:54:22 +0800 Subject: [PATCH 314/539] drm/amd/powerplay: support Vega10 fan table V3 MGPU fan boost related parameter is added. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/vega10_pptable.h | 24 +++++++++ .../powerplay/hwmgr/vega10_processpptables.c | 50 ++++++++++++++++++- drivers/gpu/drm/amd/powerplay/inc/hwmgr.h | 1 + 3 files changed, 74 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_pptable.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_pptable.h index b3e63003a789..c934e9612c1b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_pptable.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_pptable.h @@ -282,6 +282,30 @@ typedef struct _ATOM_Vega10_Fan_Table_V2 { UCHAR ucFanMaxRPM; } ATOM_Vega10_Fan_Table_V2; +typedef struct _ATOM_Vega10_Fan_Table_V3 { + UCHAR ucRevId; + USHORT usFanOutputSensitivity; + USHORT usFanAcousticLimitRpm; + USHORT usThrottlingRPM; + USHORT usTargetTemperature; + USHORT usMinimumPWMLimit; + USHORT usTargetGfxClk; + USHORT usFanGainEdge; + USHORT usFanGainHotspot; + USHORT usFanGainLiquid; + USHORT usFanGainVrVddc; + USHORT usFanGainVrMvdd; + USHORT usFanGainPlx; + USHORT usFanGainHbm; + UCHAR ucEnableZeroRPM; + USHORT usFanStopTemperature; + USHORT usFanStartTemperature; + UCHAR ucFanParameters; + UCHAR ucFanMinRPM; + UCHAR ucFanMaxRPM; + USHORT usMGpuThrottlingRPM; +} ATOM_Vega10_Fan_Table_V3; + typedef struct _ATOM_Vega10_Thermal_Controller { UCHAR ucRevId; UCHAR ucType; /* one of ATOM_VEGA10_PP_THERMALCONTROLLER_*/ diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c index 99d596dc0e89..b6767d74dc85 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c @@ -123,6 +123,7 @@ static int init_thermal_controller( const Vega10_PPTable_Generic_SubTable_Header *header; const ATOM_Vega10_Fan_Table *fan_table_v1; const ATOM_Vega10_Fan_Table_V2 *fan_table_v2; + const ATOM_Vega10_Fan_Table_V3 *fan_table_v3; thermal_controller = (ATOM_Vega10_Thermal_Controller *) (((unsigned long)powerplay_table) + @@ -207,7 +208,7 @@ static int init_thermal_controller( le16_to_cpu(fan_table_v1->usFanStopTemperature); hwmgr->thermal_controller.advanceFanControlParameters.usZeroRPMStartTemperature = le16_to_cpu(fan_table_v1->usFanStartTemperature); - } else if (header->ucRevId > 10) { + } else if (header->ucRevId == 0xb) { fan_table_v2 = (ATOM_Vega10_Fan_Table_V2 *)header; hwmgr->thermal_controller.fanInfo.ucTachometerPulsesPerRevolution = @@ -251,7 +252,54 @@ static int init_thermal_controller( le16_to_cpu(fan_table_v2->usFanStopTemperature); hwmgr->thermal_controller.advanceFanControlParameters.usZeroRPMStartTemperature = le16_to_cpu(fan_table_v2->usFanStartTemperature); + } else if (header->ucRevId > 0xb) { + fan_table_v3 = (ATOM_Vega10_Fan_Table_V3 *)header; + + hwmgr->thermal_controller.fanInfo.ucTachometerPulsesPerRevolution = + fan_table_v3->ucFanParameters & ATOM_VEGA10_PP_FANPARAMETERS_TACHOMETER_PULSES_PER_REVOLUTION_MASK; + hwmgr->thermal_controller.fanInfo.ulMinRPM = fan_table_v3->ucFanMinRPM * 100UL; + hwmgr->thermal_controller.fanInfo.ulMaxRPM = fan_table_v3->ucFanMaxRPM * 100UL; + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity = + le16_to_cpu(fan_table_v3->usFanOutputSensitivity); + hwmgr->thermal_controller.advanceFanControlParameters.usMaxFanRPM = + fan_table_v3->ucFanMaxRPM * 100UL; + hwmgr->thermal_controller.advanceFanControlParameters.usFanRPMMaxLimit = + le16_to_cpu(fan_table_v3->usThrottlingRPM); + hwmgr->thermal_controller.advanceFanControlParameters.ulMinFanSCLKAcousticLimit = + le16_to_cpu(fan_table_v3->usFanAcousticLimitRpm); + hwmgr->thermal_controller.advanceFanControlParameters.usTMax = + le16_to_cpu(fan_table_v3->usTargetTemperature); + hwmgr->thermal_controller.advanceFanControlParameters.usPWMMin = + le16_to_cpu(fan_table_v3->usMinimumPWMLimit); + hwmgr->thermal_controller.advanceFanControlParameters.ulTargetGfxClk = + le16_to_cpu(fan_table_v3->usTargetGfxClk); + hwmgr->thermal_controller.advanceFanControlParameters.usFanGainEdge = + le16_to_cpu(fan_table_v3->usFanGainEdge); + hwmgr->thermal_controller.advanceFanControlParameters.usFanGainHotspot = + le16_to_cpu(fan_table_v3->usFanGainHotspot); + hwmgr->thermal_controller.advanceFanControlParameters.usFanGainLiquid = + le16_to_cpu(fan_table_v3->usFanGainLiquid); + hwmgr->thermal_controller.advanceFanControlParameters.usFanGainVrVddc = + le16_to_cpu(fan_table_v3->usFanGainVrVddc); + hwmgr->thermal_controller.advanceFanControlParameters.usFanGainVrMvdd = + le16_to_cpu(fan_table_v3->usFanGainVrMvdd); + hwmgr->thermal_controller.advanceFanControlParameters.usFanGainPlx = + le16_to_cpu(fan_table_v3->usFanGainPlx); + hwmgr->thermal_controller.advanceFanControlParameters.usFanGainHbm = + le16_to_cpu(fan_table_v3->usFanGainHbm); + + hwmgr->thermal_controller.advanceFanControlParameters.ucEnableZeroRPM = + fan_table_v3->ucEnableZeroRPM; + hwmgr->thermal_controller.advanceFanControlParameters.usZeroRPMStopTemperature = + le16_to_cpu(fan_table_v3->usFanStopTemperature); + hwmgr->thermal_controller.advanceFanControlParameters.usZeroRPMStartTemperature = + le16_to_cpu(fan_table_v3->usFanStartTemperature); + hwmgr->thermal_controller.advanceFanControlParameters.usMGpuThrottlingRPMLimit = + le16_to_cpu(fan_table_v3->usMGpuThrottlingRPM); } + return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index b1cd70dcd6e7..bac3d85e3b82 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -687,6 +687,7 @@ struct pp_advance_fan_control_parameters { uint32_t ulTargetGfxClk; uint16_t usZeroRPMStartTemperature; uint16_t usZeroRPMStopTemperature; + uint16_t usMGpuThrottlingRPMLimit; }; struct pp_thermal_controller_info { From 713b64a58e88acec4ab97782086061fd012de7aa Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 22 Jan 2019 18:05:54 +0800 Subject: [PATCH 315/539] drm/amd/powerplay: enable MGPU fan boost feature on Vega10 For those SKUs which support this feature only. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 1 + .../drm/amd/powerplay/hwmgr/vega10_thermal.c | 37 +++++++++++++++++++ .../drm/amd/powerplay/hwmgr/vega10_thermal.h | 1 + 3 files changed, 39 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index d1e262844619..21696e8b0c23 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -4984,6 +4984,7 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .get_asic_baco_capability = vega10_baco_get_capability, .get_asic_baco_state = vega10_baco_get_state, .set_asic_baco_state = vega10_baco_set_state, + .enable_mgpu_fan_boost = vega10_enable_mgpu_fan_boost, }; int vega10_hwmgr_init(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c index 3f807d6c95ce..ba8763daa380 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c @@ -556,6 +556,43 @@ int vega10_thermal_setup_fan_table(struct pp_hwmgr *hwmgr) return ret; } +int vega10_enable_mgpu_fan_boost(struct pp_hwmgr *hwmgr) +{ + struct vega10_hwmgr *data = hwmgr->backend; + PPTable_t *table = &(data->smc_state_table.pp_table); + int ret; + + if (!data->smu_features[GNLD_FAN_CONTROL].supported) + return 0; + + if (!hwmgr->thermal_controller.advanceFanControlParameters. + usMGpuThrottlingRPMLimit) + return 0; + + table->FanThrottlingRpm = hwmgr->thermal_controller. + advanceFanControlParameters.usMGpuThrottlingRPMLimit; + + ret = smum_smc_table_manager(hwmgr, + (uint8_t *)(&(data->smc_state_table.pp_table)), + PPTABLE, false); + if (ret) { + pr_info("Failed to update fan control table in pptable!"); + return ret; + } + + ret = vega10_disable_fan_control_feature(hwmgr); + if (ret) { + pr_info("Attempt to disable SMC fan control feature failed!"); + return ret; + } + + ret = vega10_enable_fan_control_feature(hwmgr); + if (ret) + pr_info("Attempt to enable SMC fan control feature failed!"); + + return ret; +} + /** * Start the fan control on the SMC. * @param hwmgr the address of the powerplay hardware manager. diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h index 21e7c4dfa2ca..4a0ede7c1f07 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.h @@ -73,6 +73,7 @@ extern int vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr); extern int vega10_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr); extern int vega10_start_thermal_controller(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *range); +extern int vega10_enable_mgpu_fan_boost(struct pp_hwmgr *hwmgr); #endif From 222b5f044159877504dbac9bc1910f89a74136e2 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 4 Dec 2018 16:56:14 -0500 Subject: [PATCH 316/539] drm/sched: Refactor ring mirror list handling. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Decauple sched threads stop and start and ring mirror list handling from the policy of what to do about the guilty jobs. When stoppping the sched thread and detaching sched fences from non signaled HW fenes wait for all signaled HW fences to complete before rerunning the jobs. v2: Fix resubmission of guilty job into HW after refactoring. v4: Full restart for all the jobs, not only from guilty ring. Extract karma increase into standalone function. v5: Rework waiting for signaled jobs without relying on the job struct itself as those might already be freed for non 'guilty' job's schedulers. Expose karma increase to drivers. v6: Use list_for_each_entry_safe_continue and drm_sched_process_job in case fence already signaled. Call drm_sched_increase_karma only once for amdgpu and add documentation. v7: Wait only for the latest job's fence. Suggested-by: Christian Koenig Signed-off-by: Andrey Grodzovsky Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 +-- drivers/gpu/drm/etnaviv/etnaviv_sched.c | 11 +- drivers/gpu/drm/scheduler/sched_main.c | 186 +++++++++++++-------- drivers/gpu/drm/v3d/v3d_sched.c | 13 +- include/drm/gpu_scheduler.h | 7 +- 5 files changed, 139 insertions(+), 98 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e20dce438d37..d7dddb936f84 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3313,17 +3313,15 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, if (!ring || !ring->sched.thread) continue; - kthread_park(ring->sched.thread); - - if (job && job->base.sched != &ring->sched) - continue; - - drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL); + drm_sched_stop(&ring->sched); /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ amdgpu_fence_driver_force_completion(ring); } + if(job) + drm_sched_increase_karma(&job->base); + if (!amdgpu_sriov_vf(adev)) { @@ -3469,14 +3467,10 @@ static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev, if (!ring || !ring->sched.thread) continue; - /* only need recovery sched of the given job's ring - * or all rings (in the case @job is NULL) - * after above amdgpu_reset accomplished - */ - if ((!job || job->base.sched == &ring->sched) && !adev->asic_reset_res) - drm_sched_job_recovery(&ring->sched); + if (!adev->asic_reset_res) + drm_sched_resubmit_jobs(&ring->sched); - kthread_unpark(ring->sched.thread); + drm_sched_start(&ring->sched, !adev->asic_reset_res); } if (!amdgpu_device_has_dc_support(adev)) { diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index 49a6763693f1..67ae26602024 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -109,16 +109,19 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job) } /* block scheduler */ - kthread_park(gpu->sched.thread); - drm_sched_hw_job_reset(&gpu->sched, sched_job); + drm_sched_stop(&gpu->sched); + + if(sched_job) + drm_sched_increase_karma(sched_job); /* get the GPU back into the init state */ etnaviv_core_dump(gpu); etnaviv_gpu_recover_hang(gpu); + drm_sched_resubmit_jobs(&gpu->sched); + /* restart scheduler after GPU is usable again */ - drm_sched_job_recovery(&gpu->sched); - kthread_unpark(gpu->sched.thread); + drm_sched_start(&gpu->sched, true); } static void etnaviv_sched_free_job(struct drm_sched_job *sched_job) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index dbb69063b3d5..4bb18511ac75 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -60,8 +60,6 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb); -static void drm_sched_expel_job_unlocked(struct drm_sched_job *s_job); - /** * drm_sched_rq_init - initialize a given run queue struct * @@ -335,45 +333,38 @@ static void drm_sched_job_timedout(struct work_struct *work) spin_unlock_irqrestore(&sched->job_list_lock, flags); } -/** - * drm_sched_hw_job_reset - stop the scheduler if it contains the bad job - * - * @sched: scheduler instance - * @bad: bad scheduler job - * - */ -void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) + /** + * drm_sched_increase_karma - Update sched_entity guilty flag + * + * @bad: The job guilty of time out + * + * Increment on every hang caused by the 'bad' job. If this exceeds the hang + * limit of the scheduler then the respective sched entity is marked guilty and + * jobs from it will not be scheduled further + */ +void drm_sched_increase_karma(struct drm_sched_job *bad) { - struct drm_sched_job *s_job; - struct drm_sched_entity *entity, *tmp; - unsigned long flags; int i; + struct drm_sched_entity *tmp; + struct drm_sched_entity *entity; + struct drm_gpu_scheduler *sched = bad->sched; - spin_lock_irqsave(&sched->job_list_lock, flags); - list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) { - if (s_job->s_fence->parent && - dma_fence_remove_callback(s_job->s_fence->parent, - &s_job->s_fence->cb)) { - dma_fence_put(s_job->s_fence->parent); - s_job->s_fence->parent = NULL; - atomic_dec(&sched->hw_rq_count); - } - } - spin_unlock_irqrestore(&sched->job_list_lock, flags); - - if (bad && bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) { + /* don't increase @bad's karma if it's from KERNEL RQ, + * because sometimes GPU hang would cause kernel jobs (like VM updating jobs) + * corrupt but keep in mind that kernel jobs always considered good. + */ + if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) { atomic_inc(&bad->karma); - /* don't increase @bad's karma if it's from KERNEL RQ, - * becuase sometimes GPU hang would cause kernel jobs (like VM updating jobs) - * corrupt but keep in mind that kernel jobs always considered good. - */ - for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL; i++ ) { + for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL; + i++) { struct drm_sched_rq *rq = &sched->sched_rq[i]; spin_lock(&rq->lock); list_for_each_entry_safe(entity, tmp, &rq->entities, list) { - if (bad->s_fence->scheduled.context == entity->fence_context) { - if (atomic_read(&bad->karma) > bad->sched->hang_limit) + if (bad->s_fence->scheduled.context == + entity->fence_context) { + if (atomic_read(&bad->karma) > + bad->sched->hang_limit) if (entity->guilty) atomic_set(entity->guilty, 1); break; @@ -385,7 +376,51 @@ void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_jo } } } -EXPORT_SYMBOL(drm_sched_hw_job_reset); +EXPORT_SYMBOL(drm_sched_increase_karma); + +/** + * drm_sched_hw_job_reset - stop the scheduler if it contains the bad job + * + * @sched: scheduler instance + * @bad: bad scheduler job + * + */ +void drm_sched_stop(struct drm_gpu_scheduler *sched) +{ + struct drm_sched_job *s_job; + unsigned long flags; + struct dma_fence *last_fence = NULL; + + kthread_park(sched->thread); + + /* + * Verify all the signaled jobs in mirror list are removed from the ring + * by waiting for the latest job to enter the list. This should insure that + * also all the previous jobs that were in flight also already singaled + * and removed from the list. + */ + spin_lock_irqsave(&sched->job_list_lock, flags); + list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) { + if (s_job->s_fence->parent && + dma_fence_remove_callback(s_job->s_fence->parent, + &s_job->s_fence->cb)) { + dma_fence_put(s_job->s_fence->parent); + s_job->s_fence->parent = NULL; + atomic_dec(&sched->hw_rq_count); + } else { + last_fence = dma_fence_get(&s_job->s_fence->finished); + break; + } + } + spin_unlock_irqrestore(&sched->job_list_lock, flags); + + if (last_fence) { + dma_fence_wait(last_fence, false); + dma_fence_put(last_fence); + } +} + +EXPORT_SYMBOL(drm_sched_stop); /** * drm_sched_job_recovery - recover jobs after a reset @@ -393,18 +428,55 @@ EXPORT_SYMBOL(drm_sched_hw_job_reset); * @sched: scheduler instance * */ -void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) +void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery) { struct drm_sched_job *s_job, *tmp; - bool found_guilty = false; unsigned long flags; int r; + if (!full_recovery) + goto unpark; + spin_lock_irqsave(&sched->job_list_lock, flags); list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { struct drm_sched_fence *s_fence = s_job->s_fence; - struct dma_fence *fence; - uint64_t guilty_context; + struct dma_fence *fence = s_job->s_fence->parent; + + if (fence) { + r = dma_fence_add_callback(fence, &s_fence->cb, + drm_sched_process_job); + if (r == -ENOENT) + drm_sched_process_job(fence, &s_fence->cb); + else if (r) + DRM_ERROR("fence add callback failed (%d)\n", + r); + } else + drm_sched_process_job(NULL, &s_fence->cb); + } + + drm_sched_start_timeout(sched); + spin_unlock_irqrestore(&sched->job_list_lock, flags); + +unpark: + kthread_unpark(sched->thread); +} +EXPORT_SYMBOL(drm_sched_start); + +/** + * drm_sched_resubmit_jobs - helper to relunch job from mirror ring list + * + * @sched: scheduler instance + * + */ +void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched) +{ + struct drm_sched_job *s_job, *tmp; + uint64_t guilty_context; + bool found_guilty = false; + + /*TODO DO we need spinlock here ? */ + list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { + struct drm_sched_fence *s_fence = s_job->s_fence; if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) { found_guilty = true; @@ -414,31 +486,11 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) if (found_guilty && s_job->s_fence->scheduled.context == guilty_context) dma_fence_set_error(&s_fence->finished, -ECANCELED); - spin_unlock_irqrestore(&sched->job_list_lock, flags); - fence = sched->ops->run_job(s_job); + s_job->s_fence->parent = sched->ops->run_job(s_job); atomic_inc(&sched->hw_rq_count); - - if (fence) { - s_fence->parent = dma_fence_get(fence); - r = dma_fence_add_callback(fence, &s_fence->cb, - drm_sched_process_job); - if (r == -ENOENT) - drm_sched_process_job(fence, &s_fence->cb); - else if (r) - DRM_ERROR("fence add callback failed (%d)\n", - r); - dma_fence_put(fence); - } else { - if (s_fence->finished.error < 0) - drm_sched_expel_job_unlocked(s_job); - drm_sched_process_job(NULL, &s_fence->cb); - } - spin_lock_irqsave(&sched->job_list_lock, flags); } - drm_sched_start_timeout(sched); - spin_unlock_irqrestore(&sched->job_list_lock, flags); } -EXPORT_SYMBOL(drm_sched_job_recovery); +EXPORT_SYMBOL(drm_sched_resubmit_jobs); /** * drm_sched_job_init - init a scheduler job @@ -634,26 +686,14 @@ static int drm_sched_main(void *param) DRM_ERROR("fence add callback failed (%d)\n", r); dma_fence_put(fence); - } else { - if (s_fence->finished.error < 0) - drm_sched_expel_job_unlocked(sched_job); + } else drm_sched_process_job(NULL, &s_fence->cb); - } wake_up(&sched->job_scheduled); } return 0; } -static void drm_sched_expel_job_unlocked(struct drm_sched_job *s_job) -{ - struct drm_gpu_scheduler *sched = s_job->sched; - - spin_lock(&sched->job_list_lock); - list_del_init(&s_job->node); - spin_unlock(&sched->job_list_lock); -} - /** * drm_sched_init - Init a gpu scheduler instance * diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index f7508e907536..4704b2df3688 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -234,18 +234,21 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job) for (q = 0; q < V3D_MAX_QUEUES; q++) { struct drm_gpu_scheduler *sched = &v3d->queue[q].sched; - kthread_park(sched->thread); - drm_sched_hw_job_reset(sched, (sched_job->sched == sched ? - sched_job : NULL)); + drm_sched_stop(sched); + + if(sched_job) + drm_sched_increase_karma(sched_job); } /* get the GPU back into the init state */ v3d_reset(v3d); + for (q = 0; q < V3D_MAX_QUEUES; q++) + drm_sched_resubmit_jobs(sched_job->sched); + /* Unblock schedulers and restart their jobs. */ for (q = 0; q < V3D_MAX_QUEUES; q++) { - drm_sched_job_recovery(&v3d->queue[q].sched); - kthread_unpark(v3d->queue[q].sched.thread); + drm_sched_start(&v3d->queue[q].sched, true); } mutex_unlock(&v3d->reset_lock); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 47e19796c450..c567bba91ba0 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -298,9 +298,10 @@ int drm_sched_job_init(struct drm_sched_job *job, void *owner); void drm_sched_job_cleanup(struct drm_sched_job *job); void drm_sched_wakeup(struct drm_gpu_scheduler *sched); -void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, - struct drm_sched_job *job); -void drm_sched_job_recovery(struct drm_gpu_scheduler *sched); +void drm_sched_stop(struct drm_gpu_scheduler *sched); +void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery); +void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched); +void drm_sched_increase_karma(struct drm_sched_job *bad); bool drm_sched_dependency_optimized(struct dma_fence* fence, struct drm_sched_entity *entity); void drm_sched_fault(struct drm_gpu_scheduler *sched); From 3741540e04137256df82105bcd720a5e27423c34 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Wed, 5 Dec 2018 14:21:28 -0500 Subject: [PATCH 317/539] drm/sched: Rework HW fence processing. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expedite job deletion from ring mirror list to the HW fence signal callback instead from finish_work, together with waiting for all such fences to signal in drm_sched_stop we garantee that already signaled job will not be processed twice. Remove the sched finish fence callback and just submit finish_work directly from the HW fence callback. v2: Fix comments. v3: Attach hw fence cb to sched_job v5: Rebase Suggested-by: Christian Koenig Signed-off-by: Andrey Grodzovsky Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/scheduler/sched_main.c | 55 +++++++++++++------------- include/drm/gpu_scheduler.h | 6 +-- 2 files changed, 29 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 4bb18511ac75..19fc601c9eeb 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -284,8 +284,6 @@ static void drm_sched_job_finish(struct work_struct *work) cancel_delayed_work_sync(&sched->work_tdr); spin_lock_irqsave(&sched->job_list_lock, flags); - /* remove job from ring_mirror_list */ - list_del_init(&s_job->node); /* queue TDR for next job */ drm_sched_start_timeout(sched); spin_unlock_irqrestore(&sched->job_list_lock, flags); @@ -293,22 +291,11 @@ static void drm_sched_job_finish(struct work_struct *work) sched->ops->free_job(s_job); } -static void drm_sched_job_finish_cb(struct dma_fence *f, - struct dma_fence_cb *cb) -{ - struct drm_sched_job *job = container_of(cb, struct drm_sched_job, - finish_cb); - schedule_work(&job->finish_work); -} - static void drm_sched_job_begin(struct drm_sched_job *s_job) { struct drm_gpu_scheduler *sched = s_job->sched; unsigned long flags; - dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb, - drm_sched_job_finish_cb); - spin_lock_irqsave(&sched->job_list_lock, flags); list_add_tail(&s_job->node, &sched->ring_mirror_list); drm_sched_start_timeout(sched); @@ -403,7 +390,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched) list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) { if (s_job->s_fence->parent && dma_fence_remove_callback(s_job->s_fence->parent, - &s_job->s_fence->cb)) { + &s_job->cb)) { dma_fence_put(s_job->s_fence->parent); s_job->s_fence->parent = NULL; atomic_dec(&sched->hw_rq_count); @@ -431,31 +418,34 @@ EXPORT_SYMBOL(drm_sched_stop); void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery) { struct drm_sched_job *s_job, *tmp; - unsigned long flags; int r; if (!full_recovery) goto unpark; - spin_lock_irqsave(&sched->job_list_lock, flags); + /* + * Locking the list is not required here as the sched thread is parked + * so no new jobs are being pushed in to HW and in drm_sched_stop we + * flushed all the jobs who were still in mirror list but who already + * signaled and removed them self from the list. Also concurrent + * GPU recovers can't run in parallel. + */ list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { - struct drm_sched_fence *s_fence = s_job->s_fence; struct dma_fence *fence = s_job->s_fence->parent; if (fence) { - r = dma_fence_add_callback(fence, &s_fence->cb, + r = dma_fence_add_callback(fence, &s_job->cb, drm_sched_process_job); if (r == -ENOENT) - drm_sched_process_job(fence, &s_fence->cb); + drm_sched_process_job(fence, &s_job->cb); else if (r) DRM_ERROR("fence add callback failed (%d)\n", r); } else - drm_sched_process_job(NULL, &s_fence->cb); + drm_sched_process_job(NULL, &s_job->cb); } drm_sched_start_timeout(sched); - spin_unlock_irqrestore(&sched->job_list_lock, flags); unpark: kthread_unpark(sched->thread); @@ -604,18 +594,27 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched) */ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb) { - struct drm_sched_fence *s_fence = - container_of(cb, struct drm_sched_fence, cb); + struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb); + struct drm_sched_fence *s_fence = s_job->s_fence; struct drm_gpu_scheduler *sched = s_fence->sched; + unsigned long flags; + + cancel_delayed_work(&sched->work_tdr); - dma_fence_get(&s_fence->finished); atomic_dec(&sched->hw_rq_count); atomic_dec(&sched->num_jobs); + + spin_lock_irqsave(&sched->job_list_lock, flags); + /* remove job from ring_mirror_list */ + list_del_init(&s_job->node); + spin_unlock_irqrestore(&sched->job_list_lock, flags); + drm_sched_fence_finished(s_fence); trace_drm_sched_process_job(s_fence); - dma_fence_put(&s_fence->finished); wake_up_interruptible(&sched->wake_up_worker); + + schedule_work(&s_job->finish_work); } /** @@ -678,16 +677,16 @@ static int drm_sched_main(void *param) if (fence) { s_fence->parent = dma_fence_get(fence); - r = dma_fence_add_callback(fence, &s_fence->cb, + r = dma_fence_add_callback(fence, &sched_job->cb, drm_sched_process_job); if (r == -ENOENT) - drm_sched_process_job(fence, &s_fence->cb); + drm_sched_process_job(fence, &sched_job->cb); else if (r) DRM_ERROR("fence add callback failed (%d)\n", r); dma_fence_put(fence); } else - drm_sched_process_job(NULL, &s_fence->cb); + drm_sched_process_job(NULL, &sched_job->cb); wake_up(&sched->job_scheduled); } diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index c567bba91ba0..0daca4d8dad9 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -137,10 +137,6 @@ struct drm_sched_fence { */ struct dma_fence finished; - /** - * @cb: the callback for the parent fence below. - */ - struct dma_fence_cb cb; /** * @parent: the fence returned by &drm_sched_backend_ops.run_job * when scheduling the job on hardware. We signal the @@ -181,6 +177,7 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); * be scheduled further. * @s_priority: the priority of the job. * @entity: the entity to which this job belongs. + * @cb: the callback for the parent fence in s_fence. * * A job is created by the driver using drm_sched_job_init(), and * should call drm_sched_entity_push_job() once it wants the scheduler @@ -197,6 +194,7 @@ struct drm_sched_job { atomic_t karma; enum drm_sched_priority s_priority; struct drm_sched_entity *entity; + struct dma_fence_cb cb; }; static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job, From c69dffab819f5abd98f9791fdb8aede1ec2f172f Mon Sep 17 00:00:00 2001 From: hersen wu Date: Fri, 11 Jan 2019 10:39:30 -0500 Subject: [PATCH 318/539] drm/amd/display: fix eDP fast bootup for pre-raven asic [Why] For fastboot, Bios will light up eDP before SW driver is loaded. SW driver will check if eDP is lit by bios through reading the BIOS_SCRATCH_3 register. If lit, SW driver will not power down eDP power and phy to save time. Definition of BIOS_SCRATCH_3 are missing for pre-raven asic. This causes eDP fast boot to not work property. For some eDP panels, even if dp tx sends NoVideoStream_flag =1 and dpcd 0x600=2, eDP rx may not handle properly. This may cause a short flash on screen. [How] Add definition of BIOS_SCRATCH_3 for all asic Signed-off-by: hersen wu Reviewed-by: Alex Deucher Reviewed-by: Charlene Liu Acked-by: Leo Li Acked-by: Yongqiang Sun Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c | 3 +-- drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c | 2 ++ drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c | 2 ++ drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c | 2 ++ drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c | 2 ++ 6 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c index fdda8aa8e303..d8275ceb20c1 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c @@ -83,8 +83,7 @@ uint32_t bios_get_vga_enabled_displays( { uint32_t active_disp = 1; - if (bios->regs->BIOS_SCRATCH_3) /*follow up with other asic, todo*/ - active_disp = REG_READ(BIOS_SCRATCH_3) & 0XFFFF; + active_disp = REG_READ(BIOS_SCRATCH_3) & 0XFFFF; return active_disp; } diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c index c3f616a739d7..23044e6723e8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c @@ -76,6 +76,7 @@ #ifndef mmBIOS_SCRATCH_2 #define mmBIOS_SCRATCH_2 0x05CB + #define mmBIOS_SCRATCH_3 0x05CC #define mmBIOS_SCRATCH_6 0x05CF #endif @@ -365,6 +366,7 @@ static const struct dce_abm_mask abm_mask = { #define DCFE_MEM_PWR_CTRL_REG_BASE 0x1b03 static const struct bios_registers bios_regs = { + .BIOS_SCRATCH_3 = mmBIOS_SCRATCH_3, .BIOS_SCRATCH_6 = mmBIOS_SCRATCH_6 }; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index 7d46eb7a2ace..7549adaa1542 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -84,6 +84,7 @@ #ifndef mmBIOS_SCRATCH_2 #define mmBIOS_SCRATCH_2 0x05CB + #define mmBIOS_SCRATCH_3 0x05CC #define mmBIOS_SCRATCH_6 0x05CF #endif @@ -369,6 +370,7 @@ static const struct dce110_clk_src_mask cs_mask = { }; static const struct bios_registers bios_regs = { + .BIOS_SCRATCH_3 = mmBIOS_SCRATCH_3, .BIOS_SCRATCH_6 = mmBIOS_SCRATCH_6 }; diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index d930e09ccfb4..ea3065d63372 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -76,6 +76,7 @@ #ifndef mmBIOS_SCRATCH_2 #define mmBIOS_SCRATCH_2 0x05CB + #define mmBIOS_SCRATCH_3 0x05CC #define mmBIOS_SCRATCH_6 0x05CF #endif @@ -376,6 +377,7 @@ static const struct dce110_clk_src_mask cs_mask = { }; static const struct bios_registers bios_regs = { + .BIOS_SCRATCH_3 = mmBIOS_SCRATCH_3, .BIOS_SCRATCH_6 = mmBIOS_SCRATCH_6 }; diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 23d7d4d85207..312a0aebf91f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -442,6 +442,7 @@ struct dce_i2c_hw *dce120_i2c_hw_create( return dce_i2c_hw; } static const struct bios_registers bios_regs = { + .BIOS_SCRATCH_3 = mmBIOS_SCRATCH_3 + NBIO_BASE(mmBIOS_SCRATCH_3_BASE_IDX), .BIOS_SCRATCH_6 = mmBIOS_SCRATCH_6 + NBIO_BASE(mmBIOS_SCRATCH_6_BASE_IDX) }; diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c index e0bba0bc996b..2eca81b5cf2f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c @@ -77,6 +77,7 @@ #ifndef mmBIOS_SCRATCH_2 #define mmBIOS_SCRATCH_2 0x05CB + #define mmBIOS_SCRATCH_3 0x05CC #define mmBIOS_SCRATCH_6 0x05CF #endif @@ -358,6 +359,7 @@ static const struct dce110_clk_src_mask cs_mask = { }; static const struct bios_registers bios_regs = { + .BIOS_SCRATCH_3 = mmBIOS_SCRATCH_3, .BIOS_SCRATCH_6 = mmBIOS_SCRATCH_6 }; From 89c4f84b602544e580684fff9a7f869e9b1e4ae5 Mon Sep 17 00:00:00 2001 From: Eryk Brol Date: Fri, 4 Jan 2019 10:29:00 -0500 Subject: [PATCH 319/539] drm/amd/display: Restructure DCN10 hubbub [Why] Change DCN10 hubbub to use hubbub as a base and allow all future DCN hubbubs to do the same instead of using DCN10_hubbub. This increases readability and doesn't require future hubbubs to inherit anything other than the base hubbub struct. [How] Create separate DCN10_hubbub struct which uses the hubbub struct as a base. Signed-off-by: Eryk Brol Reviewed-by: Jun Lei Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn10/dcn10_hubbub.c | 131 ++++++++++-------- .../drm/amd/display/dc/dcn10/dcn10_hubbub.h | 9 +- .../drm/amd/display/dc/dcn10/dcn10_resource.c | 8 +- .../gpu/drm/amd/display/dc/inc/hw/dchubbub.h | 4 + 4 files changed, 88 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c index eb31a5ed6dff..5a4614c371bc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c @@ -29,19 +29,20 @@ #include "reg_helper.h" #define CTX \ - hubbub->ctx + hubbub1->base.ctx #define DC_LOGGER \ - hubbub->ctx->logger + hubbub1->base.ctx->logger #define REG(reg)\ - hubbub->regs->reg + hubbub1->regs->reg #undef FN #define FN(reg_name, field_name) \ - hubbub->shifts->field_name, hubbub->masks->field_name + hubbub1->shifts->field_name, hubbub1->masks->field_name void hubbub1_wm_read_state(struct hubbub *hubbub, struct dcn_hubbub_wm *wm) { + struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); struct dcn_hubbub_wm_set *s; memset(wm, 0, sizeof(struct dcn_hubbub_wm)); @@ -89,12 +90,14 @@ void hubbub1_wm_read_state(struct hubbub *hubbub, void hubbub1_disable_allow_self_refresh(struct hubbub *hubbub) { + struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); REG_UPDATE(DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_ENABLE, 0); } bool hububu1_is_allow_self_refresh_enabled(struct hubbub *hubbub) { + struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); uint32_t enable = 0; REG_GET(DCHUBBUB_ARB_DRAM_STATE_CNTL, @@ -107,6 +110,8 @@ bool hububu1_is_allow_self_refresh_enabled(struct hubbub *hubbub) bool hubbub1_verify_allow_pstate_change_high( struct hubbub *hubbub) { + struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); + /* pstate latency is ~20us so if we wait over 40us and pstate allow * still not asserted, we are probably stuck and going to hang * @@ -193,7 +198,7 @@ bool hubbub1_verify_allow_pstate_change_high( * 31: SOC pstate change request */ - REG_WRITE(DCHUBBUB_TEST_DEBUG_INDEX, hubbub->debug_test_index_pstate); + REG_WRITE(DCHUBBUB_TEST_DEBUG_INDEX, hubbub1->debug_test_index_pstate); for (i = 0; i < pstate_wait_timeout_us; i++) { debug_data = REG_READ(DCHUBBUB_TEST_DEBUG_DATA); @@ -244,6 +249,8 @@ static uint32_t convert_and_clamp( void hubbub1_wm_change_req_wa(struct hubbub *hubbub) { + struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); + REG_UPDATE_SEQ(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL, DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, 0, 1); } @@ -254,7 +261,9 @@ void hubbub1_program_watermarks( unsigned int refclk_mhz, bool safe_to_lower) { - uint32_t force_en = hubbub->ctx->dc->debug.disable_stutter ? 1 : 0; + struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); + + uint32_t force_en = hubbub1->base.ctx->dc->debug.disable_stutter ? 1 : 0; /* * Need to clamp to max of the register values (i.e. no wrap) * for dcn1, all wm registers are 21-bit wide @@ -264,8 +273,8 @@ void hubbub1_program_watermarks( /* Repeat for water mark set A, B, C and D. */ /* clock state A */ - if (safe_to_lower || watermarks->a.urgent_ns > hubbub->watermarks.a.urgent_ns) { - hubbub->watermarks.a.urgent_ns = watermarks->a.urgent_ns; + if (safe_to_lower || watermarks->a.urgent_ns > hubbub1->watermarks.a.urgent_ns) { + hubbub1->watermarks.a.urgent_ns = watermarks->a.urgent_ns; prog_wm_value = convert_and_clamp(watermarks->a.urgent_ns, refclk_mhz, 0x1fffff); REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, prog_wm_value); @@ -276,8 +285,8 @@ void hubbub1_program_watermarks( } if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A)) { - if (safe_to_lower || watermarks->a.pte_meta_urgent_ns > hubbub->watermarks.a.pte_meta_urgent_ns) { - hubbub->watermarks.a.pte_meta_urgent_ns = watermarks->a.pte_meta_urgent_ns; + if (safe_to_lower || watermarks->a.pte_meta_urgent_ns > hubbub1->watermarks.a.pte_meta_urgent_ns) { + hubbub1->watermarks.a.pte_meta_urgent_ns = watermarks->a.pte_meta_urgent_ns; prog_wm_value = convert_and_clamp(watermarks->a.pte_meta_urgent_ns, refclk_mhz, 0x1fffff); REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A, prog_wm_value); @@ -289,8 +298,8 @@ void hubbub1_program_watermarks( if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A)) { if (safe_to_lower || watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns - > hubbub->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns) { - hubbub->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = + > hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns) { + hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns; prog_wm_value = convert_and_clamp( watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, @@ -302,8 +311,8 @@ void hubbub1_program_watermarks( } if (safe_to_lower || watermarks->a.cstate_pstate.cstate_exit_ns - > hubbub->watermarks.a.cstate_pstate.cstate_exit_ns) { - hubbub->watermarks.a.cstate_pstate.cstate_exit_ns = + > hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns) { + hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns = watermarks->a.cstate_pstate.cstate_exit_ns; prog_wm_value = convert_and_clamp( watermarks->a.cstate_pstate.cstate_exit_ns, @@ -316,8 +325,8 @@ void hubbub1_program_watermarks( } if (safe_to_lower || watermarks->a.cstate_pstate.pstate_change_ns - > hubbub->watermarks.a.cstate_pstate.pstate_change_ns) { - hubbub->watermarks.a.cstate_pstate.pstate_change_ns = + > hubbub1->watermarks.a.cstate_pstate.pstate_change_ns) { + hubbub1->watermarks.a.cstate_pstate.pstate_change_ns = watermarks->a.cstate_pstate.pstate_change_ns; prog_wm_value = convert_and_clamp( watermarks->a.cstate_pstate.pstate_change_ns, @@ -329,8 +338,8 @@ void hubbub1_program_watermarks( } /* clock state B */ - if (safe_to_lower || watermarks->b.urgent_ns > hubbub->watermarks.b.urgent_ns) { - hubbub->watermarks.b.urgent_ns = watermarks->b.urgent_ns; + if (safe_to_lower || watermarks->b.urgent_ns > hubbub1->watermarks.b.urgent_ns) { + hubbub1->watermarks.b.urgent_ns = watermarks->b.urgent_ns; prog_wm_value = convert_and_clamp(watermarks->b.urgent_ns, refclk_mhz, 0x1fffff); REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, prog_wm_value); @@ -341,8 +350,8 @@ void hubbub1_program_watermarks( } if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B)) { - if (safe_to_lower || watermarks->b.pte_meta_urgent_ns > hubbub->watermarks.b.pte_meta_urgent_ns) { - hubbub->watermarks.b.pte_meta_urgent_ns = watermarks->b.pte_meta_urgent_ns; + if (safe_to_lower || watermarks->b.pte_meta_urgent_ns > hubbub1->watermarks.b.pte_meta_urgent_ns) { + hubbub1->watermarks.b.pte_meta_urgent_ns = watermarks->b.pte_meta_urgent_ns; prog_wm_value = convert_and_clamp(watermarks->b.pte_meta_urgent_ns, refclk_mhz, 0x1fffff); REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B, prog_wm_value); @@ -354,8 +363,8 @@ void hubbub1_program_watermarks( if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B)) { if (safe_to_lower || watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns - > hubbub->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns) { - hubbub->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = + > hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns) { + hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns; prog_wm_value = convert_and_clamp( watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, @@ -367,8 +376,8 @@ void hubbub1_program_watermarks( } if (safe_to_lower || watermarks->b.cstate_pstate.cstate_exit_ns - > hubbub->watermarks.b.cstate_pstate.cstate_exit_ns) { - hubbub->watermarks.b.cstate_pstate.cstate_exit_ns = + > hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns) { + hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns = watermarks->b.cstate_pstate.cstate_exit_ns; prog_wm_value = convert_and_clamp( watermarks->b.cstate_pstate.cstate_exit_ns, @@ -381,8 +390,8 @@ void hubbub1_program_watermarks( } if (safe_to_lower || watermarks->b.cstate_pstate.pstate_change_ns - > hubbub->watermarks.b.cstate_pstate.pstate_change_ns) { - hubbub->watermarks.b.cstate_pstate.pstate_change_ns = + > hubbub1->watermarks.b.cstate_pstate.pstate_change_ns) { + hubbub1->watermarks.b.cstate_pstate.pstate_change_ns = watermarks->b.cstate_pstate.pstate_change_ns; prog_wm_value = convert_and_clamp( watermarks->b.cstate_pstate.pstate_change_ns, @@ -394,8 +403,8 @@ void hubbub1_program_watermarks( } /* clock state C */ - if (safe_to_lower || watermarks->c.urgent_ns > hubbub->watermarks.c.urgent_ns) { - hubbub->watermarks.c.urgent_ns = watermarks->c.urgent_ns; + if (safe_to_lower || watermarks->c.urgent_ns > hubbub1->watermarks.c.urgent_ns) { + hubbub1->watermarks.c.urgent_ns = watermarks->c.urgent_ns; prog_wm_value = convert_and_clamp(watermarks->c.urgent_ns, refclk_mhz, 0x1fffff); REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, prog_wm_value); @@ -406,8 +415,8 @@ void hubbub1_program_watermarks( } if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C)) { - if (safe_to_lower || watermarks->c.pte_meta_urgent_ns > hubbub->watermarks.c.pte_meta_urgent_ns) { - hubbub->watermarks.c.pte_meta_urgent_ns = watermarks->c.pte_meta_urgent_ns; + if (safe_to_lower || watermarks->c.pte_meta_urgent_ns > hubbub1->watermarks.c.pte_meta_urgent_ns) { + hubbub1->watermarks.c.pte_meta_urgent_ns = watermarks->c.pte_meta_urgent_ns; prog_wm_value = convert_and_clamp(watermarks->c.pte_meta_urgent_ns, refclk_mhz, 0x1fffff); REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C, prog_wm_value); @@ -419,8 +428,8 @@ void hubbub1_program_watermarks( if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C)) { if (safe_to_lower || watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns - > hubbub->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns) { - hubbub->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = + > hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns) { + hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns; prog_wm_value = convert_and_clamp( watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, @@ -432,8 +441,8 @@ void hubbub1_program_watermarks( } if (safe_to_lower || watermarks->c.cstate_pstate.cstate_exit_ns - > hubbub->watermarks.c.cstate_pstate.cstate_exit_ns) { - hubbub->watermarks.c.cstate_pstate.cstate_exit_ns = + > hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns) { + hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns = watermarks->c.cstate_pstate.cstate_exit_ns; prog_wm_value = convert_and_clamp( watermarks->c.cstate_pstate.cstate_exit_ns, @@ -446,8 +455,8 @@ void hubbub1_program_watermarks( } if (safe_to_lower || watermarks->c.cstate_pstate.pstate_change_ns - > hubbub->watermarks.c.cstate_pstate.pstate_change_ns) { - hubbub->watermarks.c.cstate_pstate.pstate_change_ns = + > hubbub1->watermarks.c.cstate_pstate.pstate_change_ns) { + hubbub1->watermarks.c.cstate_pstate.pstate_change_ns = watermarks->c.cstate_pstate.pstate_change_ns; prog_wm_value = convert_and_clamp( watermarks->c.cstate_pstate.pstate_change_ns, @@ -459,8 +468,8 @@ void hubbub1_program_watermarks( } /* clock state D */ - if (safe_to_lower || watermarks->d.urgent_ns > hubbub->watermarks.d.urgent_ns) { - hubbub->watermarks.d.urgent_ns = watermarks->d.urgent_ns; + if (safe_to_lower || watermarks->d.urgent_ns > hubbub1->watermarks.d.urgent_ns) { + hubbub1->watermarks.d.urgent_ns = watermarks->d.urgent_ns; prog_wm_value = convert_and_clamp(watermarks->d.urgent_ns, refclk_mhz, 0x1fffff); REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, prog_wm_value); @@ -471,8 +480,8 @@ void hubbub1_program_watermarks( } if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D)) { - if (safe_to_lower || watermarks->d.pte_meta_urgent_ns > hubbub->watermarks.d.pte_meta_urgent_ns) { - hubbub->watermarks.d.pte_meta_urgent_ns = watermarks->d.pte_meta_urgent_ns; + if (safe_to_lower || watermarks->d.pte_meta_urgent_ns > hubbub1->watermarks.d.pte_meta_urgent_ns) { + hubbub1->watermarks.d.pte_meta_urgent_ns = watermarks->d.pte_meta_urgent_ns; prog_wm_value = convert_and_clamp(watermarks->d.pte_meta_urgent_ns, refclk_mhz, 0x1fffff); REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D, prog_wm_value); @@ -484,8 +493,8 @@ void hubbub1_program_watermarks( if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D)) { if (safe_to_lower || watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns - > hubbub->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns) { - hubbub->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = + > hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns) { + hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns; prog_wm_value = convert_and_clamp( watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, @@ -497,8 +506,8 @@ void hubbub1_program_watermarks( } if (safe_to_lower || watermarks->d.cstate_pstate.cstate_exit_ns - > hubbub->watermarks.d.cstate_pstate.cstate_exit_ns) { - hubbub->watermarks.d.cstate_pstate.cstate_exit_ns = + > hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns) { + hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns = watermarks->d.cstate_pstate.cstate_exit_ns; prog_wm_value = convert_and_clamp( watermarks->d.cstate_pstate.cstate_exit_ns, @@ -511,8 +520,8 @@ void hubbub1_program_watermarks( } if (safe_to_lower || watermarks->d.cstate_pstate.pstate_change_ns - > hubbub->watermarks.d.cstate_pstate.pstate_change_ns) { - hubbub->watermarks.d.cstate_pstate.pstate_change_ns = + > hubbub1->watermarks.d.cstate_pstate.pstate_change_ns) { + hubbub1->watermarks.d.cstate_pstate.pstate_change_ns = watermarks->d.cstate_pstate.pstate_change_ns; prog_wm_value = convert_and_clamp( watermarks->d.cstate_pstate.pstate_change_ns, @@ -543,6 +552,8 @@ void hubbub1_update_dchub( struct hubbub *hubbub, struct dchub_init_data *dh_data) { + struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); + if (REG(DCHUBBUB_SDPIF_FB_TOP) == 0) { ASSERT(false); /*should not come here*/ @@ -602,6 +613,8 @@ void hubbub1_update_dchub( void hubbub1_toggle_watermark_change_req(struct hubbub *hubbub) { + struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); + uint32_t watermark_change_req; REG_GET(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL, @@ -618,6 +631,8 @@ void hubbub1_toggle_watermark_change_req(struct hubbub *hubbub) void hubbub1_soft_reset(struct hubbub *hubbub, bool reset) { + struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); + uint32_t reset_en = reset ? 1 : 0; REG_UPDATE(DCHUBBUB_SOFT_RESET, @@ -760,7 +775,9 @@ static bool hubbub1_get_dcc_compression_cap(struct hubbub *hubbub, const struct dc_dcc_surface_param *input, struct dc_surface_dcc_cap *output) { - struct dc *dc = hubbub->ctx->dc; + struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); + struct dc *dc = hubbub1->base.ctx->dc; + /* implement section 1.6.2.1 of DCN1_Programming_Guide.docx */ enum dcc_control dcc_control; unsigned int bpe; @@ -772,10 +789,10 @@ static bool hubbub1_get_dcc_compression_cap(struct hubbub *hubbub, if (dc->debug.disable_dcc == DCC_DISABLE) return false; - if (!hubbub->funcs->dcc_support_pixel_format(input->format, &bpe)) + if (!hubbub1->base.funcs->dcc_support_pixel_format(input->format, &bpe)) return false; - if (!hubbub->funcs->dcc_support_swizzle(input->swizzle_mode, bpe, + if (!hubbub1->base.funcs->dcc_support_swizzle(input->swizzle_mode, bpe, &segment_order_horz, &segment_order_vert)) return false; @@ -854,18 +871,20 @@ void hubbub1_construct(struct hubbub *hubbub, const struct dcn_hubbub_shift *hubbub_shift, const struct dcn_hubbub_mask *hubbub_mask) { - hubbub->ctx = ctx; + struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); - hubbub->funcs = &hubbub1_funcs; + hubbub1->base.ctx = ctx; - hubbub->regs = hubbub_regs; - hubbub->shifts = hubbub_shift; - hubbub->masks = hubbub_mask; + hubbub1->base.funcs = &hubbub1_funcs; - hubbub->debug_test_index_pstate = 0x7; + hubbub1->regs = hubbub_regs; + hubbub1->shifts = hubbub_shift; + hubbub1->masks = hubbub_mask; + + hubbub1->debug_test_index_pstate = 0x7; #if defined(CONFIG_DRM_AMD_DC_DCN1_01) if (ctx->dce_version == DCN_VERSION_1_01) - hubbub->debug_test_index_pstate = 0xB; + hubbub1->debug_test_index_pstate = 0xB; #endif } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h index aca67633ee58..14e23e26a2cd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h @@ -29,6 +29,9 @@ #include "core_types.h" #include "dchubbub.h" +#define TO_DCN10_HUBBUB(hubbub)\ + container_of(hubbub, struct dcn10_hubbub, base) + #define HUBHUB_REG_LIST_DCN()\ SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A),\ SR(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A),\ @@ -165,10 +168,8 @@ struct dcn_hubbub_mask { struct dc; - -struct hubbub { - const struct hubbub_funcs *funcs; - struct dc_context *ctx; +struct dcn10_hubbub { + struct hubbub base; const struct dcn_hubbub_registers *regs; const struct dcn_hubbub_shift *shifts; const struct dcn_hubbub_mask *masks; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index ea0628bebe0d..4f8cbe33ca29 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -678,18 +678,18 @@ static struct mpc *dcn10_mpc_create(struct dc_context *ctx) static struct hubbub *dcn10_hubbub_create(struct dc_context *ctx) { - struct hubbub *hubbub = kzalloc(sizeof(struct hubbub), + struct dcn10_hubbub *dcn10_hubbub = kzalloc(sizeof(struct dcn10_hubbub), GFP_KERNEL); - if (!hubbub) + if (!dcn10_hubbub) return NULL; - hubbub1_construct(hubbub, ctx, + hubbub1_construct(&dcn10_hubbub->base, ctx, &hubbub_reg, &hubbub_shift, &hubbub_mask); - return hubbub; + return &dcn10_hubbub->base; } static struct timing_generator *dcn10_timing_generator_create( diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index dc0bb5bd8cac..9d2d8e51306c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -75,5 +75,9 @@ struct hubbub_funcs { struct dcn_hubbub_wm *wm); }; +struct hubbub { + const struct hubbub_funcs *funcs; + struct dc_context *ctx; +}; #endif From b9708648766d8b15003a543ca36863e731b7b1ba Mon Sep 17 00:00:00 2001 From: Eryk Brol Date: Fri, 7 Dec 2018 10:25:13 -0500 Subject: [PATCH 320/539] drm/amd/display: Introduce DC VM interface Virtual memory allows display to support flipping to surfaces which are not allocated contiguously in memory with physical addresses, instead a 1-4 level page table is used. This is beneficial because it allows the scattering of large surfaces to improve memory efficiency and security. Signed-off-by: Eryk Brol Reviewed-by: Jun Lei Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/Makefile | 3 +- .../drm/amd/display/dc/core/dc_vm_helper.c | 127 ++++++++++++++++++ drivers/gpu/drm/amd/display/dc/dc_hw_types.h | 2 + .../drm/amd/display/dc/dcn10/dcn10_hubbub.h | 14 +- .../gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c | 3 +- .../gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h | 5 - .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 3 +- drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h | 3 +- drivers/gpu/drm/amd/display/dc/inc/hw/vmid.h | 49 +++++++ .../gpu/drm/amd/display/dc/inc/vm_helper.h | 55 ++++++++ 10 files changed, 254 insertions(+), 10 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c create mode 100644 drivers/gpu/drm/amd/display/dc/inc/hw/vmid.h create mode 100644 drivers/gpu/drm/amd/display/dc/inc/vm_helper.h diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile index fa24e4c38794..b8ddb4acccdb 100644 --- a/drivers/gpu/drm/amd/display/dc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/Makefile @@ -41,7 +41,8 @@ AMD_DC = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/dc/,$(DC_LI include $(AMD_DC) DISPLAY_CORE = dc.o dc_link.o dc_resource.o dc_hw_sequencer.o dc_sink.o \ -dc_surface.o dc_link_hwss.o dc_link_dp.o dc_link_ddc.o dc_debug.o dc_stream.o +dc_surface.o dc_link_hwss.o dc_link_dp.o dc_link_ddc.o dc_debug.o dc_stream.o \ +dc_vm_helper.o AMD_DISPLAY_CORE = $(addprefix $(AMDDALPATH)/dc/core/,$(DISPLAY_CORE)) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c b/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c new file mode 100644 index 000000000000..e54b8ac339b2 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c @@ -0,0 +1,127 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "vm_helper.h" + +static void mark_vmid_used(struct vm_helper *vm_helper, unsigned int pos, uint8_t hubp_idx) +{ + struct vmid_usage vmids = vm_helper->hubp_vmid_usage[hubp_idx]; + + vmids.vmid_usage[0] = vmids.vmid_usage[1]; + vmids.vmid_usage[1] = 1 << pos; +} + +static void add_ptb_to_table(struct vm_helper *vm_helper, unsigned int vmid, uint64_t ptb) +{ + vm_helper->ptb_assigned_to_vmid[vmid] = ptb; + vm_helper->num_vmids_available--; +} + +static void clear_entry_from_vmid_table(struct vm_helper *vm_helper, unsigned int vmid) +{ + vm_helper->ptb_assigned_to_vmid[vmid] = 0; + vm_helper->num_vmids_available++; +} + +static void evict_vmids(struct vm_helper *vm_helper) +{ + int i; + uint16_t ord = 0; + + for (i = 0; i < vm_helper->num_vmid; i++) + ord |= vm_helper->hubp_vmid_usage[i].vmid_usage[0] | vm_helper->hubp_vmid_usage[i].vmid_usage[1]; + + // At this point any positions with value 0 are unused vmids, evict them + for (i = 1; i < vm_helper->num_vmid; i++) { + if (ord & (1u << i)) + clear_entry_from_vmid_table(vm_helper, i); + } +} + +// Return value of -1 indicates vmid table unitialized or ptb dne in the table +static int get_existing_vmid_for_ptb(struct vm_helper *vm_helper, uint64_t ptb) +{ + int i; + + for (i = 0; i < vm_helper->num_vmid; i++) { + if (vm_helper->ptb_assigned_to_vmid[i] == ptb) + return i; + } + + return -1; +} + +// Expected to be called only when there's an available vmid +static int get_next_available_vmid(struct vm_helper *vm_helper) +{ + int i; + + for (i = 1; i < vm_helper->num_vmid; i++) { + if (vm_helper->ptb_assigned_to_vmid[i] == 0) + return i; + } + + return -1; +} + +uint8_t get_vmid_for_ptb(struct vm_helper *vm_helper, int64_t ptb, uint8_t hubp_idx) +{ + unsigned int vmid = 0; + int vmid_exists = -1; + + // Physical address gets vmid 0 + if (ptb == 0) + return 0; + + vmid_exists = get_existing_vmid_for_ptb(vm_helper, ptb); + + if (vmid_exists != -1) { + mark_vmid_used(vm_helper, vmid_exists, hubp_idx); + vmid = vmid_exists; + } else { + if (vm_helper->num_vmids_available == 0) + evict_vmids(vm_helper); + + vmid = get_next_available_vmid(vm_helper); + mark_vmid_used(vm_helper, vmid, hubp_idx); + add_ptb_to_table(vm_helper, vmid, ptb); + } + + return vmid; +} + +struct vm_helper init_vm_helper(unsigned int num_vmid, unsigned int num_hubp) +{ + static uint64_t ptb_assigned_to_vmid[MAX_VMID]; + static struct vmid_usage hubp_vmid_usage[MAX_HUBP]; + + return (struct vm_helper){ + .num_vmid = num_vmid, + .num_hubp = num_hubp, + .num_vmids_available = num_vmid - 1, + .ptb_assigned_to_vmid = ptb_assigned_to_vmid, + .hubp_vmid_usage = hubp_vmid_usage + }; +} diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 04f279a76d70..da55d623647a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -97,6 +97,8 @@ struct dc_plane_address { union large_integer chroma_dcc_const_color; } video_progressive; }; + + union large_integer page_table_base; }; struct dc_size { diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h index 14e23e26a2cd..c681e1cc9290 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h @@ -110,6 +110,12 @@ struct dcn_hubbub_registers { uint32_t DCHUBBUB_SDPIF_AGP_TOP; uint32_t DCHUBBUB_CRC_CTRL; uint32_t DCHUBBUB_SOFT_RESET; + uint32_t DCN_VM_FB_LOCATION_BASE; + uint32_t DCN_VM_FB_LOCATION_TOP; + uint32_t DCN_VM_FB_OFFSET; + uint32_t DCN_VM_AGP_BOT; + uint32_t DCN_VM_AGP_TOP; + uint32_t DCN_VM_AGP_BASE; }; /* set field name */ @@ -155,7 +161,13 @@ struct dcn_hubbub_registers { type SDPIF_FB_OFFSET;\ type SDPIF_AGP_BASE;\ type SDPIF_AGP_BOT;\ - type SDPIF_AGP_TOP + type SDPIF_AGP_TOP;\ + type FB_BASE;\ + type FB_TOP;\ + type FB_OFFSET;\ + type AGP_BOT;\ + type AGP_TOP;\ + type AGP_BASE struct dcn_hubbub_shift { diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index fae62474a4d1..0ba68d41b9c3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -317,7 +317,8 @@ void hubp1_program_pixel_format( bool hubp1_program_surface_flip_and_addr( struct hubp *hubp, const struct dc_plane_address *address, - bool flip_immediate) + bool flip_immediate, + uint8_t vmid) { struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h index e82ca15c842f..a6d6dfe00617 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h @@ -707,11 +707,6 @@ void hubp1_dcc_control(struct hubp *hubp, bool enable, bool independent_64b_blks); -bool hubp1_program_surface_flip_and_addr( - struct hubp *hubp, - const struct dc_plane_address *address, - bool flip_immediate); - bool hubp1_is_flip_pending(struct hubp *hubp); void hubp1_cursor_set_attributes( diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 814f5976ec9b..9cde24dbdac8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1202,7 +1202,8 @@ static void dcn10_update_plane_addr(const struct dc *dc, struct pipe_ctx *pipe_c pipe_ctx->plane_res.hubp->funcs->hubp_program_surface_flip_and_addr( pipe_ctx->plane_res.hubp, &plane_state->address, - plane_state->flip_immediate); + plane_state->flip_immediate, + 0); plane_state->status.requested_address = plane_state->address; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 04c6989aac58..1cd07e94ee63 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -78,7 +78,8 @@ struct hubp_funcs { bool (*hubp_program_surface_flip_and_addr)( struct hubp *hubp, const struct dc_plane_address *address, - bool flip_immediate); + bool flip_immediate, + uint8_t vmid); void (*hubp_program_pte_vm)( struct hubp *hubp, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/vmid.h b/drivers/gpu/drm/amd/display/dc/inc/hw/vmid.h new file mode 100644 index 000000000000..037beb0a2a27 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/vmid.h @@ -0,0 +1,49 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef DAL_DC_INC_HW_VMID_H_ +#define DAL_DC_INC_HW_VMID_H_ + +#include "core_types.h" +#include "dchubbub.h" + +struct dcn_vmid_registers { + uint32_t CNTL; + uint32_t PAGE_TABLE_BASE_ADDR_HI32; + uint32_t PAGE_TABLE_BASE_ADDR_LO32; + uint32_t PAGE_TABLE_START_ADDR_HI32; + uint32_t PAGE_TABLE_START_ADDR_LO32; + uint32_t PAGE_TABLE_END_ADDR_HI32; + uint32_t PAGE_TABLE_END_ADDR_LO32; +}; + +struct dcn_vmid_page_table_config { + uint64_t page_table_start_addr; + uint64_t page_table_end_addr; + enum dcn_hubbub_page_table_depth depth; + enum dcn_hubbub_page_table_block_size block_size; +}; + +#endif /* DAL_DC_INC_HW_VMID_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/vm_helper.h b/drivers/gpu/drm/amd/display/dc/inc/vm_helper.h new file mode 100644 index 000000000000..a202206e22a3 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/inc/vm_helper.h @@ -0,0 +1,55 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef DC_INC_VM_HELPER_H_ +#define DC_INC_VM_HELPER_H_ + +#include "dc_types.h" + +#define MAX_VMID 16 +#define MAX_HUBP 6 + +struct vmid_usage { + uint16_t vmid_usage[2]; +}; + +struct vm_helper { + unsigned int num_vmid; + unsigned int num_hubp; + unsigned int num_vmids_available; + uint64_t *ptb_assigned_to_vmid; + struct vmid_usage *hubp_vmid_usage; +}; + +uint8_t get_vmid_for_ptb( + struct vm_helper *vm_helper, + int64_t ptb, + uint8_t pipe_idx); + +struct vm_helper init_vm_helper( + unsigned int num_vmid, + unsigned int num_hubp); + +#endif /* DC_INC_VM_HELPER_H_ */ From 4ef9f1ec837be562b42a1c0f196c4ef371b2ed84 Mon Sep 17 00:00:00 2001 From: David Francis Date: Thu, 3 Jan 2019 14:38:06 -0500 Subject: [PATCH 321/539] drm/amd/display: Let updates with no scaling changes be fast [Why] DC was assuming that any surface_update->scaling_info meant the update was at least medium. However, if nothing has changed there is no scaling to program, so there is no problem with the update being fast [How] If every update flag is not set, the update is fast Signed-off-by: David Francis Reviewed-by: Harry Wentland Acked-by: Leo Li Acked-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 56702840710b..1dabafc12cfe 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1240,7 +1240,7 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa || update_flags->bits.output_tf_change) return UPDATE_TYPE_FULL; - return UPDATE_TYPE_MED; + return update_flags->raw ? UPDATE_TYPE_MED : UPDATE_TYPE_FAST; } static enum surface_update_type get_scaling_info_update_type( From 02d6a6fcdf68c4de1f28e64b012c54c855294b2a Mon Sep 17 00:00:00 2001 From: David Francis Date: Tue, 18 Dec 2018 15:30:19 -0500 Subject: [PATCH 322/539] drm/amd/display: Simplify underscan and ABM commit [Why] Underscan and ABM are connector properties but require updates to DC stream state. Previously, on updates to these properties the affected stream and all its planes were committed. This is unnecessary; only a few fields on the stream need to be changed. [How] If scaling or ABM have changed, change the stream and create a stream update with those changes. Call DC with only those fields. Signed-off-by: David Francis Reviewed-by: Harry Wentland Reviewed-by: Nicholas Kazlauskas Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 49 ++++++++++++------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d5b3b7a215be..6f04f4226003 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5160,11 +5160,13 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) } } - /* Handle scaling, underscan, and abm changes*/ + /* Handle connector state changes */ for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) { struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state); struct dm_connector_state *dm_old_con_state = to_dm_connector_state(old_con_state); struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc); + struct dc_surface_update dummy_updates[MAX_SURFACES] = { 0 }; + struct dc_stream_update stream_update = { 0 }; struct dc_stream_status *status = NULL; if (acrtc) { @@ -5176,37 +5178,48 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) if (!acrtc || drm_atomic_crtc_needs_modeset(new_crtc_state)) continue; - dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); - /* Skip anything that is not scaling or underscan changes */ if (!is_scaling_state_different(dm_new_con_state, dm_old_con_state) && (dm_new_crtc_state->abm_level == dm_old_crtc_state->abm_level)) continue; - update_stream_scaling_settings(&dm_new_con_state->base.crtc->mode, - dm_new_con_state, (struct dc_stream_state *)dm_new_crtc_state->stream); + if (is_scaling_state_different(dm_new_con_state, dm_old_con_state)) { + update_stream_scaling_settings(&dm_new_con_state->base.crtc->mode, + dm_new_con_state, (struct dc_stream_state *)dm_new_crtc_state->stream); - if (!dm_new_crtc_state->stream) - continue; + stream_update.src = dm_new_crtc_state->stream->src; + stream_update.dst = dm_new_crtc_state->stream->dst; + } + + if (dm_new_crtc_state->abm_level != dm_old_crtc_state->abm_level) { + dm_new_crtc_state->stream->abm_level = dm_new_crtc_state->abm_level; + + stream_update.abm_level = &dm_new_crtc_state->abm_level; + } status = dc_stream_get_status(dm_new_crtc_state->stream); WARN_ON(!status); WARN_ON(!status->plane_count); - dm_new_crtc_state->stream->abm_level = dm_new_crtc_state->abm_level; + /* + * TODO: DC refuses to perform stream updates without a dc_surface_update. + * Here we create an empty update on each plane. + * To fix this, DC should permit updating only stream properties. + */ + for (j = 0; j < status->plane_count; j++) + dummy_updates[j].surface = status->plane_states[0]; - /*TODO How it works with MPO ?*/ - if (!commit_planes_to_stream( - dm, - dm->dc, - status->plane_states, - status->plane_count, - dm_new_crtc_state, - to_dm_crtc_state(old_crtc_state), - dc_state)) - dm_error("%s: Failed to update stream scaling!\n", __func__); + + mutex_lock(&dm->dc_lock); + dc_commit_updates_for_stream(dm->dc, + dummy_updates, + status->plane_count, + dm_new_crtc_state->stream, + &stream_update, + dc_state); + mutex_unlock(&dm->dc_lock); } for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, From 8a48b44cd00f10e83f573b9028d11bd90a36de26 Mon Sep 17 00:00:00 2001 From: David Francis Date: Tue, 11 Dec 2018 15:17:15 -0500 Subject: [PATCH 323/539] drm/amd/display: Call into DC once per multiplane flip [Why] amdgpu_dm_commit_planes was performing multi-plane flips incorrectly: It waited for vblank once per flipped plane It prepared flip ISR and acquired the corresponding vblank ref once per plane, although it closed ISR and put the ref once per crtc It called into dc once per flipped plane, duplicating some work [How] Wait for vblank, get vblank ref, prepare flip ISR, and call into DC only once, and only if there is a pageflip Make freesync continue to update planes even if vrr information has already been changed Signed-off-by: David Francis Reviewed-by: Harry Wentland Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 332 ++++++++---------- 1 file changed, 153 insertions(+), 179 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 6f04f4226003..a3df3ccfa807 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4589,12 +4589,12 @@ static void update_freesync_state_on_stream( TRANSFER_FUNC_UNKNOWN, &vrr_infopacket); - new_crtc_state->freesync_timing_changed = + new_crtc_state->freesync_timing_changed |= (memcmp(&new_crtc_state->vrr_params.adjust, &vrr_params.adjust, sizeof(vrr_params.adjust)) != 0); - new_crtc_state->freesync_vrr_info_changed = + new_crtc_state->freesync_vrr_info_changed |= (memcmp(&new_crtc_state->vrr_infopacket, &vrr_infopacket, sizeof(vrr_infopacket)) != 0); @@ -4618,156 +4618,6 @@ static void update_freesync_state_on_stream( vrr_params.adjust.v_total_max); } -/* - * Executes flip - * - * Waits on all BO's fences and for proper vblank count - */ -static void amdgpu_dm_do_flip(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - uint32_t target, - struct dc_state *state) -{ - unsigned long flags; - uint64_t timestamp_ns; - uint32_t target_vblank; - int r, vpos, hpos; - struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); - struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(fb); - struct amdgpu_bo *abo = gem_to_amdgpu_bo(fb->obj[0]); - struct amdgpu_device *adev = crtc->dev->dev_private; - bool async_flip = (crtc->state->pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0; - struct dc_flip_addrs addr = { {0} }; - /* TODO eliminate or rename surface_update */ - struct dc_surface_update surface_updates[1] = { {0} }; - struct dc_stream_update stream_update = {0}; - struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state); - struct dc_stream_status *stream_status; - struct dc_plane_state *surface; - uint64_t tiling_flags, dcc_address; - - - /* Prepare wait for target vblank early - before the fence-waits */ - target_vblank = target - (uint32_t)drm_crtc_vblank_count(crtc) + - amdgpu_get_vblank_counter_kms(crtc->dev, acrtc->crtc_id); - - /* - * TODO This might fail and hence better not used, wait - * explicitly on fences instead - * and in general should be called for - * blocking commit to as per framework helpers - */ - r = amdgpu_bo_reserve(abo, true); - if (unlikely(r != 0)) { - DRM_ERROR("failed to reserve buffer before flip\n"); - WARN_ON(1); - } - - /* Wait for all fences on this FB */ - WARN_ON(reservation_object_wait_timeout_rcu(abo->tbo.resv, true, false, - MAX_SCHEDULE_TIMEOUT) < 0); - - amdgpu_bo_get_tiling_flags(abo, &tiling_flags); - - amdgpu_bo_unreserve(abo); - - /* - * Wait until we're out of the vertical blank period before the one - * targeted by the flip - */ - while ((acrtc->enabled && - (amdgpu_display_get_crtc_scanoutpos(adev->ddev, acrtc->crtc_id, - 0, &vpos, &hpos, NULL, - NULL, &crtc->hwmode) - & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK)) == - (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) && - (int)(target_vblank - - amdgpu_get_vblank_counter_kms(adev->ddev, acrtc->crtc_id)) > 0)) { - usleep_range(1000, 1100); - } - - /* Flip */ - spin_lock_irqsave(&crtc->dev->event_lock, flags); - - WARN_ON(acrtc->pflip_status != AMDGPU_FLIP_NONE); - WARN_ON(!acrtc_state->stream); - - addr.address.grph.addr.low_part = lower_32_bits(afb->address); - addr.address.grph.addr.high_part = upper_32_bits(afb->address); - - dcc_address = get_dcc_address(afb->address, tiling_flags); - addr.address.grph.meta_addr.low_part = lower_32_bits(dcc_address); - addr.address.grph.meta_addr.high_part = upper_32_bits(dcc_address); - - addr.flip_immediate = async_flip; - - timestamp_ns = ktime_get_ns(); - addr.flip_timestamp_in_us = div_u64(timestamp_ns, 1000); - - - if (acrtc->base.state->event) - prepare_flip_isr(acrtc); - - spin_unlock_irqrestore(&crtc->dev->event_lock, flags); - - stream_status = dc_stream_get_status(acrtc_state->stream); - if (!stream_status) { - DRM_ERROR("No stream status for CRTC: id=%d\n", - acrtc->crtc_id); - return; - } - - surface = stream_status->plane_states[0]; - surface_updates->surface = surface; - - if (!surface) { - DRM_ERROR("No surface for CRTC: id=%d\n", - acrtc->crtc_id); - return; - } - surface_updates->flip_addr = &addr; - - if (acrtc_state->stream) { - update_freesync_state_on_stream( - &adev->dm, - acrtc_state, - acrtc_state->stream, - surface, - addr.flip_timestamp_in_us); - - if (acrtc_state->freesync_timing_changed) - stream_update.adjust = - &acrtc_state->stream->adjust; - - if (acrtc_state->freesync_vrr_info_changed) - stream_update.vrr_infopacket = - &acrtc_state->stream->vrr_infopacket; - } - - /* Update surface timing information. */ - surface->time.time_elapsed_in_us[surface->time.index] = - addr.flip_timestamp_in_us - surface->time.prev_update_time_in_us; - surface->time.prev_update_time_in_us = addr.flip_timestamp_in_us; - surface->time.index++; - if (surface->time.index >= DC_PLANE_UPDATE_TIMES_MAX) - surface->time.index = 0; - - mutex_lock(&adev->dm.dc_lock); - - dc_commit_updates_for_stream(adev->dm.dc, - surface_updates, - 1, - acrtc_state->stream, - &stream_update, - state); - mutex_unlock(&adev->dm.dc_lock); - - DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n", - __func__, - addr.address.grph.addr.high_part, - addr.address.grph.addr.low_part); -} - /* * TODO this whole function needs to go * @@ -4874,10 +4724,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct drm_crtc *pcrtc, bool *wait_for_vblank) { - uint32_t i; + uint32_t i, r; + uint64_t timestamp_ns; struct drm_plane *plane; struct drm_plane_state *old_plane_state, *new_plane_state; - struct dc_stream_state *dc_stream_attach; struct dc_plane_state *plane_states_constructed[MAX_SURFACES]; struct amdgpu_crtc *acrtc_attach = to_amdgpu_crtc(pcrtc); struct drm_crtc_state *new_pcrtc_state = @@ -4885,17 +4735,35 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct dm_crtc_state *acrtc_state = to_dm_crtc_state(new_pcrtc_state); struct dm_crtc_state *dm_old_crtc_state = to_dm_crtc_state(drm_atomic_get_old_crtc_state(state, pcrtc)); - int planes_count = 0; + int flip_count = 0, planes_count = 0, vpos, hpos; unsigned long flags; + struct amdgpu_bo *abo; + uint64_t tiling_flags, dcc_address; + struct dc_stream_status *stream_status; + uint32_t target, target_vblank; + + struct { + struct dc_surface_update surface_updates[MAX_SURFACES]; + struct dc_flip_addrs flip_addrs[MAX_SURFACES]; + struct dc_stream_update stream_update; + } *flip; + + flip = kzalloc(sizeof(*flip), GFP_KERNEL); + + if (!flip) + dm_error("Failed to allocate update bundles\n"); /* update planes when needed */ for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) { struct drm_crtc *crtc = new_plane_state->crtc; struct drm_crtc_state *new_crtc_state; struct drm_framebuffer *fb = new_plane_state->fb; + struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(fb); bool pflip_needed; + struct dc_plane_state *surface; struct dm_plane_state *dm_new_plane_state = to_dm_plane_state(new_plane_state); + if (plane->type == DRM_PLANE_TYPE_CURSOR) { handle_cursor_update(plane, old_plane_state); continue; @@ -4910,46 +4778,152 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, pflip_needed = !state->allow_modeset; - spin_lock_irqsave(&crtc->dev->event_lock, flags); - if (acrtc_attach->pflip_status != AMDGPU_FLIP_NONE) { - DRM_ERROR("%s: acrtc %d, already busy\n", - __func__, - acrtc_attach->crtc_id); - /* In commit tail framework this cannot happen */ - WARN_ON(1); - } - spin_unlock_irqrestore(&crtc->dev->event_lock, flags); - if (!pflip_needed || plane->type == DRM_PLANE_TYPE_OVERLAY) { WARN_ON(!dm_new_plane_state->dc_state); plane_states_constructed[planes_count] = dm_new_plane_state->dc_state; - dc_stream_attach = acrtc_state->stream; planes_count++; } else if (new_crtc_state->planes_changed) { - /* Assume even ONE crtc with immediate flip means + /* + * Assume even ONE crtc with immediate flip means * entire can't wait for VBLANK * TODO Check if it's correct */ - *wait_for_vblank = - new_pcrtc_state->pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC ? - false : true; + if (new_pcrtc_state->pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC) + *wait_for_vblank = false; - /* TODO: Needs rework for multiplane flip */ - if (plane->type == DRM_PLANE_TYPE_PRIMARY) - drm_crtc_vblank_get(crtc); + /* + * TODO This might fail and hence better not used, wait + * explicitly on fences instead + * and in general should be called for + * blocking commit to as per framework helpers + */ + abo = gem_to_amdgpu_bo(fb->obj[0]); + r = amdgpu_bo_reserve(abo, true); + if (unlikely(r != 0)) { + DRM_ERROR("failed to reserve buffer before flip\n"); + WARN_ON(1); + } - amdgpu_dm_do_flip( - crtc, - fb, - (uint32_t)drm_crtc_vblank_count(crtc) + *wait_for_vblank, - dc_state); + /* Wait for all fences on this FB */ + WARN_ON(reservation_object_wait_timeout_rcu(abo->tbo.resv, true, false, + MAX_SCHEDULE_TIMEOUT) < 0); + + amdgpu_bo_get_tiling_flags(abo, &tiling_flags); + + amdgpu_bo_unreserve(abo); + + flip->flip_addrs[flip_count].address.grph.addr.low_part = lower_32_bits(afb->address); + flip->flip_addrs[flip_count].address.grph.addr.high_part = upper_32_bits(afb->address); + + dcc_address = get_dcc_address(afb->address, tiling_flags); + flip->flip_addrs[flip_count].address.grph.meta_addr.low_part = lower_32_bits(dcc_address); + flip->flip_addrs[flip_count].address.grph.meta_addr.high_part = upper_32_bits(dcc_address); + + flip->flip_addrs[flip_count].flip_immediate = + (crtc->state->pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0; + + timestamp_ns = ktime_get_ns(); + flip->flip_addrs[flip_count].flip_timestamp_in_us = div_u64(timestamp_ns, 1000); + flip->surface_updates[flip_count].flip_addr = &flip->flip_addrs[flip_count]; + + stream_status = dc_stream_get_status(acrtc_state->stream); + if (!stream_status) { + DRM_ERROR("No stream status for CRTC: id=%d\n", + acrtc_attach->crtc_id); + continue; + } + + surface = stream_status->plane_states[0]; + flip->surface_updates[flip_count].surface = surface; + if (!flip->surface_updates[flip_count].surface) { + DRM_ERROR("No surface for CRTC: id=%d\n", + acrtc_attach->crtc_id); + continue; + } + + if (acrtc_state->stream) + update_freesync_state_on_stream( + dm, + acrtc_state, + acrtc_state->stream, + surface, + flip->flip_addrs[flip_count].flip_timestamp_in_us); + + /* Update surface timing information. */ + surface->time.time_elapsed_in_us[surface->time.index] = + flip->flip_addrs[flip_count].flip_timestamp_in_us - + surface->time.prev_update_time_in_us; + surface->time.prev_update_time_in_us = flip->flip_addrs[flip_count].flip_timestamp_in_us; + surface->time.index++; + if (surface->time.index >= DC_PLANE_UPDATE_TIMES_MAX) + surface->time.index = 0; + + DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x\n", + __func__, + flip->flip_addrs[flip_count].address.grph.addr.high_part, + flip->flip_addrs[flip_count].address.grph.addr.low_part); + + flip_count += 1; } } + if (flip_count) { + target = (uint32_t)drm_crtc_vblank_count(pcrtc) + *wait_for_vblank; + /* Prepare wait for target vblank early - before the fence-waits */ + target_vblank = target - (uint32_t)drm_crtc_vblank_count(pcrtc) + + amdgpu_get_vblank_counter_kms(pcrtc->dev, acrtc_attach->crtc_id); + + /* + * Wait until we're out of the vertical blank period before the one + * targeted by the flip + */ + while ((acrtc_attach->enabled && + (amdgpu_display_get_crtc_scanoutpos(dm->ddev, acrtc_attach->crtc_id, + 0, &vpos, &hpos, NULL, + NULL, &pcrtc->hwmode) + & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK)) == + (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) && + (int)(target_vblank - + amdgpu_get_vblank_counter_kms(dm->ddev, acrtc_attach->crtc_id)) > 0)) { + usleep_range(1000, 1100); + } + + if (acrtc_attach->base.state->event) { + drm_crtc_vblank_get(pcrtc); + + spin_lock_irqsave(&pcrtc->dev->event_lock, flags); + + WARN_ON(acrtc_attach->pflip_status != AMDGPU_FLIP_NONE); + prepare_flip_isr(acrtc_attach); + + spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags); + } + + if (acrtc_state->stream) { + + if (acrtc_state->freesync_timing_changed) + flip->stream_update.adjust = + &acrtc_state->stream->adjust; + + if (acrtc_state->freesync_vrr_info_changed) + flip->stream_update.vrr_infopacket = + &acrtc_state->stream->vrr_infopacket; + } + + mutex_lock(&dm->dc_lock); + dc_commit_updates_for_stream(dm->dc, + flip->surface_updates, + flip_count, + acrtc_state->stream, + &flip->stream_update, + dc_state); + mutex_unlock(&dm->dc_lock); + } + if (planes_count) { unsigned long flags; @@ -4962,7 +4936,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags); } - dc_stream_attach->abm_level = acrtc_state->abm_level; + acrtc_state->stream->abm_level = acrtc_state->abm_level; if (false == commit_planes_to_stream(dm, dm->dc, From bc7f670ee04cd619f8c4627c37d77b3618bc5edd Mon Sep 17 00:00:00 2001 From: David Francis Date: Wed, 19 Dec 2018 10:45:16 -0500 Subject: [PATCH 324/539] drm/amd/display: Perform plane updates only when needed [Why] Our old logic: if pageflip, update freesync and plane address. Otherwise, update everything. This over-updated on non-pageflip cases, and it failed to update if pageflip and non-pageflip changes occurred on the same commit [How] Update flip_addrs on pageflips. Update scaling_info when it changes. Update color fields on color changes. Updates plane_info always because we don't have a good way of knowing when it needs to be updated. Unfortunately, this means that every stream commit involves two calls into DC. In particular, on pageflips there is a second, pointless update that changes nothing but costs several microseconds (about a 50% increase in time taken). The update is fast, but there are comparisons and some useless programming. Leave TODOs indicating dissatisfaction. Signed-off-by: David Francis Reviewed-by: Harry Wentland Reviewed-by: Nicholas Kazlauskas Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 195 ++++++------------ 1 file changed, 64 insertions(+), 131 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a3df3ccfa807..9b77ace620db 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4618,105 +4618,6 @@ static void update_freesync_state_on_stream( vrr_params.adjust.v_total_max); } -/* - * TODO this whole function needs to go - * - * dc_surface_update is needlessly complex. See if we can just replace this - * with a dc_plane_state and follow the atomic model a bit more closely here. - */ -static bool commit_planes_to_stream( - struct amdgpu_display_manager *dm, - struct dc *dc, - struct dc_plane_state **plane_states, - uint8_t new_plane_count, - struct dm_crtc_state *dm_new_crtc_state, - struct dm_crtc_state *dm_old_crtc_state, - struct dc_state *state) -{ - /* no need to dynamically allocate this. it's pretty small */ - struct dc_surface_update updates[MAX_SURFACES]; - struct dc_flip_addrs *flip_addr; - struct dc_plane_info *plane_info; - struct dc_scaling_info *scaling_info; - int i; - struct dc_stream_state *dc_stream = dm_new_crtc_state->stream; - struct dc_stream_update *stream_update = - kzalloc(sizeof(struct dc_stream_update), GFP_KERNEL); - unsigned int abm_level; - - if (!stream_update) { - BREAK_TO_DEBUGGER(); - return false; - } - - flip_addr = kcalloc(MAX_SURFACES, sizeof(struct dc_flip_addrs), - GFP_KERNEL); - plane_info = kcalloc(MAX_SURFACES, sizeof(struct dc_plane_info), - GFP_KERNEL); - scaling_info = kcalloc(MAX_SURFACES, sizeof(struct dc_scaling_info), - GFP_KERNEL); - - if (!flip_addr || !plane_info || !scaling_info) { - kfree(flip_addr); - kfree(plane_info); - kfree(scaling_info); - kfree(stream_update); - return false; - } - - memset(updates, 0, sizeof(updates)); - - stream_update->src = dc_stream->src; - stream_update->dst = dc_stream->dst; - stream_update->out_transfer_func = dc_stream->out_transfer_func; - - if (dm_new_crtc_state->abm_level != dm_old_crtc_state->abm_level) { - abm_level = dm_new_crtc_state->abm_level; - stream_update->abm_level = &abm_level; - } - - for (i = 0; i < new_plane_count; i++) { - updates[i].surface = plane_states[i]; - updates[i].gamma = - (struct dc_gamma *)plane_states[i]->gamma_correction; - updates[i].in_transfer_func = plane_states[i]->in_transfer_func; - flip_addr[i].address = plane_states[i]->address; - flip_addr[i].flip_immediate = plane_states[i]->flip_immediate; - plane_info[i].color_space = plane_states[i]->color_space; - plane_info[i].format = plane_states[i]->format; - plane_info[i].plane_size = plane_states[i]->plane_size; - plane_info[i].rotation = plane_states[i]->rotation; - plane_info[i].horizontal_mirror = plane_states[i]->horizontal_mirror; - plane_info[i].stereo_format = plane_states[i]->stereo_format; - plane_info[i].tiling_info = plane_states[i]->tiling_info; - plane_info[i].visible = plane_states[i]->visible; - plane_info[i].per_pixel_alpha = plane_states[i]->per_pixel_alpha; - plane_info[i].dcc = plane_states[i]->dcc; - scaling_info[i].scaling_quality = plane_states[i]->scaling_quality; - scaling_info[i].src_rect = plane_states[i]->src_rect; - scaling_info[i].dst_rect = plane_states[i]->dst_rect; - scaling_info[i].clip_rect = plane_states[i]->clip_rect; - - updates[i].flip_addr = &flip_addr[i]; - updates[i].plane_info = &plane_info[i]; - updates[i].scaling_info = &scaling_info[i]; - } - - mutex_lock(&dm->dc_lock); - dc_commit_updates_for_stream( - dc, - updates, - new_plane_count, - dc_stream, stream_update, state); - mutex_unlock(&dm->dc_lock); - - kfree(flip_addr); - kfree(plane_info); - kfree(scaling_info); - kfree(stream_update); - return true; -} - static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct dc_state *dc_state, struct drm_device *dev, @@ -4728,7 +4629,6 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, uint64_t timestamp_ns; struct drm_plane *plane; struct drm_plane_state *old_plane_state, *new_plane_state; - struct dc_plane_state *plane_states_constructed[MAX_SURFACES]; struct amdgpu_crtc *acrtc_attach = to_amdgpu_crtc(pcrtc); struct drm_crtc_state *new_pcrtc_state = drm_atomic_get_new_crtc_state(state, pcrtc); @@ -4748,9 +4648,17 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct dc_stream_update stream_update; } *flip; - flip = kzalloc(sizeof(*flip), GFP_KERNEL); + struct { + struct dc_surface_update surface_updates[MAX_SURFACES]; + struct dc_plane_info plane_infos[MAX_SURFACES]; + struct dc_scaling_info scaling_infos[MAX_SURFACES]; + struct dc_stream_update stream_update; + } *full; - if (!flip) + flip = kzalloc(sizeof(*flip), GFP_KERNEL); + full = kzalloc(sizeof(*full), GFP_KERNEL); + + if (!flip || !full) dm_error("Failed to allocate update bundles\n"); /* update planes when needed */ @@ -4760,10 +4668,9 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct drm_framebuffer *fb = new_plane_state->fb; struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(fb); bool pflip_needed; - struct dc_plane_state *surface; + struct dc_plane_state *surface, *dc_plane; struct dm_plane_state *dm_new_plane_state = to_dm_plane_state(new_plane_state); - if (plane->type == DRM_PLANE_TYPE_CURSOR) { handle_cursor_update(plane, old_plane_state); continue; @@ -4778,14 +4685,9 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, pflip_needed = !state->allow_modeset; - if (!pflip_needed || plane->type == DRM_PLANE_TYPE_OVERLAY) { - WARN_ON(!dm_new_plane_state->dc_state); + dc_plane = dm_new_plane_state->dc_state; - plane_states_constructed[planes_count] = dm_new_plane_state->dc_state; - - planes_count++; - - } else if (new_crtc_state->planes_changed) { + if (pflip_needed) { /* * Assume even ONE crtc with immediate flip means * entire can't wait for VBLANK @@ -4869,8 +4771,42 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, flip_count += 1; } + full->surface_updates[planes_count].surface = dc_plane; + if (new_pcrtc_state->color_mgmt_changed) { + full->surface_updates[planes_count].gamma = dc_plane->gamma_correction; + full->surface_updates[planes_count].in_transfer_func = dc_plane->in_transfer_func; + } + + + full->scaling_infos[planes_count].scaling_quality = dc_plane->scaling_quality; + full->scaling_infos[planes_count].src_rect = dc_plane->src_rect; + full->scaling_infos[planes_count].dst_rect = dc_plane->dst_rect; + full->scaling_infos[planes_count].clip_rect = dc_plane->clip_rect; + full->surface_updates[planes_count].scaling_info = &full->scaling_infos[planes_count]; + + + full->plane_infos[planes_count].color_space = dc_plane->color_space; + full->plane_infos[planes_count].format = dc_plane->format; + full->plane_infos[planes_count].plane_size = dc_plane->plane_size; + full->plane_infos[planes_count].rotation = dc_plane->rotation; + full->plane_infos[planes_count].horizontal_mirror = dc_plane->horizontal_mirror; + full->plane_infos[planes_count].stereo_format = dc_plane->stereo_format; + full->plane_infos[planes_count].tiling_info = dc_plane->tiling_info; + full->plane_infos[planes_count].visible = dc_plane->visible; + full->plane_infos[planes_count].per_pixel_alpha = dc_plane->per_pixel_alpha; + full->plane_infos[planes_count].dcc = dc_plane->dcc; + full->surface_updates[planes_count].plane_info = &full->plane_infos[planes_count]; + + planes_count += 1; + } + /* + * TODO: For proper atomic behaviour, we should be calling into DC once with + * all the changes. However, DC refuses to do pageflips and non-pageflip + * changes in the same call. Change DC to respect atomic behaviour, + * hopefully eliminating dc_*_update structs in their entirety. + */ if (flip_count) { target = (uint32_t)drm_crtc_vblank_count(pcrtc) + *wait_for_vblank; /* Prepare wait for target vblank early - before the fence-waits */ @@ -4925,29 +4861,26 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, } if (planes_count) { - unsigned long flags; - - if (new_pcrtc_state->event) { - - drm_crtc_vblank_get(pcrtc); - - spin_lock_irqsave(&pcrtc->dev->event_lock, flags); - prepare_flip_isr(acrtc_attach); - spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags); + if (new_pcrtc_state->mode_changed) { + full->stream_update.src = acrtc_state->stream->src; + full->stream_update.dst = acrtc_state->stream->dst; } - acrtc_state->stream->abm_level = acrtc_state->abm_level; + if (new_pcrtc_state->color_mgmt_changed) + full->stream_update.out_transfer_func = acrtc_state->stream->out_transfer_func; - if (false == commit_planes_to_stream(dm, - dm->dc, - plane_states_constructed, - planes_count, - acrtc_state, - dm_old_crtc_state, - dc_state)) - dm_error("%s: Failed to attach plane!\n", __func__); - } else { - /*TODO BUG Here should go disable planes on CRTC. */ + acrtc_state->stream->abm_level = acrtc_state->abm_level; + if (acrtc_state->abm_level != dm_old_crtc_state->abm_level) + full->stream_update.abm_level = &acrtc_state->abm_level; + + mutex_lock(&dm->dc_lock); + dc_commit_updates_for_stream(dm->dc, + full->surface_updates, + planes_count, + acrtc_state->stream, + &full->stream_update, + dc_state); + mutex_unlock(&dm->dc_lock); } } From b2e85302494a6b7d3a3006c8d80a729377600628 Mon Sep 17 00:00:00 2001 From: David Francis Date: Wed, 12 Dec 2018 14:11:53 -0500 Subject: [PATCH 325/539] drm/amd/display: Know what a pageflip is [Why] We were assuming that any commit with allow_modeset == false was a pageflip. This was against drm intention and only worked by sheer luck [How] A pageflip is the change from one framebuffer to another Signed-off-by: David Francis Reviewed-by: Harry Wentland Reviewed-by: Nicholas Kazlauskas Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 9b77ace620db..407f733a47ea 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4683,7 +4683,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, if (!new_crtc_state->active) continue; - pflip_needed = !state->allow_modeset; + pflip_needed = old_plane_state->fb && + old_plane_state->fb != new_plane_state->fb; dc_plane = dm_new_plane_state->dc_state; From 07772b0715122222d5a3669fd794ac817ee1617c Mon Sep 17 00:00:00 2001 From: Josip Pavic Date: Tue, 8 Jan 2019 14:43:10 -0500 Subject: [PATCH 326/539] drm/amd/display: Update DMCU versioning mechanism [Why] Current date based versioning doesn't tell us about feature version and build version, and is not useful for debug. [How] Add versioning based on feature and build Signed-off-by: Josip Pavic Reviewed-by: Anthony Koo Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c | 6 +++--- drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h | 6 +++--- .../gpu/drm/amd/display/modules/power/power_helpers.c | 9 ++++----- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c index e927c8934681..c2926cf19dee 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c @@ -331,10 +331,10 @@ static void dcn10_get_dmcu_version(struct dmcu *dmcu) /* Write address to IRAM_RD_ADDR and read from DATA register */ REG_WRITE(DMCU_IRAM_RD_CTRL, dmcu_version_offset); dmcu->dmcu_version.interface_version = REG_READ(DMCU_IRAM_RD_DATA); - dmcu->dmcu_version.year = ((REG_READ(DMCU_IRAM_RD_DATA) << 8) | + dmcu->dmcu_version.abm_version = REG_READ(DMCU_IRAM_RD_DATA); + dmcu->dmcu_version.psr_version = REG_READ(DMCU_IRAM_RD_DATA); + dmcu->dmcu_version.build_version = ((REG_READ(DMCU_IRAM_RD_DATA) << 8) | REG_READ(DMCU_IRAM_RD_DATA)); - dmcu->dmcu_version.month = REG_READ(DMCU_IRAM_RD_DATA); - dmcu->dmcu_version.date = REG_READ(DMCU_IRAM_RD_DATA); /* Disable write access to IRAM to allow dynamic sleep state */ REG_UPDATE_2(DMCU_RAM_ACCESS_CTRL, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h index ed32a7503684..cbaa43853611 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h @@ -39,10 +39,10 @@ enum dmcu_state { }; struct dmcu_version { - unsigned int date; - unsigned int month; - unsigned int year; unsigned int interface_version; + unsigned int abm_version; + unsigned int psr_version; + unsigned int build_version; }; struct dmcu { diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 89b082b5d107..5432183d9db0 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -87,11 +87,10 @@ struct iram_table_v_2 { /* For reading PSR State directly from IRAM */ uint8_t psr_state; /* 0xf0 */ - uint8_t dmcu_interface_version; /* 0xf1 */ - uint8_t dmcu_date_version_year_b0; /* 0xf2 */ - uint8_t dmcu_date_version_year_b1; /* 0xf3 */ - uint8_t dmcu_date_version_month; /* 0xf4 */ - uint8_t dmcu_date_version_day; /* 0xf5 */ + uint8_t dmcu_mcp_interface_version; /* 0xf1 */ + uint8_t dmcu_abm_feature_version; /* 0xf2 */ + uint8_t dmcu_psr_feature_version; /* 0xf3 */ + uint16_t dmcu_version; /* 0xf4 */ uint8_t dmcu_state; /* 0xf6 */ uint16_t blRampReduction; /* 0xf7 */ From bf75572a25367e202ee986c3f7553fb38eec9c5b Mon Sep 17 00:00:00 2001 From: Josip Pavic Date: Tue, 8 Jan 2019 14:46:05 -0500 Subject: [PATCH 327/539] drm/amd/display: Create switching mechanism for ABM 2.2 [Why] Need method of detecting which version of the DMCU FW is loaded and load the appropriate iRAM. [How] Create definition for ABM 2.2 iRAM, and load it if the DMCU FW version number matches the ABM 2.2 version; otherwise load ABM 2.1 iRAM. Signed-off-by: Josip Pavic Reviewed-by: Anthony Koo Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../amd/display/modules/power/power_helpers.c | 583 +++++++++++++----- 1 file changed, 418 insertions(+), 165 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 5432183d9db0..29b7a26bea81 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -41,6 +41,17 @@ static const unsigned char min_reduction_table[13] = { static const unsigned char max_reduction_table[13] = { 0xf5, 0xe5, 0xd9, 0xcd, 0xb1, 0xa5, 0xa5, 0x80, 0x65, 0x4d, 0x4d, 0x4d, 0x32}; +/* ABM 2.2 Min Reduction effectively disabled (100% for all configs)*/ +static const unsigned char min_reduction_table_v_2_2[13] = { +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +/* Possible ABM 2.2 Max Reduction configs from least aggressive to most aggressive + * 0 1 2 3 4 5 6 7 8 9 10 11 12 + * 96.1 89.8 85.1 80.3 69.4 64.7 54.9 45.1 30.2 25.1 19.6 12.5 12.5 % + */ +static const unsigned char max_reduction_table_v_2_2[13] = { +0xf5, 0xe5, 0xd9, 0xcd, 0xb1, 0xa5, 0x8c, 0x73, 0x4d, 0x40, 0x32, 0x20, 0x20}; + /* Predefined ABM configuration sets. We may have different configuration sets * in order to satisfy different power/quality requirements. */ @@ -57,6 +68,7 @@ static const unsigned char abm_config[abm_defines_max_config][abm_defines_max_le #define NUM_POWER_FN_SEGS 8 #define NUM_BL_CURVE_SEGS 16 #define IRAM_RESERVE_AREA_START 0xF0 // reserve 0xF0~0xFF are write by DMCU only +#define IRAM_SIZE 256 #pragma pack(push, 1) /* NOTE: iRAM is 256B in size */ @@ -101,6 +113,49 @@ struct iram_table_v_2 { uint8_t dummy8; /* 0xfe */ uint8_t dummy9; /* 0xff */ }; + +struct iram_table_v_2_2 { + /* flags */ + uint16_t flags; /* 0x00 U16 */ + + /* parameters for ABM2.2 algorithm */ + uint8_t min_reduction[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL]; /* 0x02 U0.8 */ + uint8_t max_reduction[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL]; /* 0x16 U0.8 */ + uint8_t bright_pos_gain[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL]; /* 0x2a U2.6 */ + uint8_t dark_pos_gain[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL]; /* 0x3e U2.6 */ + uint8_t hybridFactor[NUM_AGGR_LEVEL]; /* 0x52 U0.8 */ + uint8_t contrastFactor[NUM_AGGR_LEVEL]; /* 0x56 U0.8 */ + uint8_t deviation_gain[NUM_AGGR_LEVEL]; /* 0x5a U0.8 */ + uint8_t iir_curve[NUM_AMBI_LEVEL]; /* 0x5e U0.8 */ + uint8_t pad[29]; /* 0x63 U0.8 */ + + /* parameters for crgb conversion */ + uint16_t crgb_thresh[NUM_POWER_FN_SEGS]; /* 0x80 U3.13 */ + uint16_t crgb_offset[NUM_POWER_FN_SEGS]; /* 0x90 U1.15 */ + uint16_t crgb_slope[NUM_POWER_FN_SEGS]; /* 0xa0 U4.12 */ + + /* parameters for custom curve */ + /* thresholds for brightness --> backlight */ + uint16_t backlight_thresholds[NUM_BL_CURVE_SEGS]; /* 0xb0 U16.0 */ + /* offsets for brightness --> backlight */ + uint16_t backlight_offsets[NUM_BL_CURVE_SEGS]; /* 0xd0 U16.0 */ + + /* For reading PSR State directly from IRAM */ + uint8_t psr_state; /* 0xf0 */ + uint8_t dmcu_mcp_interface_version; /* 0xf1 */ + uint8_t dmcu_abm_feature_version; /* 0xf2 */ + uint8_t dmcu_psr_feature_version; /* 0xf3 */ + uint16_t dmcu_version; /* 0xf4 */ + uint8_t dmcu_state; /* 0xf6 */ + + uint16_t blRampReduction; /* 0xf7 */ + uint16_t blRampStart; /* 0xf9 */ + uint8_t dummy5; /* 0xfb */ + uint8_t dummy6; /* 0xfc */ + uint8_t dummy7; /* 0xfd */ + uint8_t dummy8; /* 0xfe */ + uint8_t dummy9; /* 0xff */ +}; #pragma pack(pop) static uint16_t backlight_8_to_16(unsigned int backlight_8bit) @@ -145,11 +200,367 @@ static void fill_backlight_transform_table(struct dmcu_iram_parameters params, } } +static void fill_backlight_transform_table_v_2_2(struct dmcu_iram_parameters params, + struct iram_table_v_2_2 *table) +{ + unsigned int i; + unsigned int num_entries = NUM_BL_CURVE_SEGS; + unsigned int query_input_8bit; + unsigned int query_output_8bit; + unsigned int lut_index; + + table->backlight_thresholds[0] = 0; + table->backlight_offsets[0] = params.backlight_lut_array[0]; + table->backlight_thresholds[num_entries-1] = 0xFFFF; + table->backlight_offsets[num_entries-1] = + params.backlight_lut_array[params.backlight_lut_array_size - 1]; + + /* Setup all brightness levels between 0% and 100% exclusive + * Fills brightness-to-backlight transform table. Backlight custom curve + * describes transform from brightness to backlight. It will be defined + * as set of thresholds and set of offsets, together, implying + * extrapolation of custom curve into 16 uniformly spanned linear + * segments. Each threshold/offset represented by 16 bit entry in + * format U4.10. + */ + for (i = 1; i+1 < num_entries; i++) { + query_input_8bit = DIV_ROUNDUP((i * 256), num_entries); + + lut_index = (params.backlight_lut_array_size - 1) * i / (num_entries - 1); + ASSERT(lut_index < params.backlight_lut_array_size); + query_output_8bit = params.backlight_lut_array[lut_index] >> 8; + + table->backlight_thresholds[i] = + backlight_8_to_16(query_input_8bit); + table->backlight_offsets[i] = + backlight_8_to_16(query_output_8bit); + } +} + +void fill_iram_v_2(struct iram_table_v_2 *ram_table, struct dmcu_iram_parameters params) +{ + unsigned int set = params.set; + + ram_table->flags = 0x0; + ram_table->deviation_gain = 0xb3; + + ram_table->blRampReduction = + cpu_to_be16(params.backlight_ramping_reduction); + ram_table->blRampStart = + cpu_to_be16(params.backlight_ramping_start); + + ram_table->min_reduction[0][0] = min_reduction_table[abm_config[set][0]]; + ram_table->min_reduction[1][0] = min_reduction_table[abm_config[set][0]]; + ram_table->min_reduction[2][0] = min_reduction_table[abm_config[set][0]]; + ram_table->min_reduction[3][0] = min_reduction_table[abm_config[set][0]]; + ram_table->min_reduction[4][0] = min_reduction_table[abm_config[set][0]]; + ram_table->max_reduction[0][0] = max_reduction_table[abm_config[set][0]]; + ram_table->max_reduction[1][0] = max_reduction_table[abm_config[set][0]]; + ram_table->max_reduction[2][0] = max_reduction_table[abm_config[set][0]]; + ram_table->max_reduction[3][0] = max_reduction_table[abm_config[set][0]]; + ram_table->max_reduction[4][0] = max_reduction_table[abm_config[set][0]]; + + ram_table->min_reduction[0][1] = min_reduction_table[abm_config[set][1]]; + ram_table->min_reduction[1][1] = min_reduction_table[abm_config[set][1]]; + ram_table->min_reduction[2][1] = min_reduction_table[abm_config[set][1]]; + ram_table->min_reduction[3][1] = min_reduction_table[abm_config[set][1]]; + ram_table->min_reduction[4][1] = min_reduction_table[abm_config[set][1]]; + ram_table->max_reduction[0][1] = max_reduction_table[abm_config[set][1]]; + ram_table->max_reduction[1][1] = max_reduction_table[abm_config[set][1]]; + ram_table->max_reduction[2][1] = max_reduction_table[abm_config[set][1]]; + ram_table->max_reduction[3][1] = max_reduction_table[abm_config[set][1]]; + ram_table->max_reduction[4][1] = max_reduction_table[abm_config[set][1]]; + + ram_table->min_reduction[0][2] = min_reduction_table[abm_config[set][2]]; + ram_table->min_reduction[1][2] = min_reduction_table[abm_config[set][2]]; + ram_table->min_reduction[2][2] = min_reduction_table[abm_config[set][2]]; + ram_table->min_reduction[3][2] = min_reduction_table[abm_config[set][2]]; + ram_table->min_reduction[4][2] = min_reduction_table[abm_config[set][2]]; + ram_table->max_reduction[0][2] = max_reduction_table[abm_config[set][2]]; + ram_table->max_reduction[1][2] = max_reduction_table[abm_config[set][2]]; + ram_table->max_reduction[2][2] = max_reduction_table[abm_config[set][2]]; + ram_table->max_reduction[3][2] = max_reduction_table[abm_config[set][2]]; + ram_table->max_reduction[4][2] = max_reduction_table[abm_config[set][2]]; + + ram_table->min_reduction[0][3] = min_reduction_table[abm_config[set][3]]; + ram_table->min_reduction[1][3] = min_reduction_table[abm_config[set][3]]; + ram_table->min_reduction[2][3] = min_reduction_table[abm_config[set][3]]; + ram_table->min_reduction[3][3] = min_reduction_table[abm_config[set][3]]; + ram_table->min_reduction[4][3] = min_reduction_table[abm_config[set][3]]; + ram_table->max_reduction[0][3] = max_reduction_table[abm_config[set][3]]; + ram_table->max_reduction[1][3] = max_reduction_table[abm_config[set][3]]; + ram_table->max_reduction[2][3] = max_reduction_table[abm_config[set][3]]; + ram_table->max_reduction[3][3] = max_reduction_table[abm_config[set][3]]; + ram_table->max_reduction[4][3] = max_reduction_table[abm_config[set][3]]; + + ram_table->bright_pos_gain[0][0] = 0x20; + ram_table->bright_pos_gain[0][1] = 0x20; + ram_table->bright_pos_gain[0][2] = 0x20; + ram_table->bright_pos_gain[0][3] = 0x20; + ram_table->bright_pos_gain[1][0] = 0x20; + ram_table->bright_pos_gain[1][1] = 0x20; + ram_table->bright_pos_gain[1][2] = 0x20; + ram_table->bright_pos_gain[1][3] = 0x20; + ram_table->bright_pos_gain[2][0] = 0x20; + ram_table->bright_pos_gain[2][1] = 0x20; + ram_table->bright_pos_gain[2][2] = 0x20; + ram_table->bright_pos_gain[2][3] = 0x20; + ram_table->bright_pos_gain[3][0] = 0x20; + ram_table->bright_pos_gain[3][1] = 0x20; + ram_table->bright_pos_gain[3][2] = 0x20; + ram_table->bright_pos_gain[3][3] = 0x20; + ram_table->bright_pos_gain[4][0] = 0x20; + ram_table->bright_pos_gain[4][1] = 0x20; + ram_table->bright_pos_gain[4][2] = 0x20; + ram_table->bright_pos_gain[4][3] = 0x20; + ram_table->bright_neg_gain[0][1] = 0x00; + ram_table->bright_neg_gain[0][2] = 0x00; + ram_table->bright_neg_gain[0][3] = 0x00; + ram_table->bright_neg_gain[1][0] = 0x00; + ram_table->bright_neg_gain[1][1] = 0x00; + ram_table->bright_neg_gain[1][2] = 0x00; + ram_table->bright_neg_gain[1][3] = 0x00; + ram_table->bright_neg_gain[2][0] = 0x00; + ram_table->bright_neg_gain[2][1] = 0x00; + ram_table->bright_neg_gain[2][2] = 0x00; + ram_table->bright_neg_gain[2][3] = 0x00; + ram_table->bright_neg_gain[3][0] = 0x00; + ram_table->bright_neg_gain[3][1] = 0x00; + ram_table->bright_neg_gain[3][2] = 0x00; + ram_table->bright_neg_gain[3][3] = 0x00; + ram_table->bright_neg_gain[4][0] = 0x00; + ram_table->bright_neg_gain[4][1] = 0x00; + ram_table->bright_neg_gain[4][2] = 0x00; + ram_table->bright_neg_gain[4][3] = 0x00; + ram_table->dark_pos_gain[0][0] = 0x00; + ram_table->dark_pos_gain[0][1] = 0x00; + ram_table->dark_pos_gain[0][2] = 0x00; + ram_table->dark_pos_gain[0][3] = 0x00; + ram_table->dark_pos_gain[1][0] = 0x00; + ram_table->dark_pos_gain[1][1] = 0x00; + ram_table->dark_pos_gain[1][2] = 0x00; + ram_table->dark_pos_gain[1][3] = 0x00; + ram_table->dark_pos_gain[2][0] = 0x00; + ram_table->dark_pos_gain[2][1] = 0x00; + ram_table->dark_pos_gain[2][2] = 0x00; + ram_table->dark_pos_gain[2][3] = 0x00; + ram_table->dark_pos_gain[3][0] = 0x00; + ram_table->dark_pos_gain[3][1] = 0x00; + ram_table->dark_pos_gain[3][2] = 0x00; + ram_table->dark_pos_gain[3][3] = 0x00; + ram_table->dark_pos_gain[4][0] = 0x00; + ram_table->dark_pos_gain[4][1] = 0x00; + ram_table->dark_pos_gain[4][2] = 0x00; + ram_table->dark_pos_gain[4][3] = 0x00; + ram_table->dark_neg_gain[0][0] = 0x00; + ram_table->dark_neg_gain[0][1] = 0x00; + ram_table->dark_neg_gain[0][2] = 0x00; + ram_table->dark_neg_gain[0][3] = 0x00; + ram_table->dark_neg_gain[1][0] = 0x00; + ram_table->dark_neg_gain[1][1] = 0x00; + ram_table->dark_neg_gain[1][2] = 0x00; + ram_table->dark_neg_gain[1][3] = 0x00; + ram_table->dark_neg_gain[2][0] = 0x00; + ram_table->dark_neg_gain[2][1] = 0x00; + ram_table->dark_neg_gain[2][2] = 0x00; + ram_table->dark_neg_gain[2][3] = 0x00; + ram_table->dark_neg_gain[3][0] = 0x00; + ram_table->dark_neg_gain[3][1] = 0x00; + ram_table->dark_neg_gain[3][2] = 0x00; + ram_table->dark_neg_gain[3][3] = 0x00; + ram_table->dark_neg_gain[4][0] = 0x00; + ram_table->dark_neg_gain[4][1] = 0x00; + ram_table->dark_neg_gain[4][2] = 0x00; + ram_table->dark_neg_gain[4][3] = 0x00; + + ram_table->iir_curve[0] = 0x65; + ram_table->iir_curve[1] = 0x65; + ram_table->iir_curve[2] = 0x65; + ram_table->iir_curve[3] = 0x65; + ram_table->iir_curve[4] = 0x65; + + //Gamma 2.4 + ram_table->crgb_thresh[0] = cpu_to_be16(0x13b6); + ram_table->crgb_thresh[1] = cpu_to_be16(0x1648); + ram_table->crgb_thresh[2] = cpu_to_be16(0x18e3); + ram_table->crgb_thresh[3] = cpu_to_be16(0x1b41); + ram_table->crgb_thresh[4] = cpu_to_be16(0x1d46); + ram_table->crgb_thresh[5] = cpu_to_be16(0x1f21); + ram_table->crgb_thresh[6] = cpu_to_be16(0x2167); + ram_table->crgb_thresh[7] = cpu_to_be16(0x2384); + ram_table->crgb_offset[0] = cpu_to_be16(0x2999); + ram_table->crgb_offset[1] = cpu_to_be16(0x3999); + ram_table->crgb_offset[2] = cpu_to_be16(0x4666); + ram_table->crgb_offset[3] = cpu_to_be16(0x5999); + ram_table->crgb_offset[4] = cpu_to_be16(0x6333); + ram_table->crgb_offset[5] = cpu_to_be16(0x7800); + ram_table->crgb_offset[6] = cpu_to_be16(0x8c00); + ram_table->crgb_offset[7] = cpu_to_be16(0xa000); + ram_table->crgb_slope[0] = cpu_to_be16(0x3147); + ram_table->crgb_slope[1] = cpu_to_be16(0x2978); + ram_table->crgb_slope[2] = cpu_to_be16(0x23a2); + ram_table->crgb_slope[3] = cpu_to_be16(0x1f55); + ram_table->crgb_slope[4] = cpu_to_be16(0x1c63); + ram_table->crgb_slope[5] = cpu_to_be16(0x1a0f); + ram_table->crgb_slope[6] = cpu_to_be16(0x178d); + ram_table->crgb_slope[7] = cpu_to_be16(0x15ab); + + fill_backlight_transform_table( + params, ram_table); +} + +void fill_iram_v_2_2(struct iram_table_v_2_2 *ram_table, struct dmcu_iram_parameters params) +{ + unsigned int set = params.set; + + ram_table->flags = 0x0; + + ram_table->deviation_gain[0] = 0xb3; + ram_table->deviation_gain[1] = 0xb3; + ram_table->deviation_gain[2] = 0xb3; + ram_table->deviation_gain[3] = 0xb3; + + ram_table->blRampReduction = + cpu_to_be16(params.backlight_ramping_reduction); + ram_table->blRampStart = + cpu_to_be16(params.backlight_ramping_start); + + ram_table->min_reduction[0][0] = min_reduction_table_v_2_2[abm_config[set][0]]; + ram_table->min_reduction[1][0] = min_reduction_table_v_2_2[abm_config[set][0]]; + ram_table->min_reduction[2][0] = min_reduction_table_v_2_2[abm_config[set][0]]; + ram_table->min_reduction[3][0] = min_reduction_table_v_2_2[abm_config[set][0]]; + ram_table->min_reduction[4][0] = min_reduction_table_v_2_2[abm_config[set][0]]; + ram_table->max_reduction[0][0] = max_reduction_table_v_2_2[abm_config[set][0]]; + ram_table->max_reduction[1][0] = max_reduction_table_v_2_2[abm_config[set][0]]; + ram_table->max_reduction[2][0] = max_reduction_table_v_2_2[abm_config[set][0]]; + ram_table->max_reduction[3][0] = max_reduction_table_v_2_2[abm_config[set][0]]; + ram_table->max_reduction[4][0] = max_reduction_table_v_2_2[abm_config[set][0]]; + + ram_table->min_reduction[0][1] = min_reduction_table_v_2_2[abm_config[set][1]]; + ram_table->min_reduction[1][1] = min_reduction_table_v_2_2[abm_config[set][1]]; + ram_table->min_reduction[2][1] = min_reduction_table_v_2_2[abm_config[set][1]]; + ram_table->min_reduction[3][1] = min_reduction_table_v_2_2[abm_config[set][1]]; + ram_table->min_reduction[4][1] = min_reduction_table_v_2_2[abm_config[set][1]]; + ram_table->max_reduction[0][1] = max_reduction_table_v_2_2[abm_config[set][1]]; + ram_table->max_reduction[1][1] = max_reduction_table_v_2_2[abm_config[set][1]]; + ram_table->max_reduction[2][1] = max_reduction_table_v_2_2[abm_config[set][1]]; + ram_table->max_reduction[3][1] = max_reduction_table_v_2_2[abm_config[set][1]]; + ram_table->max_reduction[4][1] = max_reduction_table_v_2_2[abm_config[set][1]]; + + ram_table->min_reduction[0][2] = min_reduction_table_v_2_2[abm_config[set][2]]; + ram_table->min_reduction[1][2] = min_reduction_table_v_2_2[abm_config[set][2]]; + ram_table->min_reduction[2][2] = min_reduction_table_v_2_2[abm_config[set][2]]; + ram_table->min_reduction[3][2] = min_reduction_table_v_2_2[abm_config[set][2]]; + ram_table->min_reduction[4][2] = min_reduction_table_v_2_2[abm_config[set][2]]; + ram_table->max_reduction[0][2] = max_reduction_table_v_2_2[abm_config[set][2]]; + ram_table->max_reduction[1][2] = max_reduction_table_v_2_2[abm_config[set][2]]; + ram_table->max_reduction[2][2] = max_reduction_table_v_2_2[abm_config[set][2]]; + ram_table->max_reduction[3][2] = max_reduction_table_v_2_2[abm_config[set][2]]; + ram_table->max_reduction[4][2] = max_reduction_table_v_2_2[abm_config[set][2]]; + + ram_table->min_reduction[0][3] = min_reduction_table_v_2_2[abm_config[set][3]]; + ram_table->min_reduction[1][3] = min_reduction_table_v_2_2[abm_config[set][3]]; + ram_table->min_reduction[2][3] = min_reduction_table_v_2_2[abm_config[set][3]]; + ram_table->min_reduction[3][3] = min_reduction_table_v_2_2[abm_config[set][3]]; + ram_table->min_reduction[4][3] = min_reduction_table_v_2_2[abm_config[set][3]]; + ram_table->max_reduction[0][3] = max_reduction_table_v_2_2[abm_config[set][3]]; + ram_table->max_reduction[1][3] = max_reduction_table_v_2_2[abm_config[set][3]]; + ram_table->max_reduction[2][3] = max_reduction_table_v_2_2[abm_config[set][3]]; + ram_table->max_reduction[3][3] = max_reduction_table_v_2_2[abm_config[set][3]]; + ram_table->max_reduction[4][3] = max_reduction_table_v_2_2[abm_config[set][3]]; + + ram_table->bright_pos_gain[0][0] = 0x20; + ram_table->bright_pos_gain[0][1] = 0x20; + ram_table->bright_pos_gain[0][2] = 0x20; + ram_table->bright_pos_gain[0][3] = 0x20; + ram_table->bright_pos_gain[1][0] = 0x20; + ram_table->bright_pos_gain[1][1] = 0x20; + ram_table->bright_pos_gain[1][2] = 0x20; + ram_table->bright_pos_gain[1][3] = 0x20; + ram_table->bright_pos_gain[2][0] = 0x20; + ram_table->bright_pos_gain[2][1] = 0x20; + ram_table->bright_pos_gain[2][2] = 0x20; + ram_table->bright_pos_gain[2][3] = 0x20; + ram_table->bright_pos_gain[3][0] = 0x20; + ram_table->bright_pos_gain[3][1] = 0x20; + ram_table->bright_pos_gain[3][2] = 0x20; + ram_table->bright_pos_gain[3][3] = 0x20; + ram_table->bright_pos_gain[4][0] = 0x20; + ram_table->bright_pos_gain[4][1] = 0x20; + ram_table->bright_pos_gain[4][2] = 0x20; + ram_table->bright_pos_gain[4][3] = 0x20; + + ram_table->dark_pos_gain[0][0] = 0x00; + ram_table->dark_pos_gain[0][1] = 0x00; + ram_table->dark_pos_gain[0][2] = 0x00; + ram_table->dark_pos_gain[0][3] = 0x00; + ram_table->dark_pos_gain[1][0] = 0x00; + ram_table->dark_pos_gain[1][1] = 0x00; + ram_table->dark_pos_gain[1][2] = 0x00; + ram_table->dark_pos_gain[1][3] = 0x00; + ram_table->dark_pos_gain[2][0] = 0x00; + ram_table->dark_pos_gain[2][1] = 0x00; + ram_table->dark_pos_gain[2][2] = 0x00; + ram_table->dark_pos_gain[2][3] = 0x00; + ram_table->dark_pos_gain[3][0] = 0x00; + ram_table->dark_pos_gain[3][1] = 0x00; + ram_table->dark_pos_gain[3][2] = 0x00; + ram_table->dark_pos_gain[3][3] = 0x00; + ram_table->dark_pos_gain[4][0] = 0x00; + ram_table->dark_pos_gain[4][1] = 0x00; + ram_table->dark_pos_gain[4][2] = 0x00; + ram_table->dark_pos_gain[4][3] = 0x00; + + ram_table->hybridFactor[0] = 0xff; + ram_table->hybridFactor[1] = 0xff; + ram_table->hybridFactor[2] = 0xff; + ram_table->hybridFactor[3] = 0xc0; + + ram_table->contrastFactor[0] = 0x99; + ram_table->contrastFactor[1] = 0x80; + ram_table->contrastFactor[2] = 0x80; + ram_table->contrastFactor[3] = 0x4D; + + ram_table->iir_curve[0] = 0x65; + ram_table->iir_curve[1] = 0x65; + ram_table->iir_curve[2] = 0x65; + ram_table->iir_curve[3] = 0x65; + ram_table->iir_curve[4] = 0x65; + + //Gamma 2.2 + ram_table->crgb_thresh[0] = cpu_to_be16(0x127c); + ram_table->crgb_thresh[1] = cpu_to_be16(0x151b); + ram_table->crgb_thresh[2] = cpu_to_be16(0x17d5); + ram_table->crgb_thresh[3] = cpu_to_be16(0x1a56); + ram_table->crgb_thresh[4] = cpu_to_be16(0x1c83); + ram_table->crgb_thresh[5] = cpu_to_be16(0x1e72); + ram_table->crgb_thresh[6] = cpu_to_be16(0x20f0); + ram_table->crgb_thresh[7] = cpu_to_be16(0x232b); + ram_table->crgb_offset[0] = cpu_to_be16(0x2999); + ram_table->crgb_offset[1] = cpu_to_be16(0x3999); + ram_table->crgb_offset[2] = cpu_to_be16(0x4666); + ram_table->crgb_offset[3] = cpu_to_be16(0x5999); + ram_table->crgb_offset[4] = cpu_to_be16(0x6333); + ram_table->crgb_offset[5] = cpu_to_be16(0x7800); + ram_table->crgb_offset[6] = cpu_to_be16(0x8c00); + ram_table->crgb_offset[7] = cpu_to_be16(0xa000); + ram_table->crgb_slope[0] = cpu_to_be16(0x3609); + ram_table->crgb_slope[1] = cpu_to_be16(0x2dfa); + ram_table->crgb_slope[2] = cpu_to_be16(0x27ea); + ram_table->crgb_slope[3] = cpu_to_be16(0x235d); + ram_table->crgb_slope[4] = cpu_to_be16(0x2042); + ram_table->crgb_slope[5] = cpu_to_be16(0x1dc3); + ram_table->crgb_slope[6] = cpu_to_be16(0x1b1a); + ram_table->crgb_slope[7] = cpu_to_be16(0x1910); + + fill_backlight_transform_table_v_2_2( + params, ram_table); +} + bool dmcu_load_iram(struct dmcu *dmcu, struct dmcu_iram_parameters params) { - struct iram_table_v_2 ram_table; - unsigned int set = params.set; + unsigned char ram_table[IRAM_SIZE]; if (dmcu == NULL) return false; @@ -159,169 +570,11 @@ bool dmcu_load_iram(struct dmcu *dmcu, memset(&ram_table, 0, sizeof(ram_table)); - ram_table.flags = 0x0; - ram_table.deviation_gain = 0xb3; - - ram_table.blRampReduction = - cpu_to_be16(params.backlight_ramping_reduction); - ram_table.blRampStart = - cpu_to_be16(params.backlight_ramping_start); - - ram_table.min_reduction[0][0] = min_reduction_table[abm_config[set][0]]; - ram_table.min_reduction[1][0] = min_reduction_table[abm_config[set][0]]; - ram_table.min_reduction[2][0] = min_reduction_table[abm_config[set][0]]; - ram_table.min_reduction[3][0] = min_reduction_table[abm_config[set][0]]; - ram_table.min_reduction[4][0] = min_reduction_table[abm_config[set][0]]; - ram_table.max_reduction[0][0] = max_reduction_table[abm_config[set][0]]; - ram_table.max_reduction[1][0] = max_reduction_table[abm_config[set][0]]; - ram_table.max_reduction[2][0] = max_reduction_table[abm_config[set][0]]; - ram_table.max_reduction[3][0] = max_reduction_table[abm_config[set][0]]; - ram_table.max_reduction[4][0] = max_reduction_table[abm_config[set][0]]; - - ram_table.min_reduction[0][1] = min_reduction_table[abm_config[set][1]]; - ram_table.min_reduction[1][1] = min_reduction_table[abm_config[set][1]]; - ram_table.min_reduction[2][1] = min_reduction_table[abm_config[set][1]]; - ram_table.min_reduction[3][1] = min_reduction_table[abm_config[set][1]]; - ram_table.min_reduction[4][1] = min_reduction_table[abm_config[set][1]]; - ram_table.max_reduction[0][1] = max_reduction_table[abm_config[set][1]]; - ram_table.max_reduction[1][1] = max_reduction_table[abm_config[set][1]]; - ram_table.max_reduction[2][1] = max_reduction_table[abm_config[set][1]]; - ram_table.max_reduction[3][1] = max_reduction_table[abm_config[set][1]]; - ram_table.max_reduction[4][1] = max_reduction_table[abm_config[set][1]]; - - ram_table.min_reduction[0][2] = min_reduction_table[abm_config[set][2]]; - ram_table.min_reduction[1][2] = min_reduction_table[abm_config[set][2]]; - ram_table.min_reduction[2][2] = min_reduction_table[abm_config[set][2]]; - ram_table.min_reduction[3][2] = min_reduction_table[abm_config[set][2]]; - ram_table.min_reduction[4][2] = min_reduction_table[abm_config[set][2]]; - ram_table.max_reduction[0][2] = max_reduction_table[abm_config[set][2]]; - ram_table.max_reduction[1][2] = max_reduction_table[abm_config[set][2]]; - ram_table.max_reduction[2][2] = max_reduction_table[abm_config[set][2]]; - ram_table.max_reduction[3][2] = max_reduction_table[abm_config[set][2]]; - ram_table.max_reduction[4][2] = max_reduction_table[abm_config[set][2]]; - - ram_table.min_reduction[0][3] = min_reduction_table[abm_config[set][3]]; - ram_table.min_reduction[1][3] = min_reduction_table[abm_config[set][3]]; - ram_table.min_reduction[2][3] = min_reduction_table[abm_config[set][3]]; - ram_table.min_reduction[3][3] = min_reduction_table[abm_config[set][3]]; - ram_table.min_reduction[4][3] = min_reduction_table[abm_config[set][3]]; - ram_table.max_reduction[0][3] = max_reduction_table[abm_config[set][3]]; - ram_table.max_reduction[1][3] = max_reduction_table[abm_config[set][3]]; - ram_table.max_reduction[2][3] = max_reduction_table[abm_config[set][3]]; - ram_table.max_reduction[3][3] = max_reduction_table[abm_config[set][3]]; - ram_table.max_reduction[4][3] = max_reduction_table[abm_config[set][3]]; - - ram_table.bright_pos_gain[0][0] = 0x20; - ram_table.bright_pos_gain[0][1] = 0x20; - ram_table.bright_pos_gain[0][2] = 0x20; - ram_table.bright_pos_gain[0][3] = 0x20; - ram_table.bright_pos_gain[1][0] = 0x20; - ram_table.bright_pos_gain[1][1] = 0x20; - ram_table.bright_pos_gain[1][2] = 0x20; - ram_table.bright_pos_gain[1][3] = 0x20; - ram_table.bright_pos_gain[2][0] = 0x20; - ram_table.bright_pos_gain[2][1] = 0x20; - ram_table.bright_pos_gain[2][2] = 0x20; - ram_table.bright_pos_gain[2][3] = 0x20; - ram_table.bright_pos_gain[3][0] = 0x20; - ram_table.bright_pos_gain[3][1] = 0x20; - ram_table.bright_pos_gain[3][2] = 0x20; - ram_table.bright_pos_gain[3][3] = 0x20; - ram_table.bright_pos_gain[4][0] = 0x20; - ram_table.bright_pos_gain[4][1] = 0x20; - ram_table.bright_pos_gain[4][2] = 0x20; - ram_table.bright_pos_gain[4][3] = 0x20; - ram_table.bright_neg_gain[0][1] = 0x00; - ram_table.bright_neg_gain[0][2] = 0x00; - ram_table.bright_neg_gain[0][3] = 0x00; - ram_table.bright_neg_gain[1][0] = 0x00; - ram_table.bright_neg_gain[1][1] = 0x00; - ram_table.bright_neg_gain[1][2] = 0x00; - ram_table.bright_neg_gain[1][3] = 0x00; - ram_table.bright_neg_gain[2][0] = 0x00; - ram_table.bright_neg_gain[2][1] = 0x00; - ram_table.bright_neg_gain[2][2] = 0x00; - ram_table.bright_neg_gain[2][3] = 0x00; - ram_table.bright_neg_gain[3][0] = 0x00; - ram_table.bright_neg_gain[3][1] = 0x00; - ram_table.bright_neg_gain[3][2] = 0x00; - ram_table.bright_neg_gain[3][3] = 0x00; - ram_table.bright_neg_gain[4][0] = 0x00; - ram_table.bright_neg_gain[4][1] = 0x00; - ram_table.bright_neg_gain[4][2] = 0x00; - ram_table.bright_neg_gain[4][3] = 0x00; - ram_table.dark_pos_gain[0][0] = 0x00; - ram_table.dark_pos_gain[0][1] = 0x00; - ram_table.dark_pos_gain[0][2] = 0x00; - ram_table.dark_pos_gain[0][3] = 0x00; - ram_table.dark_pos_gain[1][0] = 0x00; - ram_table.dark_pos_gain[1][1] = 0x00; - ram_table.dark_pos_gain[1][2] = 0x00; - ram_table.dark_pos_gain[1][3] = 0x00; - ram_table.dark_pos_gain[2][0] = 0x00; - ram_table.dark_pos_gain[2][1] = 0x00; - ram_table.dark_pos_gain[2][2] = 0x00; - ram_table.dark_pos_gain[2][3] = 0x00; - ram_table.dark_pos_gain[3][0] = 0x00; - ram_table.dark_pos_gain[3][1] = 0x00; - ram_table.dark_pos_gain[3][2] = 0x00; - ram_table.dark_pos_gain[3][3] = 0x00; - ram_table.dark_pos_gain[4][0] = 0x00; - ram_table.dark_pos_gain[4][1] = 0x00; - ram_table.dark_pos_gain[4][2] = 0x00; - ram_table.dark_pos_gain[4][3] = 0x00; - ram_table.dark_neg_gain[0][0] = 0x00; - ram_table.dark_neg_gain[0][1] = 0x00; - ram_table.dark_neg_gain[0][2] = 0x00; - ram_table.dark_neg_gain[0][3] = 0x00; - ram_table.dark_neg_gain[1][0] = 0x00; - ram_table.dark_neg_gain[1][1] = 0x00; - ram_table.dark_neg_gain[1][2] = 0x00; - ram_table.dark_neg_gain[1][3] = 0x00; - ram_table.dark_neg_gain[2][0] = 0x00; - ram_table.dark_neg_gain[2][1] = 0x00; - ram_table.dark_neg_gain[2][2] = 0x00; - ram_table.dark_neg_gain[2][3] = 0x00; - ram_table.dark_neg_gain[3][0] = 0x00; - ram_table.dark_neg_gain[3][1] = 0x00; - ram_table.dark_neg_gain[3][2] = 0x00; - ram_table.dark_neg_gain[3][3] = 0x00; - ram_table.dark_neg_gain[4][0] = 0x00; - ram_table.dark_neg_gain[4][1] = 0x00; - ram_table.dark_neg_gain[4][2] = 0x00; - ram_table.dark_neg_gain[4][3] = 0x00; - ram_table.iir_curve[0] = 0x65; - ram_table.iir_curve[1] = 0x65; - ram_table.iir_curve[2] = 0x65; - ram_table.iir_curve[3] = 0x65; - ram_table.iir_curve[4] = 0x65; - ram_table.crgb_thresh[0] = cpu_to_be16(0x13b6); - ram_table.crgb_thresh[1] = cpu_to_be16(0x1648); - ram_table.crgb_thresh[2] = cpu_to_be16(0x18e3); - ram_table.crgb_thresh[3] = cpu_to_be16(0x1b41); - ram_table.crgb_thresh[4] = cpu_to_be16(0x1d46); - ram_table.crgb_thresh[5] = cpu_to_be16(0x1f21); - ram_table.crgb_thresh[6] = cpu_to_be16(0x2167); - ram_table.crgb_thresh[7] = cpu_to_be16(0x2384); - ram_table.crgb_offset[0] = cpu_to_be16(0x2999); - ram_table.crgb_offset[1] = cpu_to_be16(0x3999); - ram_table.crgb_offset[2] = cpu_to_be16(0x4666); - ram_table.crgb_offset[3] = cpu_to_be16(0x5999); - ram_table.crgb_offset[4] = cpu_to_be16(0x6333); - ram_table.crgb_offset[5] = cpu_to_be16(0x7800); - ram_table.crgb_offset[6] = cpu_to_be16(0x8c00); - ram_table.crgb_offset[7] = cpu_to_be16(0xa000); - ram_table.crgb_slope[0] = cpu_to_be16(0x3147); - ram_table.crgb_slope[1] = cpu_to_be16(0x2978); - ram_table.crgb_slope[2] = cpu_to_be16(0x23a2); - ram_table.crgb_slope[3] = cpu_to_be16(0x1f55); - ram_table.crgb_slope[4] = cpu_to_be16(0x1c63); - ram_table.crgb_slope[5] = cpu_to_be16(0x1a0f); - ram_table.crgb_slope[6] = cpu_to_be16(0x178d); - ram_table.crgb_slope[7] = cpu_to_be16(0x15ab); - - fill_backlight_transform_table( - params, &ram_table); + if (dmcu->dmcu_version.abm_version == 0x22) { + fill_iram_v_2_2((struct iram_table_v_2_2 *)ram_table, params); + } else { + fill_iram_v_2((struct iram_table_v_2 *)ram_table, params); + } return dmcu->funcs->load_iram( dmcu, 0, (char *)(&ram_table), IRAM_RESERVE_AREA_START); From 3cec41769d2182e629692a3262cc8b24ec972b04 Mon Sep 17 00:00:00 2001 From: John Barberiz Date: Tue, 8 Jan 2019 17:43:08 -0500 Subject: [PATCH 328/539] drm/amd/display: Fix use of uninitialized union [Why] An uninitialized variable would randomly initialize to a large value. This caused enough delay to fail DP Compliance Test 400.2.1. [How] Initialize the variable. Signed-off-by: John Barberiz Reviewed-by: Wenjing Liu Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 431805c566cf..92f565ca1260 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -47,7 +47,7 @@ static void wait_for_training_aux_rd_interval( struct dc_link *link, uint32_t default_wait_in_micro_secs) { - union training_aux_rd_interval training_rd_interval; + union training_aux_rd_interval training_rd_interval = {0}; /* overwrite the delay if rev > 1.1*/ if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) { From 130ef745f771999ec2fa2f76487892fe9e0c1c4f Mon Sep 17 00:00:00 2001 From: Krunoslav Kovac Date: Tue, 8 Jan 2019 18:32:34 -0500 Subject: [PATCH 329/539] drm/amd/display: Default to linear output gamma [Why] Our output TF calculation doesn't work if no user-specified gamma correction. Normally, user provides this, but driver sohuld just assume default (linear) gamma otherwise. [How] Remove output TF dependency on user gamma being provided. Signed-off-by: Krunoslav Kovac Reviewed-by: Anthony Koo Acked-by: Leo Li Acked-by: Sivapiriyan Kumarasamy Signed-off-by: Alex Deucher --- .../amd/display/modules/color/color_gamma.c | 73 ++++++++++--------- 1 file changed, 40 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index 4cee084a8e2d..eefb85928298 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -1508,7 +1508,7 @@ static bool map_regamma_hw_to_x_user( struct hw_x_point *coords = coords_x; const struct pwl_float_data_ex *regamma = rgb_regamma; - if (mapUserRamp) { + if (ramp && mapUserRamp) { copy_rgb_regamma_to_coordinates_x(coords, hw_points_num, rgb_regamma); @@ -1545,7 +1545,7 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, struct pwl_float_data *rgb_user = NULL; struct pwl_float_data_ex *rgb_regamma = NULL; - struct gamma_pixel *axix_x = NULL; + struct gamma_pixel *axis_x = NULL; struct pixel_gamma_point *coeff = NULL; enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB; bool ret = false; @@ -1555,47 +1555,54 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, /* we can use hardcoded curve for plain SRGB TF */ if (output_tf->type == TF_TYPE_PREDEFINED && canRomBeUsed == true && - output_tf->tf == TRANSFER_FUNCTION_SRGB && - (ramp->is_identity || (!mapUserRamp && ramp->type == GAMMA_RGB_256))) - return true; + output_tf->tf == TRANSFER_FUNCTION_SRGB) { + if (ramp == NULL) + return true; + if (ramp->is_identity || (!mapUserRamp && ramp->type == GAMMA_RGB_256)) + return true; + } output_tf->type = TF_TYPE_DISTRIBUTED_POINTS; - rgb_user = kvcalloc(ramp->num_entries + _EXTRA_POINTS, + if (ramp && (mapUserRamp || ramp->type != GAMMA_RGB_256)) { + rgb_user = kvcalloc(ramp->num_entries + _EXTRA_POINTS, sizeof(*rgb_user), GFP_KERNEL); - if (!rgb_user) - goto rgb_user_alloc_fail; + if (!rgb_user) + goto rgb_user_alloc_fail; + + axis_x = kvcalloc(ramp->num_entries + 3, sizeof(*axis_x), + GFP_KERNEL); + if (!axis_x) + goto axis_x_alloc_fail; + + dividers.divider1 = dc_fixpt_from_fraction(3, 2); + dividers.divider2 = dc_fixpt_from_int(2); + dividers.divider3 = dc_fixpt_from_fraction(5, 2); + + build_evenly_distributed_points( + axis_x, + ramp->num_entries, + dividers); + + if (ramp->type == GAMMA_RGB_256 && mapUserRamp) + scale_gamma(rgb_user, ramp, dividers); + else if (ramp->type == GAMMA_RGB_FLOAT_1024) + scale_gamma_dx(rgb_user, ramp, dividers); + } + rgb_regamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*rgb_regamma), GFP_KERNEL); if (!rgb_regamma) goto rgb_regamma_alloc_fail; - axix_x = kvcalloc(ramp->num_entries + 3, sizeof(*axix_x), - GFP_KERNEL); - if (!axix_x) - goto axix_x_alloc_fail; + coeff = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*coeff), GFP_KERNEL); if (!coeff) goto coeff_alloc_fail; - dividers.divider1 = dc_fixpt_from_fraction(3, 2); - dividers.divider2 = dc_fixpt_from_int(2); - dividers.divider3 = dc_fixpt_from_fraction(5, 2); - tf = output_tf->tf; - - build_evenly_distributed_points( - axix_x, - ramp->num_entries, - dividers); - - if (ramp->type == GAMMA_RGB_256 && mapUserRamp) - scale_gamma(rgb_user, ramp, dividers); - else if (ramp->type == GAMMA_RGB_FLOAT_1024) - scale_gamma_dx(rgb_user, ramp, dividers); - if (tf == TRANSFER_FUNCTION_PQ) { tf_pts->end_exponent = 7; tf_pts->x_point_at_y1_red = 125; @@ -1623,22 +1630,22 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, coordinates_x, tf == TRANSFER_FUNCTION_SRGB ? true:false); } map_regamma_hw_to_x_user(ramp, coeff, rgb_user, - coordinates_x, axix_x, rgb_regamma, + coordinates_x, axis_x, rgb_regamma, MAX_HW_POINTS, tf_pts, - (mapUserRamp || ramp->type != GAMMA_RGB_256) && - ramp->type != GAMMA_CS_TFM_1D); + (mapUserRamp || (ramp && ramp->type != GAMMA_RGB_256)) && + (ramp && ramp->type != GAMMA_CS_TFM_1D)); - if (ramp->type == GAMMA_CS_TFM_1D) + if (ramp && ramp->type == GAMMA_CS_TFM_1D) apply_lut_1d(ramp, MAX_HW_POINTS, tf_pts); ret = true; kvfree(coeff); coeff_alloc_fail: - kvfree(axix_x); -axix_x_alloc_fail: kvfree(rgb_regamma); rgb_regamma_alloc_fail: + kvfree(axis_x); +axis_x_alloc_fail: kvfree(rgb_user); rgb_user_alloc_fail: return ret; From 510c51df91487ccc0dce34578afc77673d11ec1a Mon Sep 17 00:00:00 2001 From: Josip Pavic Date: Thu, 10 Jan 2019 15:23:07 -0500 Subject: [PATCH 330/539] drm/amd/display: Adjust ABM 2.2 contrast parameters [Why] Improved contrast in ABM 2.2 is desired [How] Increase the contrast factor for ABM levels 2, 3 and 4 Signed-off-by: Josip Pavic Reviewed-by: Anthony Koo Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/power/power_helpers.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 29b7a26bea81..baab6c4ae191 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -517,9 +517,9 @@ void fill_iram_v_2_2(struct iram_table_v_2_2 *ram_table, struct dmcu_iram_parame ram_table->hybridFactor[3] = 0xc0; ram_table->contrastFactor[0] = 0x99; - ram_table->contrastFactor[1] = 0x80; - ram_table->contrastFactor[2] = 0x80; - ram_table->contrastFactor[3] = 0x4D; + ram_table->contrastFactor[1] = 0x99; + ram_table->contrastFactor[2] = 0x99; + ram_table->contrastFactor[3] = 0x80; ram_table->iir_curve[0] = 0x65; ram_table->iir_curve[1] = 0x65; From 8635c5a784105af8b833c43bf22cbabd6e64a8f1 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Fri, 11 Jan 2019 16:54:05 -0500 Subject: [PATCH 331/539] revert "drm/amd/display: Add condition to sync eDP SW status and HW status" [Why] This change causes regression for S4 resume where gamma is not programmed. The change incorrectly updates the requested dpms_off state. This reverts commit d2b1d6bbc56afab8ebae9d52d7ca0ea3569bd600. Signed-off-by: Eric Yang Reviewed-by: Yongqiang Sun Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/bios/bios_parser2.c | 2 - .../amd/display/dc/bios/bios_parser_helper.c | 93 ------------------- .../amd/display/dc/bios/bios_parser_helper.h | 4 - .../gpu/drm/amd/display/dc/dc_bios_types.h | 5 - .../display/dc/dce110/dce110_hw_sequencer.c | 15 --- .../drm/amd/display/dc/dcn10/dcn10_resource.c | 1 - 6 files changed, 120 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index c513ab6f3843..190b8c4e7538 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -1899,8 +1899,6 @@ static const struct dc_vbios_funcs vbios_funcs = { .is_accelerated_mode = bios_parser_is_accelerated_mode, - .is_active_display = bios_is_active_display, - .set_scratch_critical_state = bios_parser_set_scratch_critical_state, diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c index d8275ceb20c1..fce46ab54c54 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c @@ -87,96 +87,3 @@ uint32_t bios_get_vga_enabled_displays( return active_disp; } -bool bios_is_active_display( - struct dc_bios *bios, - enum signal_type signal, - const struct connector_device_tag_info *device_tag) -{ - uint32_t active = 0; - uint32_t connected = 0; - uint32_t bios_scratch_0 = 0; - uint32_t bios_scratch_3 = 0; - - switch (signal) { - case SIGNAL_TYPE_DVI_SINGLE_LINK: - case SIGNAL_TYPE_DVI_DUAL_LINK: - case SIGNAL_TYPE_HDMI_TYPE_A: - case SIGNAL_TYPE_DISPLAY_PORT: - case SIGNAL_TYPE_DISPLAY_PORT_MST: - { - if (device_tag->dev_id.device_type == DEVICE_TYPE_DFP) { - switch (device_tag->dev_id.enum_id) { - case 1: - { - active = ATOM_S3_DFP1_ACTIVE; - connected = 0x0008; //ATOM_DISPLAY_DFP1_CONNECT - } - break; - - case 2: - { - active = ATOM_S3_DFP2_ACTIVE; - connected = 0x0080; //ATOM_DISPLAY_DFP2_CONNECT - } - break; - - case 3: - { - active = ATOM_S3_DFP3_ACTIVE; - connected = 0x0200; //ATOM_DISPLAY_DFP3_CONNECT - } - break; - - case 4: - { - active = ATOM_S3_DFP4_ACTIVE; - connected = 0x0400; //ATOM_DISPLAY_DFP4_CONNECT - } - break; - - case 5: - { - active = ATOM_S3_DFP5_ACTIVE; - connected = 0x0800; //ATOM_DISPLAY_DFP5_CONNECT - } - break; - - case 6: - { - active = ATOM_S3_DFP6_ACTIVE; - connected = 0x0040; //ATOM_DISPLAY_DFP6_CONNECT - } - break; - - default: - break; - } - } - } - break; - - case SIGNAL_TYPE_LVDS: - case SIGNAL_TYPE_EDP: - { - active = ATOM_S3_LCD1_ACTIVE; - connected = 0x0002; //ATOM_DISPLAY_LCD1_CONNECT - } - break; - - default: - break; - } - - - if (bios->regs->BIOS_SCRATCH_0) /*follow up with other asic, todo*/ - bios_scratch_0 = REG_READ(BIOS_SCRATCH_0); - if (bios->regs->BIOS_SCRATCH_3) /*follow up with other asic, todo*/ - bios_scratch_3 = REG_READ(BIOS_SCRATCH_3); - - bios_scratch_3 &= ATOM_S3_DEVICE_ACTIVE_MASK; - if ((active & bios_scratch_3) && (connected & bios_scratch_0)) - return true; - - return false; -} - diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h index f33cac2147e3..75a29e68fb27 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h @@ -35,10 +35,6 @@ bool bios_is_accelerated_mode(struct dc_bios *bios); void bios_set_scratch_acc_mode_change(struct dc_bios *bios); void bios_set_scratch_critical_state(struct dc_bios *bios, bool state); uint32_t bios_get_vga_enabled_displays(struct dc_bios *bios); -bool bios_is_active_display( - struct dc_bios *bios, - enum signal_type signal, - const struct connector_device_tag_info *device_tag); #define GET_IMAGE(type, offset) ((type *) bios_get_image(&bp->base, offset, sizeof(type))) diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h index a8b3cedf9431..8130b95ccc53 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h @@ -86,10 +86,6 @@ struct dc_vbios_funcs { bool (*is_accelerated_mode)( struct dc_bios *bios); - bool (*is_active_display)( - struct dc_bios *bios, - enum signal_type signal, - const struct connector_device_tag_info *device_tag); void (*set_scratch_critical_state)( struct dc_bios *bios, bool state); @@ -145,7 +141,6 @@ struct dc_vbios_funcs { }; struct bios_registers { - uint32_t BIOS_SCRATCH_0; uint32_t BIOS_SCRATCH_3; uint32_t BIOS_SCRATCH_6; }; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 144a1c8dca1d..97796fa2f6b6 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1557,7 +1557,6 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) int i; struct dc_link *edp_link_to_turnoff = NULL; struct dc_link *edp_link = get_link_for_edp(dc); - struct dc_bios *bios = dc->ctx->dc_bios; bool can_edp_fast_boot_optimize = false; bool apply_edp_fast_boot_optimization = false; @@ -1584,20 +1583,6 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) if (context->streams[i]->signal == SIGNAL_TYPE_EDP) { context->streams[i]->apply_edp_fast_boot_optimization = true; apply_edp_fast_boot_optimization = true; - - /* When after S4 and S5, vbios may post edp and previous dpms_off - * doesn't make sense. - * Update dpms_off state to align hw and sw state via check - * vBios scratch register. - */ - if (bios->funcs->is_active_display) { - const struct connector_device_tag_info *device_tag = &(edp_link->device_tag); - - if (bios->funcs->is_active_display(bios, - context->streams[i]->signal, - device_tag)) - context->streams[i]->dpms_off = false; - } } } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 4f8cbe33ca29..09d74070a49b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -436,7 +436,6 @@ static const struct dcn_optc_mask tg_mask = { }; static const struct bios_registers bios_regs = { - NBIO_SR(BIOS_SCRATCH_0), NBIO_SR(BIOS_SCRATCH_3), NBIO_SR(BIOS_SCRATCH_6) }; From 0c522b65b04915bae0040258c3945b943e44d0e6 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Fri, 11 Jan 2019 17:09:18 -0500 Subject: [PATCH 332/539] drm/amd/display: take dpms_off into account for edp turn off logic [why] Previously we incorrectly skipped backlight control when stream is present but dpms_off = true. This causes backlight to remain on in the we boot up or resume into a external display only configuration where VBIOS posted on the eDP. [How] Add dpms_off into the condition for edp need to turn off. Signed-off-by: Eric Yang Reviewed-by: Yongqiang Sun Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 97796fa2f6b6..026d973698f4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1520,7 +1520,7 @@ static struct dc_link *get_link_for_edp(struct dc *dc) return NULL; } -static struct dc_link *get_link_for_edp_not_in_use( +static struct dc_link *get_link_for_edp_to_turn_off( struct dc *dc, struct dc_state *context) { @@ -1529,8 +1529,12 @@ static struct dc_link *get_link_for_edp_not_in_use( /* check if eDP panel is suppose to be set mode, if yes, no need to disable */ for (i = 0; i < context->stream_count; i++) { - if (context->streams[i]->signal == SIGNAL_TYPE_EDP) - return NULL; + if (context->streams[i]->signal == SIGNAL_TYPE_EDP) { + if (context->streams[i]->dpms_off == true) + return context->streams[i]->sink->link; + else + return NULL; + } } /* check if there is an eDP panel not in use */ @@ -1572,7 +1576,7 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) } if (can_edp_fast_boot_optimize) - edp_link_to_turnoff = get_link_for_edp_not_in_use(dc, context); + edp_link_to_turnoff = get_link_for_edp_to_turn_off(dc, context); /* if OS doesn't light up eDP and eDP link is available, we want to disable * If resume from S4/S5, should optimization. From 0f74e4849126266b797186a7433701df777af962 Mon Sep 17 00:00:00 2001 From: Steven Chiu Date: Fri, 11 Jan 2019 16:55:54 -0500 Subject: [PATCH 333/539] drm/amd/display: 3.2.15 Signed-off-by: Steven Chiu Reviewed-by: Aric Cyr Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index f362b04a6f99..8391bc39b7a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.14" +#define DC_VER "3.2.15" #define MAX_SURFACES 3 #define MAX_STREAMS 6 From d2c460e7537f003e5bfb1a94c9201bcbeca6949f Mon Sep 17 00:00:00 2001 From: hersen wu Date: Fri, 11 Jan 2019 12:43:20 -0500 Subject: [PATCH 334/539] drm/amd/display: Connect dig_fe to otg directly instead of calling bios [Why] After call bios table crtc_source_select, dal will program fmt again. The bios table program dig_source_select and other fmt register for bios usage which is redundancy and uncessary. [How] Program dig_soruce_select register directly Signed-off-by: hersen wu Reviewed-by: Charlene Liu Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/bios/bios_parser.c | 14 --- .../drm/amd/display/dc/bios/bios_parser2.c | 14 --- .../drm/amd/display/dc/bios/command_table.c | 116 ------------------ .../drm/amd/display/dc/bios/command_table.h | 3 - .../drm/amd/display/dc/bios/command_table2.c | 70 ----------- .../drm/amd/display/dc/bios/command_table2.h | 3 - .../gpu/drm/amd/display/dc/dc_bios_types.h | 4 - .../amd/display/dc/dce/dce_stream_encoder.c | 10 +- .../amd/display/dc/dce/dce_stream_encoder.h | 8 +- .../display/dc/dce110/dce110_hw_sequencer.c | 54 +------- .../display/dc/dcn10/dcn10_stream_encoder.c | 9 ++ .../display/dc/dcn10/dcn10_stream_encoder.h | 10 +- .../amd/display/dc/inc/hw/stream_encoder.h | 4 + 13 files changed, 39 insertions(+), 280 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index c2ab026aee91..a4c97d32e751 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -835,18 +835,6 @@ static enum bp_result bios_parser_enable_crtc( return bp->cmd_tbl.enable_crtc(bp, id, enable); } -static enum bp_result bios_parser_crtc_source_select( - struct dc_bios *dcb, - struct bp_crtc_source_select *bp_params) -{ - struct bios_parser *bp = BP_FROM_DCB(dcb); - - if (!bp->cmd_tbl.select_crtc_source) - return BP_RESULT_FAILURE; - - return bp->cmd_tbl.select_crtc_source(bp, bp_params); -} - static enum bp_result bios_parser_enable_disp_power_gating( struct dc_bios *dcb, enum controller_id controller_id, @@ -2842,8 +2830,6 @@ static const struct dc_vbios_funcs vbios_funcs = { .program_crtc_timing = bios_parser_program_crtc_timing, /* still use. should probably retire and program directly */ - .crtc_source_select = bios_parser_crtc_source_select, /* still use. should probably retire and program directly */ - .program_display_engine_pll = bios_parser_program_display_engine_pll, .enable_disp_power_gating = bios_parser_enable_disp_power_gating, diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 190b8c4e7538..a1c56f29cfeb 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -1083,18 +1083,6 @@ static enum bp_result bios_parser_enable_crtc( return bp->cmd_tbl.enable_crtc(bp, id, enable); } -static enum bp_result bios_parser_crtc_source_select( - struct dc_bios *dcb, - struct bp_crtc_source_select *bp_params) -{ - struct bios_parser *bp = BP_FROM_DCB(dcb); - - if (!bp->cmd_tbl.select_crtc_source) - return BP_RESULT_FAILURE; - - return bp->cmd_tbl.select_crtc_source(bp, bp_params); -} - static enum bp_result bios_parser_enable_disp_power_gating( struct dc_bios *dcb, enum controller_id controller_id, @@ -1915,8 +1903,6 @@ static const struct dc_vbios_funcs vbios_funcs = { .program_crtc_timing = bios_parser_program_crtc_timing, - .crtc_source_select = bios_parser_crtc_source_select, - .enable_disp_power_gating = bios_parser_enable_disp_power_gating, .bios_parser_destroy = firmware_parser_destroy, diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c b/drivers/gpu/drm/amd/display/dc/bios/command_table.c index 67c119bf6bf7..5815983caaf8 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c @@ -55,7 +55,6 @@ static void init_adjust_display_pll(struct bios_parser *bp); static void init_dac_encoder_control(struct bios_parser *bp); static void init_dac_output_control(struct bios_parser *bp); static void init_set_crtc_timing(struct bios_parser *bp); -static void init_select_crtc_source(struct bios_parser *bp); static void init_enable_crtc(struct bios_parser *bp); static void init_enable_crtc_mem_req(struct bios_parser *bp); static void init_external_encoder_control(struct bios_parser *bp); @@ -73,7 +72,6 @@ void dal_bios_parser_init_cmd_tbl(struct bios_parser *bp) init_dac_encoder_control(bp); init_dac_output_control(bp); init_set_crtc_timing(bp); - init_select_crtc_source(bp); init_enable_crtc(bp); init_enable_crtc_mem_req(bp); init_program_clock(bp); @@ -1895,120 +1893,6 @@ static enum bp_result set_crtc_using_dtd_timing_v3( return result; } -/******************************************************************************* - ******************************************************************************** - ** - ** SELECT CRTC SOURCE - ** - ******************************************************************************** - *******************************************************************************/ - -static enum bp_result select_crtc_source_v2( - struct bios_parser *bp, - struct bp_crtc_source_select *bp_params); -static enum bp_result select_crtc_source_v3( - struct bios_parser *bp, - struct bp_crtc_source_select *bp_params); - -static void init_select_crtc_source(struct bios_parser *bp) -{ - switch (BIOS_CMD_TABLE_PARA_REVISION(SelectCRTC_Source)) { - case 2: - bp->cmd_tbl.select_crtc_source = select_crtc_source_v2; - break; - case 3: - bp->cmd_tbl.select_crtc_source = select_crtc_source_v3; - break; - default: - dm_output_to_console("Don't select_crtc_source enable_crtc for v%d\n", - BIOS_CMD_TABLE_PARA_REVISION(SelectCRTC_Source)); - bp->cmd_tbl.select_crtc_source = NULL; - break; - } -} - -static enum bp_result select_crtc_source_v2( - struct bios_parser *bp, - struct bp_crtc_source_select *bp_params) -{ - enum bp_result result = BP_RESULT_FAILURE; - SELECT_CRTC_SOURCE_PARAMETERS_V2 params; - uint8_t atom_controller_id; - uint32_t atom_engine_id; - enum signal_type s = bp_params->signal; - - memset(¶ms, 0, sizeof(params)); - - /* set controller id */ - if (bp->cmd_helper->controller_id_to_atom( - bp_params->controller_id, &atom_controller_id)) - params.ucCRTC = atom_controller_id; - else - return BP_RESULT_FAILURE; - - /* set encoder id */ - if (bp->cmd_helper->engine_bp_to_atom( - bp_params->engine_id, &atom_engine_id)) - params.ucEncoderID = (uint8_t)atom_engine_id; - else - return BP_RESULT_FAILURE; - - if (SIGNAL_TYPE_EDP == s || - (SIGNAL_TYPE_DISPLAY_PORT == s && - SIGNAL_TYPE_LVDS == bp_params->sink_signal)) - s = SIGNAL_TYPE_LVDS; - - params.ucEncodeMode = - (uint8_t)bp->cmd_helper->encoder_mode_bp_to_atom( - s, bp_params->enable_dp_audio); - - if (EXEC_BIOS_CMD_TABLE(SelectCRTC_Source, params)) - result = BP_RESULT_OK; - - return result; -} - -static enum bp_result select_crtc_source_v3( - struct bios_parser *bp, - struct bp_crtc_source_select *bp_params) -{ - bool result = BP_RESULT_FAILURE; - SELECT_CRTC_SOURCE_PARAMETERS_V3 params; - uint8_t atom_controller_id; - uint32_t atom_engine_id; - enum signal_type s = bp_params->signal; - - memset(¶ms, 0, sizeof(params)); - - if (bp->cmd_helper->controller_id_to_atom(bp_params->controller_id, - &atom_controller_id)) - params.ucCRTC = atom_controller_id; - else - return result; - - if (bp->cmd_helper->engine_bp_to_atom(bp_params->engine_id, - &atom_engine_id)) - params.ucEncoderID = (uint8_t)atom_engine_id; - else - return result; - - if (SIGNAL_TYPE_EDP == s || - (SIGNAL_TYPE_DISPLAY_PORT == s && - SIGNAL_TYPE_LVDS == bp_params->sink_signal)) - s = SIGNAL_TYPE_LVDS; - - params.ucEncodeMode = - bp->cmd_helper->encoder_mode_bp_to_atom( - s, bp_params->enable_dp_audio); - /* Needed for VBIOS Random Spatial Dithering feature */ - params.ucDstBpc = (uint8_t)(bp_params->display_output_bit_depth); - - if (EXEC_BIOS_CMD_TABLE(SelectCRTC_Source, params)) - result = BP_RESULT_OK; - - return result; -} - /******************************************************************************* ******************************************************************************** ** diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.h b/drivers/gpu/drm/amd/display/dc/bios/command_table.h index 94f3d43a7471..ad533775e724 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table.h +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.h @@ -71,9 +71,6 @@ struct cmd_tbl { enum bp_result (*set_crtc_timing)( struct bios_parser *bp, struct bp_hw_crtc_timing_parameters *bp_params); - enum bp_result (*select_crtc_source)( - struct bios_parser *bp, - struct bp_crtc_source_select *bp_params); enum bp_result (*enable_crtc)( struct bios_parser *bp, enum controller_id controller_id, diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c index 63206a4b6c0b..bb2e8105e6ab 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c @@ -460,75 +460,6 @@ static enum bp_result set_crtc_using_dtd_timing_v3( return result; } -/****************************************************************************** - ****************************************************************************** - ** - ** SELECT CRTC SOURCE - ** - ****************************************************************************** - *****************************************************************************/ - - -static enum bp_result select_crtc_source_v3( - struct bios_parser *bp, - struct bp_crtc_source_select *bp_params); - -static void init_select_crtc_source(struct bios_parser *bp) -{ - switch (BIOS_CMD_TABLE_PARA_REVISION(selectcrtc_source)) { - case 3: - bp->cmd_tbl.select_crtc_source = select_crtc_source_v3; - break; - default: - dm_output_to_console("Don't select_crtc_source enable_crtc for v%d\n", - BIOS_CMD_TABLE_PARA_REVISION(selectcrtc_source)); - bp->cmd_tbl.select_crtc_source = NULL; - break; - } -} - - -static enum bp_result select_crtc_source_v3( - struct bios_parser *bp, - struct bp_crtc_source_select *bp_params) -{ - bool result = BP_RESULT_FAILURE; - struct select_crtc_source_parameters_v2_3 params; - uint8_t atom_controller_id; - uint32_t atom_engine_id; - enum signal_type s = bp_params->signal; - - memset(¶ms, 0, sizeof(params)); - - if (bp->cmd_helper->controller_id_to_atom(bp_params->controller_id, - &atom_controller_id)) - params.crtc_id = atom_controller_id; - else - return result; - - if (bp->cmd_helper->engine_bp_to_atom(bp_params->engine_id, - &atom_engine_id)) - params.encoder_id = (uint8_t)atom_engine_id; - else - return result; - - if (s == SIGNAL_TYPE_EDP || - (s == SIGNAL_TYPE_DISPLAY_PORT && bp_params->sink_signal == - SIGNAL_TYPE_LVDS)) - s = SIGNAL_TYPE_LVDS; - - params.encode_mode = - bp->cmd_helper->encoder_mode_bp_to_atom( - s, bp_params->enable_dp_audio); - /* Needed for VBIOS Random Spatial Dithering feature */ - params.dst_bpc = (uint8_t)(bp_params->display_output_bit_depth); - - if (EXEC_BIOS_CMD_TABLE(selectcrtc_source, params)) - result = BP_RESULT_OK; - - return result; -} - /****************************************************************************** ****************************************************************************** ** @@ -808,7 +739,6 @@ void dal_firmware_parser_init_cmd_tbl(struct bios_parser *bp) init_set_crtc_timing(bp); - init_select_crtc_source(bp); init_enable_crtc(bp); init_external_encoder_control(bp); diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h index ec1c0c9f3f1d..7a2af24dfe60 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h @@ -71,9 +71,6 @@ struct cmd_tbl { enum bp_result (*set_crtc_timing)( struct bios_parser *bp, struct bp_hw_crtc_timing_parameters *bp_params); - enum bp_result (*select_crtc_source)( - struct bios_parser *bp, - struct bp_crtc_source_select *bp_params); enum bp_result (*enable_crtc)( struct bios_parser *bp, enum controller_id controller_id, diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h index 8130b95ccc53..78c3b300ec45 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h @@ -121,10 +121,6 @@ struct dc_vbios_funcs { enum bp_result (*program_crtc_timing)( struct dc_bios *bios, struct bp_hw_crtc_timing_parameters *bp_params); - - enum bp_result (*crtc_source_select)( - struct dc_bios *bios, - struct bp_crtc_source_select *bp_params); enum bp_result (*program_display_engine_pll)( struct dc_bios *bios, struct bp_pixel_clock_parameters *bp_params); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index 4a49fd631f33..1fa2d4fd7a35 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -1584,6 +1584,14 @@ static void setup_stereo_sync( REG_UPDATE(DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, !enable); } +static void dig_connect_to_otg( + struct stream_encoder *enc, + int tg_inst) +{ + struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc); + + REG_UPDATE(DIG_FE_CNTL, DIG_SOURCE_SELECT, tg_inst); +} static const struct stream_encoder_funcs dce110_str_enc_funcs = { .dp_set_stream_attribute = @@ -1618,7 +1626,7 @@ static const struct stream_encoder_funcs dce110_str_enc_funcs = { .hdmi_audio_disable = dce110_se_hdmi_audio_disable, .setup_stereo_sync = setup_stereo_sync, .set_avmute = dce110_stream_encoder_set_avmute, - + .dig_connect_to_otg = dig_connect_to_otg, }; void dce110_stream_encoder_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h index 6c28229c76eb..f9cdf2b5242c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h @@ -199,7 +199,8 @@ SE_SF(DP_SEC_CNTL, DP_SEC_ATP_ENABLE, mask_sh),\ SE_SF(DP_SEC_CNTL, DP_SEC_AIP_ENABLE, mask_sh),\ SE_SF(DP_SEC_CNTL, DP_SEC_ACM_ENABLE, mask_sh),\ - SE_SF(AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, mask_sh) + SE_SF(AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, mask_sh),\ + SE_SF(DIG_FE_CNTL, DIG_SOURCE_SELECT, mask_sh) #define SE_COMMON_MASK_SH_LIST_DCE_COMMON(mask_sh)\ SE_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(mask_sh) @@ -284,7 +285,8 @@ SE_SF(DIG0_DIG_FE_CNTL, TMDS_PIXEL_ENCODING, mask_sh),\ SE_SF(DIG0_DIG_FE_CNTL, TMDS_COLOR_FORMAT, mask_sh),\ SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_SELECT, mask_sh),\ - SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, mask_sh) + SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, mask_sh),\ + SE_SF(DIG0_DIG_FE_CNTL, DIG_SOURCE_SELECT, mask_sh) #define SE_COMMON_MASK_SH_LIST_SOC(mask_sh)\ SE_COMMON_MASK_SH_LIST_SOC_BASE(mask_sh) @@ -494,6 +496,7 @@ struct dce_stream_encoder_shift { uint8_t HDMI_DB_DISABLE; uint8_t DP_VID_N_MUL; uint8_t DP_VID_M_DOUBLE_VALUE_EN; + uint8_t DIG_SOURCE_SELECT; }; struct dce_stream_encoder_mask { @@ -624,6 +627,7 @@ struct dce_stream_encoder_mask { uint32_t HDMI_DB_DISABLE; uint32_t DP_VID_N_MUL; uint32_t DP_VID_M_DOUBLE_VALUE_EN; + uint32_t DIG_SOURCE_SELECT; }; struct dce110_stream_enc_registers { diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 026d973698f4..db0ef41eb91c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -614,52 +614,6 @@ dce110_set_output_transfer_func(struct pipe_ctx *pipe_ctx, return true; } -static enum dc_status bios_parser_crtc_source_select( - struct pipe_ctx *pipe_ctx) -{ - struct dc_bios *dcb = pipe_ctx->stream->ctx->dc_bios; - /* call VBIOS table to set CRTC source for the HW - * encoder block - * note: video bios clears all FMT setting here. */ - struct bp_crtc_source_select crtc_source_select = {0}; - - crtc_source_select.engine_id = pipe_ctx->stream_res.stream_enc->id; - crtc_source_select.controller_id = pipe_ctx->stream_res.tg->inst + 1; - /*TODO: Need to un-hardcode color depth, dp_audio and account for - * the case where signal and sink signal is different (translator - * encoder)*/ - crtc_source_select.signal = pipe_ctx->stream->signal; - crtc_source_select.enable_dp_audio = false; - crtc_source_select.sink_signal = pipe_ctx->stream->signal; - - switch (pipe_ctx->stream->timing.display_color_depth) { - case COLOR_DEPTH_666: - crtc_source_select.display_output_bit_depth = PANEL_6BIT_COLOR; - break; - case COLOR_DEPTH_888: - crtc_source_select.display_output_bit_depth = PANEL_8BIT_COLOR; - break; - case COLOR_DEPTH_101010: - crtc_source_select.display_output_bit_depth = PANEL_10BIT_COLOR; - break; - case COLOR_DEPTH_121212: - crtc_source_select.display_output_bit_depth = PANEL_12BIT_COLOR; - break; - default: - BREAK_TO_DEBUGGER(); - crtc_source_select.display_output_bit_depth = PANEL_8BIT_COLOR; - break; - } - - if (BP_RESULT_OK != dcb->funcs->crtc_source_select( - dcb, - &crtc_source_select)) { - return DC_ERROR_UNEXPECTED; - } - - return DC_OK; -} - void dce110_update_info_frame(struct pipe_ctx *pipe_ctx) { bool is_hdmi; @@ -1387,12 +1341,10 @@ static enum dc_status apply_single_controller_ctx_to_hw( /* */ dc->hwss.enable_stream_timing(pipe_ctx, context, dc); - /* TODO: move to stream encoder */ if (pipe_ctx->stream->signal != SIGNAL_TYPE_VIRTUAL) - if (DC_OK != bios_parser_crtc_source_select(pipe_ctx)) { - BREAK_TO_DEBUGGER(); - return DC_ERROR_UNEXPECTED; - } + pipe_ctx->stream_res.stream_enc->funcs->dig_connect_to_otg( + pipe_ctx->stream_res.stream_enc, + pipe_ctx->stream_res.tg->inst); pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion( pipe_ctx->stream_res.opp, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index 0fb43c93932d..b08254121251 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -1425,6 +1425,14 @@ void enc1_setup_stereo_sync( REG_UPDATE(DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, !enable); } +void enc1_dig_connect_to_otg( + struct stream_encoder *enc, + int tg_inst) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + REG_UPDATE(DIG_FE_CNTL, DIG_SOURCE_SELECT, tg_inst); +} static const struct stream_encoder_funcs dcn10_str_enc_funcs = { .dp_set_stream_attribute = @@ -1457,6 +1465,7 @@ static const struct stream_encoder_funcs dcn10_str_enc_funcs = { .hdmi_audio_disable = enc1_se_hdmi_audio_disable, .setup_stereo_sync = enc1_setup_stereo_sync, .set_avmute = enc1_stream_encoder_set_avmute, + .dig_connect_to_otg = enc1_dig_connect_to_otg, }; void dcn10_stream_encoder_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h index 67f3e4dd95c1..b7c800e10a32 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h @@ -274,7 +274,8 @@ struct dcn10_stream_enc_registers { SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_HWIDTH, mask_sh),\ SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_VHEIGHT, mask_sh),\ SE_SF(DIG0_HDMI_DB_CONTROL, HDMI_DB_DISABLE, mask_sh),\ - SE_SF(DP0_DP_VID_TIMING, DP_VID_N_MUL, mask_sh) + SE_SF(DP0_DP_VID_TIMING, DP_VID_N_MUL, mask_sh),\ + SE_SF(DIG0_DIG_FE_CNTL, DIG_SOURCE_SELECT, mask_sh) #define SE_COMMON_MASK_SH_LIST_SOC(mask_sh)\ SE_COMMON_MASK_SH_LIST_SOC_BASE(mask_sh) @@ -426,7 +427,8 @@ struct dcn10_stream_enc_registers { type DP_MSA_VHEIGHT;\ type HDMI_DB_DISABLE;\ type DP_VID_N_MUL;\ - type DP_VID_M_DOUBLE_VALUE_EN + type DP_VID_M_DOUBLE_VALUE_EN;\ + type DIG_SOURCE_SELECT struct dcn10_stream_encoder_shift { SE_REG_FIELD_LIST_DCN1_0(uint8_t); @@ -523,4 +525,8 @@ void enc1_se_hdmi_audio_setup( void enc1_se_hdmi_audio_disable( struct stream_encoder *enc); +void enc1_dig_connect_to_otg( + struct stream_encoder *enc, + int tg_inst); + #endif /* __DC_STREAM_ENCODER_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index 53a9b64df11a..4051493557bc 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -161,6 +161,10 @@ struct stream_encoder_funcs { void (*set_avmute)( struct stream_encoder *enc, bool enable); + void (*dig_connect_to_otg)( + struct stream_encoder *enc, + int tg_inst); + }; #endif /* STREAM_ENCODER_H_ */ From 28e732dcd46b235c0242b9ec76964f17e6b00d1e Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 24 Jan 2019 17:50:39 +0800 Subject: [PATCH 335/539] drm/amd/powerplay: correct Vega20 gfxclk readout under DS Current implementation cannot report the correct gfxclk under DS. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 5085b3636f8e..13f124125f5a 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -2013,16 +2013,20 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx, { struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend); struct amdgpu_device *adev = hwmgr->adev; + SmuMetrics_t metrics_table; uint32_t val_vid; int ret = 0; switch (idx) { case AMDGPU_PP_SENSOR_GFX_SCLK: - ret = vega20_get_current_clk_freq(hwmgr, - PPCLK_GFXCLK, - (uint32_t *)value); - if (!ret) - *size = 4; + ret = smum_smc_table_manager(hwmgr, (uint8_t *)&metrics_table, + TABLE_SMU_METRICS, true); + PP_ASSERT_WITH_CODE(!ret, + "Failed to export SMU METRICS table!", + return ret); + + *((uint32_t *)value) = metrics_table.AverageGfxclkFrequency * 100; + *size = 4; break; case AMDGPU_PP_SENSOR_GFX_MCLK: ret = vega20_get_current_clk_freq(hwmgr, From 9e75f709fa2bc38aa77247aa58e5c7141f27a108 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 24 Jan 2019 17:55:39 +0800 Subject: [PATCH 336/539] drm/amd/powerplay: avoid frequent metrics table export That's unnecessary. Also it makes more sense to show all the clocks on one metrics table export. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 43 +++++++++++++------ .../drm/amd/powerplay/hwmgr/vega20_hwmgr.h | 3 ++ 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 13f124125f5a..7b49a9a13a4a 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -1958,16 +1958,36 @@ static uint32_t vega20_dpm_get_mclk(struct pp_hwmgr *hwmgr, bool low) return (mem_clk * 100); } +static int vega20_get_metrics_table(struct pp_hwmgr *hwmgr, SmuMetrics_t *metrics_table) +{ + struct vega20_hwmgr *data = + (struct vega20_hwmgr *)(hwmgr->backend); + int ret = 0; + + if (!data->metrics_time || time_after(jiffies, data->metrics_time + HZ / 2)) { + ret = smum_smc_table_manager(hwmgr, (uint8_t *)metrics_table, + TABLE_SMU_METRICS, true); + if (ret) { + pr_info("Failed to export SMU metrics table!\n"); + return ret; + } + memcpy(&data->metrics_table, metrics_table, sizeof(SmuMetrics_t)); + data->metrics_time = jiffies; + } else + memcpy(metrics_table, &data->metrics_table, sizeof(SmuMetrics_t)); + + return ret; +} + static int vega20_get_gpu_power(struct pp_hwmgr *hwmgr, uint32_t *query) { int ret = 0; SmuMetrics_t metrics_table; - ret = smum_smc_table_manager(hwmgr, (uint8_t *)&metrics_table, TABLE_SMU_METRICS, true); - PP_ASSERT_WITH_CODE(!ret, - "Failed to export SMU METRICS table!", - return ret); + ret = vega20_get_metrics_table(hwmgr, &metrics_table); + if (ret) + return ret; *query = metrics_table.CurrSocketPower << 8; @@ -1998,10 +2018,9 @@ static int vega20_get_current_activity_percent(struct pp_hwmgr *hwmgr, int ret = 0; SmuMetrics_t metrics_table; - ret = smum_smc_table_manager(hwmgr, (uint8_t *)&metrics_table, TABLE_SMU_METRICS, true); - PP_ASSERT_WITH_CODE(!ret, - "Failed to export SMU METRICS table!", - return ret); + ret = vega20_get_metrics_table(hwmgr, &metrics_table); + if (ret) + return ret; *activity_percent = metrics_table.AverageGfxActivity; @@ -2019,11 +2038,9 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx, switch (idx) { case AMDGPU_PP_SENSOR_GFX_SCLK: - ret = smum_smc_table_manager(hwmgr, (uint8_t *)&metrics_table, - TABLE_SMU_METRICS, true); - PP_ASSERT_WITH_CODE(!ret, - "Failed to export SMU METRICS table!", - return ret); + ret = vega20_get_metrics_table(hwmgr, &metrics_table); + if (ret) + return ret; *((uint32_t *)value) = metrics_table.AverageGfxclkFrequency * 100; *size = 4; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h index 25faaa5c5b10..37f5f5e657da 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h @@ -520,6 +520,9 @@ struct vega20_hwmgr { /* ---- Gfxoff ---- */ bool gfxoff_allowed; uint32_t counter_gfxoff; + + unsigned long metrics_time; + SmuMetrics_t metrics_table; }; #define VEGA20_DPM2_NEAR_TDP_DEC 10 From bb05821b13fa0c0b97760cb292b30d3105d65954 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 25 Jan 2019 14:09:40 +0800 Subject: [PATCH 337/539] drm/amd/powerplay: support Vega10 SOCclk and DCEFclk dpm level settings Enable SOCclk and DCEFclk dpm level retrieving and setting on Vega10. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 83 +++++++++++++++++++ .../drm/amd/powerplay/hwmgr/vega10_hwmgr.h | 1 + 2 files changed, 84 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 21696e8b0c23..df358b09cc4e 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -72,6 +72,21 @@ static const uint32_t channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2}; #define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000700L #define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L +typedef enum { + CLK_SMNCLK = 0, + CLK_SOCCLK, + CLK_MP0CLK, + CLK_MP1CLK, + CLK_LCLK, + CLK_DCEFCLK, + CLK_VCLK, + CLK_DCLK, + CLK_ECLK, + CLK_UCLK, + CLK_GFXCLK, + CLK_COUNT, +} CLOCK_ID_e; + static const ULONG PhwVega10_Magic = (ULONG)(PHM_VIslands_Magic); struct vega10_power_state *cast_phw_vega10_power_state( @@ -3486,6 +3501,17 @@ static int vega10_upload_dpm_bootup_level(struct pp_hwmgr *hwmgr) } } + if (!data->registry_data.socclk_dpm_key_disabled) { + if (data->smc_state_table.soc_boot_level != + data->dpm_table.soc_table.dpm_state.soft_min_level) { + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMinSocclkByIndex, + data->smc_state_table.soc_boot_level); + data->dpm_table.soc_table.dpm_state.soft_min_level = + data->smc_state_table.soc_boot_level; + } + } + return 0; } @@ -3517,6 +3543,17 @@ static int vega10_upload_dpm_max_level(struct pp_hwmgr *hwmgr) } } + if (!data->registry_data.socclk_dpm_key_disabled) { + if (data->smc_state_table.soc_max_level != + data->dpm_table.soc_table.dpm_state.soft_max_level) { + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxSocclkByIndex, + data->smc_state_table.soc_max_level); + data->dpm_table.soc_table.dpm_state.soft_max_level = + data->smc_state_table.soc_max_level; + } + } + return 0; } @@ -4029,6 +4066,24 @@ static int vega10_force_clock_level(struct pp_hwmgr *hwmgr, break; + case PP_SOCCLK: + data->smc_state_table.soc_boot_level = mask ? (ffs(mask) - 1) : 0; + data->smc_state_table.soc_max_level = mask ? (fls(mask) - 1) : 0; + + PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr), + "Failed to upload boot level to lowest!", + return -EINVAL); + + PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr), + "Failed to upload dpm max level to highest!", + return -EINVAL); + + break; + + case PP_DCEFCLK: + pr_info("Setting DCEFCLK min/max dpm level is not supported!\n"); + break; + case PP_PCIE: default: break; @@ -4274,6 +4329,8 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, struct vega10_hwmgr *data = hwmgr->backend; struct vega10_single_dpm_table *sclk_table = &(data->dpm_table.gfx_table); struct vega10_single_dpm_table *mclk_table = &(data->dpm_table.mem_table); + struct vega10_single_dpm_table *soc_table = &(data->dpm_table.soc_table); + struct vega10_single_dpm_table *dcef_table = &(data->dpm_table.dcef_table); struct vega10_pcie_table *pcie_table = &(data->dpm_table.pcie_table); struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep = NULL; @@ -4304,6 +4361,32 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, i, mclk_table->dpm_levels[i].value / 100, (i == now) ? "*" : ""); break; + case PP_SOCCLK: + if (data->registry_data.socclk_dpm_key_disabled) + break; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentSocclkIndex); + now = smum_get_argument(hwmgr); + + for (i = 0; i < soc_table->count; i++) + size += sprintf(buf + size, "%d: %uMhz %s\n", + i, soc_table->dpm_levels[i].value / 100, + (i == now) ? "*" : ""); + break; + case PP_DCEFCLK: + if (data->registry_data.dcefclk_dpm_key_disabled) + break; + + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_GetClockFreqMHz, CLK_DCEFCLK); + now = smum_get_argument(hwmgr); + + for (i = 0; i < dcef_table->count; i++) + size += sprintf(buf + size, "%d: %uMhz %s\n", + i, dcef_table->dpm_levels[i].value / 100, + (dcef_table->dpm_levels[i].value / 100 == now) ? + "*" : ""); + break; case PP_PCIE: smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentLinkIndex); now = smum_get_argument(hwmgr); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h index 89870556de1b..f752b4ad0c8a 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h @@ -199,6 +199,7 @@ struct vega10_smc_state_table { uint32_t vce_boot_level; uint32_t gfx_max_level; uint32_t mem_max_level; + uint32_t soc_max_level; uint8_t vr_hot_gpio; uint8_t ac_dc_gpio; uint8_t therm_out_gpio; From d6e4030168588ded911a89b51686d463a1166b93 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 25 Jan 2019 14:11:31 +0800 Subject: [PATCH 338/539] drm/amd/powerplay: support Vega10 retrieving and setting ppfeatures Enable retrieving and setting ppfeatures on Vega10. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 101 ++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index df358b09cc4e..0d38ac2fdbf1 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -4323,6 +4323,105 @@ static int vega10_set_watermarks_for_clocks_ranges(struct pp_hwmgr *hwmgr, return result; } +static int vega10_get_ppfeature_status(struct pp_hwmgr *hwmgr, char *buf) +{ + static const char *ppfeature_name[] = { + "DPM_PREFETCHER", + "GFXCLK_DPM", + "UCLK_DPM", + "SOCCLK_DPM", + "UVD_DPM", + "VCE_DPM", + "ULV", + "MP0CLK_DPM", + "LINK_DPM", + "DCEFCLK_DPM", + "AVFS", + "GFXCLK_DS", + "SOCCLK_DS", + "LCLK_DS", + "PPT", + "TDC", + "THERMAL", + "GFX_PER_CU_CG", + "RM", + "DCEFCLK_DS", + "ACDC", + "VR0HOT", + "VR1HOT", + "FW_CTF", + "LED_DISPLAY", + "FAN_CONTROL", + "FAST_PPT", + "DIDT", + "ACG", + "PCC_LIMIT"}; + static const char *output_title[] = { + "FEATURES", + "BITMASK", + "ENABLEMENT"}; + uint64_t features_enabled; + int i; + int ret = 0; + int size = 0; + + ret = vega10_get_enabled_smc_features(hwmgr, &features_enabled); + PP_ASSERT_WITH_CODE(!ret, + "[EnableAllSmuFeatures] Failed to get enabled smc features!", + return ret); + + size += sprintf(buf + size, "Current ppfeatures: 0x%016llx\n", features_enabled); + size += sprintf(buf + size, "%-19s %-22s %s\n", + output_title[0], + output_title[1], + output_title[2]); + for (i = 0; i < GNLD_FEATURES_MAX; i++) { + size += sprintf(buf + size, "%-19s 0x%016llx %6s\n", + ppfeature_name[i], + 1ULL << i, + (features_enabled & (1ULL << i)) ? "Y" : "N"); + } + + return size; +} + +static int vega10_set_ppfeature_status(struct pp_hwmgr *hwmgr, uint64_t new_ppfeature_masks) +{ + uint64_t features_enabled; + uint64_t features_to_enable; + uint64_t features_to_disable; + int ret = 0; + + if (new_ppfeature_masks >= (1ULL << GNLD_FEATURES_MAX)) + return -EINVAL; + + ret = vega10_get_enabled_smc_features(hwmgr, &features_enabled); + if (ret) + return ret; + + features_to_disable = + (features_enabled ^ new_ppfeature_masks) & features_enabled; + features_to_enable = + (features_enabled ^ new_ppfeature_masks) ^ features_to_disable; + + pr_debug("features_to_disable 0x%llx\n", features_to_disable); + pr_debug("features_to_enable 0x%llx\n", features_to_enable); + + if (features_to_disable) { + ret = vega10_enable_smc_features(hwmgr, false, features_to_disable); + if (ret) + return ret; + } + + if (features_to_enable) { + ret = vega10_enable_smc_features(hwmgr, true, features_to_enable); + if (ret) + return ret; + } + + return 0; +} + static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf) { @@ -5068,6 +5167,8 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .get_asic_baco_state = vega10_baco_get_state, .set_asic_baco_state = vega10_baco_set_state, .enable_mgpu_fan_boost = vega10_enable_mgpu_fan_boost, + .get_ppfeature_status = vega10_get_ppfeature_status, + .set_ppfeature_status = vega10_set_ppfeature_status, }; int vega10_hwmgr_init(struct pp_hwmgr *hwmgr) From aa1083edce631cbd8ab268bd9d477b3ba584d62a Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 25 Jan 2019 14:12:40 +0800 Subject: [PATCH 339/539] drm/amd/powerplay: support Vega12 SOCclk and DCEFclk dpm level settings Enable SOCclk and DCEFclk dpm level retrieving and setting on Vega12. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 98 ++++++++++++++++++- 1 file changed, 97 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index 0c8212902275..e4c8a1a16ad5 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -1093,6 +1093,16 @@ static int vega12_upload_dpm_min_level(struct pp_hwmgr *hwmgr) return ret); } + if (data->smu_features[GNLD_DPM_DCEFCLK].enabled) { + min_freq = data->dpm_table.dcef_table.dpm_state.hard_min_level; + + PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter( + hwmgr, PPSMC_MSG_SetHardMinByFreq, + (PPCLK_DCEFCLK << 16) | (min_freq & 0xffff))), + "Failed to set hard min dcefclk!", + return ret); + } + return ret; } @@ -1818,7 +1828,7 @@ static int vega12_force_clock_level(struct pp_hwmgr *hwmgr, enum pp_clock_type type, uint32_t mask) { struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend); - uint32_t soft_min_level, soft_max_level; + uint32_t soft_min_level, soft_max_level, hard_min_level; int ret = 0; switch (type) { @@ -1863,6 +1873,56 @@ static int vega12_force_clock_level(struct pp_hwmgr *hwmgr, break; + case PP_SOCCLK: + soft_min_level = mask ? (ffs(mask) - 1) : 0; + soft_max_level = mask ? (fls(mask) - 1) : 0; + + if (soft_max_level >= data->dpm_table.soc_table.count) { + pr_err("Clock level specified %d is over max allowed %d\n", + soft_max_level, + data->dpm_table.soc_table.count - 1); + return -EINVAL; + } + + data->dpm_table.soc_table.dpm_state.soft_min_level = + data->dpm_table.soc_table.dpm_levels[soft_min_level].value; + data->dpm_table.soc_table.dpm_state.soft_max_level = + data->dpm_table.soc_table.dpm_levels[soft_max_level].value; + + ret = vega12_upload_dpm_min_level(hwmgr); + PP_ASSERT_WITH_CODE(!ret, + "Failed to upload boot level to lowest!", + return ret); + + ret = vega12_upload_dpm_max_level(hwmgr); + PP_ASSERT_WITH_CODE(!ret, + "Failed to upload dpm max level to highest!", + return ret); + + break; + + case PP_DCEFCLK: + hard_min_level = mask ? (ffs(mask) - 1) : 0; + + if (hard_min_level >= data->dpm_table.dcef_table.count) { + pr_err("Clock level specified %d is over max allowed %d\n", + hard_min_level, + data->dpm_table.dcef_table.count - 1); + return -EINVAL; + } + + data->dpm_table.dcef_table.dpm_state.hard_min_level = + data->dpm_table.dcef_table.dpm_levels[hard_min_level].value; + + ret = vega12_upload_dpm_min_level(hwmgr); + PP_ASSERT_WITH_CODE(!ret, + "Failed to upload boot level to lowest!", + return ret); + + //TODO: Setting DCEFCLK max dpm level is not supported + + break; + case PP_PCIE: break; @@ -1912,6 +1972,42 @@ static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr, (clocks.data[i].clocks_in_khz / 1000 == now / 100) ? "*" : ""); break; + case PP_SOCCLK: + PP_ASSERT_WITH_CODE( + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_GetDpmClockFreq, (PPCLK_SOCCLK << 16)) == 0, + "Attempt to get Current SOCCLK Frequency Failed!", + return -EINVAL); + now = smum_get_argument(hwmgr); + + PP_ASSERT_WITH_CODE( + vega12_get_socclocks(hwmgr, &clocks) == 0, + "Attempt to get soc clk levels Failed!", + return -1); + for (i = 0; i < clocks.num_levels; i++) + size += sprintf(buf + size, "%d: %uMhz %s\n", + i, clocks.data[i].clocks_in_khz / 1000, + (clocks.data[i].clocks_in_khz / 1000 == now) ? "*" : ""); + break; + + case PP_DCEFCLK: + PP_ASSERT_WITH_CODE( + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_GetDpmClockFreq, (PPCLK_DCEFCLK << 16)) == 0, + "Attempt to get Current DCEFCLK Frequency Failed!", + return -EINVAL); + now = smum_get_argument(hwmgr); + + PP_ASSERT_WITH_CODE( + vega12_get_dcefclocks(hwmgr, &clocks) == 0, + "Attempt to get dcef clk levels Failed!", + return -1); + for (i = 0; i < clocks.num_levels; i++) + size += sprintf(buf + size, "%d: %uMhz %s\n", + i, clocks.data[i].clocks_in_khz / 1000, + (clocks.data[i].clocks_in_khz / 1000 == now) ? "*" : ""); + break; + case PP_PCIE: break; From 5eeb3f62a5746bb2f6848e684b3adf61edd66778 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 25 Jan 2019 14:15:10 +0800 Subject: [PATCH 340/539] drm/amd/powerplay: support Vega12 retrieving and setting ppfeatures Enable retrieving and setting ppfeatures on Vega12. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index e4c8a1a16ad5..6c8e78611c03 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -1933,6 +1933,104 @@ static int vega12_force_clock_level(struct pp_hwmgr *hwmgr, return 0; } +static int vega12_get_ppfeature_status(struct pp_hwmgr *hwmgr, char *buf) +{ + static const char *ppfeature_name[] = { + "DPM_PREFETCHER", + "GFXCLK_DPM", + "UCLK_DPM", + "SOCCLK_DPM", + "UVD_DPM", + "VCE_DPM", + "ULV", + "MP0CLK_DPM", + "LINK_DPM", + "DCEFCLK_DPM", + "GFXCLK_DS", + "SOCCLK_DS", + "LCLK_DS", + "PPT", + "TDC", + "THERMAL", + "GFX_PER_CU_CG", + "RM", + "DCEFCLK_DS", + "ACDC", + "VR0HOT", + "VR1HOT", + "FW_CTF", + "LED_DISPLAY", + "FAN_CONTROL", + "DIDT", + "GFXOFF", + "CG", + "ACG"}; + static const char *output_title[] = { + "FEATURES", + "BITMASK", + "ENABLEMENT"}; + uint64_t features_enabled; + int i; + int ret = 0; + int size = 0; + + ret = vega12_get_enabled_smc_features(hwmgr, &features_enabled); + PP_ASSERT_WITH_CODE(!ret, + "[EnableAllSmuFeatures] Failed to get enabled smc features!", + return ret); + + size += sprintf(buf + size, "Current ppfeatures: 0x%016llx\n", features_enabled); + size += sprintf(buf + size, "%-19s %-22s %s\n", + output_title[0], + output_title[1], + output_title[2]); + for (i = 0; i < GNLD_FEATURES_MAX; i++) { + size += sprintf(buf + size, "%-19s 0x%016llx %6s\n", + ppfeature_name[i], + 1ULL << i, + (features_enabled & (1ULL << i)) ? "Y" : "N"); + } + + return size; +} + +static int vega12_set_ppfeature_status(struct pp_hwmgr *hwmgr, uint64_t new_ppfeature_masks) +{ + uint64_t features_enabled; + uint64_t features_to_enable; + uint64_t features_to_disable; + int ret = 0; + + if (new_ppfeature_masks >= (1ULL << GNLD_FEATURES_MAX)) + return -EINVAL; + + ret = vega12_get_enabled_smc_features(hwmgr, &features_enabled); + if (ret) + return ret; + + features_to_disable = + (features_enabled ^ new_ppfeature_masks) & features_enabled; + features_to_enable = + (features_enabled ^ new_ppfeature_masks) ^ features_to_disable; + + pr_debug("features_to_disable 0x%llx\n", features_to_disable); + pr_debug("features_to_enable 0x%llx\n", features_to_enable); + + if (features_to_disable) { + ret = vega12_enable_smc_features(hwmgr, false, features_to_disable); + if (ret) + return ret; + } + + if (features_to_enable) { + ret = vega12_enable_smc_features(hwmgr, true, features_to_enable); + if (ret) + return ret; + } + + return 0; +} + static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf) { @@ -2528,6 +2626,8 @@ static const struct pp_hwmgr_func vega12_hwmgr_funcs = { .start_thermal_controller = vega12_start_thermal_controller, .powergate_gfx = vega12_gfx_off_control, .get_performance_level = vega12_get_performance_level, + .get_ppfeature_status = vega12_get_ppfeature_status, + .set_ppfeature_status = vega12_set_ppfeature_status, }; int vega12_hwmgr_init(struct pp_hwmgr *hwmgr) From 10117450735c7a7c0858095fb46a860e7037cb9a Mon Sep 17 00:00:00 2001 From: "ndesaulniers@google.com" Date: Thu, 24 Jan 2019 16:52:59 -0800 Subject: [PATCH 341/539] drm/amd/display: add -msse2 to prevent Clang from emitting libcalls to undefined SW FP routines arch/x86/Makefile disables SSE and SSE2 for the whole kernel. The AMDGPU drivers modified in this patch re-enable SSE but not SSE2. Turn on SSE2 to support emitting double precision floating point instructions rather than calls to non-existent (usually available from gcc_s or compiler_rt) floating point helper routines. Link: https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html Link: https://github.com/ClangBuiltLinux/linux/issues/327 Cc: stable@vger.kernel.org # 4.19 Reported-by: S, Shirish Reported-by: Matthias Kaehlcke Suggested-by: James Y Knight Suggested-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Tested-by: Guenter Roeck Tested-by: Matthias Kaehlcke Tested-by: Nathan Chancellor Reviewed-by: Harry Wentland Signed-off-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/calcs/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/dml/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile index 95f332ee3e7e..dc85a3c088af 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile @@ -30,7 +30,7 @@ else ifneq ($(call cc-option, -mstack-alignment=16),) cc_stack_align := -mstack-alignment=16 endif -calcs_ccflags := -mhard-float -msse $(cc_stack_align) +calcs_ccflags := -mhard-float -msse -msse2 $(cc_stack_align) CFLAGS_dcn_calcs.o := $(calcs_ccflags) CFLAGS_dcn_calc_auto.o := $(calcs_ccflags) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index d97ca6528f9d..33c7d7588712 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -30,7 +30,7 @@ else ifneq ($(call cc-option, -mstack-alignment=16),) cc_stack_align := -mstack-alignment=16 endif -dml_ccflags := -mhard-float -msse $(cc_stack_align) +dml_ccflags := -mhard-float -msse -msse2 $(cc_stack_align) CFLAGS_display_mode_lib.o := $(dml_ccflags) CFLAGS_display_pipe_clocks.o := $(dml_ccflags) From 29214e8cf6ceaaf1a6ad74ee6a781f79bcb84d1f Mon Sep 17 00:00:00 2001 From: P Raviraj Sitaram Date: Wed, 19 Dec 2018 13:59:12 +0530 Subject: [PATCH 342/539] drm/i915: correct the pitch check for NV12 framebuffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit framebuffer for NV12 requires the pitch to the multiplier of 4, instead of the width. This patch corrects it. For instance, a 480p video, whose width and pitch are 854 and 896 respectively, is excluded for NV12 plane so far. Changes since v1: - Removed check for NV12 buffer dimensions since additional checks are done for viewport size in intel_sprite.c Signed-off-by: Dongseong Hwang Signed-off-by: P Raviraj Sitaram Cc: Chandra Konduru Cc: Vidya Srinivas Cc: Ville Syrjälä Cc: Juha-Pekka Heikkila Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/1545208152-22658-1-git-send-email-raviraj.p.sitaram@intel.com --- drivers/gpu/drm/i915/intel_display.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 67a64cbbe851..aa777182cac9 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14710,14 +14710,6 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd); - if (fb->format->format == DRM_FORMAT_NV12 && - (fb->width < SKL_MIN_YUV_420_SRC_W || - fb->height < SKL_MIN_YUV_420_SRC_H || - (fb->width % 4) != 0 || (fb->height % 4) != 0)) { - DRM_DEBUG_KMS("src dimensions not correct for NV12\n"); - goto err; - } - for (i = 0; i < fb->format->num_planes; i++) { u32 stride_alignment; From c5627461490e4b913e8747d3b06541e5264a50d7 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 26 Jan 2019 00:11:23 -0700 Subject: [PATCH 343/539] drm/i915: Disable -Wuninitialized This warning is disabled by default in scripts/Makefile.extrawarn when W= is not provided but this Makefile adds -Wall after this warning is disabled so it shows up in the build when it shouldn't: In file included from drivers/gpu/drm/i915/intel_breadcrumbs.c:895: drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c:350:34: error: variable 'wq' is uninitialized when used within its own initialization [-Werror,-Wuninitialized] DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); ^~ ./include/linux/wait.h:74:63: note: expanded from macro 'DECLARE_WAIT_QUEUE_HEAD_ONSTACK' struct wait_queue_head name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) ~~~~ ^~~~ ./include/linux/wait.h:72:33: note: expanded from macro '__WAIT_QUEUE_HEAD_INIT_ONSTACK' ({ init_waitqueue_head(&name); name; }) ^~~~ 1 error generated. Explicitly disable the warning like commit 46e2068081e9 ("drm/i915: Disable some extra clang warnings"). Link: https://github.com/ClangBuiltLinux/linux/issues/220 Suggested-by: Chris Wilson Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Reviewed-by: Nick Desaulniers Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190126071122.24557-1-natechancellor@gmail.com --- drivers/gpu/drm/i915/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 8300efe60fe1..210d0e8777b6 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -22,6 +22,7 @@ subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable) subdir-ccflags-y += $(call cc-disable-warning, sign-compare) subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized) subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides) +subdir-ccflags-y += $(call cc-disable-warning, uninitialized) subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror # Fine grained warnings disable From ad4062da1397e32a942191c61de3eabe1fb3664f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 28 Jan 2019 01:02:18 +0000 Subject: [PATCH 344/539] drm/i915: Wait for a moment before forcibly resetting the device During igt, we ask to reset the device if any requests are still outstanding at the end of a test, as this quickly kills off any erroneous hanging request streams that may escape a test. However, since it may take the device a few milliseconds to flush itself after the end of a normal test, *cough* guc *cough*, we may accidentally tell the device to reset itself after it idles. If we wait a moment, our usual I915_IDLE_ENGINES_TIMEOUT of 200ms (seems a bit high, but still better than umpteen hangchecks!), we can differentiate better between a stuck engine and a healthy one, and so avoid prematurely forcing the reset and any extra complications that may entail. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190128010245.20148-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 76dea0572f3e..ecf762e24d0e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4050,7 +4050,8 @@ i915_drop_caches_set(void *data, u64 val) val, val & DROP_ALL); wakeref = intel_runtime_pm_get(i915); - if (val & DROP_RESET_ACTIVE && !intel_engines_are_idle(i915)) + if (val & DROP_RESET_ACTIVE && + wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) i915_gem_set_wedged(i915); /* No need to check and wait for gpu resets, only libdrm auto-restarts From 6a2a94041052dd4616a56b6a9aa25dd2ad51c772 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 25 Jan 2019 20:19:30 +0200 Subject: [PATCH 345/539] drm/i915/tv: Fix return value for intel_tv_compute_config() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ever since commit 204474a6b859 ("drm/i915: Pass down rc in intel_encoder->compute_config()") we're supposed to return an errno from .compute_config(). I failed to notice that when pushing the TV encoder fixes which were written before said commmit. Fix up the return value for the error case. Cc: Imre Deak Fixes: 690157f0a9e7 ("drm/i915/tv: Fix >1024 modes on gen3") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190125181931.19482-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_tv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index f0b9abda7720..78be08e2971b 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1189,7 +1189,7 @@ intel_tv_compute_config(struct intel_encoder *encoder, if (extra < 0) { DRM_DEBUG_KMS("No vertical scaling for >1024 pixel wide modes\n"); - return false; + return -EINVAL; } /* Need to turn off the vertical filter and center the image */ From 8a920e24f05802e2ed88d0454a7253449c4654c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 25 Jan 2019 20:19:31 +0200 Subject: [PATCH 346/539] drm/i915/tv: Use the scanline counter for timestamps on i965gm TV output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just like the frame counter, the pixel counter also reads zero all the time when the TV encoder is used. Fortunately the scanline counter still works sufficiently well so let's use that to correct the vblank timestamps. Otherwise the timestamps may en up out of whack, and since we use them to guesstimate the vblank counter value that may end up incorrect as well. Cc: Imre Deak Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190125181931.19482-2-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/i915_irq.c | 7 +++++-- drivers/gpu/drm/i915/intel_drv.h | 4 +++- drivers/gpu/drm/i915/intel_tv.c | 10 ++++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 926e05f0db24..26ab00862b59 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1012,6 +1012,9 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, int position; int vbl_start, vbl_end, hsync_start, htotal, vtotal; unsigned long irqflags; + bool use_scanline_counter = INTEL_GEN(dev_priv) >= 5 || + IS_G4X(dev_priv) || IS_GEN(dev_priv, 2) || + mode->private_flags & I915_MODE_FLAG_USE_SCANLINE_COUNTER; if (WARN_ON(!mode->crtc_clock)) { DRM_DEBUG_DRIVER("trying to get scanoutpos for disabled " @@ -1044,7 +1047,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, if (stime) *stime = ktime_get(); - if (IS_GEN(dev_priv, 2) || IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) { + if (use_scanline_counter) { /* No obvious pixelcount register. Only query vertical * scanout position from Display scan line register. */ @@ -1104,7 +1107,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, else position += vtotal - vbl_end; - if (IS_GEN(dev_priv, 2) || IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) { + if (use_scanline_counter) { *vpos = position; *hpos = 0; } else { diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 47195320413a..35fbc527c19e 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -629,9 +629,11 @@ struct intel_crtc_scaler_state { }; /* drm_mode->private_flags */ -#define I915_MODE_FLAG_INHERITED 1 +#define I915_MODE_FLAG_INHERITED (1<<0) /* Flag to get scanline using frame time stamps */ #define I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP (1<<1) +/* Flag to use the scanline counter instead of the pixel counter */ +#define I915_MODE_FLAG_USE_SCANLINE_COUNTER (1<<2) struct intel_pipe_wm { struct intel_wm_level wm[5]; diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index 78be08e2971b..751b88dde18e 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1150,6 +1150,11 @@ intel_tv_get_config(struct intel_encoder *encoder, ypos, mode.vdisplay - ysize - ypos); adjusted_mode->crtc_clock = mode.clock; + + /* pixel counter doesn't work on i965gm TV output */ + if (IS_I965GM(dev_priv)) + adjusted_mode->private_flags |= + I915_MODE_FLAG_USE_SCANLINE_COUNTER; } static int @@ -1295,6 +1300,11 @@ intel_tv_compute_config(struct intel_encoder *encoder, drm_mode_set_crtcinfo(adjusted_mode, 0); adjusted_mode->name[0] = '\0'; + /* pixel counter doesn't work on i965gm TV output */ + if (IS_I965GM(dev_priv)) + adjusted_mode->private_flags |= + I915_MODE_FLAG_USE_SCANLINE_COUNTER; + return 0; } From 7bed8adcd9f86231bb69bbc02f88ad89330f99e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 11 Jan 2019 19:49:50 +0200 Subject: [PATCH 347/539] drm/i915: Try to sanitize bogus DPLL state left over by broken SNB BIOSen MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Certain SNB machines (eg. ASUS K53SV) seem to have a broken BIOS which misprograms the hardware badly when encountering a suitably high resolution display. The programmed pipe timings are somewhat bonkers and the DPLL is totally misprogrammed (P divider == 0). That will result in atomic commit timeouts as apparently the pipe is sufficiently stuck to not signal vblank interrupts. IIRC something like this was also observed on some other SNB machine years ago (might have been a Dell XPS 8300) but a BIOS update cured it. Sadly looks like this was never fixed for the ASUS K53SV as the latest BIOS (K53SV.320 11/11/2011) is still broken. The quickest way to deal with this seems to be to shut down the pipe+ports+DPLL. Unfortunately doing this during the normal sanitization phase isn't quite soon enough as we already spew several WARNs about the bogus hardware state. But it's better than hanging the boot for a few dozen seconds. Since this is limited to a few old machines it doesn't seem entirely worthwile to try and rework the readout+sanitization code to handle it more gracefully. v2: Fix potential NULL deref (kbuild test robot) Constify has_bogus_dpll_config() Cc: stable@vger.kernel.org # v4.20+ Cc: Daniel Kamil Kozar Reported-by: Daniel Kamil Kozar Tested-by: Daniel Kamil Kozar Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109245 Fixes: 516a49cc1946 ("drm/i915: Fix assert_plane() warning on bootup with external display") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190111174950.10681-1-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola --- drivers/gpu/drm/i915/intel_display.c | 50 ++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index aa777182cac9..6cf480a0eb23 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15515,16 +15515,45 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, } } +static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + + /* + * Some SNB BIOSen (eg. ASUS K53SV) are known to misprogram + * the hardware when a high res displays plugged in. DPLL P + * divider is zero, and the pipe timings are bonkers. We'll + * try to disable everything in that case. + * + * FIXME would be nice to be able to sanitize this state + * without several WARNs, but for now let's take the easy + * road. + */ + return IS_GEN(dev_priv, 6) && + crtc_state->base.active && + crtc_state->shared_dpll && + crtc_state->port_clock == 0; +} + static void intel_sanitize_encoder(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_connector *connector; + struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + struct intel_crtc_state *crtc_state = crtc ? + to_intel_crtc_state(crtc->base.state) : NULL; /* We need to check both for a crtc link (meaning that the * encoder is active and trying to read from a pipe) and the * pipe itself being active. */ - bool has_active_crtc = encoder->base.crtc && - to_intel_crtc(encoder->base.crtc)->active; + bool has_active_crtc = crtc_state && + crtc_state->base.active; + + if (crtc_state && has_bogus_dpll_config(crtc_state)) { + DRM_DEBUG_KMS("BIOS has misprogrammed the hardware. Disabling pipe %c\n", + pipe_name(crtc->pipe)); + has_active_crtc = false; + } connector = intel_encoder_find_connector(encoder); if (connector && !has_active_crtc) { @@ -15535,16 +15564,25 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) /* Connector is active, but has no active pipe. This is * fallout from our resume register restoring. Disable * the encoder manually again. */ - if (encoder->base.crtc) { - struct drm_crtc_state *crtc_state = encoder->base.crtc->state; + if (crtc_state) { + struct drm_encoder *best_encoder; DRM_DEBUG_KMS("[ENCODER:%d:%s] manually disabled\n", encoder->base.base.id, encoder->base.name); + + /* avoid oopsing in case the hooks consult best_encoder */ + best_encoder = connector->base.state->best_encoder; + connector->base.state->best_encoder = &encoder->base; + if (encoder->disable) - encoder->disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state); + encoder->disable(encoder, crtc_state, + connector->base.state); if (encoder->post_disable) - encoder->post_disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state); + encoder->post_disable(encoder, crtc_state, + connector->base.state); + + connector->base.state->best_encoder = best_encoder; } encoder->base.crtc = NULL; From 499197dc169601116e106cabe409bf39295893b3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 28 Jan 2019 10:23:52 +0000 Subject: [PATCH 348/539] drm/i915: Stop tracking MRU activity on VMA Our goal is to remove struct_mutex and replace it with fine grained locking. One of the thorny issues is our eviction logic for reclaiming space for an execbuffer (or GTT mmaping, among a few other examples). While eviction itself is easy to move under a per-VM mutex, performing the activity tracking is less agreeable. One solution is not to do any MRU tracking and do a simple coarse evaluation during eviction of active/inactive, with a loose temporal ordering of last insertion/evaluation. That keeps all the locking constrained to when we are manipulating the VM itself, neatly avoiding the tricky handling of possible recursive locking during execbuf and elsewhere. Note that discarding the MRU (currently implemented as a pair of lists, to avoid scanning the active list for a NONBLOCKING search) is unlikely to impact upon our efficiency to reclaim VM space (where we think a LRU model is best) as our current strategy is to use random idle replacement first before doing a search, and over time the use of softpinned 48b per-ppGTT is growing (thereby eliminating any need to perform any eviction searches, in theory at least) with the remaining users being found on much older devices (gen2-gen6). v2: Changelog and commentary rewritten to elaborate on the duality of a single list being both an inactive and active list. v3: Consolidate bool parameters into a single set of flags; don't comment on the duality of a single variable being a multiplicity of bits. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190128102356.15037-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 10 +-- drivers/gpu/drm/i915/i915_gem_evict.c | 87 +++++++++++-------- drivers/gpu/drm/i915/i915_gem_gtt.c | 15 ++-- drivers/gpu/drm/i915/i915_gem_gtt.h | 26 +----- drivers/gpu/drm/i915/i915_gem_shrinker.c | 8 +- drivers/gpu/drm/i915/i915_gem_stolen.c | 3 +- drivers/gpu/drm/i915/i915_gpu_error.c | 42 ++++----- drivers/gpu/drm/i915/i915_vma.c | 9 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 +- 10 files changed, 95 insertions(+), 111 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d20b42386c3c..f45186ddb236 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -253,10 +253,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, pinned = ggtt->vm.reserved; mutex_lock(&dev->struct_mutex); - list_for_each_entry(vma, &ggtt->vm.active_list, vm_link) - if (i915_vma_is_pinned(vma)) - pinned += vma->node.size; - list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link) + list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) if (i915_vma_is_pinned(vma)) pinned += vma->node.size; mutex_unlock(&dev->struct_mutex); @@ -1539,13 +1536,10 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); for_each_ggtt_vma(vma, obj) { - if (i915_vma_is_active(vma)) - continue; - if (!drm_mm_node_allocated(&vma->node)) continue; - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + list_move_tail(&vma->vm_link, &vma->vm->bound_list); } i915 = to_i915(obj->base.dev); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index f6855401f247..d76839670632 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -126,31 +126,25 @@ i915_gem_evict_something(struct i915_address_space *vm, struct drm_i915_private *dev_priv = vm->i915; struct drm_mm_scan scan; struct list_head eviction_list; - struct list_head *phases[] = { - &vm->inactive_list, - &vm->active_list, - NULL, - }, **phase; struct i915_vma *vma, *next; struct drm_mm_node *node; enum drm_mm_insert_mode mode; + struct i915_vma *active; int ret; lockdep_assert_held(&vm->i915->drm.struct_mutex); trace_i915_gem_evict(vm, min_size, alignment, flags); /* - * The goal is to evict objects and amalgamate space in LRU order. - * The oldest idle objects reside on the inactive list, which is in - * retirement order. The next objects to retire are those in flight, - * on the active list, again in retirement order. + * The goal is to evict objects and amalgamate space in rough LRU order. + * Since both active and inactive objects reside on the same list, + * in a mix of creation and last scanned order, as we process the list + * we sort it into inactive/active, which keeps the active portion + * in a rough MRU order. * * The retirement sequence is thus: - * 1. Inactive objects (already retired) - * 2. Active objects (will stall on unbinding) - * - * On each list, the oldest objects lie at the HEAD with the freshest - * object on the TAIL. + * 1. Inactive objects (already retired, random order) + * 2. Active objects (will stall on unbinding, oldest scanned first) */ mode = DRM_MM_INSERT_BEST; if (flags & PIN_HIGH) @@ -169,17 +163,46 @@ i915_gem_evict_something(struct i915_address_space *vm, */ if (!(flags & PIN_NONBLOCK)) i915_retire_requests(dev_priv); - else - phases[1] = NULL; search_again: + active = NULL; INIT_LIST_HEAD(&eviction_list); - phase = phases; - do { - list_for_each_entry(vma, *phase, vm_link) - if (mark_free(&scan, vma, flags, &eviction_list)) - goto found; - } while (*++phase); + list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) { + /* + * We keep this list in a rough least-recently scanned order + * of active elements (inactive elements are cheap to reap). + * New entries are added to the end, and we move anything we + * scan to the end. The assumption is that the working set + * of applications is either steady state (and thanks to the + * userspace bo cache it almost always is) or volatile and + * frequently replaced after a frame, which are self-evicting! + * Given that assumption, the MRU order of the scan list is + * fairly static, and keeping it in least-recently scan order + * is suitable. + * + * To notice when we complete one full cycle, we record the + * first active element seen, before moving it to the tail. + */ + if (i915_vma_is_active(vma)) { + if (vma == active) { + if (flags & PIN_NONBLOCK) + break; + + active = ERR_PTR(-EAGAIN); + } + + if (active != ERR_PTR(-EAGAIN)) { + if (!active) + active = vma; + + list_move_tail(&vma->vm_link, &vm->bound_list); + continue; + } + } + + if (mark_free(&scan, vma, flags, &eviction_list)) + goto found; + } /* Nothing found, clean up and bail out! */ list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { @@ -388,11 +411,6 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, */ int i915_gem_evict_vm(struct i915_address_space *vm) { - struct list_head *phases[] = { - &vm->inactive_list, - &vm->active_list, - NULL - }, **phase; struct list_head eviction_list; struct i915_vma *vma, *next; int ret; @@ -412,16 +430,13 @@ int i915_gem_evict_vm(struct i915_address_space *vm) } INIT_LIST_HEAD(&eviction_list); - phase = phases; - do { - list_for_each_entry(vma, *phase, vm_link) { - if (i915_vma_is_pinned(vma)) - continue; + list_for_each_entry(vma, &vm->bound_list, vm_link) { + if (i915_vma_is_pinned(vma)) + continue; - __i915_vma_pin(vma); - list_add(&vma->evict_link, &eviction_list); - } - } while (*++phase); + __i915_vma_pin(vma); + list_add(&vma->evict_link, &eviction_list); + } ret = 0; list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 9081e3bc5a59..2ad9070a54c1 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -491,9 +491,8 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass) stash_init(&vm->free_pages); - INIT_LIST_HEAD(&vm->active_list); - INIT_LIST_HEAD(&vm->inactive_list); INIT_LIST_HEAD(&vm->unbound_list); + INIT_LIST_HEAD(&vm->bound_list); } static void i915_address_space_fini(struct i915_address_space *vm) @@ -2111,8 +2110,7 @@ void i915_ppgtt_close(struct i915_address_space *vm) static void ppgtt_destroy_vma(struct i915_address_space *vm) { struct list_head *phases[] = { - &vm->active_list, - &vm->inactive_list, + &vm->bound_list, &vm->unbound_list, NULL, }, **phase; @@ -2135,8 +2133,7 @@ void i915_ppgtt_release(struct kref *kref) ppgtt_destroy_vma(&ppgtt->vm); - GEM_BUG_ON(!list_empty(&ppgtt->vm.active_list)); - GEM_BUG_ON(!list_empty(&ppgtt->vm.inactive_list)); + GEM_BUG_ON(!list_empty(&ppgtt->vm.bound_list)); GEM_BUG_ON(!list_empty(&ppgtt->vm.unbound_list)); ppgtt->vm.cleanup(&ppgtt->vm); @@ -2801,8 +2798,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); i915_gem_fini_aliasing_ppgtt(dev_priv); - GEM_BUG_ON(!list_empty(&ggtt->vm.active_list)); - list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) + list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) WARN_ON(i915_vma_unbind(vma)); if (drm_mm_node_allocated(&ggtt->error_capture)) @@ -3514,8 +3510,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */ /* clflush objects bound into the GGTT and rebind them. */ - GEM_BUG_ON(!list_empty(&ggtt->vm.active_list)); - list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) { + list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { struct drm_i915_gem_object *obj = vma->obj; if (!(vma->flags & I915_VMA_GLOBAL_BIND)) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index a0039ea97cdc..bd679c8c56dd 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -299,32 +299,12 @@ struct i915_address_space { struct i915_page_directory_pointer *scratch_pdp; /* GEN8+ & 48b PPGTT */ /** - * List of objects currently involved in rendering. - * - * Includes buffers having the contents of their GPU caches - * flushed, not necessarily primitives. last_read_req - * represents when the rendering involved will be completed. - * - * A reference is held on the buffer while on this list. + * List of vma currently bound. */ - struct list_head active_list; + struct list_head bound_list; /** - * LRU list of objects which are not in the ringbuffer and - * are ready to unbind, but are still in the GTT. - * - * last_read_req is NULL while an object is in this list. - * - * A reference is not held on the buffer while on this list, - * as merely being GTT-bound shouldn't prevent its being - * freed, and we'll pull it off the list in the free path. - */ - struct list_head inactive_list; - - /** - * List of vma that have been unbound. - * - * A reference is not held on the buffer while on this list. + * List of vma that are not unbound. */ struct list_head unbound_list; diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 8ceecb026910..a76d6c95c824 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -462,9 +462,13 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr /* We also want to clear any cached iomaps as they wrap vmap */ list_for_each_entry_safe(vma, next, - &i915->ggtt.vm.inactive_list, vm_link) { + &i915->ggtt.vm.bound_list, vm_link) { unsigned long count = vma->node.size >> PAGE_SHIFT; - if (vma->iomap && i915_vma_unbind(vma) == 0) + + if (!vma->iomap || i915_vma_is_active(vma)) + continue; + + if (i915_vma_unbind(vma) == 0) freed_pages += count; } diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 9df615eea2d8..a9e365789686 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -701,7 +701,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv vma->pages = obj->mm.pages; vma->flags |= I915_VMA_GLOBAL_BIND; __i915_vma_set_map_and_fenceable(vma); - list_move_tail(&vma->vm_link, &ggtt->vm.inactive_list); + + list_move_tail(&vma->vm_link, &ggtt->vm.bound_list); spin_lock(&dev_priv->mm.obj_lock); list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 4eef0462489c..898e06014295 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1121,7 +1121,9 @@ static void capture_bo(struct drm_i915_error_buffer *err, static u32 capture_error_bo(struct drm_i915_error_buffer *err, int count, struct list_head *head, - bool pinned_only) + unsigned int flags) +#define ACTIVE_ONLY BIT(0) +#define PINNED_ONLY BIT(1) { struct i915_vma *vma; int i = 0; @@ -1130,7 +1132,10 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err, if (!vma->obj) continue; - if (pinned_only && !i915_vma_is_pinned(vma)) + if (flags & ACTIVE_ONLY && !i915_vma_is_active(vma)) + continue; + + if (flags & PINNED_ONLY && !i915_vma_is_pinned(vma)) continue; capture_bo(err++, vma); @@ -1601,14 +1606,17 @@ static void gem_capture_vm(struct i915_gpu_state *error, int count; count = 0; - list_for_each_entry(vma, &vm->active_list, vm_link) - count++; + list_for_each_entry(vma, &vm->bound_list, vm_link) + if (i915_vma_is_active(vma)) + count++; active_bo = NULL; if (count) active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC); if (active_bo) - count = capture_error_bo(active_bo, count, &vm->active_list, false); + count = capture_error_bo(active_bo, + count, &vm->bound_list, + ACTIVE_ONLY); else count = 0; @@ -1646,28 +1654,20 @@ static void capture_pinned_buffers(struct i915_gpu_state *error) struct i915_address_space *vm = &error->i915->ggtt.vm; struct drm_i915_error_buffer *bo; struct i915_vma *vma; - int count_inactive, count_active; + int count; - count_inactive = 0; - list_for_each_entry(vma, &vm->inactive_list, vm_link) - count_inactive++; - - count_active = 0; - list_for_each_entry(vma, &vm->active_list, vm_link) - count_active++; + count = 0; + list_for_each_entry(vma, &vm->bound_list, vm_link) + count++; bo = NULL; - if (count_inactive + count_active) - bo = kcalloc(count_inactive + count_active, - sizeof(*bo), GFP_ATOMIC); + if (count) + bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC); if (!bo) return; - count_inactive = capture_error_bo(bo, count_inactive, - &vm->active_list, true); - count_active = capture_error_bo(bo + count_inactive, count_active, - &vm->inactive_list, true); - error->pinned_bo_count = count_inactive + count_active; + error->pinned_bo_count = + capture_error_bo(bo, count, &vm->bound_list, PINNED_ONLY); error->pinned_bo = bo; } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 5b4d78cdb4ca..7de28baffb8f 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -79,9 +79,6 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq) if (--vma->active_count) return; - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); - GEM_BUG_ON(!i915_gem_object_is_active(obj)); if (--obj->active_count) return; @@ -659,7 +656,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level)); - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + list_move_tail(&vma->vm_link, &vma->vm->bound_list); if (vma->obj) { struct drm_i915_gem_object *obj = vma->obj; @@ -1003,10 +1000,8 @@ int i915_vma_move_to_active(struct i915_vma *vma, * add the active reference first and queue for it to be dropped * *last*. */ - if (!i915_gem_active_isset(active) && !vma->active_count++) { - list_move_tail(&vma->vm_link, &vma->vm->active_list); + if (!i915_gem_active_isset(active) && !vma->active_count++) obj->active_count++; - } i915_gem_active_set(active, rq); GEM_BUG_ON(!i915_vma_is_active(vma)); GEM_BUG_ON(!obj->active_count); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index d0553bc69705..af9b85cb8639 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -84,7 +84,7 @@ static int populate_ggtt(struct drm_i915_private *i915, return -EINVAL; } - if (list_empty(&i915->ggtt.vm.inactive_list)) { + if (list_empty(&i915->ggtt.vm.bound_list)) { pr_err("No objects on the GGTT inactive list!\n"); return -EINVAL; } @@ -96,7 +96,7 @@ static void unpin_ggtt(struct drm_i915_private *i915) { struct i915_vma *vma; - list_for_each_entry(vma, &i915->ggtt.vm.inactive_list, vm_link) + list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link) if (vma->obj->mm.quirked) i915_vma_unpin(vma); } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 06bde4a273cb..8feb4af308ff 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1237,7 +1237,7 @@ static void track_vma_bind(struct i915_vma *vma) __i915_gem_object_pin_pages(obj); vma->pages = obj->mm.pages; - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + list_move_tail(&vma->vm_link, &vma->vm->bound_list); } static int exercise_mock(struct drm_i915_private *i915, From 09d7e46b97c663c9b7f5245871a8f19114e9148d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 28 Jan 2019 10:23:53 +0000 Subject: [PATCH 349/539] drm/i915: Pull VM lists under the VM mutex. A starting point to counter the pervasive struct_mutex. For the goal of avoiding (or at least blocking under them!) global locks during user request submission, a simple but important step is being able to manage each clients GTT separately. For which, we want to replace using the struct_mutex as the guard for all things GTT/VM and switch instead to a specific mutex inside i915_address_space. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190128102356.15037-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 14 ++++++++------ drivers/gpu/drm/i915/i915_gem_evict.c | 2 ++ drivers/gpu/drm/i915/i915_gem_gtt.c | 15 +++++++++++++-- drivers/gpu/drm/i915/i915_gem_shrinker.c | 4 ++++ drivers/gpu/drm/i915/i915_gem_stolen.c | 2 ++ drivers/gpu/drm/i915/i915_vma.c | 11 +++++++++++ drivers/gpu/drm/i915/selftests/i915_gem_evict.c | 3 +++ drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 3 +++ 8 files changed, 46 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f45186ddb236..538fa5404603 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -245,18 +245,19 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct i915_ggtt *ggtt = &to_i915(dev)->ggtt; struct drm_i915_gem_get_aperture *args = data; struct i915_vma *vma; u64 pinned; + mutex_lock(&ggtt->vm.mutex); + pinned = ggtt->vm.reserved; - mutex_lock(&dev->struct_mutex); list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) if (i915_vma_is_pinned(vma)) pinned += vma->node.size; - mutex_unlock(&dev->struct_mutex); + + mutex_unlock(&ggtt->vm.mutex); args->aper_size = ggtt->vm.total; args->aper_available_size = args->aper_size - pinned; @@ -1529,20 +1530,21 @@ err: static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915; + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct list_head *list; struct i915_vma *vma; GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + mutex_lock(&i915->ggtt.vm.mutex); for_each_ggtt_vma(vma, obj) { if (!drm_mm_node_allocated(&vma->node)) continue; list_move_tail(&vma->vm_link, &vma->vm->bound_list); } + mutex_unlock(&i915->ggtt.vm.mutex); - i915 = to_i915(obj->base.dev); spin_lock(&i915->mm.obj_lock); list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; list_move_tail(&obj->mm.link, list); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index d76839670632..68d74c50ac39 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -430,6 +430,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm) } INIT_LIST_HEAD(&eviction_list); + mutex_lock(&vm->mutex); list_for_each_entry(vma, &vm->bound_list, vm_link) { if (i915_vma_is_pinned(vma)) continue; @@ -437,6 +438,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm) __i915_vma_pin(vma); list_add(&vma->evict_link, &eviction_list); } + mutex_unlock(&vm->mutex); ret = 0; list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 2ad9070a54c1..49b00996a15e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1931,7 +1931,10 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size) vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ INIT_LIST_HEAD(&vma->obj_link); + + mutex_lock(&vma->vm->mutex); list_add(&vma->vm_link, &vma->vm->unbound_list); + mutex_unlock(&vma->vm->mutex); return vma; } @@ -3504,9 +3507,10 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) i915_check_and_clear_faults(dev_priv); + mutex_lock(&ggtt->vm.mutex); + /* First fill our portion of the GTT with scratch pages */ ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); - ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */ /* clflush objects bound into the GGTT and rebind them. */ @@ -3516,19 +3520,26 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) if (!(vma->flags & I915_VMA_GLOBAL_BIND)) continue; + mutex_unlock(&ggtt->vm.mutex); + if (!i915_vma_unbind(vma)) - continue; + goto lock; WARN_ON(i915_vma_bind(vma, obj ? obj->cache_level : 0, PIN_UPDATE)); if (obj) WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); + +lock: + mutex_lock(&ggtt->vm.mutex); } ggtt->vm.closed = false; i915_ggtt_invalidate(dev_priv); + mutex_unlock(&ggtt->vm.mutex); + if (INTEL_GEN(dev_priv) >= 8) { struct intel_ppat *ppat = &dev_priv->ppat; diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index a76d6c95c824..6da795c7e62e 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -461,6 +461,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr I915_SHRINK_VMAPS); /* We also want to clear any cached iomaps as they wrap vmap */ + mutex_lock(&i915->ggtt.vm.mutex); list_for_each_entry_safe(vma, next, &i915->ggtt.vm.bound_list, vm_link) { unsigned long count = vma->node.size >> PAGE_SHIFT; @@ -468,9 +469,12 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr if (!vma->iomap || i915_vma_is_active(vma)) continue; + mutex_unlock(&i915->ggtt.vm.mutex); if (i915_vma_unbind(vma) == 0) freed_pages += count; + mutex_lock(&i915->ggtt.vm.mutex); } + mutex_unlock(&i915->ggtt.vm.mutex); out: shrinker_unlock(i915, unlock); diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index a9e365789686..74a9661479ca 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -702,7 +702,9 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv vma->flags |= I915_VMA_GLOBAL_BIND; __i915_vma_set_map_and_fenceable(vma); + mutex_lock(&ggtt->vm.mutex); list_move_tail(&vma->vm_link, &ggtt->vm.bound_list); + mutex_unlock(&ggtt->vm.mutex); spin_lock(&dev_priv->mm.obj_lock); list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 7de28baffb8f..dcbd0d345c72 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -213,7 +213,10 @@ vma_create(struct drm_i915_gem_object *obj, } rb_link_node(&vma->obj_node, rb, p); rb_insert_color(&vma->obj_node, &obj->vma_tree); + + mutex_lock(&vm->mutex); list_add(&vma->vm_link, &vm->unbound_list); + mutex_unlock(&vm->mutex); return vma; @@ -656,7 +659,9 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level)); + mutex_lock(&vma->vm->mutex); list_move_tail(&vma->vm_link, &vma->vm->bound_list); + mutex_unlock(&vma->vm->mutex); if (vma->obj) { struct drm_i915_gem_object *obj = vma->obj; @@ -689,8 +694,10 @@ i915_vma_remove(struct i915_vma *vma) vma->ops->clear_pages(vma); + mutex_lock(&vma->vm->mutex); drm_mm_remove_node(&vma->node); list_move_tail(&vma->vm_link, &vma->vm->unbound_list); + mutex_unlock(&vma->vm->mutex); /* * Since the unbound list is global, only move to that list if @@ -802,7 +809,11 @@ static void __i915_vma_destroy(struct i915_vma *vma) GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence)); list_del(&vma->obj_link); + + mutex_lock(&vma->vm->mutex); list_del(&vma->vm_link); + mutex_unlock(&vma->vm->mutex); + if (vma->obj) rb_erase(&vma->obj_node, &vma->obj->vma_tree); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index af9b85cb8639..32dce7176f63 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -94,11 +94,14 @@ static int populate_ggtt(struct drm_i915_private *i915, static void unpin_ggtt(struct drm_i915_private *i915) { + struct i915_ggtt *ggtt = &i915->ggtt; struct i915_vma *vma; + mutex_lock(&ggtt->vm.mutex); list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link) if (vma->obj->mm.quirked) i915_vma_unpin(vma); + mutex_unlock(&ggtt->vm.mutex); } static void cleanup_objects(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 8feb4af308ff..3850ef4a5ec8 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1237,7 +1237,10 @@ static void track_vma_bind(struct i915_vma *vma) __i915_gem_object_pin_pages(obj); vma->pages = obj->mm.pages; + + mutex_lock(&vma->vm->mutex); list_move_tail(&vma->vm_link, &vma->vm->bound_list); + mutex_unlock(&vma->vm->mutex); } static int exercise_mock(struct drm_i915_private *i915, From 528cbd17ceff070747a312c6312346b585495157 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 28 Jan 2019 10:23:54 +0000 Subject: [PATCH 350/539] drm/i915: Move vma lookup to its own lock Remove the struct_mutex requirement for looking up the vma for an object. v2: Highlight how the race for duplicate vma creation is resolved on reacquiring the lock with a short comment. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190128102356.15037-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 6 +- drivers/gpu/drm/i915/i915_gem.c | 33 ++++++---- drivers/gpu/drm/i915/i915_gem_object.h | 45 ++++++++----- drivers/gpu/drm/i915/i915_vma.c | 80 +++++++++++++++-------- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 4 +- 6 files changed, 105 insertions(+), 65 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index ecf762e24d0e..c9c230499420 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -159,14 +159,14 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : ""); if (obj->base.name) seq_printf(m, " (name: %d)", obj->base.name); - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (i915_vma_is_pinned(vma)) pin_count++; } seq_printf(m, " (pinned x %d)", pin_count); if (obj->pin_global) seq_printf(m, " (global)"); - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -322,7 +322,7 @@ static int per_file_stats(int id, void *ptr, void *data) if (obj->base.name || obj->base.dma_buf) stats->shared += obj->base.size; - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 538fa5404603..15acd052da46 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -437,15 +437,19 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) if (ret) return ret; - while ((vma = list_first_entry_or_null(&obj->vma_list, - struct i915_vma, - obj_link))) { + spin_lock(&obj->vma.lock); + while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, + struct i915_vma, + obj_link))) { list_move_tail(&vma->obj_link, &still_in_list); + spin_unlock(&obj->vma.lock); + ret = i915_vma_unbind(vma); - if (ret) - break; + + spin_lock(&obj->vma.lock); } - list_splice(&still_in_list, &obj->vma_list); + list_splice(&still_in_list, &obj->vma.list); + spin_unlock(&obj->vma.lock); return ret; } @@ -3489,7 +3493,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, * reading an invalid PTE on older architectures. */ restart: - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -3567,7 +3571,7 @@ restart: */ } - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -3577,7 +3581,7 @@ restart: } } - list_for_each_entry(vma, &obj->vma_list, obj_link) + list_for_each_entry(vma, &obj->vma.list, obj_link) vma->node.color = cache_level; i915_gem_object_set_cache_coherency(obj, cache_level); obj->cache_dirty = true; /* Always invalidate stale cachelines */ @@ -4153,7 +4157,9 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, { mutex_init(&obj->mm.lock); - INIT_LIST_HEAD(&obj->vma_list); + spin_lock_init(&obj->vma.lock); + INIT_LIST_HEAD(&obj->vma.list); + INIT_LIST_HEAD(&obj->lut_list); INIT_LIST_HEAD(&obj->batch_pool_link); @@ -4319,14 +4325,13 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, mutex_lock(&i915->drm.struct_mutex); GEM_BUG_ON(i915_gem_object_is_active(obj)); - list_for_each_entry_safe(vma, vn, - &obj->vma_list, obj_link) { + list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) { GEM_BUG_ON(i915_vma_is_active(vma)); vma->flags &= ~I915_VMA_PIN_MASK; i915_vma_destroy(vma); } - GEM_BUG_ON(!list_empty(&obj->vma_list)); - GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); + GEM_BUG_ON(!list_empty(&obj->vma.list)); + GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree)); /* This serializes freeing with the shrinker. Since the free * is delayed, first by RCU then by the workqueue, we want the diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index cb1b0144d274..73fec917d097 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -87,24 +87,33 @@ struct drm_i915_gem_object { const struct drm_i915_gem_object_ops *ops; - /** - * @vma_list: List of VMAs backed by this object - * - * The VMA on this list are ordered by type, all GGTT vma are placed - * at the head and all ppGTT vma are placed at the tail. The different - * types of GGTT vma are unordered between themselves, use the - * @vma_tree (which has a defined order between all VMA) to find an - * exact match. - */ - struct list_head vma_list; - /** - * @vma_tree: Ordered tree of VMAs backed by this object - * - * All VMA created for this object are placed in the @vma_tree for - * fast retrieval via a binary search in i915_vma_instance(). - * They are also added to @vma_list for easy iteration. - */ - struct rb_root vma_tree; + struct { + /** + * @vma.lock: protect the list/tree of vmas + */ + spinlock_t lock; + + /** + * @vma.list: List of VMAs backed by this object + * + * The VMA on this list are ordered by type, all GGTT vma are + * placed at the head and all ppGTT vma are placed at the tail. + * The different types of GGTT vma are unordered between + * themselves, use the @vma.tree (which has a defined order + * between all VMA) to quickly find an exact match. + */ + struct list_head list; + + /** + * @vma.tree: Ordered tree of VMAs backed by this object + * + * All VMA created for this object are placed in the @vma.tree + * for fast retrieval via a binary search in + * i915_vma_instance(). They are also added to @vma.list for + * easy iteration. + */ + struct rb_root tree; + } vma; /** * @lut_list: List of vma lookup entries in use for this object. diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index dcbd0d345c72..d83b8ad5f859 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -187,32 +187,52 @@ vma_create(struct drm_i915_gem_object *obj, i915_gem_object_get_stride(obj)); GEM_BUG_ON(!is_power_of_2(vma->fence_alignment)); + vma->flags |= I915_VMA_GGTT; + } + + spin_lock(&obj->vma.lock); + + rb = NULL; + p = &obj->vma.tree.rb_node; + while (*p) { + struct i915_vma *pos; + long cmp; + + rb = *p; + pos = rb_entry(rb, struct i915_vma, obj_node); + + /* + * If the view already exists in the tree, another thread + * already created a matching vma, so return the older instance + * and dispose of ours. + */ + cmp = i915_vma_compare(pos, vm, view); + if (cmp == 0) { + spin_unlock(&obj->vma.lock); + kmem_cache_free(vm->i915->vmas, vma); + return pos; + } + + if (cmp < 0) + p = &rb->rb_right; + else + p = &rb->rb_left; + } + rb_link_node(&vma->obj_node, rb, p); + rb_insert_color(&vma->obj_node, &obj->vma.tree); + + if (i915_vma_is_ggtt(vma)) /* * We put the GGTT vma at the start of the vma-list, followed * by the ppGGTT vma. This allows us to break early when * iterating over only the GGTT vma for an object, see * for_each_ggtt_vma() */ - vma->flags |= I915_VMA_GGTT; - list_add(&vma->obj_link, &obj->vma_list); - } else { - list_add_tail(&vma->obj_link, &obj->vma_list); - } + list_add(&vma->obj_link, &obj->vma.list); + else + list_add_tail(&vma->obj_link, &obj->vma.list); - rb = NULL; - p = &obj->vma_tree.rb_node; - while (*p) { - struct i915_vma *pos; - - rb = *p; - pos = rb_entry(rb, struct i915_vma, obj_node); - if (i915_vma_compare(pos, vm, view) < 0) - p = &rb->rb_right; - else - p = &rb->rb_left; - } - rb_link_node(&vma->obj_node, rb, p); - rb_insert_color(&vma->obj_node, &obj->vma_tree); + spin_unlock(&obj->vma.lock); mutex_lock(&vm->mutex); list_add(&vma->vm_link, &vm->unbound_list); @@ -232,7 +252,7 @@ vma_lookup(struct drm_i915_gem_object *obj, { struct rb_node *rb; - rb = obj->vma_tree.rb_node; + rb = obj->vma.tree.rb_node; while (rb) { struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node); long cmp; @@ -272,16 +292,18 @@ i915_vma_instance(struct drm_i915_gem_object *obj, { struct i915_vma *vma; - lockdep_assert_held(&obj->base.dev->struct_mutex); GEM_BUG_ON(view && !i915_is_ggtt(vm)); GEM_BUG_ON(vm->closed); + spin_lock(&obj->vma.lock); vma = vma_lookup(obj, vm, view); - if (!vma) + spin_unlock(&obj->vma.lock); + + /* vma_create() will resolve the race if another creates the vma */ + if (unlikely(!vma)) vma = vma_create(obj, vm, view); GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view)); - GEM_BUG_ON(!IS_ERR(vma) && vma_lookup(obj, vm, view) != vma); return vma; } @@ -808,14 +830,18 @@ static void __i915_vma_destroy(struct i915_vma *vma) GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence)); - list_del(&vma->obj_link); - mutex_lock(&vma->vm->mutex); list_del(&vma->vm_link); mutex_unlock(&vma->vm->mutex); - if (vma->obj) - rb_erase(&vma->obj_node, &vma->obj->vma_tree); + if (vma->obj) { + struct drm_i915_gem_object *obj = vma->obj; + + spin_lock(&obj->vma.lock); + list_del(&vma->obj_link); + rb_erase(&vma->obj_node, &vma->obj->vma.tree); + spin_unlock(&obj->vma.lock); + } rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) { GEM_BUG_ON(i915_gem_active_isset(&iter->base)); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 4f7c1c7599f4..7252abc73d3e 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -425,7 +425,7 @@ void i915_vma_parked(struct drm_i915_private *i915); * or the list is empty ofc. */ #define for_each_ggtt_vma(V, OBJ) \ - list_for_each_entry(V, &(OBJ)->vma_list, obj_link) \ + list_for_each_entry(V, &(OBJ)->vma.list, obj_link) \ for_each_until(!i915_vma_is_ggtt(V)) #endif diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index f0a32edfb9b1..cf1de82741fa 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -672,7 +672,7 @@ static int igt_vma_partial(void *arg) } count = 0; - list_for_each_entry(vma, &obj->vma_list, obj_link) + list_for_each_entry(vma, &obj->vma.list, obj_link) count++; if (count != nvma) { pr_err("(%s) All partial vma were not recorded on the obj->vma_list: found %u, expected %u\n", @@ -701,7 +701,7 @@ static int igt_vma_partial(void *arg) i915_vma_unpin(vma); count = 0; - list_for_each_entry(vma, &obj->vma_list, obj_link) + list_for_each_entry(vma, &obj->vma.list, obj_link) count++; if (count != nvma) { pr_err("(%s) allocated an extra full vma!\n", p->name); From 0ca88ba0d6347cf8c4ea9f264c384594f8fefa11 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 28 Jan 2019 10:23:55 +0000 Subject: [PATCH 351/539] drm/i915: Always allocate an object/vma for the HWSP Currently we only allocate an object and vma if we are using a GGTT virtual HWSP, and a plain struct page for a physical HWSP. For convenience later on with global timelines, it will be useful to always have the status page being tracked by a struct i915_vma. Make it so. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190128102356.15037-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 109 ++++++++++--------- drivers/gpu/drm/i915/intel_guc_submission.c | 6 + drivers/gpu/drm/i915/intel_lrc.c | 12 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 21 +++- drivers/gpu/drm/i915/intel_ringbuffer.h | 23 +--- drivers/gpu/drm/i915/selftests/mock_engine.c | 2 +- 6 files changed, 93 insertions(+), 80 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 1a5c163b98d6..2657eb6fd914 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -506,27 +506,61 @@ void intel_engine_setup_common(struct intel_engine_cs *engine) static void cleanup_status_page(struct intel_engine_cs *engine) { + struct i915_vma *vma; + /* Prevent writes into HWSP after returning the page to the system */ intel_engine_set_hwsp_writemask(engine, ~0u); - if (HWS_NEEDS_PHYSICAL(engine->i915)) { - void *addr = fetch_and_zero(&engine->status_page.page_addr); + vma = fetch_and_zero(&engine->status_page.vma); + if (!vma) + return; - __free_page(virt_to_page(addr)); - } + if (!HWS_NEEDS_PHYSICAL(engine->i915)) + i915_vma_unpin(vma); - i915_vma_unpin_and_release(&engine->status_page.vma, - I915_VMA_RELEASE_MAP); + i915_gem_object_unpin_map(vma->obj); + __i915_gem_object_release_unless_active(vma->obj); +} + +static int pin_ggtt_status_page(struct intel_engine_cs *engine, + struct i915_vma *vma) +{ + unsigned int flags; + + flags = PIN_GLOBAL; + if (!HAS_LLC(engine->i915)) + /* + * On g33, we cannot place HWS above 256MiB, so + * restrict its pinning to the low mappable arena. + * Though this restriction is not documented for + * gen4, gen5, or byt, they also behave similarly + * and hang if the HWS is placed at the top of the + * GTT. To generalise, it appears that all !llc + * platforms have issues with us placing the HWS + * above the mappable region (even though we never + * actually map it). + */ + flags |= PIN_MAPPABLE; + else + flags |= PIN_HIGH; + + return i915_vma_pin(vma, 0, 0, flags); } static int init_status_page(struct intel_engine_cs *engine) { struct drm_i915_gem_object *obj; struct i915_vma *vma; - unsigned int flags; void *vaddr; int ret; + /* + * Though the HWS register does support 36bit addresses, historically + * we have had hangs and corruption reported due to wild writes if + * the HWS is placed above 4G. We only allow objects to be allocated + * in GFP_DMA32 for i965, and no earlier physical address users had + * access to more than 4G. + */ obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); if (IS_ERR(obj)) { DRM_ERROR("Failed to allocate status page\n"); @@ -543,61 +577,30 @@ static int init_status_page(struct intel_engine_cs *engine) goto err; } - flags = PIN_GLOBAL; - if (!HAS_LLC(engine->i915)) - /* On g33, we cannot place HWS above 256MiB, so - * restrict its pinning to the low mappable arena. - * Though this restriction is not documented for - * gen4, gen5, or byt, they also behave similarly - * and hang if the HWS is placed at the top of the - * GTT. To generalise, it appears that all !llc - * platforms have issues with us placing the HWS - * above the mappable region (even though we never - * actually map it). - */ - flags |= PIN_MAPPABLE; - else - flags |= PIN_HIGH; - ret = i915_vma_pin(vma, 0, 0, flags); - if (ret) - goto err; - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); - goto err_unpin; + goto err; } + engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE); engine->status_page.vma = vma; - engine->status_page.ggtt_offset = i915_ggtt_offset(vma); - engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE); + + if (!HWS_NEEDS_PHYSICAL(engine->i915)) { + ret = pin_ggtt_status_page(engine, vma); + if (ret) + goto err_unpin; + } + return 0; err_unpin: - i915_vma_unpin(vma); + i915_gem_object_unpin_map(obj); err: i915_gem_object_put(obj); return ret; } -static int init_phys_status_page(struct intel_engine_cs *engine) -{ - struct page *page; - - /* - * Though the HWS register does support 36bit addresses, historically - * we have had hangs and corruption reported due to wild writes if - * the HWS is placed above 4G. - */ - page = alloc_page(GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO); - if (!page) - return -ENOMEM; - - engine->status_page.page_addr = page_address(page); - - return 0; -} - static void __intel_context_unpin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { @@ -690,10 +693,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine) if (ret) goto err_unpin_preempt; - if (HWS_NEEDS_PHYSICAL(i915)) - ret = init_phys_status_page(engine); - else - ret = init_status_page(engine); + ret = init_status_page(engine); if (ret) goto err_breadcrumbs; @@ -1366,7 +1366,8 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, } if (HAS_EXECLISTS(dev_priv)) { - const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; + const u32 *hws = + &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; unsigned int idx; u8 read, write; @@ -1549,7 +1550,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, spin_unlock_irqrestore(&b->rb_lock, flags); drm_printf(m, "HWSP:\n"); - hexdump(m, engine->status_page.page_addr, PAGE_SIZE); + hexdump(m, engine->status_page.addr, PAGE_SIZE); drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine))); } diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 45e2db683fe5..4295ade0d613 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -81,6 +81,12 @@ * */ +static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) +{ + return (i915_ggtt_offset(engine->status_page.vma) + + I915_GEM_HWS_PREEMPT_ADDR); +} + static inline struct i915_priolist *to_priolist(struct rb_node *rb) { return rb_entry(rb, struct i915_priolist, node); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 185867106c14..2cf99c436658 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -172,6 +172,12 @@ static void execlists_init_reg_state(u32 *reg_state, struct intel_engine_cs *engine, struct intel_ring *ring); +static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) +{ + return (i915_ggtt_offset(engine->status_page.vma) + + I915_GEM_HWS_INDEX_ADDR); +} + static inline struct i915_priolist *to_priolist(struct rb_node *rb) { return rb_entry(rb, struct i915_priolist, node); @@ -1699,7 +1705,7 @@ static void enable_execlists(struct intel_engine_cs *engine) _MASKED_BIT_DISABLE(STOP_RING)); I915_WRITE(RING_HWS_PGA(engine->mmio_base), - engine->status_page.ggtt_offset); + i915_ggtt_offset(engine->status_page.vma)); POSTING_READ(RING_HWS_PGA(engine->mmio_base)); } @@ -2244,10 +2250,10 @@ static int logical_ring_init(struct intel_engine_cs *engine) } execlists->csb_status = - &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; + &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; execlists->csb_write = - &engine->status_page.page_addr[intel_hws_csb_write_index(i915)]; + &engine->status_page.addr[intel_hws_csb_write_index(i915)]; reset_csb_pointers(execlists); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index a9efc8c71254..cb6d2aa2a829 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -43,6 +43,12 @@ */ #define LEGACY_REQUEST_SIZE 200 +static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) +{ + return (i915_ggtt_offset(engine->status_page.vma) + + I915_GEM_HWS_INDEX_ADDR); +} + static unsigned int __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) @@ -503,12 +509,17 @@ static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys) I915_WRITE(HWS_PGA, addr); } +static struct page *status_page(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *obj = engine->status_page.vma->obj; + + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + return sg_page(obj->mm.pages->sgl); +} + static void ring_setup_phys_status_page(struct intel_engine_cs *engine) { - struct page *page = virt_to_page(engine->status_page.page_addr); - phys_addr_t phys = PFN_PHYS(page_to_pfn(page)); - - set_hws_pga(engine, phys); + set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine)))); set_hwstam(engine, ~0u); } @@ -575,7 +586,7 @@ static void flush_cs_tlb(struct intel_engine_cs *engine) static void ring_setup_status_page(struct intel_engine_cs *engine) { - set_hwsp(engine, engine->status_page.ggtt_offset); + set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma)); set_hwstam(engine, ~0u); flush_cs_tlb(engine); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index f2effd001540..32371ae67f24 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -32,8 +32,7 @@ struct i915_sched_attr; struct intel_hw_status_page { struct i915_vma *vma; - u32 *page_addr; - u32 ggtt_offset; + u32 *addr; }; #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) @@ -671,7 +670,7 @@ static inline u32 intel_read_status_page(const struct intel_engine_cs *engine, int reg) { /* Ensure that the compiler doesn't optimize away the load. */ - return READ_ONCE(engine->status_page.page_addr[reg]); + return READ_ONCE(engine->status_page.addr[reg]); } static inline void @@ -684,12 +683,12 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) */ if (static_cpu_has(X86_FEATURE_CLFLUSH)) { mb(); - clflush(&engine->status_page.page_addr[reg]); - engine->status_page.page_addr[reg] = value; - clflush(&engine->status_page.page_addr[reg]); + clflush(&engine->status_page.addr[reg]); + engine->status_page.addr[reg] = value; + clflush(&engine->status_page.addr[reg]); mb(); } else { - WRITE_ONCE(engine->status_page.page_addr[reg], value); + WRITE_ONCE(engine->status_page.addr[reg], value); } } @@ -877,16 +876,6 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine, void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone); -static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) -{ - return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR; -} - -static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) -{ - return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR; -} - /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 905318b7ae18..4e5b4dc6df0f 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -200,7 +200,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.i915 = i915; snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); engine->base.id = id; - engine->base.status_page.page_addr = (void *)(engine + 1); + engine->base.status_page.addr = (void *)(engine + 1); engine->base.context_pin = mock_context_pin; engine->base.request_alloc = mock_request_alloc; From 1e345568e3b541e19202caadae8d2cb2237e7ed8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 28 Jan 2019 10:23:56 +0000 Subject: [PATCH 352/539] drm/i915: Move list of timelines under its own lock Currently, the list of timelines is serialised by the struct_mutex, but to alleviate difficulties with using that mutex in future, move the list management under its own dedicated mutex. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190128102356.15037-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 5 +- drivers/gpu/drm/i915/i915_gem.c | 103 ++++++++++-------- drivers/gpu/drm/i915/i915_reset.c | 8 +- drivers/gpu/drm/i915/i915_timeline.c | 38 ++++++- drivers/gpu/drm/i915/i915_timeline.h | 3 + .../gpu/drm/i915/selftests/mock_gem_device.c | 7 +- .../gpu/drm/i915/selftests/mock_timeline.c | 3 +- 7 files changed, 109 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0133d1da3d3c..8a181b455197 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1975,7 +1975,10 @@ struct drm_i915_private { void (*resume)(struct drm_i915_private *); void (*cleanup_engine)(struct intel_engine_cs *engine); - struct list_head timelines; + struct i915_gt_timelines { + struct mutex mutex; /* protects list, tainted by GPU */ + struct list_head list; + } timelines; struct list_head active_rings; struct list_head closed_vma; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 15acd052da46..761714448ff3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3222,33 +3222,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return ret; } -static long wait_for_timeline(struct i915_timeline *tl, - unsigned int flags, long timeout) -{ - struct i915_request *rq; - - rq = i915_gem_active_get_unlocked(&tl->last_request); - if (!rq) - return timeout; - - /* - * "Race-to-idle". - * - * Switching to the kernel context is often used a synchronous - * step prior to idling, e.g. in suspend for flushing all - * current operations to memory before sleeping. These we - * want to complete as quickly as possible to avoid prolonged - * stalls, so allow the gpu to boost to maximum clocks. - */ - if (flags & I915_WAIT_FOR_IDLE_BOOST) - gen6_rps_boost(rq, NULL); - - timeout = i915_request_wait(rq, flags, timeout); - i915_request_put(rq); - - return timeout; -} - static int wait_for_engines(struct drm_i915_private *i915) { if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { @@ -3262,6 +3235,52 @@ static int wait_for_engines(struct drm_i915_private *i915) return 0; } +static long +wait_for_timelines(struct drm_i915_private *i915, + unsigned int flags, long timeout) +{ + struct i915_gt_timelines *gt = &i915->gt.timelines; + struct i915_timeline *tl; + + if (!READ_ONCE(i915->gt.active_requests)) + return timeout; + + mutex_lock(>->mutex); + list_for_each_entry(tl, >->list, link) { + struct i915_request *rq; + + rq = i915_gem_active_get_unlocked(&tl->last_request); + if (!rq) + continue; + + mutex_unlock(>->mutex); + + /* + * "Race-to-idle". + * + * Switching to the kernel context is often used a synchronous + * step prior to idling, e.g. in suspend for flushing all + * current operations to memory before sleeping. These we + * want to complete as quickly as possible to avoid prolonged + * stalls, so allow the gpu to boost to maximum clocks. + */ + if (flags & I915_WAIT_FOR_IDLE_BOOST) + gen6_rps_boost(rq, NULL); + + timeout = i915_request_wait(rq, flags, timeout); + i915_request_put(rq); + if (timeout < 0) + return timeout; + + /* restart after reacquiring the lock */ + mutex_lock(>->mutex); + tl = list_entry(>->list, typeof(*tl), link); + } + mutex_unlock(>->mutex); + + return timeout; +} + int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags, long timeout) { @@ -3273,17 +3292,15 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, if (!READ_ONCE(i915->gt.awake)) return 0; + timeout = wait_for_timelines(i915, flags, timeout); + if (timeout < 0) + return timeout; + if (flags & I915_WAIT_LOCKED) { - struct i915_timeline *tl; int err; lockdep_assert_held(&i915->drm.struct_mutex); - list_for_each_entry(tl, &i915->gt.timelines, link) { - timeout = wait_for_timeline(tl, flags, timeout); - if (timeout < 0) - return timeout; - } if (GEM_SHOW_DEBUG() && !timeout) { /* Presume that timeout was non-zero to begin with! */ dev_warn(&i915->drm.pdev->dev, @@ -3297,17 +3314,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, i915_retire_requests(i915); GEM_BUG_ON(i915->gt.active_requests); - } else { - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) { - struct i915_timeline *tl = &engine->timeline; - - timeout = wait_for_timeline(tl, flags, timeout); - if (timeout < 0) - return timeout; - } } return 0; @@ -5008,6 +5014,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) dev_priv->gt.cleanup_engine = intel_engine_cleanup; } + i915_timelines_init(dev_priv); + ret = i915_gem_init_userptr(dev_priv); if (ret) return ret; @@ -5130,8 +5138,10 @@ err_unlock: err_uc_misc: intel_uc_fini_misc(dev_priv); - if (ret != -EIO) + if (ret != -EIO) { i915_gem_cleanup_userptr(dev_priv); + i915_timelines_fini(dev_priv); + } if (ret == -EIO) { mutex_lock(&dev_priv->drm.struct_mutex); @@ -5182,6 +5192,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv) intel_uc_fini_misc(dev_priv); i915_gem_cleanup_userptr(dev_priv); + i915_timelines_fini(dev_priv); i915_gem_drain_freed_objects(dev_priv); @@ -5284,7 +5295,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) if (!dev_priv->priorities) goto err_dependencies; - INIT_LIST_HEAD(&dev_priv->gt.timelines); INIT_LIST_HEAD(&dev_priv->gt.active_rings); INIT_LIST_HEAD(&dev_priv->gt.closed_vma); @@ -5328,7 +5338,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); WARN_ON(dev_priv->mm.object_count); - WARN_ON(!list_empty(&dev_priv->gt.timelines)); kmem_cache_destroy(dev_priv->priorities); kmem_cache_destroy(dev_priv->dependencies); diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index 99bd3bc336b3..d2dca85a543d 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -854,7 +854,8 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) * * No more can be submitted until we reset the wedged bit. */ - list_for_each_entry(tl, &i915->gt.timelines, link) { + mutex_lock(&i915->gt.timelines.mutex); + list_for_each_entry(tl, &i915->gt.timelines.list, link) { struct i915_request *rq; long timeout; @@ -876,9 +877,12 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) timeout = dma_fence_default_wait(&rq->fence, true, MAX_SCHEDULE_TIMEOUT); i915_request_put(rq); - if (timeout < 0) + if (timeout < 0) { + mutex_unlock(&i915->gt.timelines.mutex); goto unlock; + } } + mutex_unlock(&i915->gt.timelines.mutex); intel_engines_sanitize(i915, false); diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index 4667cc08c416..84550f17d3df 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -13,7 +13,7 @@ void i915_timeline_init(struct drm_i915_private *i915, struct i915_timeline *timeline, const char *name) { - lockdep_assert_held(&i915->drm.struct_mutex); + struct i915_gt_timelines *gt = &i915->gt.timelines; /* * Ideally we want a set of engines on a single leaf as we expect @@ -23,9 +23,12 @@ void i915_timeline_init(struct drm_i915_private *i915, */ BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); + timeline->i915 = i915; timeline->name = name; - list_add(&timeline->link, &i915->gt.timelines); + mutex_lock(>->mutex); + list_add(&timeline->link, >->list); + mutex_unlock(>->mutex); /* Called during early_init before we know how many engines there are */ @@ -39,6 +42,17 @@ void i915_timeline_init(struct drm_i915_private *i915, i915_syncmap_init(&timeline->sync); } +void i915_timelines_init(struct drm_i915_private *i915) +{ + struct i915_gt_timelines *gt = &i915->gt.timelines; + + mutex_init(>->mutex); + INIT_LIST_HEAD(>->list); + + /* via i915_gem_wait_for_idle() */ + i915_gem_shrinker_taints_mutex(i915, >->mutex); +} + /** * i915_timelines_park - called when the driver idles * @i915: the drm_i915_private device @@ -51,11 +65,11 @@ void i915_timeline_init(struct drm_i915_private *i915, */ void i915_timelines_park(struct drm_i915_private *i915) { + struct i915_gt_timelines *gt = &i915->gt.timelines; struct i915_timeline *timeline; - lockdep_assert_held(&i915->drm.struct_mutex); - - list_for_each_entry(timeline, &i915->gt.timelines, link) { + mutex_lock(>->mutex); + list_for_each_entry(timeline, >->list, link) { /* * All known fences are completed so we can scrap * the current sync point tracking and start afresh, @@ -64,15 +78,20 @@ void i915_timelines_park(struct drm_i915_private *i915) */ i915_syncmap_free(&timeline->sync); } + mutex_unlock(>->mutex); } void i915_timeline_fini(struct i915_timeline *timeline) { + struct i915_gt_timelines *gt = &timeline->i915->gt.timelines; + GEM_BUG_ON(!list_empty(&timeline->requests)); i915_syncmap_free(&timeline->sync); + mutex_lock(>->mutex); list_del(&timeline->link); + mutex_unlock(>->mutex); } struct i915_timeline * @@ -99,6 +118,15 @@ void __i915_timeline_free(struct kref *kref) kfree(timeline); } +void i915_timelines_fini(struct drm_i915_private *i915) +{ + struct i915_gt_timelines *gt = &i915->gt.timelines; + + GEM_BUG_ON(!list_empty(>->list)); + + mutex_destroy(>->mutex); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_timeline.c" #include "selftests/i915_timeline.c" diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 38c1e15e927a..87ad2dd31c20 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -66,6 +66,7 @@ struct i915_timeline { struct list_head link; const char *name; + struct drm_i915_private *i915; struct kref kref; }; @@ -134,6 +135,8 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl, return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno); } +void i915_timelines_init(struct drm_i915_private *i915); void i915_timelines_park(struct drm_i915_private *i915); +void i915_timelines_fini(struct drm_i915_private *i915); #endif diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 8ab5a2688a0c..14ae46fda49f 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -68,13 +68,14 @@ static void mock_device_release(struct drm_device *dev) i915_gem_contexts_fini(i915); mutex_unlock(&i915->drm.struct_mutex); + i915_timelines_fini(i915); + drain_workqueue(i915->wq); i915_gem_drain_freed_objects(i915); mutex_lock(&i915->drm.struct_mutex); mock_fini_ggtt(&i915->ggtt); mutex_unlock(&i915->drm.struct_mutex); - WARN_ON(!list_empty(&i915->gt.timelines)); destroy_workqueue(i915->wq); @@ -226,7 +227,8 @@ struct drm_i915_private *mock_gem_device(void) if (!i915->priorities) goto err_dependencies; - INIT_LIST_HEAD(&i915->gt.timelines); + i915_timelines_init(i915); + INIT_LIST_HEAD(&i915->gt.active_rings); INIT_LIST_HEAD(&i915->gt.closed_vma); @@ -253,6 +255,7 @@ err_context: i915_gem_contexts_fini(i915); err_unlock: mutex_unlock(&i915->drm.struct_mutex); + i915_timelines_fini(i915); kmem_cache_destroy(i915->priorities); err_dependencies: kmem_cache_destroy(i915->dependencies); diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c index dcf3b16f5a07..cf39ccd9fc05 100644 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c @@ -10,6 +10,7 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context) { + timeline->i915 = NULL; timeline->fence_context = context; spin_lock_init(&timeline->lock); @@ -24,5 +25,5 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context) void mock_timeline_fini(struct i915_timeline *timeline) { - i915_timeline_fini(timeline); + i915_syncmap_free(&timeline->sync); } From 376bc02da22efc5a72441889f4d55ff555cd3430 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 25 Jan 2019 12:02:05 +0100 Subject: [PATCH 353/539] drm/ast: Replace ttm_bo_unref with ttm_bo_put MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function ttm_bo_put releases a reference to a TTM buffer object. The function's name is more aligned to the Linux kernel convention of naming ref-counting function _get and _put. A call to ttm_bo_unref takes the address of the TTM BO object's pointer and clears the pointer's value to NULL. This is not necessary in most cases and sometimes even worked around by the calling code. A call to ttm_bo_put only releases the reference without clearing the pointer. Signed-off-by: Thomas Zimmermann Reviewed-by: Christian König Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/ast/ast_main.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index 373700c05a00..2854399856ba 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -639,13 +639,9 @@ int ast_dumb_create(struct drm_file *file, static void ast_bo_unref(struct ast_bo **bo) { - struct ttm_buffer_object *tbo; - if ((*bo) == NULL) return; - - tbo = &((*bo)->bo); - ttm_bo_unref(&tbo); + ttm_bo_put(&((*bo)->bo)); *bo = NULL; } From b063dc9c1c680d758826302ad4131e581d9a5091 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 25 Jan 2019 12:02:06 +0100 Subject: [PATCH 354/539] drm/nouveau: Replace ttm_bo_reference with ttm_bo_get MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function ttm_bo_get acquires a reference on a TTM buffer object. The function's name is more aligned to the Linux kernel convention of naming ref-counting function _get and _put. Signed-off-by: Thomas Zimmermann Reviewed-by: Christian König Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/nouveau/nouveau_bo.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h index 73c48440d4d7..3920134cb132 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.h +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h @@ -61,7 +61,12 @@ nouveau_bo_ref(struct nouveau_bo *ref, struct nouveau_bo **pnvbo) return -EINVAL; prev = *pnvbo; - *pnvbo = ref ? nouveau_bo(ttm_bo_reference(&ref->bo)) : NULL; + if (ref) { + ttm_bo_get(&ref->bo); + *pnvbo = nouveau_bo(&ref->bo); + } else { + *pnvbo = NULL; + } if (prev) { struct ttm_buffer_object *bo = &prev->bo; From 829eee6290b35185f18aa6b52c057b5659b0e557 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 25 Jan 2019 12:02:07 +0100 Subject: [PATCH 355/539] drm/nouveau: Replace ttm_bo_unref with ttm_bo_put MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function ttm_bo_put releases a reference to a TTM buffer object. The function's name is more aligned to the Linux kernel convention of naming ref-counting function _get and _put. A call to ttm_bo_unref takes the address of the TTM BO object's pointer and clears the pointer's value to NULL. This is not necessary in most cases and sometimes even worked around by the calling code. A call to ttm_bo_put only releases the reference without clearing the pointer. Signed-off-by: Thomas Zimmermann Reviewed-by: Christian König Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/nouveau/nouveau_bo.h | 7 ++----- drivers/gpu/drm/nouveau/nouveau_gem.c | 3 +-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h index 3920134cb132..846f4bdec0de 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.h +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h @@ -67,11 +67,8 @@ nouveau_bo_ref(struct nouveau_bo *ref, struct nouveau_bo **pnvbo) } else { *pnvbo = NULL; } - if (prev) { - struct ttm_buffer_object *bo = &prev->bo; - - ttm_bo_unref(&bo); - } + if (prev) + ttm_bo_put(&prev->bo); return 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index b56524d343c3..fb028e3b5f51 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -41,7 +41,6 @@ nouveau_gem_object_del(struct drm_gem_object *gem) { struct nouveau_bo *nvbo = nouveau_gem_object(gem); struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); - struct ttm_buffer_object *bo = &nvbo->bo; struct device *dev = drm->dev->dev; int ret; @@ -56,7 +55,7 @@ nouveau_gem_object_del(struct drm_gem_object *gem) /* reset filp so nouveau_bo_del_ttm() can test for it */ gem->filp = NULL; - ttm_bo_unref(&bo); + ttm_bo_put(&nvbo->bo); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); From 2d18cb98d68e84d84dd952839efddcb82fd68d6c Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 25 Jan 2019 12:02:08 +0100 Subject: [PATCH 356/539] drm/vmwgfx: Replace ttm_bo_reference with ttm_bo_get MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function ttm_bo_get acquires a reference on a TTM buffer object. The function's name is more aligned to the Linux kernel convention of naming ref-counting function _get and _put. Signed-off-by: Thomas Zimmermann Reviewed-by: Christian König Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 5 +++-- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 5 ++--- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 3 ++- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 7ce1c2f87d9a..bb9d95ef0707 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -614,7 +614,8 @@ int vmw_user_bo_alloc(struct vmw_private *dev_priv, if (unlikely(ret != 0)) return ret; - tmp = ttm_bo_reference(&user_bo->vbo.base); + ttm_bo_get(&user_bo->vbo.base); + tmp = &user_bo->vbo.base; ret = ttm_prime_object_init(tfile, size, &user_bo->prime, @@ -911,7 +912,7 @@ int vmw_user_bo_lookup(struct ttm_object_file *tfile, vmw_user_bo = container_of(base, struct vmw_user_buffer_object, prime.base); - (void)ttm_bo_reference(&vmw_user_bo->vbo.base); + ttm_bo_get(&vmw_user_bo->vbo.base); if (p_base) *p_base = base; else diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index cd607ba9c2fe..b7fb9b79bfef 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1346,9 +1346,8 @@ static inline void vmw_bo_unreference(struct vmw_buffer_object **buf) static inline struct vmw_buffer_object * vmw_bo_reference(struct vmw_buffer_object *buf) { - if (ttm_bo_reference(&buf->base)) - return buf; - return NULL; + ttm_bo_get(&buf->base); + return buf; } static inline struct ttm_mem_global *vmw_mem_glob(struct vmw_private *dev_priv) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 3025bfc001a1..d67b928adf6a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -461,7 +461,8 @@ vmw_resource_check_buffer(struct ww_acquire_ctx *ticket, } INIT_LIST_HEAD(&val_list); - val_buf->bo = ttm_bo_reference(&res->backup->base); + ttm_bo_get(&res->backup->base); + val_buf->bo = &res->backup->base; val_buf->num_shared = 0; list_add_tail(&val_buf->head, &val_list); ret = ttm_eu_reserve_buffers(ticket, &val_list, interruptible, NULL); From 6034d9d48e62a0bf36ce8926b556da52f4d13455 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 25 Jan 2019 12:02:09 +0100 Subject: [PATCH 357/539] drm/vmwgfx: Replace ttm_bo_unref with ttm_bo_put MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function ttm_bo_put releases a reference to a TTM buffer object. The function's name is more aligned to the Linux kernel convention of naming ref-counting function _get and _put. A call to ttm_bo_unref takes the address of the TTM BO object's pointer and clears the pointer's value to NULL. This is not necessary in most cases and sometimes even worked around by the calling code. A call to ttm_bo_put only releases the reference without clearing the pointer. In places where is might be necessary, the current behaviour of cleaning the pointer is kept. Signed-off-by: Thomas Zimmermann Reviewed-by: Christian König Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 8 ++------ drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c | 11 +++++++---- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 4 +--- drivers/gpu/drm/vmwgfx/vmwgfx_mob.c | 21 ++++++++++++++------- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 6 ++++-- drivers/gpu/drm/vmwgfx/vmwgfx_validation.c | 6 ++++-- 6 files changed, 32 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index bb9d95ef0707..5d5c2bce01f3 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -534,7 +534,6 @@ static void vmw_user_bo_release(struct ttm_base_object **p_base) { struct vmw_user_buffer_object *vmw_user_bo; struct ttm_base_object *base = *p_base; - struct ttm_buffer_object *bo; *p_base = NULL; @@ -543,8 +542,7 @@ static void vmw_user_bo_release(struct ttm_base_object **p_base) vmw_user_bo = container_of(base, struct vmw_user_buffer_object, prime.base); - bo = &vmw_user_bo->vbo.base; - ttm_bo_unref(&bo); + ttm_bo_put(&vmw_user_bo->vbo.base); } @@ -597,7 +595,6 @@ int vmw_user_bo_alloc(struct vmw_private *dev_priv, struct ttm_base_object **p_base) { struct vmw_user_buffer_object *user_bo; - struct ttm_buffer_object *tmp; int ret; user_bo = kzalloc(sizeof(*user_bo), GFP_KERNEL); @@ -615,7 +612,6 @@ int vmw_user_bo_alloc(struct vmw_private *dev_priv, return ret; ttm_bo_get(&user_bo->vbo.base); - tmp = &user_bo->vbo.base; ret = ttm_prime_object_init(tfile, size, &user_bo->prime, @@ -624,7 +620,7 @@ int vmw_user_bo_alloc(struct vmw_private *dev_priv, &vmw_user_bo_release, &vmw_user_bo_ref_obj_release); if (unlikely(ret != 0)) { - ttm_bo_unref(&tmp); + ttm_bo_put(&user_bo->vbo.base); goto out_no_base_object; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c index 48d1380a952e..70dab55e7888 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c @@ -765,7 +765,7 @@ static bool vmw_cmdbuf_try_alloc(struct vmw_cmdbuf_man *man, if (info->done) return true; - + memset(info->node, 0, sizeof(*info->node)); spin_lock(&man->lock); ret = drm_mm_insert_node(&man->mm, info->node, info->page_size); @@ -1276,8 +1276,10 @@ int vmw_cmdbuf_set_pool_size(struct vmw_cmdbuf_man *man, return 0; out_no_map: - if (man->using_mob) - ttm_bo_unref(&man->cmd_space); + if (man->using_mob) { + ttm_bo_put(man->cmd_space); + man->cmd_space = NULL; + } return ret; } @@ -1380,7 +1382,8 @@ void vmw_cmdbuf_remove_pool(struct vmw_cmdbuf_man *man) (void) vmw_cmdbuf_idle(man, false, 10*HZ); if (man->using_mob) { (void) ttm_bo_kunmap(&man->map_obj); - ttm_bo_unref(&man->cmd_space); + ttm_bo_put(man->cmd_space); + man->cmd_space = NULL; } else { dma_free_coherent(&man->dev_priv->dev->pdev->dev, man->size, man->map, man->handle); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index b7fb9b79bfef..accb2fafe2f1 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1337,9 +1337,7 @@ static inline void vmw_bo_unreference(struct vmw_buffer_object **buf) *buf = NULL; if (tmp_buf != NULL) { - struct ttm_buffer_object *bo = &tmp_buf->base; - - ttm_bo_unref(&bo); + ttm_bo_put(&tmp_buf->base); } } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c index 7ed179d30ec5..d83cc66e1210 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c @@ -300,7 +300,8 @@ out_no_setup: &batch->otables[i]); } - ttm_bo_unref(&batch->otable_bo); + ttm_bo_put(batch->otable_bo); + batch->otable_bo = NULL; out_no_bo: return ret; } @@ -365,7 +366,8 @@ static void vmw_otable_batch_takedown(struct vmw_private *dev_priv, vmw_bo_fence_single(bo, NULL); ttm_bo_unreserve(bo); - ttm_bo_unref(&batch->otable_bo); + ttm_bo_put(batch->otable_bo); + batch->otable_bo = NULL; } /* @@ -463,7 +465,8 @@ static int vmw_mob_pt_populate(struct vmw_private *dev_priv, out_unreserve: ttm_bo_unreserve(mob->pt_bo); - ttm_bo_unref(&mob->pt_bo); + ttm_bo_put(mob->pt_bo); + mob->pt_bo = NULL; return ret; } @@ -580,8 +583,10 @@ static void vmw_mob_pt_setup(struct vmw_mob *mob, */ void vmw_mob_destroy(struct vmw_mob *mob) { - if (mob->pt_bo) - ttm_bo_unref(&mob->pt_bo); + if (mob->pt_bo) { + ttm_bo_put(mob->pt_bo); + mob->pt_bo = NULL; + } kfree(mob); } @@ -698,8 +703,10 @@ int vmw_mob_bind(struct vmw_private *dev_priv, out_no_cmd_space: vmw_fifo_resource_dec(dev_priv); - if (pt_set_up) - ttm_bo_unref(&mob->pt_bo); + if (pt_set_up) { + ttm_bo_put(mob->pt_bo); + mob->pt_bo = NULL; + } return -ENOMEM; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index d67b928adf6a..a7c30e567f09 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -485,7 +485,8 @@ vmw_resource_check_buffer(struct ww_acquire_ctx *ticket, out_no_validate: ttm_eu_backoff_reservation(ticket, &val_list); out_no_reserve: - ttm_bo_unref(&val_buf->bo); + ttm_bo_put(val_buf->bo); + val_buf->bo = NULL; if (backup_dirty) vmw_bo_unreference(&res->backup); @@ -545,7 +546,8 @@ vmw_resource_backoff_reservation(struct ww_acquire_ctx *ticket, INIT_LIST_HEAD(&val_list); list_add_tail(&val_buf->head, &val_list); ttm_eu_backoff_reservation(ticket, &val_list); - ttm_bo_unref(&val_buf->bo); + ttm_bo_put(val_buf->bo); + val_buf->bo = NULL; } /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c index b3f547fc5d3d..e9944ac2e057 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c @@ -628,8 +628,10 @@ void vmw_validation_unref_lists(struct vmw_validation_context *ctx) struct vmw_validation_bo_node *entry; struct vmw_validation_res_node *val; - list_for_each_entry(entry, &ctx->bo_list, base.head) - ttm_bo_unref(&entry->base.bo); + list_for_each_entry(entry, &ctx->bo_list, base.head) { + ttm_bo_put(entry->base.bo); + entry->base.bo = NULL; + } list_splice_init(&ctx->resource_ctx_list, &ctx->resource_list); list_for_each_entry(val, &ctx->resource_list, head) From 706b7761abea968da71639a2a715afd72add7155 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 25 Jan 2019 12:02:10 +0100 Subject: [PATCH 358/539] drm/mgag200: Replace ttm_bo_unref with ttm_bo_put MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function ttm_bo_put releases a reference to a TTM buffer object. The function's name is more aligned to the Linux kernel convention of naming ref-counting function _get and _put. A call to ttm_bo_unref takes the address of the TTM BO object's pointer and clears the pointer's value to NULL. This is not necessary in most cases and sometimes even worked around by the calling code. A call to ttm_bo_put only releases the reference without clearing the pointer. The current behaviour of cleaning the pointer is kept in the calling code, but should be removed if not required in a later patch. Signed-off-by: Thomas Zimmermann Reviewed-by: Christian König Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/mgag200/mgag200_main.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/mgag200/mgag200_main.c b/drivers/gpu/drm/mgag200/mgag200_main.c index 79d54103d470..163255099779 100644 --- a/drivers/gpu/drm/mgag200/mgag200_main.c +++ b/drivers/gpu/drm/mgag200/mgag200_main.c @@ -33,7 +33,7 @@ int mgag200_framebuffer_init(struct drm_device *dev, struct drm_gem_object *obj) { int ret; - + drm_helper_mode_fill_fb_struct(dev, &gfb->base, mode_cmd); gfb->obj = obj; ret = drm_framebuffer_init(dev, &gfb->base, &mga_fb_funcs); @@ -318,13 +318,9 @@ int mgag200_dumb_create(struct drm_file *file, static void mgag200_bo_unref(struct mgag200_bo **bo) { - struct ttm_buffer_object *tbo; - if ((*bo) == NULL) return; - - tbo = &((*bo)->bo); - ttm_bo_unref(&tbo); + ttm_bo_put(&((*bo)->bo)); *bo = NULL; } From cbce5f0a9f306f890d257a92fcca6cb341af76ed Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 25 Jan 2019 12:02:11 +0100 Subject: [PATCH 359/539] drm/ttm: Remove ttm_bo_reference and ttm_bo_unref MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both functions are obsolete and all calls have been replaced by ttm_bo_get and ttm_bo_put. Signed-off-by: Thomas Zimmermann Reviewed-by: Christian König Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/ttm/ttm_bo.c | 9 --------- include/drm/ttm/ttm_bo_api.h | 28 ---------------------------- 2 files changed, 37 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index de088c8070fb..3f56647cdb35 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -679,15 +679,6 @@ void ttm_bo_put(struct ttm_buffer_object *bo) } EXPORT_SYMBOL(ttm_bo_put); -void ttm_bo_unref(struct ttm_buffer_object **p_bo) -{ - struct ttm_buffer_object *bo = *p_bo; - - *p_bo = NULL; - ttm_bo_put(bo); -} -EXPORT_SYMBOL(ttm_bo_unref); - int ttm_bo_lock_delayed_workqueue(struct ttm_bo_device *bdev) { return cancel_delayed_work_sync(&bdev->wq); diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 3fc4854dce49..49d9cdfc58f2 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -295,23 +295,6 @@ static inline void ttm_bo_get(struct ttm_buffer_object *bo) kref_get(&bo->kref); } -/** - * ttm_bo_reference - reference a struct ttm_buffer_object - * - * @bo: The buffer object. - * - * Returns a refcounted pointer to a buffer object. - * - * This function is deprecated. Use @ttm_bo_get instead. - */ - -static inline struct ttm_buffer_object * -ttm_bo_reference(struct ttm_buffer_object *bo) -{ - ttm_bo_get(bo); - return bo; -} - /** * ttm_bo_get_unless_zero - reference a struct ttm_buffer_object unless * its refcount has already reached zero. @@ -386,17 +369,6 @@ int ttm_bo_validate(struct ttm_buffer_object *bo, */ void ttm_bo_put(struct ttm_buffer_object *bo); -/** - * ttm_bo_unref - * - * @bo: The buffer object. - * - * Unreference and clear a pointer to a buffer object. - * - * This function is deprecated. Use @ttm_bo_put instead. - */ -void ttm_bo_unref(struct ttm_buffer_object **bo); - /** * ttm_bo_add_to_lru * From 22051b636e347888bfd2efbbc9bcb8918d16e8c5 Mon Sep 17 00:00:00 2001 From: Martin Tsai Date: Thu, 10 Jan 2019 13:05:15 +0800 Subject: [PATCH 360/539] drm/amd/display: Poll pending down rep before clear payload allocation table [Why] On current design, driver cannot handle the interrupt for down reply when link training is processing. The DOWN REQ send before link training will keep in the pending DOWN REP state in the queue. It makes the next DOWN REQ be queued until time out. [How] To add a polling sequence before clear payload allocation table to make sure the pending DOWN REP can be handled. Signed-off-by: Martin Tsai Reviewed-by: Charlene Liu Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 7 +++++++ drivers/gpu/drm/amd/display/dc/core/dc_link.c | 5 +++++ drivers/gpu/drm/amd/display/dc/dm_helpers.h | 7 +++++++ 3 files changed, 19 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index e6ab0186955c..cae16b6d2344 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -263,6 +263,13 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( return true; } +/* + * poll pending down reply before clear payload allocation table + */ +void dm_helpers_dp_mst_poll_pending_down_reply( + struct dc_context *ctx, + const struct dc_link *link) +{} /* * Clear payload allocation table before enable MST DP link. diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 3dd5f2717b53..8ff5d42587c2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1467,6 +1467,11 @@ static enum dc_status enable_link_dp_mst( if (link->cur_link_settings.lane_count != LANE_COUNT_UNKNOWN) return DC_OK; + /* to make sure the pending down rep can be processed + * before clear payload table + */ + dm_helpers_dp_mst_poll_pending_down_reply(link->ctx, link); + /* clear payload table */ dm_helpers_dp_mst_clear_payload_allocation_table(link->ctx, link); diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h index 5d4527d03045..e81b24374bcb 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h +++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h @@ -57,6 +57,13 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( struct dp_mst_stream_allocation_table *proposed_table, bool enable); +/* + * poll pending down reply before clear payload allocation table + */ +void dm_helpers_dp_mst_poll_pending_down_reply( + struct dc_context *ctx, + const struct dc_link *link); + /* * Clear payload allocation table before enable MST DP link. */ From 428da2bdb05d76c48d0bd8fbfa2e4c102685be08 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Mon, 14 Jan 2019 16:04:10 -0500 Subject: [PATCH 361/539] drm/amd/display: Enable vblank interrupt during CRC capture [Why] In order to read CRC events when CRC capture is enabled the vblank interrput handler needs to be running for the CRTC. The handler is enabled while there is an active vblank reference. When running IGT tests there will often be no active vblank reference but the test expects to read a CRC value. This is valid usage (and works on i915 since they have a CRC interrupt handler) so the reference to the vblank should be grabbed while capture is active. This issue was found running: igt@kms_plane_multiple@atomic-pipe-b-tiling-none The pipe-b is the only one in the initial commit and was not previously active so no vblank reference is grabbed. The vblank interrupt is not enabled and the test times out. [How] Keep a reference to the vblank as long as CRC capture is enabled. If userspace never explicitly disables it then the reference is also dropped when removing the CRTC from the context (stream = NULL). Signed-off-by: Nicholas Kazlauskas Reviewed-by: Harry Wentland Reviewed-by: Sun peng Li Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 14 ++++++- .../drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c | 42 +++++++++---------- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 407f733a47ea..b0bdf5ceed0e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4917,10 +4917,22 @@ static int amdgpu_dm_atomic_commit(struct drm_device *dev, */ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { struct dm_crtc_state *dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); + struct dm_crtc_state *dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); - if (drm_atomic_crtc_needs_modeset(new_crtc_state) && dm_old_crtc_state->stream) + if (drm_atomic_crtc_needs_modeset(new_crtc_state) + && dm_old_crtc_state->stream) { + /* + * CRC capture was enabled but not disabled. + * Release the vblank reference. + */ + if (dm_new_crtc_state->crc_enabled) { + drm_crtc_vblank_put(crtc); + dm_new_crtc_state->crc_enabled = false; + } + manage_dm_interrupts(adev, acrtc, false); + } } /* * Add check here for SoC's that support hardware cursor plane, to diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index f088ac585978..26b651148c67 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -66,6 +66,7 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) { struct dm_crtc_state *crtc_state = to_dm_crtc_state(crtc->state); struct dc_stream_state *stream_state = crtc_state->stream; + bool enable; enum amdgpu_dm_pipe_crc_source source = dm_parse_crc_source(src_name); @@ -80,28 +81,27 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) return -EINVAL; } + enable = (source == AMDGPU_DM_PIPE_CRC_SOURCE_AUTO); + + if (!dc_stream_configure_crc(stream_state->ctx->dc, stream_state, + enable, enable)) + return -EINVAL; + /* When enabling CRC, we should also disable dithering. */ - if (source == AMDGPU_DM_PIPE_CRC_SOURCE_AUTO) { - if (dc_stream_configure_crc(stream_state->ctx->dc, - stream_state, - true, true)) { - crtc_state->crc_enabled = true; - dc_stream_set_dither_option(stream_state, - DITHER_OPTION_TRUN8); - } - else - return -EINVAL; - } else { - if (dc_stream_configure_crc(stream_state->ctx->dc, - stream_state, - false, false)) { - crtc_state->crc_enabled = false; - dc_stream_set_dither_option(stream_state, - DITHER_OPTION_DEFAULT); - } - else - return -EINVAL; - } + dc_stream_set_dither_option(stream_state, + enable ? DITHER_OPTION_TRUN8 + : DITHER_OPTION_DEFAULT); + + /* + * Reading the CRC requires the vblank interrupt handler to be + * enabled. Keep a reference until CRC capture stops. + */ + if (!crtc_state->crc_enabled && enable) + drm_crtc_vblank_get(crtc); + else if (crtc_state->crc_enabled && !enable) + drm_crtc_vblank_put(crtc); + + crtc_state->crc_enabled = enable; /* Reset crc_skipped on dm state */ crtc_state->crc_skip_count = 0; From 43a6a02eb3558d1f3a0618f9bf02328662fb06e3 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 15 Jan 2019 10:33:58 -0500 Subject: [PATCH 362/539] drm/amd/display: Re-enable CRC capture following modeset [Why] During any modeset the CRTC stream is removed and a new stream is added. This new stream doesn't carry over CRC capture state if it was previously set. [How] Re-program the stream for CRC capture. The existing DRM callback can be re-used here for the most part - the only modification needed is additional locking now that it's called from within commit tail. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Harry Wentland Reviewed-by: Sun peng Li Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 13 ++++++++++--- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c | 8 +++++++- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b0bdf5ceed0e..a247cb19077c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4923,10 +4923,13 @@ static int amdgpu_dm_atomic_commit(struct drm_device *dev, if (drm_atomic_crtc_needs_modeset(new_crtc_state) && dm_old_crtc_state->stream) { /* - * CRC capture was enabled but not disabled. - * Release the vblank reference. + * If the stream is removed and CRC capture was + * enabled on the CRTC the extra vblank reference + * needs to be dropped since CRC capture will be + * disabled. */ - if (dm_new_crtc_state->crc_enabled) { + if (!dm_new_crtc_state->stream + && dm_new_crtc_state->crc_enabled) { drm_crtc_vblank_put(crtc); dm_new_crtc_state->crc_enabled = false; } @@ -5164,6 +5167,10 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) continue; manage_dm_interrupts(adev, acrtc, true); + + /* The stream has changed so CRC capture needs to re-enabled. */ + if (dm_new_crtc_state->crc_enabled) + amdgpu_dm_crtc_set_crc_source(crtc, "auto"); } /* update planes when needed per crtc*/ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index 26b651148c67..a10e3a50d9ef 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -64,6 +64,7 @@ amdgpu_dm_crtc_verify_crc_source(struct drm_crtc *crtc, const char *src_name, int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) { + struct amdgpu_device *adev = crtc->dev->dev_private; struct dm_crtc_state *crtc_state = to_dm_crtc_state(crtc->state); struct dc_stream_state *stream_state = crtc_state->stream; bool enable; @@ -83,15 +84,20 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) enable = (source == AMDGPU_DM_PIPE_CRC_SOURCE_AUTO); + mutex_lock(&adev->dm.dc_lock); if (!dc_stream_configure_crc(stream_state->ctx->dc, stream_state, - enable, enable)) + enable, enable)) { + mutex_unlock(&adev->dm.dc_lock); return -EINVAL; + } /* When enabling CRC, we should also disable dithering. */ dc_stream_set_dither_option(stream_state, enable ? DITHER_OPTION_TRUN8 : DITHER_OPTION_DEFAULT); + mutex_unlock(&adev->dm.dc_lock); + /* * Reading the CRC requires the vblank interrupt handler to be * enabled. Keep a reference until CRC capture stops. From 8fde60b7f350045614aecbc433eda830b5413b6d Mon Sep 17 00:00:00 2001 From: Fatemeh Darbehani Date: Fri, 11 Jan 2019 11:00:26 -0500 Subject: [PATCH 363/539] drm/amd/display: Add Vline1 interrupt source to InterruptManager [Why] Enhanced sync need to use vertical_interrupt1. [How] Add vertical_interrupt1 source to irq manger, Implment setup vline interrupt interface. Signed-off-by: Fatemeh Darbehani Reviewed-by: Jun Lei Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 10 +-- drivers/gpu/drm/amd/display/dc/dc_stream.h | 14 +++- .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 72 ++++--------------- .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.h | 12 +++- .../amd/display/dc/inc/hw/timing_generator.h | 13 +++- drivers/gpu/drm/amd/display/dc/irq_types.h | 8 +++ 6 files changed, 61 insertions(+), 68 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 1dabafc12cfe..e22be0aefd1b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1463,11 +1463,13 @@ static void commit_planes_do_stream_update(struct dc *dc, stream_update->adjust->v_total_min, stream_update->adjust->v_total_max); - if (stream_update->periodic_fn_vsync_delta && - pipe_ctx->stream_res.tg->funcs->program_vline_interrupt) + if (stream_update->vline0_config && pipe_ctx->stream_res.tg->funcs->program_vline_interrupt) pipe_ctx->stream_res.tg->funcs->program_vline_interrupt( - pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, - pipe_ctx->stream->periodic_fn_vsync_delta); + pipe_ctx->stream_res.tg, VLINE0, stream->vline0_config); + + if (stream_update->vline1_config && pipe_ctx->stream_res.tg->funcs->program_vline_interrupt) + pipe_ctx->stream_res.tg->funcs->program_vline_interrupt( + pipe_ctx->stream_res.tg, VLINE1, stream->vline1_config); if ((stream_update->hdr_static_metadata && !stream->use_dynamic_meta) || stream_update->vrr_infopacket || diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 7bb1da18c1ba..0de6d7f377a6 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -45,6 +45,11 @@ struct freesync_context { bool dummy; }; +struct vline_config { + unsigned int start_line; + unsigned int end_line; +}; + struct dc_stream_state { // sink is deprecated, new code should not reference // this pointer @@ -85,8 +90,6 @@ struct dc_stream_state { uint8_t qs_bit; uint8_t qy_bit; - unsigned long long periodic_fn_vsync_delta; - /* TODO: custom INFO packets */ /* TODO: ABM info (DMCU) */ /* PSR info */ @@ -96,6 +99,9 @@ struct dc_stream_state { /* DMCU info */ unsigned int abm_level; + struct vline_config vline0_config; + struct vline_config vline1_config; + /* from core_stream struct */ struct dc_context *ctx; @@ -143,7 +149,9 @@ struct dc_stream_update { struct dc_info_packet *hdr_static_metadata; unsigned int *abm_level; - unsigned long long *periodic_fn_vsync_delta; + struct vline_config *vline0_config; + struct vline_config *vline1_config; + struct dc_crtc_timing_adjust *adjust; struct dc_info_packet *vrr_infopacket; struct dc_info_packet *vsc_infopacket; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index eb019d404928..1d4f9b48ed7d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -92,68 +92,26 @@ static void optc1_disable_stereo(struct timing_generator *optc) OTG_3D_STRUCTURE_STEREO_SEL_OVR, 0); } -static uint32_t get_start_vline(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing) -{ - struct dc_crtc_timing patched_crtc_timing; - int vesa_sync_start; - int asic_blank_end; - int vertical_line_start; - - patched_crtc_timing = *dc_crtc_timing; - optc1_apply_front_porch_workaround(optc, &patched_crtc_timing); - - vesa_sync_start = patched_crtc_timing.v_addressable + - patched_crtc_timing.v_border_bottom + - patched_crtc_timing.v_front_porch; - - asic_blank_end = (patched_crtc_timing.v_total - - vesa_sync_start - - patched_crtc_timing.v_border_top); - - vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1; - if (vertical_line_start < 0) - vertical_line_start = 0; - - return vertical_line_start; -} - void optc1_program_vline_interrupt( struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing, - unsigned long long vsync_delta) + enum vline_select vline, + struct vline_config vline_config) { - struct optc *optc1 = DCN10TG_FROM_TG(optc); - unsigned long long req_delta_tens_of_usec = div64_u64((vsync_delta + 9999), 10000); - unsigned long long pix_clk_hundreds_khz = div64_u64((dc_crtc_timing->pix_clk_100hz + 999), 1000); - uint32_t req_delta_lines = (uint32_t) div64_u64( - (req_delta_tens_of_usec * pix_clk_hundreds_khz + dc_crtc_timing->h_total - 1), - dc_crtc_timing->h_total); - - uint32_t vsync_line = get_start_vline(optc, dc_crtc_timing); - uint32_t start_line = 0; - uint32_t end_line = 0; - - if (req_delta_lines != 0) - req_delta_lines--; - - if (req_delta_lines > vsync_line) - start_line = dc_crtc_timing->v_total - (req_delta_lines - vsync_line) + 2; - else - start_line = vsync_line - req_delta_lines; - - end_line = start_line + 2; - - if (start_line >= dc_crtc_timing->v_total) - start_line = start_line % dc_crtc_timing->v_total; - - if (end_line >= dc_crtc_timing->v_total) - end_line = end_line % dc_crtc_timing->v_total; - - REG_SET_2(OTG_VERTICAL_INTERRUPT0_POSITION, 0, - OTG_VERTICAL_INTERRUPT0_LINE_START, start_line, - OTG_VERTICAL_INTERRUPT0_LINE_END, end_line); + switch (vline) { + case VLINE0: + REG_SET_2(OTG_VERTICAL_INTERRUPT0_POSITION, 0, + OTG_VERTICAL_INTERRUPT0_LINE_START, vline_config.start_line, + OTG_VERTICAL_INTERRUPT0_LINE_END, vline_config.end_line); + break; + case VLINE1: + REG_SET(OTG_VERTICAL_INTERRUPT1_POSITION, 0, + OTG_VERTICAL_INTERRUPT1_LINE_START, vline_config.start_line); + break; + default: + break; + } } /** diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index 8bacf0b6e27e..8eb71c0160a7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -67,6 +67,8 @@ SRI(OTG_CLOCK_CONTROL, OTG, inst),\ SRI(OTG_VERTICAL_INTERRUPT0_CONTROL, OTG, inst),\ SRI(OTG_VERTICAL_INTERRUPT0_POSITION, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT1_CONTROL, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT1_POSITION, OTG, inst),\ SRI(OTG_VERTICAL_INTERRUPT2_CONTROL, OTG, inst),\ SRI(OTG_VERTICAL_INTERRUPT2_POSITION, OTG, inst),\ SRI(OPTC_INPUT_CLOCK_CONTROL, ODM, inst),\ @@ -135,6 +137,8 @@ struct dcn_optc_registers { uint32_t OTG_CLOCK_CONTROL; uint32_t OTG_VERTICAL_INTERRUPT0_CONTROL; uint32_t OTG_VERTICAL_INTERRUPT0_POSITION; + uint32_t OTG_VERTICAL_INTERRUPT1_CONTROL; + uint32_t OTG_VERTICAL_INTERRUPT1_POSITION; uint32_t OTG_VERTICAL_INTERRUPT2_CONTROL; uint32_t OTG_VERTICAL_INTERRUPT2_POSITION; uint32_t OPTC_INPUT_CLOCK_CONTROL; @@ -227,6 +231,8 @@ struct dcn_optc_registers { SF(OTG0_OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE, mask_sh),\ SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_START, mask_sh),\ SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_END, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT1_POSITION, OTG_VERTICAL_INTERRUPT1_LINE_START, mask_sh),\ SF(OTG0_OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE, mask_sh),\ SF(OTG0_OTG_VERTICAL_INTERRUPT2_POSITION, OTG_VERTICAL_INTERRUPT2_LINE_START, mask_sh),\ SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_EN, mask_sh),\ @@ -361,6 +367,8 @@ struct dcn_optc_registers { type OTG_VERTICAL_INTERRUPT0_INT_ENABLE;\ type OTG_VERTICAL_INTERRUPT0_LINE_START;\ type OTG_VERTICAL_INTERRUPT0_LINE_END;\ + type OTG_VERTICAL_INTERRUPT1_INT_ENABLE;\ + type OTG_VERTICAL_INTERRUPT1_LINE_START;\ type OTG_VERTICAL_INTERRUPT2_INT_ENABLE;\ type OTG_VERTICAL_INTERRUPT2_LINE_START;\ type OPTC_INPUT_CLK_EN;\ @@ -476,8 +484,8 @@ void optc1_program_timing( bool use_vbios); void optc1_program_vline_interrupt(struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing, - unsigned long long vsync_delta); + enum vline_select vline, + struct vline_config vline_config); void optc1_program_global_sync( struct timing_generator *optc); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index af700c7dac50..df64cf73ceb9 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -134,6 +134,15 @@ struct dc_crtc_timing; struct drr_params; +struct vline_config; + + +enum vline_select { + VLINE0, + VLINE1, + VLINE2 +}; + struct timing_generator_funcs { bool (*validate_timing)(struct timing_generator *tg, const struct dc_crtc_timing *timing); @@ -141,8 +150,8 @@ struct timing_generator_funcs { const struct dc_crtc_timing *timing, bool use_vbios); void (*program_vline_interrupt)(struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing, - unsigned long long vsync_delta); + enum vline_select vline, + struct vline_config vline_config); bool (*enable_crtc)(struct timing_generator *tg); bool (*disable_crtc)(struct timing_generator *tg); bool (*is_counter_moving)(struct timing_generator *tg); diff --git a/drivers/gpu/drm/amd/display/dc/irq_types.h b/drivers/gpu/drm/amd/display/dc/irq_types.h index 0b5f3a278c22..d0ccd81ad5b4 100644 --- a/drivers/gpu/drm/amd/display/dc/irq_types.h +++ b/drivers/gpu/drm/amd/display/dc/irq_types.h @@ -144,6 +144,14 @@ enum dc_irq_source { DC_IRQ_SOURCE_DC5_VLINE0, DC_IRQ_SOURCE_DC6_VLINE0, + DC_IRQ_SOURCE_DC1_VLINE1, + DC_IRQ_SOURCE_DC2_VLINE1, + DC_IRQ_SOURCE_DC3_VLINE1, + DC_IRQ_SOURCE_DC4_VLINE1, + DC_IRQ_SOURCE_DC5_VLINE1, + DC_IRQ_SOURCE_DC6_VLINE1, + + DAL_IRQ_SOURCES_NUMBER }; From 4b5105036afbd21fb810d3f1528f9a26ef3eea7e Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Mon, 28 Jan 2019 09:00:52 -0500 Subject: [PATCH 364/539] drm/amd/display: Don't leak memory when updating streams MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] The flip and full structures were allocated but never freed. [How] Free them at the end of the function. There's a small behavioral change here with the function returning early if the allocation fails but we wouldn't should be doing anything in that case anyway. Fixes: c00e0cc0fdc0 ("drm/amd/display: Call into DC once per multiplane flip") Fixes: ea39594e0855 ("drm/amd/display: Perform plane updates only when needed") Signed-off-by: Nicholas Kazlauskas Reviewed-by: Leo Li Tested-by: Michel Dänzer Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a247cb19077c..1923f47d3dd6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4658,8 +4658,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, flip = kzalloc(sizeof(*flip), GFP_KERNEL); full = kzalloc(sizeof(*full), GFP_KERNEL); - if (!flip || !full) + if (!flip || !full) { dm_error("Failed to allocate update bundles\n"); + goto cleanup; + } /* update planes when needed */ for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) { @@ -4883,6 +4885,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, dc_state); mutex_unlock(&dm->dc_lock); } + +cleanup: + kfree(flip); + kfree(full); } /* From a97c084aad394f65a47af92273494d2ce0da1be5 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Fri, 25 Jan 2019 16:29:25 -0500 Subject: [PATCH 365/539] drm/amd/powerplay: add override pcie parameters for Vega20 It is to solve RDMA performance issue. Signed-off-by: Eric Huang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 7b49a9a13a4a..da022ca79b56 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -771,6 +771,47 @@ static int vega20_init_smc_table(struct pp_hwmgr *hwmgr) return 0; } +static int vega20_override_pcie_parameters(struct pp_hwmgr *hwmgr) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); + uint32_t pcie_speed = 0, pcie_width = 0, pcie_arg; + int ret; + + if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) + pcie_speed = 16; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) + pcie_speed = 8; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) + pcie_speed = 5; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1) + pcie_speed = 2; + + if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X32) + pcie_width = 32; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) + pcie_width = 16; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12) + pcie_width = 12; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8) + pcie_width = 8; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4) + pcie_width = 4; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2) + pcie_width = 2; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1) + pcie_width = 1; + + pcie_arg = pcie_width | (pcie_speed << 8); + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_OverridePcieParameters, pcie_arg); + PP_ASSERT_WITH_CODE(!ret, + "[OverridePcieParameters] Attempt to override pcie params failed!", + return ret); + + return 0; +} + static int vega20_set_allowed_featuresmask(struct pp_hwmgr *hwmgr) { struct vega20_hwmgr *data = @@ -1570,6 +1611,11 @@ static int vega20_enable_dpm_tasks(struct pp_hwmgr *hwmgr) "[EnableDPMTasks] Failed to initialize SMC table!", return result); + result = vega20_override_pcie_parameters(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "[EnableDPMTasks] Failed to override pcie parameters!", + return result); + result = vega20_run_btc(hwmgr); PP_ASSERT_WITH_CODE(!result, "[EnableDPMTasks] Failed to run btc!", From 3adac4689f58cb3fb666d92dff0ee73cc97d24d7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 28 Jan 2019 18:18:07 +0000 Subject: [PATCH 366/539] drm/i915: Introduce concept of per-timeline (context) HWSP Supplement the per-engine HWSP with a per-timeline HWSP. That is a per-request pointer through which we can check a local seqno, abstracting away the presumption of a global seqno. In this first step, we point each request back into the engine's HWSP so everything continues to work with the global timeline. v2: s/i915_request_hwsp/hwsp_seqno/ to emphasis that this is the current HW value and that we are accessing it via i915_request merely as a convenience. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20190128181812.22804-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 16 ++++++---- drivers/gpu/drm/i915/i915_request.h | 45 ++++++++++++++++++++++++----- drivers/gpu/drm/i915/intel_lrc.c | 9 ++++-- 3 files changed, 55 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index f4241a17e2ad..a076fd0b7ba6 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -182,10 +182,11 @@ static void free_capture_list(struct i915_request *request) static void __retire_engine_request(struct intel_engine_cs *engine, struct i915_request *rq) { - GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n", + GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d:%d\n", __func__, engine->name, rq->fence.context, rq->fence.seqno, rq->global_seqno, + hwsp_seqno(rq), intel_engine_get_seqno(engine)); GEM_BUG_ON(!i915_request_completed(rq)); @@ -244,10 +245,11 @@ static void i915_request_retire(struct i915_request *request) { struct i915_gem_active *active, *next; - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n", request->engine->name, request->fence.context, request->fence.seqno, request->global_seqno, + hwsp_seqno(request), intel_engine_get_seqno(request->engine)); lockdep_assert_held(&request->i915->drm.struct_mutex); @@ -307,10 +309,11 @@ void i915_request_retire_upto(struct i915_request *rq) struct intel_ring *ring = rq->ring; struct i915_request *tmp; - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n", rq->engine->name, rq->fence.context, rq->fence.seqno, rq->global_seqno, + hwsp_seqno(rq), intel_engine_get_seqno(rq->engine)); lockdep_assert_held(&rq->i915->drm.struct_mutex); @@ -355,10 +358,11 @@ void __i915_request_submit(struct i915_request *request) struct intel_engine_cs *engine = request->engine; u32 seqno; - GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d:%d\n", engine->name, request->fence.context, request->fence.seqno, engine->timeline.seqno + 1, + hwsp_seqno(request), intel_engine_get_seqno(engine)); GEM_BUG_ON(!irqs_disabled()); @@ -405,10 +409,11 @@ void __i915_request_unsubmit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d:%d\n", engine->name, request->fence.context, request->fence.seqno, request->global_seqno, + hwsp_seqno(request), intel_engine_get_seqno(engine)); GEM_BUG_ON(!irqs_disabled()); @@ -616,6 +621,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) rq->ring = ce->ring; rq->timeline = ce->ring->timeline; GEM_BUG_ON(rq->timeline == &engine->timeline); + rq->hwsp_seqno = &engine->status_page.addr[I915_GEM_HWS_INDEX]; spin_lock_init(&rq->lock); dma_fence_init(&rq->fence, diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index c0f084ca4f29..ade010fe6e26 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -130,6 +130,13 @@ struct i915_request { struct i915_sched_node sched; struct i915_dependency dep; + /* + * A convenience pointer to the current breadcrumb value stored in + * the HW status page (or our timeline's local equivalent). The full + * path would be rq->hw_context->ring->timeline->hwsp_seqno. + */ + const u32 *hwsp_seqno; + /** * GEM sequence number associated with this request on the * global execution timeline. It is zero when the request is not @@ -285,11 +292,6 @@ static inline bool i915_request_signaled(const struct i915_request *rq) return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags); } -static inline bool intel_engine_has_started(struct intel_engine_cs *engine, - u32 seqno); -static inline bool intel_engine_has_completed(struct intel_engine_cs *engine, - u32 seqno); - /** * Returns true if seq1 is later than seq2. */ @@ -298,6 +300,35 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2) return (s32)(seq1 - seq2) >= 0; } +static inline u32 __hwsp_seqno(const struct i915_request *rq) +{ + return READ_ONCE(*rq->hwsp_seqno); +} + +/** + * hwsp_seqno - the current breadcrumb value in the HW status page + * @rq: the request, to chase the relevant HW status page + * + * The emphasis in naming here is that hwsp_seqno() is not a property of the + * request, but an indication of the current HW state (associated with this + * request). Its value will change as the GPU executes more requests. + * + * Returns the current breadcrumb value in the associated HW status page (or + * the local timeline's equivalent) for this request. The request itself + * has the associated breadcrumb value of rq->fence.seqno, when the HW + * status page has that breadcrumb or later, this request is complete. + */ +static inline u32 hwsp_seqno(const struct i915_request *rq) +{ + u32 seqno; + + rcu_read_lock(); /* the HWSP may be freed at runtime */ + seqno = __hwsp_seqno(rq); + rcu_read_unlock(); + + return seqno; +} + /** * i915_request_started - check if the request has begun being executed * @rq: the request @@ -315,14 +346,14 @@ static inline bool i915_request_started(const struct i915_request *rq) if (!seqno) /* not yet submitted to HW */ return false; - return intel_engine_has_started(rq->engine, seqno); + return i915_seqno_passed(hwsp_seqno(rq), seqno - 1); } static inline bool __i915_request_completed(const struct i915_request *rq, u32 seqno) { GEM_BUG_ON(!seqno); - return intel_engine_has_completed(rq->engine, seqno) && + return i915_seqno_passed(hwsp_seqno(rq), seqno) && seqno == i915_request_global_seqno(rq); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 2cf99c436658..9ae7f77293a0 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -446,11 +446,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", + GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n", engine->name, n, port[n].context_id, count, rq->global_seqno, rq->fence.context, rq->fence.seqno, + hwsp_seqno(rq), intel_engine_get_seqno(engine), rq_prio(rq)); } else { @@ -742,11 +743,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) while (num_ports-- && port_isset(port)) { struct i915_request *rq = port_request(port); - GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n", + GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d:%d)\n", rq->engine->name, (unsigned int)(port - execlists->port), rq->global_seqno, rq->fence.context, rq->fence.seqno, + hwsp_seqno(rq), intel_engine_get_seqno(rq->engine)); GEM_BUG_ON(!execlists->active); @@ -970,12 +972,13 @@ static void process_csb(struct intel_engine_cs *engine) EXECLISTS_ACTIVE_USER)); rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", + GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n", engine->name, port->context_id, count, rq ? rq->global_seqno : 0, rq ? rq->fence.context : 0, rq ? rq->fence.seqno : 0, + rq ? hwsp_seqno(rq) : 0, intel_engine_get_seqno(engine), rq ? rq_prio(rq) : 0); From b18fe4be59f215b1ce75f406d04810454f206faf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 28 Jan 2019 18:18:08 +0000 Subject: [PATCH 367/539] drm/i915: Enlarge vma->pin_count Previously we only accommodated having a vma pinned by a small number of users, with the maximum being pinned for use by the display engine. As such, we used a small bitfield only large enough to allow the vma to be pinned twice (for back/front buffers) in each scanout plane. Keeping the maximum permissible pin_count small allows us to quickly catch a potential leak. However, as we want to split a 4096B page into 64 different cachelines and pin each cacheline for use by a different timeline, we will exceed the current maximum permissible vma->pin_count and so time has come to enlarge it. Whilst we are here, try to pull together the similar bits: Address/layout specification: - bias, mappable, zone_4g: address limit specifiers - fixed: address override, limits still apply though - high: not strictly an address limit, but an address direction to search Search controls: - nonblock, nonfault, noevict v2: Rewrite the guideline comment on bit consumption. Signed-off-by: Chris Wilson Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20190128181812.22804-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.h | 22 +++++++------- drivers/gpu/drm/i915/i915_vma.h | 45 +++++++++++++++++++---------- 2 files changed, 40 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index bd679c8c56dd..03ade71b8d9a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -642,19 +642,19 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, /* Flags used by pin/bind&friends. */ #define PIN_NONBLOCK BIT_ULL(0) -#define PIN_MAPPABLE BIT_ULL(1) -#define PIN_ZONE_4G BIT_ULL(2) -#define PIN_NONFAULT BIT_ULL(3) -#define PIN_NOEVICT BIT_ULL(4) +#define PIN_NONFAULT BIT_ULL(1) +#define PIN_NOEVICT BIT_ULL(2) +#define PIN_MAPPABLE BIT_ULL(3) +#define PIN_ZONE_4G BIT_ULL(4) +#define PIN_HIGH BIT_ULL(5) +#define PIN_OFFSET_BIAS BIT_ULL(6) +#define PIN_OFFSET_FIXED BIT_ULL(7) -#define PIN_MBZ BIT_ULL(5) /* I915_VMA_PIN_OVERFLOW */ -#define PIN_GLOBAL BIT_ULL(6) /* I915_VMA_GLOBAL_BIND */ -#define PIN_USER BIT_ULL(7) /* I915_VMA_LOCAL_BIND */ -#define PIN_UPDATE BIT_ULL(8) +#define PIN_MBZ BIT_ULL(8) /* I915_VMA_PIN_OVERFLOW */ +#define PIN_GLOBAL BIT_ULL(9) /* I915_VMA_GLOBAL_BIND */ +#define PIN_USER BIT_ULL(10) /* I915_VMA_LOCAL_BIND */ +#define PIN_UPDATE BIT_ULL(11) -#define PIN_HIGH BIT_ULL(9) -#define PIN_OFFSET_BIAS BIT_ULL(10) -#define PIN_OFFSET_FIXED BIT_ULL(11) #define PIN_OFFSET_MASK (-I915_GTT_PAGE_SIZE) #endif diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 7252abc73d3e..5793abe509a2 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -71,29 +71,42 @@ struct i915_vma { unsigned int open_count; unsigned long flags; /** - * How many users have pinned this object in GTT space. The following - * users can each hold at most one reference: pwrite/pread, execbuffer - * (objects are not allowed multiple times for the same batchbuffer), - * and the framebuffer code. When switching/pageflipping, the - * framebuffer code has at most two buffers pinned per crtc. + * How many users have pinned this object in GTT space. * - * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 - * bits with absolutely no headroom. So use 4 bits. + * This is a tightly bound, fairly small number of users, so we + * stuff inside the flags field so that we can both check for overflow + * and detect a no-op i915_vma_pin() in a single check, while also + * pinning the vma. + * + * The worst case display setup would have the same vma pinned for + * use on each plane on each crtc, while also building the next atomic + * state and holding a pin for the length of the cleanup queue. In the + * future, the flip queue may be increased from 1. + * Estimated worst case: 3 [qlen] * 4 [max crtcs] * 7 [max planes] = 84 + * + * For GEM, the number of concurrent users for pwrite/pread is + * unbounded. For execbuffer, it is currently one but will in future + * be extended to allow multiple clients to pin vma concurrently. + * + * We also use suballocated pages, with each suballocation claiming + * its own pin on the shared vma. At present, this is limited to + * exclusive cachelines of a single page, so a maximum of 64 possible + * users. */ -#define I915_VMA_PIN_MASK 0xf -#define I915_VMA_PIN_OVERFLOW BIT(5) +#define I915_VMA_PIN_MASK 0xff +#define I915_VMA_PIN_OVERFLOW BIT(8) /** Flags and address space this VMA is bound to */ -#define I915_VMA_GLOBAL_BIND BIT(6) -#define I915_VMA_LOCAL_BIND BIT(7) +#define I915_VMA_GLOBAL_BIND BIT(9) +#define I915_VMA_LOCAL_BIND BIT(10) #define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) -#define I915_VMA_GGTT BIT(8) -#define I915_VMA_CAN_FENCE BIT(9) -#define I915_VMA_CLOSED BIT(10) -#define I915_VMA_USERFAULT_BIT 11 +#define I915_VMA_GGTT BIT(11) +#define I915_VMA_CAN_FENCE BIT(12) +#define I915_VMA_CLOSED BIT(13) +#define I915_VMA_USERFAULT_BIT 14 #define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT) -#define I915_VMA_GGTT_WRITE BIT(12) +#define I915_VMA_GGTT_WRITE BIT(15) unsigned int active_count; struct rb_root active; From 52954edd1f7030f753a63093c16826ef50805098 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 28 Jan 2019 18:18:09 +0000 Subject: [PATCH 368/539] drm/i915: Allocate a status page for each timeline Allocate a page for use as a status page by a group of timelines, as we only need a dword of storage for each (rounded up to the cacheline for safety) we can pack multiple timelines into the same page. Each timeline will then be able to track its own HW seqno. v2: Reuse the common per-engine HWSP for the solitary ringbuffer timeline, so that we do not have to emit (using per-gen specialised vfuncs) the breadcrumb into the distinct timeline HWSP and instead can keep on using the common MI_STORE_DWORD_INDEX. However, to maintain the sleight-of-hand for the global/per-context seqno switchover, we will store both temporarily (and so use a custom offset for the shared timeline HWSP until the switch over). v3: Keep things simple and allocate a page for each timeline, page sharing comes next. v4: I was caught repeating the same MI_STORE_DWORD_IMM over and over again in selftests. v5: And caught red handed copying create timeline + check. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190128181812.22804-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_timeline.c | 121 ++++++- drivers/gpu/drm/i915/i915_timeline.h | 21 +- drivers/gpu/drm/i915/intel_engine_cs.c | 76 ++-- drivers/gpu/drm/i915/intel_lrc.c | 22 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 10 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 6 +- .../drm/i915/selftests/i915_live_selftests.h | 1 + .../drm/i915/selftests/i915_mock_selftests.h | 2 +- .../gpu/drm/i915/selftests/i915_timeline.c | 326 +++++++++++++++++- drivers/gpu/drm/i915/selftests/mock_engine.c | 14 +- 10 files changed, 543 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index 84550f17d3df..8d5792311a8f 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -9,28 +9,78 @@ #include "i915_timeline.h" #include "i915_syncmap.h" -void i915_timeline_init(struct drm_i915_private *i915, - struct i915_timeline *timeline, - const char *name) +static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); + + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + if (IS_ERR(vma)) + i915_gem_object_put(obj); + + return vma; +} + +static int hwsp_alloc(struct i915_timeline *timeline) +{ + struct i915_vma *vma; + + vma = __hwsp_alloc(timeline->i915); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + timeline->hwsp_ggtt = vma; + timeline->hwsp_offset = 0; + + return 0; +} + +int i915_timeline_init(struct drm_i915_private *i915, + struct i915_timeline *timeline, + const char *name, + struct i915_vma *global_hwsp) { struct i915_gt_timelines *gt = &i915->gt.timelines; + void *vaddr; + int err; /* * Ideally we want a set of engines on a single leaf as we expect * to mostly be tracking synchronisation between engines. It is not * a huge issue if this is not the case, but we may want to mitigate * any page crossing penalties if they become an issue. + * + * Called during early_init before we know how many engines there are. */ BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); timeline->i915 = i915; timeline->name = name; + timeline->pin_count = 0; - mutex_lock(>->mutex); - list_add(&timeline->link, >->list); - mutex_unlock(>->mutex); + if (global_hwsp) { + timeline->hwsp_ggtt = i915_vma_get(global_hwsp); + timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; + } else { + err = hwsp_alloc(timeline); + if (err) + return err; + } - /* Called during early_init before we know how many engines there are */ + vaddr = i915_gem_object_pin_map(timeline->hwsp_ggtt->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + i915_vma_put(timeline->hwsp_ggtt); + return PTR_ERR(vaddr); + } + + timeline->hwsp_seqno = + memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES); timeline->fence_context = dma_fence_context_alloc(1); @@ -40,6 +90,12 @@ void i915_timeline_init(struct drm_i915_private *i915, INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); + + mutex_lock(>->mutex); + list_add(&timeline->link, >->list); + mutex_unlock(>->mutex); + + return 0; } void i915_timelines_init(struct drm_i915_private *i915) @@ -85,6 +141,7 @@ void i915_timeline_fini(struct i915_timeline *timeline) { struct i915_gt_timelines *gt = &timeline->i915->gt.timelines; + GEM_BUG_ON(timeline->pin_count); GEM_BUG_ON(!list_empty(&timeline->requests)); i915_syncmap_free(&timeline->sync); @@ -92,23 +149,69 @@ void i915_timeline_fini(struct i915_timeline *timeline) mutex_lock(>->mutex); list_del(&timeline->link); mutex_unlock(>->mutex); + + i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); + i915_vma_put(timeline->hwsp_ggtt); } struct i915_timeline * -i915_timeline_create(struct drm_i915_private *i915, const char *name) +i915_timeline_create(struct drm_i915_private *i915, + const char *name, + struct i915_vma *global_hwsp) { struct i915_timeline *timeline; + int err; timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); if (!timeline) return ERR_PTR(-ENOMEM); - i915_timeline_init(i915, timeline, name); + err = i915_timeline_init(i915, timeline, name, global_hwsp); + if (err) { + kfree(timeline); + return ERR_PTR(err); + } + kref_init(&timeline->kref); return timeline; } +int i915_timeline_pin(struct i915_timeline *tl) +{ + int err; + + if (tl->pin_count++) + return 0; + GEM_BUG_ON(!tl->pin_count); + + err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) + goto unpin; + + return 0; + +unpin: + tl->pin_count = 0; + return err; +} + +void i915_timeline_unpin(struct i915_timeline *tl) +{ + GEM_BUG_ON(!tl->pin_count); + if (--tl->pin_count) + return; + + /* + * Since this timeline is idle, all bariers upon which we were waiting + * must also be complete and so we can discard the last used barriers + * without loss of information. + */ + i915_syncmap_free(&tl->sync); + + __i915_vma_unpin(tl->hwsp_ggtt); +} + void __i915_timeline_free(struct kref *kref) { struct i915_timeline *timeline = diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 87ad2dd31c20..0c3739d53d79 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -32,6 +32,8 @@ #include "i915_syncmap.h" #include "i915_utils.h" +struct i915_vma; + struct i915_timeline { u64 fence_context; u32 seqno; @@ -40,6 +42,11 @@ struct i915_timeline { #define TIMELINE_CLIENT 0 /* default subclass */ #define TIMELINE_ENGINE 1 + unsigned int pin_count; + const u32 *hwsp_seqno; + struct i915_vma *hwsp_ggtt; + u32 hwsp_offset; + /** * List of breadcrumbs associated with GPU requests currently * outstanding. @@ -71,9 +78,10 @@ struct i915_timeline { struct kref kref; }; -void i915_timeline_init(struct drm_i915_private *i915, - struct i915_timeline *tl, - const char *name); +int i915_timeline_init(struct drm_i915_private *i915, + struct i915_timeline *tl, + const char *name, + struct i915_vma *hwsp); void i915_timeline_fini(struct i915_timeline *tl); static inline void @@ -96,7 +104,9 @@ i915_timeline_set_subclass(struct i915_timeline *timeline, } struct i915_timeline * -i915_timeline_create(struct drm_i915_private *i915, const char *name); +i915_timeline_create(struct drm_i915_private *i915, + const char *name, + struct i915_vma *global_hwsp); static inline struct i915_timeline * i915_timeline_get(struct i915_timeline *timeline) @@ -135,6 +145,9 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl, return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno); } +int i915_timeline_pin(struct i915_timeline *tl); +void i915_timeline_unpin(struct i915_timeline *tl); + void i915_timelines_init(struct drm_i915_private *i915); void i915_timelines_park(struct drm_i915_private *i915); void i915_timelines_fini(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 2657eb6fd914..515e87846afd 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -484,26 +484,6 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) execlists->queue = RB_ROOT_CACHED; } -/** - * intel_engines_setup_common - setup engine state not requiring hw access - * @engine: Engine to setup. - * - * Initializes @engine@ structure members shared between legacy and execlists - * submission modes which do not require hardware access. - * - * Typically done early in the submission mode specific engine setup stage. - */ -void intel_engine_setup_common(struct intel_engine_cs *engine) -{ - i915_timeline_init(engine->i915, &engine->timeline, engine->name); - i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); - - intel_engine_init_execlist(engine); - intel_engine_init_hangcheck(engine); - intel_engine_init_batch_pool(engine); - intel_engine_init_cmd_parser(engine); -} - static void cleanup_status_page(struct intel_engine_cs *engine) { struct i915_vma *vma; @@ -601,6 +581,44 @@ err: return ret; } +/** + * intel_engines_setup_common - setup engine state not requiring hw access + * @engine: Engine to setup. + * + * Initializes @engine@ structure members shared between legacy and execlists + * submission modes which do not require hardware access. + * + * Typically done early in the submission mode specific engine setup stage. + */ +int intel_engine_setup_common(struct intel_engine_cs *engine) +{ + int err; + + err = init_status_page(engine); + if (err) + return err; + + err = i915_timeline_init(engine->i915, + &engine->timeline, + engine->name, + engine->status_page.vma); + if (err) + goto err_hwsp; + + i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); + + intel_engine_init_execlist(engine); + intel_engine_init_hangcheck(engine); + intel_engine_init_batch_pool(engine); + intel_engine_init_cmd_parser(engine); + + return 0; + +err_hwsp: + cleanup_status_page(engine); + return err; +} + static void __intel_context_unpin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { @@ -617,7 +635,7 @@ struct measure_breadcrumb { static int measure_breadcrumb_dw(struct intel_engine_cs *engine) { struct measure_breadcrumb *frame; - unsigned int dw; + int dw = -ENOMEM; GEM_BUG_ON(!engine->i915->gt.scratch); @@ -625,7 +643,10 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) if (!frame) return -ENOMEM; - i915_timeline_init(engine->i915, &frame->timeline, "measure"); + if (i915_timeline_init(engine->i915, + &frame->timeline, "measure", + engine->status_page.vma)) + goto out_frame; INIT_LIST_HEAD(&frame->ring.request_list); frame->ring.timeline = &frame->timeline; @@ -642,8 +663,9 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) dw = engine->emit_breadcrumb(&frame->rq, frame->cs) - frame->cs; i915_timeline_fini(&frame->timeline); - kfree(frame); +out_frame: + kfree(frame); return dw; } @@ -693,20 +715,14 @@ int intel_engine_init_common(struct intel_engine_cs *engine) if (ret) goto err_unpin_preempt; - ret = init_status_page(engine); - if (ret) - goto err_breadcrumbs; - ret = measure_breadcrumb_dw(engine); if (ret < 0) - goto err_status_page; + goto err_breadcrumbs; engine->emit_breadcrumb_dw = ret; return 0; -err_status_page: - cleanup_status_page(engine); err_breadcrumbs: intel_engine_fini_breadcrumbs(engine); err_unpin_preempt: diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9ae7f77293a0..e388f37743a2 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2206,10 +2206,14 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; } -static void +static int logical_ring_setup(struct intel_engine_cs *engine) { - intel_engine_setup_common(engine); + int err; + + err = intel_engine_setup_common(engine); + if (err) + return err; /* Intentionally left blank. */ engine->buffer = NULL; @@ -2219,6 +2223,8 @@ logical_ring_setup(struct intel_engine_cs *engine) logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); + + return 0; } static int logical_ring_init(struct intel_engine_cs *engine) @@ -2267,7 +2273,9 @@ int logical_render_ring_init(struct intel_engine_cs *engine) { int ret; - logical_ring_setup(engine); + ret = logical_ring_setup(engine); + if (ret) + return ret; /* Override some for render ring. */ engine->init_context = gen8_init_rcs_context; @@ -2296,7 +2304,11 @@ int logical_render_ring_init(struct intel_engine_cs *engine) int logical_xcs_ring_init(struct intel_engine_cs *engine) { - logical_ring_setup(engine); + int err; + + err = logical_ring_setup(engine); + if (err) + return err; return logical_ring_init(engine); } @@ -2629,7 +2641,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, goto error_deref_obj; } - timeline = i915_timeline_create(ctx->i915, ctx->name); + timeline = i915_timeline_create(ctx->i915, ctx->name, NULL); if (IS_ERR(timeline)) { ret = PTR_ERR(timeline); goto error_deref_obj; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index cb6d2aa2a829..174795622eb1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1545,9 +1545,13 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) struct intel_ring *ring; int err; - intel_engine_setup_common(engine); + err = intel_engine_setup_common(engine); + if (err) + return err; - timeline = i915_timeline_create(engine->i915, engine->name); + timeline = i915_timeline_create(engine->i915, + engine->name, + engine->status_page.vma); if (IS_ERR(timeline)) { err = PTR_ERR(timeline); goto err; @@ -1571,6 +1575,8 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) if (err) goto err_unpin; + GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma); + return 0; err_unpin: diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 32371ae67f24..2927b712b973 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -712,7 +712,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX * sizeof(u32)) #define I915_GEM_HWS_PREEMPT 0x32 #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32)) -#define I915_GEM_HWS_SCRATCH 0x40 +#define I915_GEM_HWS_SEQNO 0x40 +#define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32)) +#define I915_GEM_HWS_SCRATCH 0x80 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32)) #define I915_HWS_CSB_BUF0_INDEX 0x10 @@ -818,7 +820,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno); -void intel_engine_setup_common(struct intel_engine_cs *engine); +int intel_engine_setup_common(struct intel_engine_cs *engine); int intel_engine_init_common(struct intel_engine_cs *engine); void intel_engine_cleanup_common(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index a15713cae3b3..76b4f87fc853 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -13,6 +13,7 @@ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(uncore, intel_uncore_live_selftests) selftest(workarounds, intel_workarounds_live_selftests) selftest(requests, i915_request_live_selftests) +selftest(timelines, i915_timeline_live_selftests) selftest(objects, i915_gem_object_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 1b70208eeea7..4a83a1c6c406 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -16,7 +16,7 @@ selftest(syncmap, i915_syncmap_mock_selftests) selftest(uncore, intel_uncore_mock_selftests) selftest(engine, intel_engine_cs_mock_selftests) selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) -selftest(timelines, i915_gem_timeline_mock_selftests) +selftest(timelines, i915_timeline_mock_selftests) selftest(requests, i915_request_mock_selftests) selftest(objects, i915_gem_object_mock_selftests) selftest(dmabuf, i915_gem_dmabuf_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c index 19f1c6a5c8fb..1585b614510d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c @@ -7,6 +7,7 @@ #include "../i915_selftest.h" #include "i915_random.h" +#include "igt_flush_test.h" #include "mock_gem_device.h" #include "mock_timeline.h" @@ -256,7 +257,7 @@ static int bench_sync(void *arg) return 0; } -int i915_gem_timeline_mock_selftests(void) +int i915_timeline_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_sync), @@ -265,3 +266,326 @@ int i915_gem_timeline_mock_selftests(void) return i915_subtests(tests, NULL); } + +static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + if (INTEL_GEN(rq->i915) >= 8) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = addr; + *cs++ = 0; + *cs++ = value; + } else if (INTEL_GEN(rq->i915) >= 4) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = 0; + *cs++ = addr; + *cs++ = value; + } else { + *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *cs++ = addr; + *cs++ = value; + *cs++ = MI_NOOP; + } + + intel_ring_advance(rq, cs); + + return 0; +} + +static u32 hwsp_address(const struct i915_timeline *tl) +{ + return i915_ggtt_offset(tl->hwsp_ggtt) + tl->hwsp_offset; +} + +static struct i915_request * +tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value) +{ + struct i915_request *rq; + int err; + + lockdep_assert_held(&tl->i915->drm.struct_mutex); /* lazy rq refs */ + + err = i915_timeline_pin(tl); + if (err) { + rq = ERR_PTR(err); + goto out; + } + + rq = i915_request_alloc(engine, engine->i915->kernel_context); + if (IS_ERR(rq)) + goto out_unpin; + + err = emit_ggtt_store_dw(rq, hwsp_address(tl), value); + i915_request_add(rq); + if (err) + rq = ERR_PTR(err); + +out_unpin: + i915_timeline_unpin(tl); +out: + if (IS_ERR(rq)) + pr_err("Failed to write to timeline!\n"); + return rq; +} + +static struct i915_timeline * +checked_i915_timeline_create(struct drm_i915_private *i915) +{ + struct i915_timeline *tl; + + tl = i915_timeline_create(i915, "live", NULL); + if (IS_ERR(tl)) + return tl; + + if (*tl->hwsp_seqno != tl->seqno) { + pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n", + *tl->hwsp_seqno, tl->seqno); + i915_timeline_put(tl); + return ERR_PTR(-EINVAL); + } + + return tl; +} + +static int live_hwsp_engine(void *arg) +{ +#define NUM_TIMELINES 4096 + struct drm_i915_private *i915 = arg; + struct i915_timeline **timelines; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + unsigned long count, n; + int err = 0; + + /* + * Create a bunch of timelines and check we can write + * independently to each of their breadcrumb slots. + */ + + timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, + sizeof(*timelines), + GFP_KERNEL); + if (!timelines) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + count = 0; + for_each_engine(engine, i915, id) { + if (!intel_engine_can_store_dword(engine)) + continue; + + for (n = 0; n < NUM_TIMELINES; n++) { + struct i915_timeline *tl; + struct i915_request *rq; + + tl = checked_i915_timeline_create(i915); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out; + } + + rq = tl_write(tl, engine, count); + if (IS_ERR(rq)) { + i915_timeline_put(tl); + err = PTR_ERR(rq); + goto out; + } + + timelines[count++] = tl; + } + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + for (n = 0; n < count; n++) { + struct i915_timeline *tl = timelines[n]; + + if (!err && *tl->hwsp_seqno != n) { + pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", + n, *tl->hwsp_seqno); + err = -EINVAL; + } + i915_timeline_put(tl); + } + + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + kvfree(timelines); + + return err; +#undef NUM_TIMELINES +} + +static int live_hwsp_alternate(void *arg) +{ +#define NUM_TIMELINES 4096 + struct drm_i915_private *i915 = arg; + struct i915_timeline **timelines; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + unsigned long count, n; + int err = 0; + + /* + * Create a bunch of timelines and check we can write + * independently to each of their breadcrumb slots with adjacent + * engines. + */ + + timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, + sizeof(*timelines), + GFP_KERNEL); + if (!timelines) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + count = 0; + for (n = 0; n < NUM_TIMELINES; n++) { + for_each_engine(engine, i915, id) { + struct i915_timeline *tl; + struct i915_request *rq; + + if (!intel_engine_can_store_dword(engine)) + continue; + + tl = checked_i915_timeline_create(i915); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out; + } + + rq = tl_write(tl, engine, count); + if (IS_ERR(rq)) { + i915_timeline_put(tl); + err = PTR_ERR(rq); + goto out; + } + + timelines[count++] = tl; + } + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + for (n = 0; n < count; n++) { + struct i915_timeline *tl = timelines[n]; + + if (!err && *tl->hwsp_seqno != n) { + pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", + n, *tl->hwsp_seqno); + err = -EINVAL; + } + i915_timeline_put(tl); + } + + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + kvfree(timelines); + + return err; +#undef NUM_TIMELINES +} + +static int live_hwsp_recycle(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + unsigned long count; + int err = 0; + + /* + * Check seqno writes into one timeline at a time. We expect to + * recycle the breadcrumb slot between iterations and neither + * want to confuse ourselves or the GPU. + */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + count = 0; + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + + if (!intel_engine_can_store_dword(engine)) + continue; + + do { + struct i915_timeline *tl; + struct i915_request *rq; + + tl = checked_i915_timeline_create(i915); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out; + } + + rq = tl_write(tl, engine, count); + if (IS_ERR(rq)) { + i915_timeline_put(tl); + err = PTR_ERR(rq); + goto out; + } + + if (i915_request_wait(rq, + I915_WAIT_LOCKED, + HZ / 5) < 0) { + pr_err("Wait for timeline writes timed out!\n"); + i915_timeline_put(tl); + err = -EIO; + goto out; + } + + if (*tl->hwsp_seqno != count) { + pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", + count, *tl->hwsp_seqno); + err = -EINVAL; + } + + i915_timeline_put(tl); + count++; + + if (err) + goto out; + + i915_timelines_park(i915); /* Encourage recycling! */ + } while (!__igt_timeout(end_time, NULL)); + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + +int i915_timeline_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_hwsp_recycle), + SUBTEST(live_hwsp_engine), + SUBTEST(live_hwsp_alternate), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 4e5b4dc6df0f..919c89fd6ee5 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -39,7 +39,12 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) if (!ring) return NULL; - i915_timeline_init(engine->i915, &ring->timeline, engine->name); + if (i915_timeline_init(engine->i915, + &ring->timeline, engine->name, + NULL)) { + kfree(ring); + return NULL; + } ring->base.size = sz; ring->base.effective_size = sz; @@ -208,7 +213,11 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.emit_breadcrumb = mock_emit_breadcrumb; engine->base.submit_request = mock_submit_request; - i915_timeline_init(i915, &engine->base.timeline, engine->base.name); + if (i915_timeline_init(i915, + &engine->base.timeline, + engine->base.name, + NULL)) + goto err_free; i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE); intel_engine_init_breadcrumbs(&engine->base); @@ -226,6 +235,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, err_breadcrumbs: intel_engine_fini_breadcrumbs(&engine->base); i915_timeline_fini(&engine->base.timeline); +err_free: kfree(engine); return NULL; } From 8ba306a6a362ef6f3c005ec8819c8890a6fadcd1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 28 Jan 2019 18:18:10 +0000 Subject: [PATCH 369/539] drm/i915: Share per-timeline HWSP using a slab suballocator If we restrict ourselves to only using a cacheline for each timeline's HWSP (we could go smaller, but want to avoid needless polluting cachelines on different engines between different contexts), then we can suballocate a single 4k page into 64 different timeline HWSP. By treating each fresh allocation as a slab of 64 entries, we can keep it around for the next 64 allocation attempts until we need to refresh the slab cache. John Harrison noted the issue of fragmentation leading to the same worst case performance of one page per timeline as before, which can be mitigated by adopting a freelist. v2: Keep all partially allocated HWSP on a freelist This is still without migration, so it is possible for the system to end up with each timeline in its own page, but we ensure that no new allocation would needless allocate a fresh page! v3: Throw a selftest at the allocator to try and catch invalid cacheline reuse. Signed-off-by: Chris Wilson Cc: John Harrison Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190128181812.22804-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 4 + drivers/gpu/drm/i915/i915_timeline.c | 126 ++++++++++++--- drivers/gpu/drm/i915/i915_timeline.h | 1 + drivers/gpu/drm/i915/selftests/i915_random.c | 33 +++- drivers/gpu/drm/i915/selftests/i915_random.h | 3 + .../gpu/drm/i915/selftests/i915_timeline.c | 143 ++++++++++++++++++ 6 files changed, 281 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8a181b455197..6a051381f535 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1978,6 +1978,10 @@ struct drm_i915_private { struct i915_gt_timelines { struct mutex mutex; /* protects list, tainted by GPU */ struct list_head list; + + /* Pack multiple timelines' seqnos into the same page */ + spinlock_t hwsp_lock; + struct list_head hwsp_free_list; } timelines; struct list_head active_rings; diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index 8d5792311a8f..add8fc33cf6e 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -9,6 +9,18 @@ #include "i915_timeline.h" #include "i915_syncmap.h" +struct i915_timeline_hwsp { + struct i915_vma *vma; + struct list_head free_link; + u64 free_bitmap; +}; + +static inline struct i915_timeline_hwsp * +i915_timeline_hwsp(const struct i915_timeline *tl) +{ + return tl->hwsp_ggtt->private; +} + static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915) { struct drm_i915_gem_object *obj; @@ -27,28 +39,89 @@ static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915) return vma; } -static int hwsp_alloc(struct i915_timeline *timeline) +static struct i915_vma * +hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline) { - struct i915_vma *vma; + struct drm_i915_private *i915 = timeline->i915; + struct i915_gt_timelines *gt = &i915->gt.timelines; + struct i915_timeline_hwsp *hwsp; - vma = __hwsp_alloc(timeline->i915); - if (IS_ERR(vma)) - return PTR_ERR(vma); + BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE); - timeline->hwsp_ggtt = vma; - timeline->hwsp_offset = 0; + spin_lock(>->hwsp_lock); - return 0; + /* hwsp_free_list only contains HWSP that have available cachelines */ + hwsp = list_first_entry_or_null(>->hwsp_free_list, + typeof(*hwsp), free_link); + if (!hwsp) { + struct i915_vma *vma; + + spin_unlock(>->hwsp_lock); + + hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL); + if (!hwsp) + return ERR_PTR(-ENOMEM); + + vma = __hwsp_alloc(i915); + if (IS_ERR(vma)) { + kfree(hwsp); + return vma; + } + + vma->private = hwsp; + hwsp->vma = vma; + hwsp->free_bitmap = ~0ull; + + spin_lock(>->hwsp_lock); + list_add(&hwsp->free_link, >->hwsp_free_list); + } + + GEM_BUG_ON(!hwsp->free_bitmap); + *cacheline = __ffs64(hwsp->free_bitmap); + hwsp->free_bitmap &= ~BIT_ULL(*cacheline); + if (!hwsp->free_bitmap) + list_del(&hwsp->free_link); + + spin_unlock(>->hwsp_lock); + + GEM_BUG_ON(hwsp->vma->private != hwsp); + return hwsp->vma; +} + +static void hwsp_free(struct i915_timeline *timeline) +{ + struct i915_gt_timelines *gt = &timeline->i915->gt.timelines; + struct i915_timeline_hwsp *hwsp; + + hwsp = i915_timeline_hwsp(timeline); + if (!hwsp) /* leave global HWSP alone! */ + return; + + spin_lock(>->hwsp_lock); + + /* As a cacheline becomes available, publish the HWSP on the freelist */ + if (!hwsp->free_bitmap) + list_add_tail(&hwsp->free_link, >->hwsp_free_list); + + hwsp->free_bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES); + + /* And if no one is left using it, give the page back to the system */ + if (hwsp->free_bitmap == ~0ull) { + i915_vma_put(hwsp->vma); + list_del(&hwsp->free_link); + kfree(hwsp); + } + + spin_unlock(>->hwsp_lock); } int i915_timeline_init(struct drm_i915_private *i915, struct i915_timeline *timeline, const char *name, - struct i915_vma *global_hwsp) + struct i915_vma *hwsp) { struct i915_gt_timelines *gt = &i915->gt.timelines; void *vaddr; - int err; /* * Ideally we want a set of engines on a single leaf as we expect @@ -64,18 +137,22 @@ int i915_timeline_init(struct drm_i915_private *i915, timeline->name = name; timeline->pin_count = 0; - if (global_hwsp) { - timeline->hwsp_ggtt = i915_vma_get(global_hwsp); - timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; - } else { - err = hwsp_alloc(timeline); - if (err) - return err; - } + timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; + if (!hwsp) { + unsigned int cacheline; - vaddr = i915_gem_object_pin_map(timeline->hwsp_ggtt->obj, I915_MAP_WB); + hwsp = hwsp_alloc(timeline, &cacheline); + if (IS_ERR(hwsp)) + return PTR_ERR(hwsp); + + timeline->hwsp_offset = cacheline * CACHELINE_BYTES; + } + timeline->hwsp_ggtt = i915_vma_get(hwsp); + + vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); if (IS_ERR(vaddr)) { - i915_vma_put(timeline->hwsp_ggtt); + hwsp_free(timeline); + i915_vma_put(hwsp); return PTR_ERR(vaddr); } @@ -105,6 +182,9 @@ void i915_timelines_init(struct drm_i915_private *i915) mutex_init(>->mutex); INIT_LIST_HEAD(>->list); + spin_lock_init(>->hwsp_lock); + INIT_LIST_HEAD(>->hwsp_free_list); + /* via i915_gem_wait_for_idle() */ i915_gem_shrinker_taints_mutex(i915, >->mutex); } @@ -144,12 +224,13 @@ void i915_timeline_fini(struct i915_timeline *timeline) GEM_BUG_ON(timeline->pin_count); GEM_BUG_ON(!list_empty(&timeline->requests)); - i915_syncmap_free(&timeline->sync); - mutex_lock(>->mutex); list_del(&timeline->link); mutex_unlock(>->mutex); + i915_syncmap_free(&timeline->sync); + hwsp_free(timeline); + i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); i915_vma_put(timeline->hwsp_ggtt); } @@ -226,6 +307,7 @@ void i915_timelines_fini(struct drm_i915_private *i915) struct i915_gt_timelines *gt = &i915->gt.timelines; GEM_BUG_ON(!list_empty(>->list)); + GEM_BUG_ON(!list_empty(>->hwsp_free_list)); mutex_destroy(>->mutex); } diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 0c3739d53d79..ab736e2e5707 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -33,6 +33,7 @@ #include "i915_utils.h" struct i915_vma; +struct i915_timeline_hwsp; struct i915_timeline { u64 fence_context; diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c index 1f415ce47018..716a3f19f030 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.c +++ b/drivers/gpu/drm/i915/selftests/i915_random.c @@ -41,16 +41,35 @@ u64 i915_prandom_u64_state(struct rnd_state *rnd) return x; } +void i915_prandom_shuffle(void *arr, size_t elsz, size_t count, + struct rnd_state *state) +{ + char stack[128]; + + if (WARN_ON(elsz > sizeof(stack) || count > U32_MAX)) + return; + + if (!elsz || !count) + return; + + /* Fisher-Yates shuffle courtesy of Knuth */ + while (--count) { + size_t swp; + + swp = i915_prandom_u32_max_state(count + 1, state); + if (swp == count) + continue; + + memcpy(stack, arr + count * elsz, elsz); + memcpy(arr + count * elsz, arr + swp * elsz, elsz); + memcpy(arr + swp * elsz, stack, elsz); + } +} + void i915_random_reorder(unsigned int *order, unsigned int count, struct rnd_state *state) { - unsigned int i, j; - - for (i = 0; i < count; i++) { - BUILD_BUG_ON(sizeof(unsigned int) > sizeof(u32)); - j = i915_prandom_u32_max_state(count, state); - swap(order[i], order[j]); - } + i915_prandom_shuffle(order, sizeof(*order), count, state); } unsigned int *i915_random_order(unsigned int count, struct rnd_state *state) diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h index 7dffedc501ca..8e1ff9c105b6 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.h +++ b/drivers/gpu/drm/i915/selftests/i915_random.h @@ -54,4 +54,7 @@ void i915_random_reorder(unsigned int *order, unsigned int count, struct rnd_state *state); +void i915_prandom_shuffle(void *arr, size_t elsz, size_t count, + struct rnd_state *state); + #endif /* !__I915_SELFTESTS_RANDOM_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c index 1585b614510d..c34340f074cf 100644 --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c @@ -4,6 +4,8 @@ * Copyright © 2017-2018 Intel Corporation */ +#include + #include "../i915_selftest.h" #include "i915_random.h" @@ -11,6 +13,146 @@ #include "mock_gem_device.h" #include "mock_timeline.h" +static struct page *hwsp_page(struct i915_timeline *tl) +{ + struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj; + + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + return sg_page(obj->mm.pages->sgl); +} + +static unsigned long hwsp_cacheline(struct i915_timeline *tl) +{ + unsigned long address = (unsigned long)page_address(hwsp_page(tl)); + + return (address + tl->hwsp_offset) / CACHELINE_BYTES; +} + +#define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES) + +struct mock_hwsp_freelist { + struct drm_i915_private *i915; + struct radix_tree_root cachelines; + struct i915_timeline **history; + unsigned long count, max; + struct rnd_state prng; +}; + +enum { + SHUFFLE = BIT(0), +}; + +static void __mock_hwsp_record(struct mock_hwsp_freelist *state, + unsigned int idx, + struct i915_timeline *tl) +{ + tl = xchg(&state->history[idx], tl); + if (tl) { + radix_tree_delete(&state->cachelines, hwsp_cacheline(tl)); + i915_timeline_put(tl); + } +} + +static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, + unsigned int count, + unsigned int flags) +{ + struct i915_timeline *tl; + unsigned int idx; + + while (count--) { + unsigned long cacheline; + int err; + + tl = i915_timeline_create(state->i915, "mock", NULL); + if (IS_ERR(tl)) + return PTR_ERR(tl); + + cacheline = hwsp_cacheline(tl); + err = radix_tree_insert(&state->cachelines, cacheline, tl); + if (err) { + if (err == -EEXIST) { + pr_err("HWSP cacheline %lu already used; duplicate allocation!\n", + cacheline); + } + i915_timeline_put(tl); + return err; + } + + idx = state->count++ % state->max; + __mock_hwsp_record(state, idx, tl); + } + + if (flags & SHUFFLE) + i915_prandom_shuffle(state->history, + sizeof(*state->history), + min(state->count, state->max), + &state->prng); + + count = i915_prandom_u32_max_state(min(state->count, state->max), + &state->prng); + while (count--) { + idx = --state->count % state->max; + __mock_hwsp_record(state, idx, NULL); + } + + return 0; +} + +static int mock_hwsp_freelist(void *arg) +{ + struct mock_hwsp_freelist state; + const struct { + const char *name; + unsigned int flags; + } phases[] = { + { "linear", 0 }, + { "shuffled", SHUFFLE }, + { }, + }, *p; + unsigned int na; + int err = 0; + + INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL); + state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed); + + state.i915 = mock_gem_device(); + if (!state.i915) + return -ENOMEM; + + /* + * Create a bunch of timelines and check that their HWSP do not overlap. + * Free some, and try again. + */ + + state.max = PAGE_SIZE / sizeof(*state.history); + state.count = 0; + state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL); + if (!state.history) { + err = -ENOMEM; + goto err_put; + } + + mutex_lock(&state.i915->drm.struct_mutex); + for (p = phases; p->name; p++) { + pr_debug("%s(%s)\n", __func__, p->name); + for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) { + err = __mock_hwsp_timeline(&state, na, p->flags); + if (err) + goto out; + } + } + +out: + for (na = 0; na < state.max; na++) + __mock_hwsp_record(&state, na, NULL); + mutex_unlock(&state.i915->drm.struct_mutex); + kfree(state.history); +err_put: + drm_dev_put(&state.i915->drm); + return err; +} + struct __igt_sync { const char *name; u32 seqno; @@ -260,6 +402,7 @@ static int bench_sync(void *arg) int i915_timeline_mock_selftests(void) { static const struct i915_subtest tests[] = { + SUBTEST(mock_hwsp_freelist), SUBTEST(igt_sync), SUBTEST(bench_sync), }; From 5013eb8cd601c31e6d7d1b9d3291b24e933b77b2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 28 Jan 2019 18:18:11 +0000 Subject: [PATCH 370/539] drm/i915: Track the context's seqno in its own timeline HWSP Now that we have allocated ourselves a cacheline to store a breadcrumb, we can emit a write from the GPU into the timeline's HWSP of the per-context seqno as we complete each request. This drops the mirroring of the per-engine HWSP and allows each context to operate independently. We do not need to unwind the per-context timeline, and so requests are always consistent with the timeline breadcrumb, greatly simplifying the completion checks as we no longer need to be concerned about the global_seqno changing mid check. One complication though is that we have to be wary that the request may outlive the HWSP and so avoid touching the potentially danging pointer after we have retired the fence. We also have to guard our access of the HWSP with RCU, the release of the obj->mm.pages should already be RCU-safe. At this point, we are emitting both per-context and global seqno and still using the single per-engine execution timeline for resolving interrupts. v2: s/fake_complete/mark_complete/ Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190128181812.22804-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_request.c | 3 +- drivers/gpu/drm/i915/i915_request.h | 30 +++---- drivers/gpu/drm/i915/i915_reset.c | 1 + drivers/gpu/drm/i915/i915_timeline.c | 4 + drivers/gpu/drm/i915/intel_engine_cs.c | 15 +++- drivers/gpu/drm/i915/intel_lrc.c | 31 ++++--- drivers/gpu/drm/i915/intel_ringbuffer.c | 87 +++++++++++++++---- .../gpu/drm/i915/selftests/i915_timeline.c | 7 +- drivers/gpu/drm/i915/selftests/mock_engine.c | 19 +++- 10 files changed, 139 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 761714448ff3..4e0de22f0166 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2890,7 +2890,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) */ spin_lock_irqsave(&engine->timeline.lock, flags); list_for_each_entry(request, &engine->timeline.requests, link) { - if (__i915_request_completed(request, request->global_seqno)) + if (i915_request_completed(request)) continue; active = request; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a076fd0b7ba6..4d58770e6a8c 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -199,6 +199,7 @@ static void __retire_engine_request(struct intel_engine_cs *engine, spin_unlock(&engine->timeline.lock); spin_lock(&rq->lock); + i915_request_mark_complete(rq); if (!i915_request_signaled(rq)) dma_fence_signal_locked(&rq->fence); if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) @@ -621,7 +622,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) rq->ring = ce->ring; rq->timeline = ce->ring->timeline; GEM_BUG_ON(rq->timeline == &engine->timeline); - rq->hwsp_seqno = &engine->status_page.addr[I915_GEM_HWS_INDEX]; + rq->hwsp_seqno = rq->timeline->hwsp_seqno; spin_lock_init(&rq->lock); dma_fence_init(&rq->fence, diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index ade010fe6e26..96c586d6ff4d 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -289,6 +289,7 @@ long i915_request_wait(struct i915_request *rq, static inline bool i915_request_signaled(const struct i915_request *rq) { + /* The request may live longer than its HWSP, so check flags first! */ return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags); } @@ -340,32 +341,23 @@ static inline u32 hwsp_seqno(const struct i915_request *rq) */ static inline bool i915_request_started(const struct i915_request *rq) { - u32 seqno; + if (i915_request_signaled(rq)) + return true; - seqno = i915_request_global_seqno(rq); - if (!seqno) /* not yet submitted to HW */ - return false; - - return i915_seqno_passed(hwsp_seqno(rq), seqno - 1); -} - -static inline bool -__i915_request_completed(const struct i915_request *rq, u32 seqno) -{ - GEM_BUG_ON(!seqno); - return i915_seqno_passed(hwsp_seqno(rq), seqno) && - seqno == i915_request_global_seqno(rq); + return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1); } static inline bool i915_request_completed(const struct i915_request *rq) { - u32 seqno; + if (i915_request_signaled(rq)) + return true; - seqno = i915_request_global_seqno(rq); - if (!seqno) - return false; + return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno); +} - return __i915_request_completed(rq, seqno); +static inline void i915_request_mark_complete(struct i915_request *rq) +{ + rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */ } void i915_retire_requests(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index d2dca85a543d..bd82f9b1043f 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -760,6 +760,7 @@ static void nop_submit_request(struct i915_request *request) spin_lock_irqsave(&request->engine->timeline.lock, flags); __i915_request_submit(request); + i915_request_mark_complete(request); intel_engine_write_global_seqno(request->engine, request->global_seqno); spin_unlock_irqrestore(&request->engine->timeline.lock, flags); } diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index add8fc33cf6e..e4c11414a824 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -270,6 +270,10 @@ int i915_timeline_pin(struct i915_timeline *tl) if (err) goto unpin; + tl->hwsp_offset = + i915_ggtt_offset(tl->hwsp_ggtt) + + offset_in_page(tl->hwsp_offset); + return 0; unpin: diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 515e87846afd..ead9c4371fe1 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -660,10 +660,16 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) frame->rq.ring = &frame->ring; frame->rq.timeline = &frame->timeline; + dw = i915_timeline_pin(&frame->timeline); + if (dw < 0) + goto out_timeline; + dw = engine->emit_breadcrumb(&frame->rq, frame->cs) - frame->cs; - i915_timeline_fini(&frame->timeline); + i915_timeline_unpin(&frame->timeline); +out_timeline: + i915_timeline_fini(&frame->timeline); out_frame: kfree(frame); return dw; @@ -1426,9 +1432,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, char hdr[80]; snprintf(hdr, sizeof(hdr), - "\t\tELSP[%d] count=%d, ring->start=%08x, rq: ", + "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x}, rq: ", idx, count, - i915_ggtt_offset(rq->ring->vma)); + i915_ggtt_offset(rq->ring->vma), + rq->timeline->hwsp_offset); print_request(m, rq, hdr); } else { drm_printf(m, "\t\tELSP[%d] idle\n", idx); @@ -1538,6 +1545,8 @@ void intel_engine_dump(struct intel_engine_cs *engine, rq->ring->emit); drm_printf(m, "\t\tring->space: 0x%08x\n", rq->ring->space); + drm_printf(m, "\t\tring->hwsp: 0x%08x\n", + rq->timeline->hwsp_offset); print_request_ring(m, rq); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e388f37743a2..fdbb3fe8eac9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -832,10 +832,10 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) list_for_each_entry(rq, &engine->timeline.requests, link) { GEM_BUG_ON(!rq->global_seqno); - if (i915_request_signaled(rq)) - continue; + if (!i915_request_signaled(rq)) + dma_fence_set_error(&rq->fence, -EIO); - dma_fence_set_error(&rq->fence, -EIO); + i915_request_mark_complete(rq); } /* Flush the queued requests to the timeline list (for retiring). */ @@ -845,9 +845,9 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) priolist_for_each_request_consume(rq, rn, p, i) { list_del_init(&rq->sched.link); - - dma_fence_set_error(&rq->fence, -EIO); __i915_request_submit(rq); + dma_fence_set_error(&rq->fence, -EIO); + i915_request_mark_complete(rq); } rb_erase_cached(&p->node, &execlists->queue); @@ -2044,10 +2044,17 @@ static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - cs = gen8_emit_ggtt_write(cs, request->global_seqno, + cs = gen8_emit_ggtt_write(cs, + request->fence.seqno, + request->timeline->hwsp_offset); + + cs = gen8_emit_ggtt_write(cs, + request->global_seqno, intel_hws_seqno_address(request->engine)); + *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); @@ -2056,18 +2063,20 @@ static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) static u32 *gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) { - /* We're using qword write, seqno should be aligned to 8 bytes. */ - BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); - cs = gen8_emit_ggtt_write_rcs(cs, - request->global_seqno, - intel_hws_seqno_address(request->engine), + request->fence.seqno, + request->timeline->hwsp_offset, PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL); + cs = gen8_emit_ggtt_write_rcs(cs, + request->global_seqno, + intel_hws_seqno_address(request->engine), + PIPE_CONTROL_CS_STALL); + *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 174795622eb1..ee3719324e2d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -326,6 +326,11 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL); + *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = rq->fence.seqno; + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; *cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; *cs++ = rq->global_seqno; @@ -427,6 +432,13 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL); + *cs++ = rq->timeline->hwsp_offset; + *cs++ = rq->fence.seqno; + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = (PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL); *cs++ = intel_hws_seqno_address(rq->engine); *cs++ = rq->global_seqno; @@ -441,10 +453,19 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW; - *cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT; + GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = rq->fence.seqno; + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = rq->global_seqno; + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -457,14 +478,21 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { int i; - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW; - *cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT; + GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = rq->fence.seqno; + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = rq->global_seqno; for (i = 0; i < GEN7_XCS_WA; i++) { *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_INDEX_ADDR; - *cs++ = rq->global_seqno; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; } *cs++ = MI_FLUSH_DW; @@ -472,7 +500,6 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = 0; *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -738,7 +765,7 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled) rq = NULL; spin_lock_irqsave(&tl->lock, flags); list_for_each_entry(pos, &tl->requests, link) { - if (!__i915_request_completed(pos, pos->global_seqno)) { + if (!i915_request_completed(pos)) { rq = pos; break; } @@ -880,10 +907,10 @@ static void cancel_requests(struct intel_engine_cs *engine) list_for_each_entry(request, &engine->timeline.requests, link) { GEM_BUG_ON(!request->global_seqno); - if (i915_request_signaled(request)) - continue; + if (!i915_request_signaled(request)) + dma_fence_set_error(&request->fence, -EIO); - dma_fence_set_error(&request->fence, -EIO); + i915_request_mark_complete(request); } intel_write_status_page(engine, @@ -907,14 +934,20 @@ static void i9xx_submit_request(struct i915_request *request) static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) { + GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + *cs++ = MI_FLUSH; + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; + *cs++ = MI_STORE_DWORD_INDEX; *cs++ = I915_GEM_HWS_INDEX_ADDR; *cs++ = rq->global_seqno; *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -927,8 +960,15 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) { int i; + GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + *cs++ = MI_FLUSH; + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; + BUILD_BUG_ON(GEN5_WA_STORES < 1); for (i = 0; i < GEN5_WA_STORES; i++) { *cs++ = MI_STORE_DWORD_INDEX; @@ -937,6 +977,7 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) } *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -1169,6 +1210,10 @@ int intel_ring_pin(struct intel_ring *ring) GEM_BUG_ON(ring->vaddr); + ret = i915_timeline_pin(ring->timeline); + if (ret) + return ret; + flags = PIN_GLOBAL; /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ @@ -1185,28 +1230,32 @@ int intel_ring_pin(struct intel_ring *ring) else ret = i915_gem_object_set_to_cpu_domain(vma->obj, true); if (unlikely(ret)) - return ret; + goto unpin_timeline; } ret = i915_vma_pin(vma, 0, 0, flags); if (unlikely(ret)) - return ret; + goto unpin_timeline; if (i915_vma_is_map_and_fenceable(vma)) addr = (void __force *)i915_vma_pin_iomap(vma); else addr = i915_gem_object_pin_map(vma->obj, map); - if (IS_ERR(addr)) - goto err; + if (IS_ERR(addr)) { + ret = PTR_ERR(addr); + goto unpin_ring; + } vma->obj->pin_global++; ring->vaddr = addr; return 0; -err: +unpin_ring: i915_vma_unpin(vma); - return PTR_ERR(addr); +unpin_timeline: + i915_timeline_unpin(ring->timeline); + return ret; } void intel_ring_reset(struct intel_ring *ring, u32 tail) @@ -1235,6 +1284,8 @@ void intel_ring_unpin(struct intel_ring *ring) ring->vma->obj->pin_global--; i915_vma_unpin(ring->vma); + + i915_timeline_unpin(ring->timeline); } static struct i915_vma * diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c index c34340f074cf..12ea69b1a1e5 100644 --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c @@ -440,11 +440,6 @@ static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) return 0; } -static u32 hwsp_address(const struct i915_timeline *tl) -{ - return i915_ggtt_offset(tl->hwsp_ggtt) + tl->hwsp_offset; -} - static struct i915_request * tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value) { @@ -463,7 +458,7 @@ tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value) if (IS_ERR(rq)) goto out_unpin; - err = emit_ggtt_store_dw(rq, hwsp_address(tl), value); + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value); i915_request_add(rq); if (err) rq = ERR_PTR(err); diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 919c89fd6ee5..95e890d7f58b 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -30,6 +30,17 @@ struct mock_ring { struct i915_timeline timeline; }; +static void mock_timeline_pin(struct i915_timeline *tl) +{ + tl->pin_count++; +} + +static void mock_timeline_unpin(struct i915_timeline *tl) +{ + GEM_BUG_ON(!tl->pin_count); + tl->pin_count--; +} + static struct intel_ring *mock_ring(struct intel_engine_cs *engine) { const unsigned long sz = PAGE_SIZE / 2; @@ -76,6 +87,8 @@ static void advance(struct mock_request *request) { list_del_init(&request->link); mock_seqno_advance(request->base.engine, request->base.global_seqno); + i915_request_mark_complete(&request->base); + GEM_BUG_ON(!i915_request_completed(&request->base)); } static void hw_delay_complete(struct timer_list *t) @@ -108,6 +121,7 @@ static void hw_delay_complete(struct timer_list *t) static void mock_context_unpin(struct intel_context *ce) { + mock_timeline_unpin(ce->ring->timeline); i915_gem_context_put(ce->gem_context); } @@ -129,6 +143,7 @@ mock_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { struct intel_context *ce = to_intel_context(ctx, engine); + int err = -ENOMEM; if (ce->pin_count++) return ce; @@ -139,13 +154,15 @@ mock_context_pin(struct intel_engine_cs *engine, goto err; } + mock_timeline_pin(ce->ring->timeline); + ce->ops = &mock_context_ops; i915_gem_context_get(ctx); return ce; err: ce->pin_count = 0; - return ERR_PTR(-ENOMEM); + return ERR_PTR(err); } static int mock_request_alloc(struct i915_request *request) From 9407d3bdb048d52fa0fd45fa624a6c9f00072169 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 28 Jan 2019 18:18:12 +0000 Subject: [PATCH 371/539] drm/i915: Track active timelines Now that we pin timelines around use, we have a clearly defined lifetime and convenient points at which we can track only the active timelines. This allows us to reduce the list iteration to only consider those active timelines and not all. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190128181812.22804-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 4 +-- drivers/gpu/drm/i915/i915_reset.c | 2 +- drivers/gpu/drm/i915/i915_timeline.c | 39 ++++++++++++++++++---------- 4 files changed, 29 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6a051381f535..d072f3369ee1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1977,7 +1977,7 @@ struct drm_i915_private { struct i915_gt_timelines { struct mutex mutex; /* protects list, tainted by GPU */ - struct list_head list; + struct list_head active_list; /* Pack multiple timelines' seqnos into the same page */ spinlock_t hwsp_lock; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4e0de22f0166..9c499edb4c13 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3246,7 +3246,7 @@ wait_for_timelines(struct drm_i915_private *i915, return timeout; mutex_lock(>->mutex); - list_for_each_entry(tl, >->list, link) { + list_for_each_entry(tl, >->active_list, link) { struct i915_request *rq; rq = i915_gem_active_get_unlocked(&tl->last_request); @@ -3274,7 +3274,7 @@ wait_for_timelines(struct drm_i915_private *i915, /* restart after reacquiring the lock */ mutex_lock(>->mutex); - tl = list_entry(>->list, typeof(*tl), link); + tl = list_entry(>->active_list, typeof(*tl), link); } mutex_unlock(>->mutex); diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index bd82f9b1043f..acf3c777e49d 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -856,7 +856,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) * No more can be submitted until we reset the wedged bit. */ mutex_lock(&i915->gt.timelines.mutex); - list_for_each_entry(tl, &i915->gt.timelines.list, link) { + list_for_each_entry(tl, &i915->gt.timelines.active_list, link) { struct i915_request *rq; long timeout; diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index e4c11414a824..79838d89bdb9 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -120,7 +120,6 @@ int i915_timeline_init(struct drm_i915_private *i915, const char *name, struct i915_vma *hwsp) { - struct i915_gt_timelines *gt = &i915->gt.timelines; void *vaddr; /* @@ -168,10 +167,6 @@ int i915_timeline_init(struct drm_i915_private *i915, i915_syncmap_init(&timeline->sync); - mutex_lock(>->mutex); - list_add(&timeline->link, >->list); - mutex_unlock(>->mutex); - return 0; } @@ -180,7 +175,7 @@ void i915_timelines_init(struct drm_i915_private *i915) struct i915_gt_timelines *gt = &i915->gt.timelines; mutex_init(>->mutex); - INIT_LIST_HEAD(>->list); + INIT_LIST_HEAD(>->active_list); spin_lock_init(>->hwsp_lock); INIT_LIST_HEAD(>->hwsp_free_list); @@ -189,6 +184,24 @@ void i915_timelines_init(struct drm_i915_private *i915) i915_gem_shrinker_taints_mutex(i915, >->mutex); } +static void timeline_add_to_active(struct i915_timeline *tl) +{ + struct i915_gt_timelines *gt = &tl->i915->gt.timelines; + + mutex_lock(>->mutex); + list_add(&tl->link, >->active_list); + mutex_unlock(>->mutex); +} + +static void timeline_remove_from_active(struct i915_timeline *tl) +{ + struct i915_gt_timelines *gt = &tl->i915->gt.timelines; + + mutex_lock(>->mutex); + list_del(&tl->link); + mutex_unlock(>->mutex); +} + /** * i915_timelines_park - called when the driver idles * @i915: the drm_i915_private device @@ -205,7 +218,7 @@ void i915_timelines_park(struct drm_i915_private *i915) struct i915_timeline *timeline; mutex_lock(>->mutex); - list_for_each_entry(timeline, >->list, link) { + list_for_each_entry(timeline, >->active_list, link) { /* * All known fences are completed so we can scrap * the current sync point tracking and start afresh, @@ -219,15 +232,9 @@ void i915_timelines_park(struct drm_i915_private *i915) void i915_timeline_fini(struct i915_timeline *timeline) { - struct i915_gt_timelines *gt = &timeline->i915->gt.timelines; - GEM_BUG_ON(timeline->pin_count); GEM_BUG_ON(!list_empty(&timeline->requests)); - mutex_lock(>->mutex); - list_del(&timeline->link); - mutex_unlock(>->mutex); - i915_syncmap_free(&timeline->sync); hwsp_free(timeline); @@ -274,6 +281,8 @@ int i915_timeline_pin(struct i915_timeline *tl) i915_ggtt_offset(tl->hwsp_ggtt) + offset_in_page(tl->hwsp_offset); + timeline_add_to_active(tl); + return 0; unpin: @@ -287,6 +296,8 @@ void i915_timeline_unpin(struct i915_timeline *tl) if (--tl->pin_count) return; + timeline_remove_from_active(tl); + /* * Since this timeline is idle, all bariers upon which we were waiting * must also be complete and so we can discard the last used barriers @@ -310,7 +321,7 @@ void i915_timelines_fini(struct drm_i915_private *i915) { struct i915_gt_timelines *gt = &i915->gt.timelines; - GEM_BUG_ON(!list_empty(>->list)); + GEM_BUG_ON(!list_empty(>->active_list)); GEM_BUG_ON(!list_empty(>->hwsp_free_list)); mutex_destroy(>->mutex); From 3d6535cbed4a4b029602ff83efb2adec7cb8d28b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 24 Jan 2019 14:01:14 +0100 Subject: [PATCH 372/539] drm/i915: Enable fastboot by default on Skylake and newer We really want to have fastboot enabled by default to avoid an ugly modeset during boot. Rather then enabling it everywhere, lets start with enabling it on Skylake and newer. Signed-off-by: Hans de Goede Signed-off-by: Maarten Lankhorst Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20190124130114.3967-1-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/i915_params.c | 6 ++++-- drivers/gpu/drm/i915/i915_params.h | 2 +- drivers/gpu/drm/i915/intel_display.c | 11 ++++++++++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 9f0539bdaa39..b5be0abbba35 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -97,8 +97,10 @@ i915_param_named_unsafe(disable_power_well, int, 0400, i915_param_named_unsafe(enable_ips, int, 0600, "Enable IPS (default: true)"); -i915_param_named(fastboot, bool, 0600, - "Try to skip unnecessary mode sets at boot time (default: false)"); +i915_param_named(fastboot, int, 0600, + "Try to skip unnecessary mode sets at boot time " + "(0=disabled, 1=enabled) " + "Default: -1 (use per-chip default)"); i915_param_named_unsafe(prefault_disable, bool, 0600, "Disable page prefaulting for pread/pwrite/reloc (default:false). " diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 6efcf330bdab..3f14e9881a0d 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -63,10 +63,10 @@ struct drm_printer; param(int, edp_vswing, 0) \ param(int, reset, 2) \ param(unsigned int, inject_load_failure, 0) \ + param(int, fastboot, -1) \ /* leave bools at the end to not create holes */ \ param(bool, alpha_support, IS_ENABLED(CONFIG_DRM_I915_ALPHA_SUPPORT)) \ param(bool, enable_hangcheck, true) \ - param(bool, fastboot, false) \ param(bool, prefault_disable, false) \ param(bool, load_detect_test, false) \ param(bool, force_reset_modeset_test, false) \ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 6cf480a0eb23..734ac77c9a8e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11719,6 +11719,15 @@ pipe_config_err(bool adjust, const char *name, const char *format, ...) va_end(args); } +static bool fastboot_enabled(struct drm_i915_private *dev_priv) +{ + if (i915_modparams.fastboot != -1) + return i915_modparams.fastboot; + + /* Enable fastboot by default on Skylake and newer */ + return INTEL_GEN(dev_priv) >= 9; +} + static bool intel_pipe_config_compare(struct drm_i915_private *dev_priv, struct intel_crtc_state *current_config, @@ -11730,7 +11739,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, (current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) && !(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED); - if (fixup_inherited && !i915_modparams.fastboot) { + if (fixup_inherited && !fastboot_enabled(dev_priv)) { DRM_DEBUG_KMS("initial modeset and fastboot not set\n"); ret = false; } From 968bf969b47df2481022b9a05eaab02948eec088 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 25 Jan 2019 20:38:46 +0200 Subject: [PATCH 373/539] drm/i915: Fix skl srckey mask bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're incorrectly masking off the R/V channel enable bit from KEYMSK. Fix it up. Cc: Maarten Lankhorst Cc: Matt Roper Fixes: b20815255693 ("drm/i915: Add plane alpha blending support, v2.") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190125183846.28755-1-ville.syrjala@linux.intel.com Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/intel_sprite.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index b02d3d9809e3..cd42e81f8a90 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -493,7 +493,7 @@ skl_program_plane(struct intel_plane *plane, keymax = (key->max_value & 0xffffff) | PLANE_KEYMAX_ALPHA(alpha); - keymsk = key->channel_mask & 0x3ffffff; + keymsk = key->channel_mask & 0x7ffffff; if (alpha < 0xff) keymsk |= PLANE_KEYMSK_ALPHA_ENABLE; From 06039d98202f31dbf85af76d2659aaaa455ee0cb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 29 Jan 2019 18:54:49 +0000 Subject: [PATCH 374/539] drm/i915/selftests: Apply a subtest filter In bringup on simulated HW even rudimentary tests are slow, and so many may fail that we want to be able to filter out the noise to focus on the specific problem. Even just the tests groups provided for igt is not specific enough, and we would like to isolate one particular subtest (and probably subsubtests!). For simplicity, allow the user to provide a command line parameter such as i915.st_filter=i915_timeline_mock_selftests/igt_sync to restrict ourselves to only running on subtest. The exact name to use is given during a normal run, highlighted as an error if it failed, debug otherwise. The test group is optional, and then all subtests are compared for an exact match with the filter (most subtests have unique names). The filter can be negated, e.g. i915.st_filter=!igt_sync and then all tests but those that match will be run. More than one match can be supplied separated by a comma, e.g. i915.st_filter=igt_vma_create,igt_vma_pin1 to only run those specified, or i915.st_filter=!igt_vma_create,!igt_vma_pin1 to run all but those named. Mixing a blacklist and whitelist will only execute those subtests matching the whitelist so long as they are previously excluded in the blacklist. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20190129185452.20989-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_selftest.h | 1 + .../gpu/drm/i915/selftests/i915_selftest.c | 47 +++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h index a73472dd12fd..207e21b478f2 100644 --- a/drivers/gpu/drm/i915/i915_selftest.h +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -31,6 +31,7 @@ struct i915_selftest { unsigned long timeout_jiffies; unsigned int timeout_ms; unsigned int random_seed; + char *filter; int mock; int live; }; diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index 86c54ea37f48..10ef0e636a24 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -197,6 +197,49 @@ int i915_live_selftests(struct pci_dev *pdev) return 0; } +static bool apply_subtest_filter(const char *caller, const char *name) +{ + char *filter, *sep, *tok; + bool result = true; + + filter = kstrdup(i915_selftest.filter, GFP_KERNEL); + for (sep = filter; (tok = strsep(&sep, ","));) { + bool allow = true; + char *sl; + + if (*tok == '!') { + allow = false; + tok++; + } + + if (*tok == '\0') + continue; + + sl = strchr(tok, '/'); + if (sl) { + *sl++ = '\0'; + if (strcmp(tok, caller)) { + if (allow) + result = false; + continue; + } + tok = sl; + } + + if (strcmp(tok, name)) { + if (allow) + result = false; + continue; + } + + result = allow; + break; + } + kfree(filter); + + return result; +} + int __i915_subtests(const char *caller, const struct i915_subtest *st, unsigned int count, @@ -209,6 +252,9 @@ int __i915_subtests(const char *caller, if (signal_pending(current)) return -EINTR; + if (!apply_subtest_filter(caller, st->name)) + continue; + pr_debug(DRIVER_NAME ": Running %s/%s\n", caller, st->name); GEM_TRACE("Running %s/%s\n", caller, st->name); @@ -244,6 +290,7 @@ bool __igt_timeout(unsigned long timeout, const char *fmt, ...) module_param_named(st_random_seed, i915_selftest.random_seed, uint, 0400); module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400); +module_param_named(st_filter, i915_selftest.filter, charp, 0400); module_param_named_unsafe(mock_selftests, i915_selftest.mock, int, 0400); MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardware (0:disabled [default], 1:run tests then load driver, -1:run tests then exit module)"); From 8547444137ec6138ce52fc1938980b737a0d4d9e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 29 Jan 2019 18:54:50 +0000 Subject: [PATCH 375/539] drm/i915: Identify active requests To allow requests to forgo a common execution timeline, one question we need to be able to answer is "is this request running?". To track whether a request has started on HW, we can emit a breadcrumb at the beginning of the request and check its timeline's HWSP to see if the breadcrumb has advanced past the start of this request. (This is in contrast to the global timeline where we need only ask if we are on the global timeline and if the timeline has advanced past the end of the previous request.) There is still confusion from a preempted request, which has already started but relinquished the HW to a high priority request. For the common case, this discrepancy should be negligible. However, for identification of hung requests, knowing which one was running at the time of the hang will be much more important. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190129185452.20989-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 15 ++++++++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 ++++++ drivers/gpu/drm/i915/i915_request.c | 10 ++--- drivers/gpu/drm/i915/i915_request.h | 1 + drivers/gpu/drm/i915/i915_timeline.c | 1 + drivers/gpu/drm/i915/i915_timeline.h | 2 + drivers/gpu/drm/i915/intel_engine_cs.c | 8 ++-- drivers/gpu/drm/i915/intel_lrc.c | 39 +++++++++++++++++--- drivers/gpu/drm/i915/intel_ringbuffer.c | 25 ++++++++----- drivers/gpu/drm/i915/intel_ringbuffer.h | 6 ++- drivers/gpu/drm/i915/selftests/mock_engine.c | 2 +- 11 files changed, 96 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9c499edb4c13..d92e7ab0005e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2871,6 +2871,14 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, return 0; } +static bool match_ring(struct i915_request *rq) +{ + struct drm_i915_private *dev_priv = rq->i915; + u32 ring = I915_READ(RING_START(rq->engine->mmio_base)); + + return ring == i915_ggtt_offset(rq->ring->vma); +} + struct i915_request * i915_gem_find_active_request(struct intel_engine_cs *engine) { @@ -2893,6 +2901,13 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) if (i915_request_completed(request)) continue; + if (!i915_request_started(request)) + break; + + /* More than one preemptible request may match! */ + if (!match_ring(request)) + break; + active = request; break; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index f250109e1f66..8eedf7cac493 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1976,6 +1976,18 @@ static int eb_submit(struct i915_execbuffer *eb) return err; } + /* + * After we completed waiting for other engines (using HW semaphores) + * then we can signal that this request/batch is ready to run. This + * allows us to determine if the batch is still waiting on the GPU + * or actually running by checking the breadcrumb. + */ + if (eb->engine->emit_init_breadcrumb) { + err = eb->engine->emit_init_breadcrumb(eb->request); + if (err) + return err; + } + err = eb->engine->emit_bb_start(eb->request, eb->batch->node.start + eb->batch_start_offset, diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 4d58770e6a8c..7db15b7b3de8 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -333,7 +333,7 @@ void i915_request_retire_upto(struct i915_request *rq) static u32 timeline_get_seqno(struct i915_timeline *tl) { - return ++tl->seqno; + return tl->seqno += 1 + tl->has_initial_breadcrumb; } static void move_to_timeline(struct i915_request *request, @@ -382,8 +382,8 @@ void __i915_request_submit(struct i915_request *request) intel_engine_enable_signaling(request, false); spin_unlock(&request->lock); - engine->emit_breadcrumb(request, - request->ring->vaddr + request->postfix); + engine->emit_fini_breadcrumb(request, + request->ring->vaddr + request->postfix); /* Transfer from per-context onto the global per-engine timeline */ move_to_timeline(request, &engine->timeline); @@ -657,7 +657,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * around inside i915_request_add() there is sufficient space at * the beginning of the ring as well. */ - rq->reserved_space = 2 * engine->emit_breadcrumb_dw * sizeof(u32); + rq->reserved_space = 2 * engine->emit_fini_breadcrumb_dw * sizeof(u32); /* * Record the position of the start of the request so that @@ -908,7 +908,7 @@ void i915_request_add(struct i915_request *request) * GPU processing the request, we never over-estimate the * position of the ring's HEAD. */ - cs = intel_ring_begin(request, engine->emit_breadcrumb_dw); + cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_dw); GEM_BUG_ON(IS_ERR(cs)); request->postfix = intel_ring_offset(request, cs); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 96c586d6ff4d..340d6216791c 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -344,6 +344,7 @@ static inline bool i915_request_started(const struct i915_request *rq) if (i915_request_signaled(rq)) return true; + /* Remember: started but may have since been preempted! */ return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1); } diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index 79838d89bdb9..5ea3af393ffe 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -135,6 +135,7 @@ int i915_timeline_init(struct drm_i915_private *i915, timeline->i915 = i915; timeline->name = name; timeline->pin_count = 0; + timeline->has_initial_breadcrumb = !hwsp; timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; if (!hwsp) { diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index ab736e2e5707..8caeb66d1cd5 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -48,6 +48,8 @@ struct i915_timeline { struct i915_vma *hwsp_ggtt; u32 hwsp_offset; + bool has_initial_breadcrumb; + /** * List of breadcrumbs associated with GPU requests currently * outstanding. diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ead9c4371fe1..8dca76f6315d 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -664,7 +664,7 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) if (dw < 0) goto out_timeline; - dw = engine->emit_breadcrumb(&frame->rq, frame->cs) - frame->cs; + dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; i915_timeline_unpin(&frame->timeline); @@ -725,7 +725,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine) if (ret < 0) goto err_breadcrumbs; - engine->emit_breadcrumb_dw = ret; + engine->emit_fini_breadcrumb_dw = ret; return 0; @@ -1297,7 +1297,9 @@ static void print_request(struct drm_printer *m, drm_printf(m, "%s%x%s [%llx:%llx]%s @ %dms: %s\n", prefix, rq->global_seqno, - i915_request_completed(rq) ? "!" : "", + i915_request_completed(rq) ? "!" : + i915_request_started(rq) ? "*" : + "", rq->fence.context, rq->fence.seqno, buf, jiffies_to_msecs(jiffies - rq->emitted_jiffies), diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index fdbb3fe8eac9..5db16dd8e844 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -624,7 +624,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * WaIdleLiteRestore:bdw,skl * Apply the wa NOOPs to prevent * ring:HEAD == rq:TAIL as we resubmit the - * request. See gen8_emit_breadcrumb() for + * request. See gen8_emit_fini_breadcrumb() for * where we prepare the padding after the * end of the request. */ @@ -1283,6 +1283,34 @@ execlists_context_pin(struct intel_engine_cs *engine, return __execlists_context_pin(engine, ctx, ce); } +static int gen8_emit_init_breadcrumb(struct i915_request *rq) +{ + u32 *cs; + + GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb); + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* + * Check if we have been preempted before we even get started. + * + * After this point i915_request_started() reports true, even if + * we get preempted and so are no longer running. + */ + *cs++ = MI_ARB_CHECK; + *cs++ = MI_NOOP; + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = rq->timeline->hwsp_offset; + *cs++ = 0; + *cs++ = rq->fence.seqno - 1; + + intel_ring_advance(rq, cs); + return 0; +} + static int emit_pdps(struct i915_request *rq) { const struct intel_engine_cs * const engine = rq->engine; @@ -2039,7 +2067,7 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) return cs; } -static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) +static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) { /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); @@ -2061,7 +2089,7 @@ static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) return gen8_emit_wa_tail(request, cs); } -static u32 *gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) +static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { cs = gen8_emit_ggtt_write_rcs(cs, request->fence.seqno, @@ -2176,7 +2204,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->request_alloc = execlists_request_alloc; engine->emit_flush = gen8_emit_flush; - engine->emit_breadcrumb = gen8_emit_breadcrumb; + engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; + engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb; engine->set_default_submission = intel_execlists_set_default_submission; @@ -2289,7 +2318,7 @@ int logical_render_ring_init(struct intel_engine_cs *engine) /* Override some for render ring. */ engine->init_context = gen8_init_rcs_context; engine->emit_flush = gen8_emit_flush_render; - engine->emit_breadcrumb = gen8_emit_breadcrumb_rcs; + engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; ret = logical_ring_init(engine); if (ret) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ee3719324e2d..668ed67336a2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1607,6 +1607,7 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) err = PTR_ERR(timeline); goto err; } + GEM_BUG_ON(timeline->has_initial_breadcrumb); ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); i915_timeline_put(timeline); @@ -1960,6 +1961,7 @@ static int ring_request_alloc(struct i915_request *request) int ret; GEM_BUG_ON(!request->hw_context->pin_count); + GEM_BUG_ON(request->timeline->has_initial_breadcrumb); /* * Flush enough space to reduce the likelihood of waiting after @@ -2296,9 +2298,14 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->context_pin = intel_ring_context_pin; engine->request_alloc = ring_request_alloc; - engine->emit_breadcrumb = i9xx_emit_breadcrumb; + /* + * Using a global execution timeline; the previous final breadcrumb is + * equivalent to our next initial bread so we can elide + * engine->emit_init_breadcrumb(). + */ + engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb; if (IS_GEN(dev_priv, 5)) - engine->emit_breadcrumb = gen5_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen5_emit_breadcrumb; engine->set_default_submission = i9xx_set_default_submission; @@ -2327,11 +2334,11 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) if (INTEL_GEN(dev_priv) >= 7) { engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen7_render_ring_flush; - engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb; } else if (IS_GEN(dev_priv, 6)) { engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen6_render_ring_flush; - engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb; } else if (IS_GEN(dev_priv, 5)) { engine->emit_flush = gen4_render_ring_flush; } else { @@ -2368,9 +2375,9 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; if (IS_GEN(dev_priv, 6)) - engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb; else - engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; } else { engine->emit_flush = bsd_ring_flush; if (IS_GEN(dev_priv, 5)) @@ -2394,9 +2401,9 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; if (IS_GEN(dev_priv, 6)) - engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb; else - engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; return intel_init_ring_buffer(engine); } @@ -2414,7 +2421,7 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) engine->irq_enable = hsw_vebox_irq_enable; engine->irq_disable = hsw_vebox_irq_disable; - engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; return intel_init_ring_buffer(engine); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 2927b712b973..1f30ffb84936 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -463,8 +463,10 @@ struct intel_engine_cs { unsigned int dispatch_flags); #define I915_DISPATCH_SECURE BIT(0) #define I915_DISPATCH_PINNED BIT(1) - u32 *(*emit_breadcrumb)(struct i915_request *rq, u32 *cs); - int emit_breadcrumb_dw; + int (*emit_init_breadcrumb)(struct i915_request *rq); + u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, + u32 *cs); + unsigned int emit_fini_breadcrumb_dw; /* Pass the request to the hardware queue (e.g. directly into * the legacy ringbuffer or to the end of an execlist). diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 95e890d7f58b..3b226ebc6bc4 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -227,7 +227,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.context_pin = mock_context_pin; engine->base.request_alloc = mock_request_alloc; engine->base.emit_flush = mock_emit_flush; - engine->base.emit_breadcrumb = mock_emit_breadcrumb; + engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb; engine->base.submit_request = mock_submit_request; if (i915_timeline_init(i915, From 4d97cbe01980c0d008d125903ef9ff05b6640c2d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 29 Jan 2019 18:54:51 +0000 Subject: [PATCH 376/539] drm/i915: Rename execlists->queue_priority to queue_priority_hint After noticing that we trigger preemption events for currently executing requests, as well as requests that complete before the preemption and attempting to suppress those preemption events, it is wise to not consider the queue_priority to be authoritative. As we only track the maximum priority seen between dequeue passes, if the maximum priority request is no longer available for dequeuing (it completed or is even executing on another engine), we have no knowledge of the previous queue_priority as it would require us to keep a full history of enqueued requests -- but we already have that history in the priolists! Rename the queue_priority to queue_priority_hint so that we do not confuse it as being exactly the maximum priority in the queue, but merely an indication that we have seen a new maximum priority value and as such we should check whether it should preempt the currently running request. v2: s/preempt_priority_hint/queue_priority_hint/ as preempt implies it being only used for the singular task of preemption and not the wider question of waking up due to a change in the queue. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190129185452.20989-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_scheduler.c | 11 +++++------ drivers/gpu/drm/i915/intel_engine_cs.c | 4 ++-- drivers/gpu/drm/i915/intel_guc_submission.c | 5 +++-- drivers/gpu/drm/i915/intel_lrc.c | 20 +++++++++++--------- drivers/gpu/drm/i915/intel_ringbuffer.h | 8 ++++++-- 5 files changed, 27 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 340faea6c08a..4eeba588b996 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -127,8 +127,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) return rb_entry(rb, struct i915_priolist, node); } -static void assert_priolists(struct intel_engine_execlists * const execlists, - long queue_priority) +static void assert_priolists(struct intel_engine_execlists * const execlists) { struct rb_node *rb; long last_prio, i; @@ -139,7 +138,7 @@ static void assert_priolists(struct intel_engine_execlists * const execlists, GEM_BUG_ON(rb_first_cached(&execlists->queue) != rb_first(&execlists->queue.rb_root)); - last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1; + last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1; for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { const struct i915_priolist *p = to_priolist(rb); @@ -166,7 +165,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) int idx, i; lockdep_assert_held(&engine->timeline.lock); - assert_priolists(execlists, INT_MAX); + assert_priolists(execlists); /* buckets sorted from highest [in slot 0] to lowest priority */ idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1; @@ -353,7 +352,7 @@ static void __i915_schedule(struct i915_request *rq, continue; } - if (prio <= engine->execlists.queue_priority) + if (prio <= engine->execlists.queue_priority_hint) continue; /* @@ -366,7 +365,7 @@ static void __i915_schedule(struct i915_request *rq, continue; /* Defer (tasklet) submission until after all of our updates. */ - engine->execlists.queue_priority = prio; + engine->execlists.queue_priority_hint = prio; tasklet_hi_schedule(&engine->execlists.tasklet); } diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 8dca76f6315d..0a610c9691fd 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -480,7 +480,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists))); GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); - execlists->queue_priority = INT_MIN; + execlists->queue_priority_hint = INT_MIN; execlists->queue = RB_ROOT_CACHED; } @@ -1178,7 +1178,7 @@ void intel_engines_park(struct drm_i915_private *i915) } /* Must be reset upon idling, or we may miss the busy wakeup. */ - GEM_BUG_ON(engine->execlists.queue_priority != INT_MIN); + GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); if (engine->park) engine->park(engine); diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 4295ade0d613..f12ecc8dec6b 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -737,7 +737,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) if (intel_engine_has_preemption(engine)) { struct guc_preempt_work *preempt_work = &engine->i915->guc.preempt_work[engine->id]; - int prio = execlists->queue_priority; + int prio = execlists->queue_priority_hint; if (__execlists_need_preempt(prio, port_prio(port))) { execlists_set_active(execlists, @@ -783,7 +783,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) kmem_cache_free(engine->i915->priorities, p); } done: - execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; + execlists->queue_priority_hint = + rb ? to_priolist(rb)->priority : INT_MIN; if (submit) port_assign(port, last); if (last) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5db16dd8e844..a1034f7069aa 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -591,7 +591,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) return; - if (need_preempt(engine, last, execlists->queue_priority)) { + if (need_preempt(engine, last, execlists->queue_priority_hint)) { inject_preempt_context(engine); return; } @@ -699,20 +699,20 @@ done: /* * Here be a bit of magic! Or sleight-of-hand, whichever you prefer. * - * We choose queue_priority such that if we add a request of greater + * We choose the priority hint such that if we add a request of greater * priority than this, we kick the submission tasklet to decide on * the right order of submitting the requests to hardware. We must * also be prepared to reorder requests as they are in-flight on the - * HW. We derive the queue_priority then as the first "hole" in + * HW. We derive the priority hint then as the first "hole" in * the HW submission ports and if there are no available slots, * the priority of the lowest executing request, i.e. last. * * When we do receive a higher priority request ready to run from the - * user, see queue_request(), the queue_priority is bumped to that + * user, see queue_request(), the priority hint is bumped to that * request triggering preemption on the next dequeue (or subsequent * interrupt for secondary ports). */ - execlists->queue_priority = + execlists->queue_priority_hint = port != execlists->port ? rq_prio(last) : INT_MIN; if (submit) { @@ -861,7 +861,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Remaining _unready_ requests will be nop'ed when submitted */ - execlists->queue_priority = INT_MIN; + execlists->queue_priority_hint = INT_MIN; execlists->queue = RB_ROOT_CACHED; GEM_BUG_ON(port_isset(execlists->port)); @@ -1092,8 +1092,8 @@ static void __submit_queue_imm(struct intel_engine_cs *engine) static void submit_queue(struct intel_engine_cs *engine, int prio) { - if (prio > engine->execlists.queue_priority) { - engine->execlists.queue_priority = prio; + if (prio > engine->execlists.queue_priority_hint) { + engine->execlists.queue_priority_hint = prio; __submit_queue_imm(engine); } } @@ -2777,7 +2777,9 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, last = NULL; count = 0; - drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); + if (execlists->queue_priority_hint != INT_MIN) + drm_printf(m, "\t\tQueue priority hint: %d\n", + execlists->queue_priority_hint); for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); int i; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 1f30ffb84936..ef024c154a1b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -293,14 +293,18 @@ struct intel_engine_execlists { unsigned int port_mask; /** - * @queue_priority: Highest pending priority. + * @queue_priority_hint: Highest pending priority. * * When we add requests into the queue, or adjust the priority of * executing requests, we compute the maximum priority of those * pending requests. We can then use this value to determine if * we need to preempt the executing requests to service the queue. + * However, since the we may have recorded the priority of an inflight + * request we wanted to preempt but since completed, at the time of + * dequeuing the priority hint may no longer may match the highest + * available request priority. */ - int queue_priority; + int queue_priority_hint; /** * @queue: queue of requests, in priority lists From c9a646228816efeeacb05cc9400464ad8aa90017 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 29 Jan 2019 18:54:52 +0000 Subject: [PATCH 377/539] drm/i915/execlists: Suppress preempting self In order to avoid preempting ourselves, we currently refuse to schedule the tasklet if we reschedule an inflight context. However, this glosses over a few issues such as what happens after a CS completion event and we then preempt the newly executing context with itself, or if something else causes a tasklet_schedule triggering the same evaluation to preempt the active context with itself. However, when we avoid preempting ELSP[0], we still retain the preemption value as it may match a second preemption request within the same time period that we need to resolve after the next CS event. However, since we only store the maximum preemption priority seen, it may not match the subsequent event and so we should double check whether or not we actually do need to trigger a preempt-to-idle by comparing the top priorities from each queue. Later, this gives us a hook for finer control over deciding whether the preempt-to-idle is justified. The sequence of events where we end up preempting for no avail is: 1. Queue requests/contexts A, B 2. Priority boost A; no preemption as it is executing, but keep hint 3. After CS switch, B is less than hint, force preempt-to-idle 4. Resubmit B after idling v2: We can simplify a bunch of tests based on the knowledge that PI will ensure that earlier requests along the same context will have the highest priority. v3: Demonstrate the stale preemption hint with a selftest References: a2bf92e8cc16 ("drm/i915/execlists: Avoid kicking priority on the current context") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190129185452.20989-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_scheduler.c | 20 ++- drivers/gpu/drm/i915/intel_guc_submission.c | 2 + drivers/gpu/drm/i915/intel_lrc.c | 98 ++++++++++++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + drivers/gpu/drm/i915/selftests/intel_lrc.c | 138 ++++++++++++++++++++ 5 files changed, 246 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 4eeba588b996..2d172991024f 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -238,6 +238,18 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) return engine; } +static bool inflight(const struct i915_request *rq, + const struct intel_engine_cs *engine) +{ + const struct i915_request *active; + + if (!rq->global_seqno) + return false; + + active = port_request(engine->execlists.port); + return active->hw_context == rq->hw_context; +} + static void __i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) { @@ -327,6 +339,7 @@ static void __i915_schedule(struct i915_request *rq, INIT_LIST_HEAD(&dep->dfs_link); engine = sched_lock_engine(node, engine); + lockdep_assert_held(&engine->timeline.lock); /* Recheck after acquiring the engine->timeline.lock */ if (prio <= node->attr.priority || node_signaled(node)) @@ -355,17 +368,16 @@ static void __i915_schedule(struct i915_request *rq, if (prio <= engine->execlists.queue_priority_hint) continue; + engine->execlists.queue_priority_hint = prio; + /* * If we are already the currently executing context, don't * bother evaluating if we should preempt ourselves. */ - if (node_to_request(node)->global_seqno && - i915_seqno_passed(port_request(engine->execlists.port)->global_seqno, - node_to_request(node)->global_seqno)) + if (inflight(node_to_request(node), engine)) continue; /* Defer (tasklet) submission until after all of our updates. */ - engine->execlists.queue_priority_hint = prio; tasklet_hi_schedule(&engine->execlists.tasklet); } diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index f12ecc8dec6b..8bc8aa54aa35 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -629,6 +629,8 @@ static void inject_preempt_context(struct work_struct *work) EXECLISTS_ACTIVE_PREEMPT); tasklet_schedule(&engine->execlists.tasklet); } + + (void)I915_SELFTEST_ONLY(engine->execlists.preempt_hang.count++); } /* diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index a1034f7069aa..a9eb0211ce77 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -188,13 +188,90 @@ static inline int rq_prio(const struct i915_request *rq) return rq->sched.attr.priority; } -static inline bool need_preempt(const struct intel_engine_cs *engine, - const struct i915_request *last, - int prio) +static int queue_prio(const struct intel_engine_execlists *execlists) { - return (intel_engine_has_preemption(engine) && - __execlists_need_preempt(prio, rq_prio(last)) && - !i915_request_completed(last)); + struct i915_priolist *p; + struct rb_node *rb; + + rb = rb_first_cached(&execlists->queue); + if (!rb) + return INT_MIN; + + /* + * As the priolist[] are inverted, with the highest priority in [0], + * we have to flip the index value to become priority. + */ + p = to_priolist(rb); + return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used); +} + +static inline bool need_preempt(const struct intel_engine_cs *engine, + const struct i915_request *rq) +{ + const int last_prio = rq_prio(rq); + + if (!intel_engine_has_preemption(engine)) + return false; + + if (i915_request_completed(rq)) + return false; + + /* + * Check if the current priority hint merits a preemption attempt. + * + * We record the highest value priority we saw during rescheduling + * prior to this dequeue, therefore we know that if it is strictly + * less than the current tail of ESLP[0], we do not need to force + * a preempt-to-idle cycle. + * + * However, the priority hint is a mere hint that we may need to + * preempt. If that hint is stale or we may be trying to preempt + * ourselves, ignore the request. + */ + if (!__execlists_need_preempt(engine->execlists.queue_priority_hint, + last_prio)) + return false; + + /* + * Check against the first request in ELSP[1], it will, thanks to the + * power of PI, be the highest priority of that context. + */ + if (!list_is_last(&rq->link, &engine->timeline.requests) && + rq_prio(list_next_entry(rq, link)) > last_prio) + return true; + + /* + * If the inflight context did not trigger the preemption, then maybe + * it was the set of queued requests? Pick the highest priority in + * the queue (the first active priolist) and see if it deserves to be + * running instead of ELSP[0]. + * + * The highest priority request in the queue can not be either + * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same + * context, it's priority would not exceed ELSP[0] aka last_prio. + */ + return queue_prio(&engine->execlists) > last_prio; +} + +__maybe_unused static inline bool +assert_priority_queue(const struct intel_engine_execlists *execlists, + const struct i915_request *prev, + const struct i915_request *next) +{ + if (!prev) + return true; + + /* + * Without preemption, the prev may refer to the still active element + * which we refuse to let go. + * + * Even with preemption, there are times when we think it is better not + * to preempt and leave an ostensibly lower priority request in flight. + */ + if (port_request(execlists->port) == prev) + return true; + + return rq_prio(prev) >= rq_prio(next); } /* @@ -523,6 +600,8 @@ static void inject_preempt_context(struct intel_engine_cs *engine) execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT); + + (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); } static void complete_preempt_context(struct intel_engine_execlists *execlists) @@ -591,7 +670,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) return; - if (need_preempt(engine, last, execlists->queue_priority_hint)) { + if (need_preempt(engine, last)) { inject_preempt_context(engine); return; } @@ -637,8 +716,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { - GEM_BUG_ON(last && - need_preempt(engine, last, rq_prio(rq))); + GEM_BUG_ON(!assert_priority_queue(execlists, last, rq)); /* * Can we combine this request with the current port? @@ -884,6 +962,8 @@ static void process_csb(struct intel_engine_cs *engine) const u32 * const buf = execlists->csb_status; u8 head, tail; + lockdep_assert_held(&engine->timeline.lock); + /* * Note that csb_write, csb_status may be either in HWSP or mmio. * When reading from the csb_write mmio register, we have to be diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index ef024c154a1b..953ccc2617ff 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -203,6 +203,7 @@ struct i915_priolist { struct st_preempt_hang { struct completion completion; + unsigned int count; bool inject_hang; }; diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index 2b2ecd76c2ac..fb35f53c9ce3 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -268,6 +268,143 @@ err_wedged: goto err_ctx_lo; } +struct preempt_client { + struct igt_spinner spin; + struct i915_gem_context *ctx; +}; + +static int preempt_client_init(struct drm_i915_private *i915, + struct preempt_client *c) +{ + c->ctx = kernel_context(i915); + if (!c->ctx) + return -ENOMEM; + + if (igt_spinner_init(&c->spin, i915)) + goto err_ctx; + + return 0; + +err_ctx: + kernel_context_close(c->ctx); + return -ENOMEM; +} + +static void preempt_client_fini(struct preempt_client *c) +{ + igt_spinner_fini(&c->spin); + kernel_context_close(c->ctx); +} + +static int live_suppress_self_preempt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) + }; + struct preempt_client a, b; + enum intel_engine_id id; + intel_wakeref_t wakeref; + int err = -ENOMEM; + + /* + * Verify that if a preemption request does not cause a change in + * the current execution order, the preempt-to-idle injection is + * skipped and that we do not accidentally apply it after the CS + * completion event. + */ + + if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + return 0; + + if (USES_GUC_SUBMISSION(i915)) + return 0; /* presume black blox */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + if (preempt_client_init(i915, &a)) + goto err_unlock; + if (preempt_client_init(i915, &b)) + goto err_client_a; + + for_each_engine(engine, i915, id) { + struct i915_request *rq_a, *rq_b; + int depth; + + engine->execlists.preempt_hang.count = 0; + + rq_a = igt_spinner_create_request(&a.spin, + a.ctx, engine, + MI_NOOP); + if (IS_ERR(rq_a)) { + err = PTR_ERR(rq_a); + goto err_client_b; + } + + i915_request_add(rq_a); + if (!igt_wait_for_spinner(&a.spin, rq_a)) { + pr_err("First client failed to start\n"); + goto err_wedged; + } + + for (depth = 0; depth < 8; depth++) { + rq_b = igt_spinner_create_request(&b.spin, + b.ctx, engine, + MI_NOOP); + if (IS_ERR(rq_b)) { + err = PTR_ERR(rq_b); + goto err_client_b; + } + i915_request_add(rq_b); + + GEM_BUG_ON(i915_request_completed(rq_a)); + engine->schedule(rq_a, &attr); + igt_spinner_end(&a.spin); + + if (!igt_wait_for_spinner(&b.spin, rq_b)) { + pr_err("Second client failed to start\n"); + goto err_wedged; + } + + swap(a, b); + rq_a = rq_b; + } + igt_spinner_end(&a.spin); + + if (engine->execlists.preempt_hang.count) { + pr_err("Preemption recorded x%d, depth %d; should have been suppressed!\n", + engine->execlists.preempt_hang.count, + depth); + err = -EINVAL; + goto err_client_b; + } + + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + goto err_wedged; + } + + err = 0; +err_client_b: + preempt_client_fini(&b); +err_client_a: + preempt_client_fini(&a); +err_unlock: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + return err; + +err_wedged: + igt_spinner_end(&b.spin); + igt_spinner_end(&a.spin); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_client_b; +} + static int live_preempt_hang(void *arg) { struct drm_i915_private *i915 = arg; @@ -647,6 +784,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_sanitycheck), SUBTEST(live_preempt), SUBTEST(live_late_preempt), + SUBTEST(live_suppress_self_preempt), SUBTEST(live_preempt_hang), SUBTEST(live_preempt_smoke), }; From 06ea4c34991570d1e3d175f81029a6ea4bd8f634 Mon Sep 17 00:00:00 2001 From: wentalou Date: Mon, 14 Jan 2019 16:26:13 +0800 Subject: [PATCH 378/539] drm/amdgpu: csa_vaddr should not larger than AMDGPU_GMC_HOLE_START After removing unnecessary VM size calculations, vm_manager.max_pfn would reach 0x10,0000,0000 max_pfn << AMDGPU_GPU_PAGE_SHIFT exceeding AMDGPU_GMC_HOLE_START would cause GPU reset. Signed-off-by: wentalou Reviewed-by: Monk Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 7e22be7ca68a..dd3bd01e3070 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -26,7 +26,8 @@ uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) { - uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT; + uint64_t addr = min(adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT, + AMDGPU_GMC_HOLE_START); addr -= AMDGPU_VA_RESERVED_SIZE; addr = amdgpu_gmc_sign_extend(addr); From cdba61daf55090430a3e1aa495fbbe8f6768af39 Mon Sep 17 00:00:00 2001 From: wentalou Date: Thu, 24 Jan 2019 11:24:59 +0800 Subject: [PATCH 379/539] drm/amdgpu: sriov restrict max_pfn below AMDGPU_GMC_HOLE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sriov need to restrict max_pfn below AMDGPU_GMC_HOLE. access the hole results in a range fault interrupt IIRC. Signed-off-by: Wentao Lou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 3 +-- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 6 +++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index dd3bd01e3070..7e22be7ca68a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -26,8 +26,7 @@ uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) { - uint64_t addr = min(adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT, - AMDGPU_GMC_HOLE_START); + uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT; addr -= AMDGPU_VA_RESERVED_SIZE; addr = amdgpu_gmc_sign_extend(addr); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 9c082f9aea1a..600259b4e291 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -965,7 +965,11 @@ static int gmc_v9_0_sw_init(void *handle) * vm size is 256TB (48bit), maximum size of Vega10, * block size 512 (9bit) */ - amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + /* sriov restrict max_pfn below AMDGPU_GMC_HOLE */ + if (amdgpu_sriov_vf(adev)) + amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47); + else + amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); break; default: break; From 193392ed9f6953713957c14045be1e920519870d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 29 Jan 2019 11:55:42 -0500 Subject: [PATCH 380/539] Revert "drm/amd/display: add -msse2 to prevent Clang from emitting libcalls to undefined SW FP routines" This reverts commit 10117450735c7a7c0858095fb46a860e7037cb9a. Causes a crash. Bug: https://bugs.freedesktop.org/show_bug.cgi?id=109487 Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 4.19 --- drivers/gpu/drm/amd/display/dc/calcs/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/dml/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile index dc85a3c088af..95f332ee3e7e 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile @@ -30,7 +30,7 @@ else ifneq ($(call cc-option, -mstack-alignment=16),) cc_stack_align := -mstack-alignment=16 endif -calcs_ccflags := -mhard-float -msse -msse2 $(cc_stack_align) +calcs_ccflags := -mhard-float -msse $(cc_stack_align) CFLAGS_dcn_calcs.o := $(calcs_ccflags) CFLAGS_dcn_calc_auto.o := $(calcs_ccflags) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 33c7d7588712..d97ca6528f9d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -30,7 +30,7 @@ else ifneq ($(call cc-option, -mstack-alignment=16),) cc_stack_align := -mstack-alignment=16 endif -dml_ccflags := -mhard-float -msse -msse2 $(cc_stack_align) +dml_ccflags := -mhard-float -msse $(cc_stack_align) CFLAGS_display_mode_lib.o := $(dml_ccflags) CFLAGS_display_pipe_clocks.o := $(dml_ccflags) From a5c8e0524dbbe1107d81a1604da3d191b66ead6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 25 Jan 2019 13:40:01 +0100 Subject: [PATCH 381/539] drm/amdgpu: cleanup amdgpu_pte_update_params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kptr is not used any more. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0bc6f553dc08..a404ac17e5ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -107,14 +107,6 @@ struct amdgpu_pte_update_params { * DMA addresses to use for mapping, used during VM update by CPU */ dma_addr_t *pages_addr; - - /** - * @kptr: - * - * Kernel pointer of PD/PT BO that needs to be updated, - * used during VM update by CPU - */ - void *kptr; }; /** From 47dd8048a1bf5b2fb96e5abe99b4f1dcd208ea4d Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Mon, 28 Jan 2019 11:54:35 -0500 Subject: [PATCH 382/539] drm/amdgpu: Show XGMI node and hive message per device only once Reduce the repeated node and hive information during XGMI initialization Signed-off-by: shaoyunl Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index dac187454b33..0d90672d0e58 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -91,10 +91,6 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev "XGMI: Set topology failure on device %llx, hive %llx, ret %d", adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id, ret); - else - dev_info(adev->dev, "XGMI: Set topology for node %d, hive 0x%llx.\n", - adev->gmc.xgmi.physical_node_id, - adev->gmc.xgmi.hive_id); return ret; } @@ -160,6 +156,9 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) break; } + dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n", + adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id); + mutex_unlock(&hive->hive_lock); exit: return ret; From 5a3db6f08a8eae15bff597dce290ac238daeb717 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 29 Jan 2019 19:06:09 +0200 Subject: [PATCH 383/539] drm: Constify drm_color_lut_check() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_color_lut_check() doens't modify the passed in blob so let's make it const. Also s/uint32_t/u32/ while at it. v2: Reduce line wraps (Sam) Cc: Matt Roper Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190129170609.5718-1-ville.syrjala@linux.intel.com Reviewed-by: Sam Ravnborg Acked-by: Daniel Vetter --- drivers/gpu/drm/drm_color_mgmt.c | 5 ++--- include/drm/drm_color_mgmt.h | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index 968ca7c91ad8..d5d34d0c79c7 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -474,10 +474,9 @@ EXPORT_SYMBOL(drm_plane_create_color_properties); * * Returns 0 on success, -EINVAL on failure. */ -int drm_color_lut_check(struct drm_property_blob *lut, - uint32_t tests) +int drm_color_lut_check(const struct drm_property_blob *lut, u32 tests) { - struct drm_color_lut *entry; + const struct drm_color_lut *entry; int i; if (!lut || !tests) diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index 6affbda6d9cb..d1c662d92ab7 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -96,6 +96,5 @@ enum drm_color_lut_tests { DRM_COLOR_LUT_NON_DECREASING = BIT(1), }; -int drm_color_lut_check(struct drm_property_blob *lut, - uint32_t tests); +int drm_color_lut_check(const struct drm_property_blob *lut, u32 tests); #endif From addc80f0bba9f015208132b804397d9d08399465 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 29 Jan 2019 16:19:12 +0200 Subject: [PATCH 384/539] drm/i915/tv: Fix adjusted_mode dotclock for interlaced modes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_tv_mode_to_mode() assumes the pipe will be in progressive fetch mode, and thus when programming the pipe into interlaced mode we have to halve the calculated dotclock to get the correct field duration. This becomes more important when we start to program the pipe into interlaced mode on i965gm as we depend on the timestamps to get accurate frame counter values. Withot halving the clock our guesstimated frame counter would tick at twice the expected speed. Cc: Imre Deak Fixes: 690157f0a9e7 ("drm/i915/tv: Fix >1024 modes on gen3") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190129141913.5515-1-ville.syrjala@linux.intel.com Acked-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_tv.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index 751b88dde18e..3a5682f44e55 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1150,6 +1150,8 @@ intel_tv_get_config(struct intel_encoder *encoder, ypos, mode.vdisplay - ysize - ypos); adjusted_mode->crtc_clock = mode.clock; + if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) + adjusted_mode->crtc_clock /= 2; /* pixel counter doesn't work on i965gm TV output */ if (IS_I965GM(dev_priv)) @@ -1214,8 +1216,11 @@ intel_tv_compute_config(struct intel_encoder *encoder, tv_conn_state->bypass_vfilter = true; - if (!tv_mode->progressive) + if (!tv_mode->progressive) { + adjusted_mode->clock /= 2; + adjusted_mode->crtc_clock /= 2; adjusted_mode->flags |= DRM_MODE_FLAG_INTERLACE; + } } else { tv_conn_state->margins.top = conn_state->tv.margins.top; tv_conn_state->margins.bottom = conn_state->tv.margins.bottom; From 68e94f62cfbe1650fb783979150a2b2f0a3eca1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 29 Jan 2019 16:19:13 +0200 Subject: [PATCH 385/539] drm/i915/tv: Bypass the vertical filter if possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's switch the pipe into interlaced mode and switch off the TV encoder vertical filter if the pipe vdisplay matches the TV YSIZE exactly. While I didn't measure it I presume this might reduce the power consumption a little bit, and the pixel rate is halved as the pipe will now fetching in interlaced mode rather than in progressive mode (effectively the same difference as between IF-ID vs. PF-ID pfit modes on more modern hardware) so a bit easier on the memory bandwidth. Cc: Imre Deak Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190129141913.5515-2-ville.syrjala@linux.intel.com Acked-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_tv.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index 3a5682f44e55..3924c4944e1f 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1159,6 +1159,22 @@ intel_tv_get_config(struct intel_encoder *encoder, I915_MODE_FLAG_USE_SCANLINE_COUNTER; } +static bool intel_tv_source_too_wide(struct drm_i915_private *dev_priv, + int hdisplay) +{ + return IS_GEN(dev_priv, 3) && hdisplay > 1024; +} + +static bool intel_tv_vert_scaling(const struct drm_display_mode *tv_mode, + const struct drm_connector_state *conn_state, + int vdisplay) +{ + return tv_mode->crtc_vdisplay - + conn_state->tv.margins.top - + conn_state->tv.margins.bottom != + vdisplay; +} + static int intel_tv_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, @@ -1189,7 +1205,8 @@ intel_tv_compute_config(struct intel_encoder *encoder, intel_tv_mode_to_mode(adjusted_mode, tv_mode); drm_mode_set_crtcinfo(adjusted_mode, 0); - if (IS_GEN(dev_priv, 3) && hdisplay > 1024) { + if (intel_tv_source_too_wide(dev_priv, hdisplay) || + !intel_tv_vert_scaling(adjusted_mode, conn_state, vdisplay)) { int extra, top, bottom; extra = adjusted_mode->crtc_vdisplay - vdisplay; From 3df0bd19193c47c48b7904e5f41c3041a431da33 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 29 Jan 2019 20:52:28 +0000 Subject: [PATCH 386/539] drm/i915: Remove the intel_engine_notify tracepoint The global seqno is defunct and so we have no meaningful indicator of forward progress for an engine. You need to listen to the request signaling tracepoints instead. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190129205230.19056-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_irq.c | 2 -- drivers/gpu/drm/i915/i915_trace.h | 25 ------------------------- 2 files changed, 27 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 26ab00862b59..0fcdb14c50f4 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1227,8 +1227,6 @@ static void notify_ring(struct intel_engine_cs *engine) wake_up_process(tsk); rcu_read_unlock(); - - trace_intel_engine_notify(engine, wait); } static void vlv_c0_read(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 33d90eca9cdd..cb5bc65d575d 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -750,31 +750,6 @@ trace_i915_request_out(struct i915_request *rq) #endif #endif -TRACE_EVENT(intel_engine_notify, - TP_PROTO(struct intel_engine_cs *engine, bool waiters), - TP_ARGS(engine, waiters), - - TP_STRUCT__entry( - __field(u32, dev) - __field(u16, class) - __field(u16, instance) - __field(u32, seqno) - __field(bool, waiters) - ), - - TP_fast_assign( - __entry->dev = engine->i915->drm.primary->index; - __entry->class = engine->uabi_class; - __entry->instance = engine->instance; - __entry->seqno = intel_engine_get_seqno(engine); - __entry->waiters = waiters; - ), - - TP_printk("dev=%u, engine=%u:%u, seqno=%u, waiters=%u", - __entry->dev, __entry->class, __entry->instance, - __entry->seqno, __entry->waiters) -); - DEFINE_EVENT(i915_request, i915_request_retire, TP_PROTO(struct i915_request *rq), TP_ARGS(rq) From 52c0fdb25c7c919334b97976d05096b441a3eada Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 29 Jan 2019 20:52:29 +0000 Subject: [PATCH 387/539] drm/i915: Replace global breadcrumbs with per-context interrupt tracking A few years ago, see commit 688e6c725816 ("drm/i915: Slaughter the thundering i915_wait_request herd"), the issue of handling multiple clients waiting in parallel was brought to our attention. The requirement was that every client should be woken immediately upon its request being signaled, without incurring any cpu overhead. To handle certain fragility of our hw meant that we could not do a simple check inside the irq handler (some generations required almost unbounded delays before we could be sure of seqno coherency) and so request completion checking required delegation. Before commit 688e6c725816, the solution was simple. Every client waiting on a request would be woken on every interrupt and each would do a heavyweight check to see if their request was complete. Commit 688e6c725816 introduced an rbtree so that only the earliest waiter on the global timeline would woken, and would wake the next and so on. (Along with various complications to handle requests being reordered along the global timeline, and also a requirement for kthread to provide a delegate for fence signaling that had no process context.) The global rbtree depends on knowing the execution timeline (and global seqno). Without knowing that order, we must instead check all contexts queued to the HW to see which may have advanced. We trim that list by only checking queued contexts that are being waited on, but still we keep a list of all active contexts and their active signalers that we inspect from inside the irq handler. By moving the waiters onto the fence signal list, we can combine the client wakeup with the dma_fence signaling (a dramatic reduction in complexity, but does require the HW being coherent, the seqno must be visible from the cpu before the interrupt is raised - we keep a timer backup just in case). Having previously fixed all the issues with irq-seqno serialisation (by inserting delays onto the GPU after each request instead of random delays on the CPU after each interrupt), we can rely on the seqno state to perfom direct wakeups from the interrupt handler. This allows us to preserve our single context switch behaviour of the current routine, with the only downside that we lose the RT priority sorting of wakeups. In general, direct wakeup latency of multiple clients is about the same (about 10% better in most cases) with a reduction in total CPU time spent in the waiter (about 20-50% depending on gen). Average herd behaviour is improved, but at the cost of not delegating wakeups on task_prio. v2: Capture fence signaling state for error state and add comments to warm even the most cold of hearts. v3: Check if the request is still active before busywaiting v4: Reduce the amount of pointer misdirection with list_for_each_safe and using a local i915_request variable inside the loops v5: Add a missing pluralisation to a purely informative selftest message. References: 688e6c725816 ("drm/i915: Slaughter the thundering i915_wait_request herd") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190129205230.19056-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 28 +- drivers/gpu/drm/i915/i915_gem_context.c | 3 + drivers/gpu/drm/i915/i915_gem_context.h | 2 + drivers/gpu/drm/i915/i915_gpu_error.c | 83 +- drivers/gpu/drm/i915/i915_gpu_error.h | 9 +- drivers/gpu/drm/i915/i915_irq.c | 82 +- drivers/gpu/drm/i915/i915_request.c | 152 ++-- drivers/gpu/drm/i915/i915_request.h | 72 +- drivers/gpu/drm/i915/i915_reset.c | 16 +- drivers/gpu/drm/i915/i915_scheduler.c | 2 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 844 +++++------------- drivers/gpu/drm/i915/intel_engine_cs.c | 35 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 96 +- .../drm/i915/selftests/i915_mock_selftests.h | 1 - drivers/gpu/drm/i915/selftests/i915_request.c | 425 +++++++++ drivers/gpu/drm/i915/selftests/igt_spinner.c | 5 - .../drm/i915/selftests/intel_breadcrumbs.c | 470 ---------- .../gpu/drm/i915/selftests/intel_hangcheck.c | 2 +- drivers/gpu/drm/i915/selftests/lib_sw_fence.c | 54 ++ drivers/gpu/drm/i915/selftests/lib_sw_fence.h | 3 + drivers/gpu/drm/i915/selftests/mock_engine.c | 17 +- drivers/gpu/drm/i915/selftests/mock_engine.h | 6 - 23 files changed, 909 insertions(+), 1500 deletions(-) delete mode 100644 drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index c9c230499420..29d52304c189 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1315,29 +1315,16 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake)); for_each_engine(engine, dev_priv, id) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct rb_node *rb; - seq_printf(m, "%s:\n", engine->name); seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n", engine->hangcheck.seqno, seqno[id], intel_engine_last_submit(engine), jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); - seq_printf(m, "\twaiters? %s, fake irq active? %s\n", - yesno(intel_engine_has_waiter(engine)), + seq_printf(m, "\tfake irq active? %s\n", yesno(test_bit(engine->id, &dev_priv->gpu_error.missed_irq_rings))); - spin_lock_irq(&b->rb_lock); - for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = rb_entry(rb, typeof(*w), node); - - seq_printf(m, "\t%s [%d] waiting for %x\n", - w->tsk->comm, w->tsk->pid, w->seqno); - } - spin_unlock_irq(&b->rb_lock); - seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", (long long)engine->hangcheck.acthd, (long long)acthd[id]); @@ -2021,18 +2008,6 @@ static int i915_swizzle_info(struct seq_file *m, void *data) return 0; } -static int count_irq_waiters(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - int count = 0; - - for_each_engine(engine, i915, id) - count += intel_engine_has_waiter(engine); - - return count; -} - static const char *rps_power_to_str(unsigned int power) { static const char * const strings[] = { @@ -2072,7 +2047,6 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) seq_printf(m, "RPS enabled? %d\n", rps->enabled); seq_printf(m, "GPU busy? %s [%d requests]\n", yesno(dev_priv->gt.awake), dev_priv->gt.active_requests); - seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv)); seq_printf(m, "Boosts outstanding? %d\n", atomic_read(&rps->num_waiters)); seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 93e84751370f..6faf1f6faab5 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -327,6 +327,9 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) { ce->gem_context = ctx; + + INIT_LIST_HEAD(&ce->signal_link); + INIT_LIST_HEAD(&ce->signals); } static struct i915_gem_context * diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 3769438228f6..6ba40ff6b91f 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -164,6 +164,8 @@ struct i915_gem_context { struct intel_context { struct i915_gem_context *gem_context; struct intel_engine_cs *active; + struct list_head signal_link; + struct list_head signals; struct i915_vma *state; struct intel_ring *ring; u32 *lrc_reg_state; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 898e06014295..304a7ef7f7fb 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -447,9 +447,14 @@ static void error_print_request(struct drm_i915_error_state_buf *m, if (!erq->seqno) return; - err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n", + err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x%s%s, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n", prefix, erq->pid, erq->ban_score, - erq->context, erq->seqno, erq->sched_attr.priority, + erq->context, erq->seqno, + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &erq->flags) ? "!" : "", + test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &erq->flags) ? "+" : "", + erq->sched_attr.priority, jiffies_to_msecs(erq->jiffies - epoch), erq->start, erq->head, erq->tail); } @@ -530,7 +535,6 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, } err_printf(m, " seqno: 0x%08x\n", ee->seqno); err_printf(m, " last_seqno: 0x%08x\n", ee->last_seqno); - err_printf(m, " waiting: %s\n", yesno(ee->waiting)); err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head); err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail); err_printf(m, " hangcheck timestamp: %dms (%lu%s)\n", @@ -804,21 +808,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, error->epoch); } - if (IS_ERR(ee->waiters)) { - err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n", - m->i915->engine[i]->name); - } else if (ee->num_waiters) { - err_printf(m, "%s --- %d waiters\n", - m->i915->engine[i]->name, - ee->num_waiters); - for (j = 0; j < ee->num_waiters; j++) { - err_printf(m, " seqno 0x%08x for %s [%d]\n", - ee->waiters[j].seqno, - ee->waiters[j].comm, - ee->waiters[j].pid); - } - } - print_error_obj(m, m->i915->engine[i], "ringbuffer", ee->ringbuffer); @@ -1000,8 +989,6 @@ void __i915_gpu_state_free(struct kref *error_ref) i915_error_object_free(ee->wa_ctx); kfree(ee->requests); - if (!IS_ERR_OR_NULL(ee->waiters)) - kfree(ee->waiters); } for (i = 0; i < ARRAY_SIZE(error->active_bo); i++) @@ -1205,59 +1192,6 @@ static void gen6_record_semaphore_state(struct intel_engine_cs *engine, I915_READ(RING_SYNC_2(engine->mmio_base)); } -static void error_record_engine_waiters(struct intel_engine_cs *engine, - struct drm_i915_error_engine *ee) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct drm_i915_error_waiter *waiter; - struct rb_node *rb; - int count; - - ee->num_waiters = 0; - ee->waiters = NULL; - - if (RB_EMPTY_ROOT(&b->waiters)) - return; - - if (!spin_trylock_irq(&b->rb_lock)) { - ee->waiters = ERR_PTR(-EDEADLK); - return; - } - - count = 0; - for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb)) - count++; - spin_unlock_irq(&b->rb_lock); - - waiter = NULL; - if (count) - waiter = kmalloc_array(count, - sizeof(struct drm_i915_error_waiter), - GFP_ATOMIC); - if (!waiter) - return; - - if (!spin_trylock_irq(&b->rb_lock)) { - kfree(waiter); - ee->waiters = ERR_PTR(-EDEADLK); - return; - } - - ee->waiters = waiter; - for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = rb_entry(rb, typeof(*w), node); - - strcpy(waiter->comm, w->tsk->comm); - waiter->pid = w->tsk->pid; - waiter->seqno = w->seqno; - waiter++; - - if (++ee->num_waiters == count) - break; - } - spin_unlock_irq(&b->rb_lock); -} - static void error_record_engine_registers(struct i915_gpu_state *error, struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) @@ -1293,7 +1227,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error, intel_engine_get_instdone(engine, &ee->instdone); - ee->waiting = intel_engine_has_waiter(engine); ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); ee->acthd = intel_engine_get_active_head(engine); ee->seqno = intel_engine_get_seqno(engine); @@ -1367,6 +1300,7 @@ static void record_request(struct i915_request *request, { struct i915_gem_context *ctx = request->gem_context; + erq->flags = request->fence.flags; erq->context = ctx->hw_id; erq->sched_attr = request->sched.attr; erq->ban_score = atomic_read(&ctx->ban_score); @@ -1542,7 +1476,6 @@ static void gem_record_rings(struct i915_gpu_state *error) ee->engine_id = i; error_record_engine_registers(error, engine, ee); - error_record_engine_waiters(engine, ee); error_record_engine_execlists(engine, ee); request = i915_gem_find_active_request(engine); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 231173786eae..74757c424aab 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -82,8 +82,6 @@ struct i915_gpu_state { int engine_id; /* Software tracked state */ bool idle; - bool waiting; - int num_waiters; unsigned long hangcheck_timestamp; struct i915_address_space *vm; int num_requests; @@ -147,6 +145,7 @@ struct i915_gpu_state { struct drm_i915_error_object *default_state; struct drm_i915_error_request { + unsigned long flags; long jiffies; pid_t pid; u32 context; @@ -159,12 +158,6 @@ struct i915_gpu_state { } *requests, execlist[EXECLIST_MAX_PORTS]; unsigned int num_ports; - struct drm_i915_error_waiter { - char comm[TASK_COMM_LEN]; - pid_t pid; - u32 seqno; - } *waiters; - struct { u32 gfx_mode; union { diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 0fcdb14c50f4..eab085686a2a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1169,66 +1169,6 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) return; } -static void notify_ring(struct intel_engine_cs *engine) -{ - const u32 seqno = intel_engine_get_seqno(engine); - struct i915_request *rq = NULL; - struct task_struct *tsk = NULL; - struct intel_wait *wait; - - if (unlikely(!engine->breadcrumbs.irq_armed)) - return; - - rcu_read_lock(); - - spin_lock(&engine->breadcrumbs.irq_lock); - wait = engine->breadcrumbs.irq_wait; - if (wait) { - /* - * We use a callback from the dma-fence to submit - * requests after waiting on our own requests. To - * ensure minimum delay in queuing the next request to - * hardware, signal the fence now rather than wait for - * the signaler to be woken up. We still wake up the - * waiter in order to handle the irq-seqno coherency - * issues (we may receive the interrupt before the - * seqno is written, see __i915_request_irq_complete()) - * and to handle coalescing of multiple seqno updates - * and many waiters. - */ - if (i915_seqno_passed(seqno, wait->seqno)) { - struct i915_request *waiter = wait->request; - - if (waiter && - !i915_request_signaled(waiter) && - intel_wait_check_request(wait, waiter)) - rq = i915_request_get(waiter); - - tsk = wait->tsk; - } - - engine->breadcrumbs.irq_count++; - } else { - if (engine->breadcrumbs.irq_armed) - __intel_engine_disarm_breadcrumbs(engine); - } - spin_unlock(&engine->breadcrumbs.irq_lock); - - if (rq) { - spin_lock(&rq->lock); - dma_fence_signal_locked(&rq->fence); - GEM_BUG_ON(!i915_request_completed(rq)); - spin_unlock(&rq->lock); - - i915_request_put(rq); - } - - if (tsk && tsk->state & TASK_NORMAL) - wake_up_process(tsk); - - rcu_read_unlock(); -} - static void vlv_c0_read(struct drm_i915_private *dev_priv, struct intel_rps_ei *ei) { @@ -1473,20 +1413,20 @@ static void ilk_gt_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) { if (gt_iir & GT_RENDER_USER_INTERRUPT) - notify_ring(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); if (gt_iir & ILK_BSD_USER_INTERRUPT) - notify_ring(dev_priv->engine[VCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]); } static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) { if (gt_iir & GT_RENDER_USER_INTERRUPT) - notify_ring(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); if (gt_iir & GT_BSD_USER_INTERRUPT) - notify_ring(dev_priv->engine[VCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]); if (gt_iir & GT_BLT_USER_INTERRUPT) - notify_ring(dev_priv->engine[BCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[BCS]); if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT | GT_BSD_CS_ERROR_INTERRUPT | @@ -1506,7 +1446,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) tasklet = true; if (iir & GT_RENDER_USER_INTERRUPT) { - notify_ring(engine); + intel_engine_breadcrumbs_irq(engine); tasklet |= USES_GUC_SUBMISSION(engine->i915); } @@ -1852,7 +1792,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) if (HAS_VEBOX(dev_priv)) { if (pm_iir & PM_VEBOX_USER_INTERRUPT) - notify_ring(dev_priv->engine[VECS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[VECS]); if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); @@ -4276,7 +4216,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) I915_WRITE16(IIR, iir); if (iir & I915_USER_INTERRUPT) - notify_ring(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); if (iir & I915_MASTER_ERROR_INTERRUPT) i8xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4384,7 +4324,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) I915_WRITE(IIR, iir); if (iir & I915_USER_INTERRUPT) - notify_ring(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); if (iir & I915_MASTER_ERROR_INTERRUPT) i9xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4529,10 +4469,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) I915_WRITE(IIR, iir); if (iir & I915_USER_INTERRUPT) - notify_ring(dev_priv->engine[RCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]); if (iir & I915_BSD_USER_INTERRUPT) - notify_ring(dev_priv->engine[VCS]); + intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]); if (iir & I915_MASTER_ERROR_INTERRUPT) i9xx_error_irq_handler(dev_priv, eir, eir_stuck); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 7db15b7b3de8..9ed5baf157a3 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -60,7 +60,7 @@ static bool i915_fence_signaled(struct dma_fence *fence) static bool i915_fence_enable_signaling(struct dma_fence *fence) { - return intel_engine_enable_signaling(to_request(fence), true); + return i915_request_enable_breadcrumb(to_request(fence)); } static signed long i915_fence_wait(struct dma_fence *fence, @@ -203,7 +203,7 @@ static void __retire_engine_request(struct intel_engine_cs *engine, if (!i915_request_signaled(rq)) dma_fence_signal_locked(&rq->fence); if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) - intel_engine_cancel_signaling(rq); + i915_request_cancel_breadcrumb(rq); if (rq->waitboost) { GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters)); atomic_dec(&rq->i915->gt_pm.rps.num_waiters); @@ -377,9 +377,12 @@ void __i915_request_submit(struct i915_request *request) /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); + set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); request->global_seqno = seqno; - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) - intel_engine_enable_signaling(request, false); + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && + !i915_request_enable_breadcrumb(request)) + intel_engine_queue_breadcrumbs(engine); spin_unlock(&request->lock); engine->emit_fini_breadcrumb(request, @@ -389,8 +392,6 @@ void __i915_request_submit(struct i915_request *request) move_to_timeline(request, &engine->timeline); trace_i915_request_execute(request); - - wake_up_all(&request->execute); } void i915_request_submit(struct i915_request *request) @@ -433,7 +434,9 @@ void __i915_request_unsubmit(struct i915_request *request) spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); request->global_seqno = 0; if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) - intel_engine_cancel_signaling(request); + i915_request_cancel_breadcrumb(request); + GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); + clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); spin_unlock(&request->lock); /* Transfer back from the global per-engine timeline to per-context */ @@ -633,13 +636,11 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) /* We bump the ref for the fence chain */ i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); - init_waitqueue_head(&rq->execute); i915_sched_node_init(&rq->sched); /* No zalloc, must clear what we need by hand */ rq->global_seqno = 0; - rq->signaling.wait.seqno = 0; rq->file_priv = NULL; rq->batch = NULL; rq->capture_list = NULL; @@ -1030,13 +1031,10 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu) return this_cpu != cpu; } -static bool __i915_spin_request(const struct i915_request *rq, - u32 seqno, int state, unsigned long timeout_us) +static bool __i915_spin_request(const struct i915_request * const rq, + int state, unsigned long timeout_us) { - struct intel_engine_cs *engine = rq->engine; - unsigned int irq, cpu; - - GEM_BUG_ON(!seqno); + unsigned int cpu; /* * Only wait for the request if we know it is likely to complete. @@ -1044,12 +1042,12 @@ static bool __i915_spin_request(const struct i915_request *rq, * We don't track the timestamps around requests, nor the average * request length, so we do not have a good indicator that this * request will complete within the timeout. What we do know is the - * order in which requests are executed by the engine and so we can - * tell if the request has started. If the request hasn't started yet, - * it is a fair assumption that it will not complete within our - * relatively short timeout. + * order in which requests are executed by the context and so we can + * tell if the request has been started. If the request is not even + * running yet, it is a fair assumption that it will not complete + * within our relatively short timeout. */ - if (!intel_engine_has_started(engine, seqno)) + if (!i915_request_is_running(rq)) return false; /* @@ -1063,20 +1061,10 @@ static bool __i915_spin_request(const struct i915_request *rq, * takes to sleep on a request, on the order of a microsecond. */ - irq = READ_ONCE(engine->breadcrumbs.irq_count); timeout_us += local_clock_us(&cpu); do { - if (intel_engine_has_completed(engine, seqno)) - return seqno == i915_request_global_seqno(rq); - - /* - * Seqno are meant to be ordered *before* the interrupt. If - * we see an interrupt without a corresponding seqno advance, - * assume we won't see one in the near future but require - * the engine->seqno_barrier() to fixup coherency. - */ - if (READ_ONCE(engine->breadcrumbs.irq_count) != irq) - break; + if (i915_request_completed(rq)) + return true; if (signal_pending_state(state, current)) break; @@ -1090,6 +1078,18 @@ static bool __i915_spin_request(const struct i915_request *rq, return false; } +struct request_wait { + struct dma_fence_cb cb; + struct task_struct *tsk; +}; + +static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct request_wait *wait = container_of(cb, typeof(*wait), cb); + + wake_up_process(wait->tsk); +} + /** * i915_request_wait - wait until execution of request has finished * @rq: the request to wait upon @@ -1115,8 +1115,7 @@ long i915_request_wait(struct i915_request *rq, { const int state = flags & I915_WAIT_INTERRUPTIBLE ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; - DEFINE_WAIT_FUNC(exec, default_wake_function); - struct intel_wait wait; + struct request_wait wait; might_sleep(); GEM_BUG_ON(timeout < 0); @@ -1128,85 +1127,42 @@ long i915_request_wait(struct i915_request *rq, return -ETIME; trace_i915_request_wait_begin(rq, flags); - add_wait_queue(&rq->execute, &exec); - intel_wait_init(&wait); + + /* Optimistic short spin before touching IRQs */ + if (__i915_spin_request(rq, state, 5)) + goto out; + if (flags & I915_WAIT_PRIORITY) i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); -restart: - do { - set_current_state(state); - if (intel_wait_update_request(&wait, rq)) - break; - - if (signal_pending_state(state, current)) { - timeout = -ERESTARTSYS; - goto complete; - } - - if (!timeout) { - timeout = -ETIME; - goto complete; - } - - timeout = io_schedule_timeout(timeout); - } while (1); - - GEM_BUG_ON(!intel_wait_has_seqno(&wait)); - GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); - - /* Optimistic short spin before touching IRQs */ - if (__i915_spin_request(rq, wait.seqno, state, 5)) - goto complete; - - set_current_state(state); - if (intel_engine_add_wait(rq->engine, &wait)) - /* - * In order to check that we haven't missed the interrupt - * as we enabled it, we need to kick ourselves to do a - * coherent check on the seqno before we sleep. - */ - goto wakeup; + wait.tsk = current; + if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake)) + goto out; for (;;) { - if (signal_pending_state(state, current)) { - timeout = -ERESTARTSYS; - break; - } - - if (!timeout) { - timeout = -ETIME; - break; - } - - timeout = io_schedule_timeout(timeout); - - if (intel_wait_complete(&wait) && - intel_wait_check_request(&wait, rq)) - break; - set_current_state(state); -wakeup: if (i915_request_completed(rq)) break; - /* Only spin if we know the GPU is processing this request */ - if (__i915_spin_request(rq, wait.seqno, state, 2)) + if (signal_pending_state(state, current)) { + timeout = -ERESTARTSYS; break; - - if (!intel_wait_check_request(&wait, rq)) { - intel_engine_remove_wait(rq->engine, &wait); - goto restart; } + + if (!timeout) { + timeout = -ETIME; + break; + } + + timeout = io_schedule_timeout(timeout); } - - intel_engine_remove_wait(rq->engine, &wait); -complete: __set_current_state(TASK_RUNNING); - remove_wait_queue(&rq->execute, &exec); - trace_i915_request_wait_end(rq); + dma_fence_remove_callback(&rq->fence, &wait.cb); + +out: + trace_i915_request_wait_end(rq); return timeout; } diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 340d6216791c..3cffb96203b9 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -38,23 +38,34 @@ struct drm_i915_gem_object; struct i915_request; struct i915_timeline; -struct intel_wait { - struct rb_node node; - struct task_struct *tsk; - struct i915_request *request; - u32 seqno; -}; - -struct intel_signal_node { - struct intel_wait wait; - struct list_head link; -}; - struct i915_capture_list { struct i915_capture_list *next; struct i915_vma *vma; }; +enum { + /* + * I915_FENCE_FLAG_ACTIVE - this request is currently submitted to HW. + * + * Set by __i915_request_submit() on handing over to HW, and cleared + * by __i915_request_unsubmit() if we preempt this request. + * + * Finally cleared for consistency on retiring the request, when + * we know the HW is no longer running this request. + * + * See i915_request_is_active() + */ + I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS, + + /* + * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list + * + * Internal bookkeeping used by the breadcrumb code to track when + * a request is on the various signal_list. + */ + I915_FENCE_FLAG_SIGNAL, +}; + /** * Request queue structure. * @@ -97,7 +108,7 @@ struct i915_request { struct intel_context *hw_context; struct intel_ring *ring; struct i915_timeline *timeline; - struct intel_signal_node signaling; + struct list_head signal_link; /* * The rcu epoch of when this request was allocated. Used to judiciously @@ -116,7 +127,6 @@ struct i915_request { */ struct i915_sw_fence submit; wait_queue_entry_t submitq; - wait_queue_head_t execute; /* * A list of everyone we wait upon, and everyone who waits upon us. @@ -255,7 +265,7 @@ i915_request_put(struct i915_request *rq) * that it has passed the global seqno and the global seqno is unchanged * after the read, it is indeed complete). */ -static u32 +static inline u32 i915_request_global_seqno(const struct i915_request *request) { return READ_ONCE(request->global_seqno); @@ -277,6 +287,10 @@ void i915_request_skip(struct i915_request *request, int error); void __i915_request_unsubmit(struct i915_request *request); void i915_request_unsubmit(struct i915_request *request); +/* Note: part of the intel_breadcrumbs family */ +bool i915_request_enable_breadcrumb(struct i915_request *request); +void i915_request_cancel_breadcrumb(struct i915_request *request); + long i915_request_wait(struct i915_request *rq, unsigned int flags, long timeout) @@ -293,6 +307,11 @@ static inline bool i915_request_signaled(const struct i915_request *rq) return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags); } +static inline bool i915_request_is_active(const struct i915_request *rq) +{ + return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); +} + /** * Returns true if seq1 is later than seq2. */ @@ -330,6 +349,11 @@ static inline u32 hwsp_seqno(const struct i915_request *rq) return seqno; } +static inline bool __i915_request_has_started(const struct i915_request *rq) +{ + return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1); +} + /** * i915_request_started - check if the request has begun being executed * @rq: the request @@ -345,7 +369,23 @@ static inline bool i915_request_started(const struct i915_request *rq) return true; /* Remember: started but may have since been preempted! */ - return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1); + return __i915_request_has_started(rq); +} + +/** + * i915_request_is_running - check if the request may actually be executing + * @rq: the request + * + * Returns true if the request is currently submitted to hardware, has passed + * its start point (i.e. the context is setup and not busywaiting). Note that + * it may no longer be running by the time the function returns! + */ +static inline bool i915_request_is_running(const struct i915_request *rq) +{ + if (!i915_request_is_active(rq)) + return false; + + return __i915_request_has_started(rq); } static inline bool i915_request_completed(const struct i915_request *rq) diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index acf3c777e49d..4462007a681c 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -29,7 +29,7 @@ static void engine_skip_context(struct i915_request *rq) spin_lock(&timeline->lock); - if (rq->global_seqno) { + if (i915_request_is_active(rq)) { list_for_each_entry_continue(rq, &engine->timeline.requests, link) if (rq->gem_context == hung_ctx) @@ -751,18 +751,20 @@ static void reset_restart(struct drm_i915_private *i915) static void nop_submit_request(struct i915_request *request) { + struct intel_engine_cs *engine = request->engine; unsigned long flags; GEM_TRACE("%s fence %llx:%lld -> -EIO\n", - request->engine->name, - request->fence.context, request->fence.seqno); + engine->name, request->fence.context, request->fence.seqno); dma_fence_set_error(&request->fence, -EIO); - spin_lock_irqsave(&request->engine->timeline.lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); __i915_request_submit(request); i915_request_mark_complete(request); - intel_engine_write_global_seqno(request->engine, request->global_seqno); - spin_unlock_irqrestore(&request->engine->timeline.lock, flags); + intel_engine_write_global_seqno(engine, request->global_seqno); + spin_unlock_irqrestore(&engine->timeline.lock, flags); + + intel_engine_queue_breadcrumbs(engine); } void i915_gem_set_wedged(struct drm_i915_private *i915) @@ -817,7 +819,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) for_each_engine(engine, i915, id) { reset_finish_engine(engine); - intel_engine_wakeup(engine); + intel_engine_signal_breadcrumbs(engine); } smp_mb__before_atomic(); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 2d172991024f..d01683167c77 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -243,7 +243,7 @@ static bool inflight(const struct i915_request *rq, { const struct i915_request *active; - if (!rq->global_seqno) + if (!i915_request_is_active(rq)) return false; active = port_request(engine->execlists.port); diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index b58915b8708b..b0795b0ad227 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -29,46 +29,147 @@ #define task_asleep(tsk) ((tsk)->state & TASK_NORMAL && !(tsk)->on_rq) -static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b) +static void irq_enable(struct intel_engine_cs *engine) { - struct intel_wait *wait; - unsigned int result = 0; + if (!engine->irq_enable) + return; + /* Caller disables interrupts */ + spin_lock(&engine->i915->irq_lock); + engine->irq_enable(engine); + spin_unlock(&engine->i915->irq_lock); +} + +static void irq_disable(struct intel_engine_cs *engine) +{ + if (!engine->irq_disable) + return; + + /* Caller disables interrupts */ + spin_lock(&engine->i915->irq_lock); + engine->irq_disable(engine); + spin_unlock(&engine->i915->irq_lock); +} + +static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) +{ lockdep_assert_held(&b->irq_lock); - wait = b->irq_wait; - if (wait) { - /* - * N.B. Since task_asleep() and ttwu are not atomic, the - * waiter may actually go to sleep after the check, causing - * us to suppress a valid wakeup. We prefer to reduce the - * number of false positive missed_breadcrumb() warnings - * at the expense of a few false negatives, as it it easy - * to trigger a false positive under heavy load. Enough - * signal should remain from genuine missed_breadcrumb() - * for us to detect in CI. - */ - bool was_asleep = task_asleep(wait->tsk); + GEM_BUG_ON(!b->irq_enabled); + if (!--b->irq_enabled) + irq_disable(container_of(b, + struct intel_engine_cs, + breadcrumbs)); - result = ENGINE_WAKEUP_WAITER; - if (wake_up_process(wait->tsk) && was_asleep) - result |= ENGINE_WAKEUP_ASLEEP; + b->irq_armed = false; +} + +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + if (!b->irq_armed) + return; + + spin_lock_irq(&b->irq_lock); + if (b->irq_armed) + __intel_breadcrumbs_disarm_irq(b); + spin_unlock_irq(&b->irq_lock); +} + +static inline bool __request_completed(const struct i915_request *rq) +{ + return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno); +} + +bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct intel_context *ce, *cn; + struct list_head *pos, *next; + LIST_HEAD(signal); + + spin_lock(&b->irq_lock); + + b->irq_fired = true; + if (b->irq_armed && list_empty(&b->signalers)) + __intel_breadcrumbs_disarm_irq(b); + + list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) { + GEM_BUG_ON(list_empty(&ce->signals)); + + list_for_each_safe(pos, next, &ce->signals) { + struct i915_request *rq = + list_entry(pos, typeof(*rq), signal_link); + + if (!__request_completed(rq)) + break; + + GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, + &rq->fence.flags)); + clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + + /* + * We may race with direct invocation of + * dma_fence_signal(), e.g. i915_request_retire(), + * in which case we can skip processing it ourselves. + */ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &rq->fence.flags)) + continue; + + /* + * Queue for execution after dropping the signaling + * spinlock as the callback chain may end up adding + * more signalers to the same context or engine. + */ + i915_request_get(rq); + list_add_tail(&rq->signal_link, &signal); + } + + /* + * We process the list deletion in bulk, only using a list_add + * (not list_move) above but keeping the status of + * rq->signal_link known with the I915_FENCE_FLAG_SIGNAL bit. + */ + if (!list_is_first(pos, &ce->signals)) { + /* Advance the list to the first incomplete request */ + __list_del_many(&ce->signals, pos); + if (&ce->signals == pos) /* now empty */ + list_del_init(&ce->signal_link); + } } + spin_unlock(&b->irq_lock); + + list_for_each_safe(pos, next, &signal) { + struct i915_request *rq = + list_entry(pos, typeof(*rq), signal_link); + + dma_fence_signal(&rq->fence); + i915_request_put(rq); + } + + return !list_empty(&signal); +} + +bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) +{ + bool result; + + local_irq_disable(); + result = intel_engine_breadcrumbs_irq(engine); + local_irq_enable(); + return result; } -unsigned int intel_engine_wakeup(struct intel_engine_cs *engine) +static void signal_irq_work(struct irq_work *work) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - unsigned long flags; - unsigned int result; + struct intel_engine_cs *engine = + container_of(work, typeof(*engine), breadcrumbs.irq_work); - spin_lock_irqsave(&b->irq_lock, flags); - result = __intel_breadcrumbs_wakeup(b); - spin_unlock_irqrestore(&b->irq_lock, flags); - - return result; + intel_engine_breadcrumbs_irq(engine); } static unsigned long wait_timeout(void) @@ -94,19 +195,15 @@ static void intel_breadcrumbs_hangcheck(struct timer_list *t) struct intel_engine_cs *engine = from_timer(engine, t, breadcrumbs.hangcheck); struct intel_breadcrumbs *b = &engine->breadcrumbs; - unsigned int irq_count; if (!b->irq_armed) return; - irq_count = READ_ONCE(b->irq_count); - if (b->hangcheck_interrupts != irq_count) { - b->hangcheck_interrupts = irq_count; - mod_timer(&b->hangcheck, wait_timeout()); - return; - } + if (b->irq_fired) + goto rearm; - /* We keep the hangcheck timer alive until we disarm the irq, even + /* + * We keep the hangcheck timer alive until we disarm the irq, even * if there are no waiters at present. * * If the waiter was currently running, assume it hasn't had a chance @@ -118,10 +215,13 @@ static void intel_breadcrumbs_hangcheck(struct timer_list *t) * but we still have a waiter. Assuming all batches complete within * DRM_I915_HANGCHECK_JIFFIES [1.5s]! */ - if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) { + synchronize_hardirq(engine->i915->drm.irq); + if (intel_engine_signal_breadcrumbs(engine)) { missed_breadcrumb(engine); mod_timer(&b->fake_irq, jiffies + 1); } else { +rearm: + b->irq_fired = false; mod_timer(&b->hangcheck, wait_timeout()); } } @@ -140,11 +240,7 @@ static void intel_breadcrumbs_fake_irq(struct timer_list *t) * oldest waiter to do the coherent seqno check. */ - spin_lock_irq(&b->irq_lock); - if (b->irq_armed && !__intel_breadcrumbs_wakeup(b)) - __intel_engine_disarm_breadcrumbs(engine); - spin_unlock_irq(&b->irq_lock); - if (!b->irq_armed) + if (!intel_engine_signal_breadcrumbs(engine) && !b->irq_armed) return; /* If the user has disabled the fake-irq, restore the hangchecking */ @@ -156,43 +252,6 @@ static void intel_breadcrumbs_fake_irq(struct timer_list *t) mod_timer(&b->fake_irq, jiffies + 1); } -static void irq_enable(struct intel_engine_cs *engine) -{ - if (!engine->irq_enable) - return; - - /* Caller disables interrupts */ - spin_lock(&engine->i915->irq_lock); - engine->irq_enable(engine); - spin_unlock(&engine->i915->irq_lock); -} - -static void irq_disable(struct intel_engine_cs *engine) -{ - if (!engine->irq_disable) - return; - - /* Caller disables interrupts */ - spin_lock(&engine->i915->irq_lock); - engine->irq_disable(engine); - spin_unlock(&engine->i915->irq_lock); -} - -void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - lockdep_assert_held(&b->irq_lock); - GEM_BUG_ON(b->irq_wait); - GEM_BUG_ON(!b->irq_armed); - - GEM_BUG_ON(!b->irq_enabled); - if (!--b->irq_enabled) - irq_disable(engine); - - b->irq_armed = false; -} - void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; @@ -215,40 +274,6 @@ void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine) spin_unlock_irq(&b->irq_lock); } -void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct intel_wait *wait, *n; - - if (!b->irq_armed) - return; - - /* - * We only disarm the irq when we are idle (all requests completed), - * so if the bottom-half remains asleep, it missed the request - * completion. - */ - if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) - missed_breadcrumb(engine); - - spin_lock_irq(&b->rb_lock); - - spin_lock(&b->irq_lock); - b->irq_wait = NULL; - if (b->irq_armed) - __intel_engine_disarm_breadcrumbs(engine); - spin_unlock(&b->irq_lock); - - rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) { - GEM_BUG_ON(!intel_engine_signaled(engine, wait->seqno)); - RB_CLEAR_NODE(&wait->node); - wake_up_process(wait->tsk); - } - b->waiters = RB_ROOT; - - spin_unlock_irq(&b->rb_lock); -} - static bool use_fake_irq(const struct intel_breadcrumbs *b) { const struct intel_engine_cs *engine = @@ -264,7 +289,7 @@ static bool use_fake_irq(const struct intel_breadcrumbs *b) * engine->seqno_barrier(), a timing error that should be transient * and unlikely to reoccur. */ - return READ_ONCE(b->irq_count) == b->hangcheck_interrupts; + return !b->irq_fired; } static void enable_fake_irq(struct intel_breadcrumbs *b) @@ -276,7 +301,7 @@ static void enable_fake_irq(struct intel_breadcrumbs *b) mod_timer(&b->hangcheck, wait_timeout()); } -static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) +static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) { struct intel_engine_cs *engine = container_of(b, struct intel_engine_cs, breadcrumbs); @@ -315,488 +340,17 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) return enabled; } -static inline struct intel_wait *to_wait(struct rb_node *node) -{ - return rb_entry(node, struct intel_wait, node); -} - -static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, - struct intel_wait *wait) -{ - lockdep_assert_held(&b->rb_lock); - GEM_BUG_ON(b->irq_wait == wait); - - /* - * This request is completed, so remove it from the tree, mark it as - * complete, and *then* wake up the associated task. N.B. when the - * task wakes up, it will find the empty rb_node, discern that it - * has already been removed from the tree and skip the serialisation - * of the b->rb_lock and b->irq_lock. This means that the destruction - * of the intel_wait is not serialised with the interrupt handler - * by the waiter - it must instead be serialised by the caller. - */ - rb_erase(&wait->node, &b->waiters); - RB_CLEAR_NODE(&wait->node); - - if (wait->tsk->state != TASK_RUNNING) - wake_up_process(wait->tsk); /* implicit smp_wmb() */ -} - -static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine, - struct rb_node *next) +void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - spin_lock(&b->irq_lock); - GEM_BUG_ON(!b->irq_armed); - GEM_BUG_ON(!b->irq_wait); - b->irq_wait = to_wait(next); - spin_unlock(&b->irq_lock); - - /* We always wake up the next waiter that takes over as the bottom-half - * as we may delegate not only the irq-seqno barrier to the next waiter - * but also the task of waking up concurrent waiters. - */ - if (next) - wake_up_process(to_wait(next)->tsk); -} - -static bool __intel_engine_add_wait(struct intel_engine_cs *engine, - struct intel_wait *wait) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct rb_node **p, *parent, *completed; - bool first, armed; - u32 seqno; - - GEM_BUG_ON(!wait->seqno); - - /* Insert the request into the retirement ordered list - * of waiters by walking the rbtree. If we are the oldest - * seqno in the tree (the first to be retired), then - * set ourselves as the bottom-half. - * - * As we descend the tree, prune completed branches since we hold the - * spinlock we know that the first_waiter must be delayed and can - * reduce some of the sequential wake up latency if we take action - * ourselves and wake up the completed tasks in parallel. Also, by - * removing stale elements in the tree, we may be able to reduce the - * ping-pong between the old bottom-half and ourselves as first-waiter. - */ - armed = false; - first = true; - parent = NULL; - completed = NULL; - seqno = intel_engine_get_seqno(engine); - - /* If the request completed before we managed to grab the spinlock, - * return now before adding ourselves to the rbtree. We let the - * current bottom-half handle any pending wakeups and instead - * try and get out of the way quickly. - */ - if (i915_seqno_passed(seqno, wait->seqno)) { - RB_CLEAR_NODE(&wait->node); - return first; - } - - p = &b->waiters.rb_node; - while (*p) { - parent = *p; - if (wait->seqno == to_wait(parent)->seqno) { - /* We have multiple waiters on the same seqno, select - * the highest priority task (that with the smallest - * task->prio) to serve as the bottom-half for this - * group. - */ - if (wait->tsk->prio > to_wait(parent)->tsk->prio) { - p = &parent->rb_right; - first = false; - } else { - p = &parent->rb_left; - } - } else if (i915_seqno_passed(wait->seqno, - to_wait(parent)->seqno)) { - p = &parent->rb_right; - if (i915_seqno_passed(seqno, to_wait(parent)->seqno)) - completed = parent; - else - first = false; - } else { - p = &parent->rb_left; - } - } - rb_link_node(&wait->node, parent, p); - rb_insert_color(&wait->node, &b->waiters); - - if (first) { - spin_lock(&b->irq_lock); - b->irq_wait = wait; - /* After assigning ourselves as the new bottom-half, we must - * perform a cursory check to prevent a missed interrupt. - * Either we miss the interrupt whilst programming the hardware, - * or if there was a previous waiter (for a later seqno) they - * may be woken instead of us (due to the inherent race - * in the unlocked read of b->irq_seqno_bh in the irq handler) - * and so we miss the wake up. - */ - armed = __intel_breadcrumbs_enable_irq(b); - spin_unlock(&b->irq_lock); - } - - if (completed) { - /* Advance the bottom-half (b->irq_wait) before we wake up - * the waiters who may scribble over their intel_wait - * just as the interrupt handler is dereferencing it via - * b->irq_wait. - */ - if (!first) { - struct rb_node *next = rb_next(completed); - GEM_BUG_ON(next == &wait->node); - __intel_breadcrumbs_next(engine, next); - } - - do { - struct intel_wait *crumb = to_wait(completed); - completed = rb_prev(completed); - __intel_breadcrumbs_finish(b, crumb); - } while (completed); - } - - GEM_BUG_ON(!b->irq_wait); - GEM_BUG_ON(!b->irq_armed); - GEM_BUG_ON(rb_first(&b->waiters) != &b->irq_wait->node); - - return armed; -} - -bool intel_engine_add_wait(struct intel_engine_cs *engine, - struct intel_wait *wait) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - bool armed; - - spin_lock_irq(&b->rb_lock); - armed = __intel_engine_add_wait(engine, wait); - spin_unlock_irq(&b->rb_lock); - if (armed) - return armed; - - /* Make the caller recheck if its request has already started. */ - return intel_engine_has_started(engine, wait->seqno); -} - -static inline bool chain_wakeup(struct rb_node *rb, int priority) -{ - return rb && to_wait(rb)->tsk->prio <= priority; -} - -static inline int wakeup_priority(struct intel_breadcrumbs *b, - struct task_struct *tsk) -{ - if (tsk == b->signaler) - return INT_MIN; - else - return tsk->prio; -} - -static void __intel_engine_remove_wait(struct intel_engine_cs *engine, - struct intel_wait *wait) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - lockdep_assert_held(&b->rb_lock); - - if (RB_EMPTY_NODE(&wait->node)) - goto out; - - if (b->irq_wait == wait) { - const int priority = wakeup_priority(b, wait->tsk); - struct rb_node *next; - - /* We are the current bottom-half. Find the next candidate, - * the first waiter in the queue on the remaining oldest - * request. As multiple seqnos may complete in the time it - * takes us to wake up and find the next waiter, we have to - * wake up that waiter for it to perform its own coherent - * completion check. - */ - next = rb_next(&wait->node); - if (chain_wakeup(next, priority)) { - /* If the next waiter is already complete, - * wake it up and continue onto the next waiter. So - * if have a small herd, they will wake up in parallel - * rather than sequentially, which should reduce - * the overall latency in waking all the completed - * clients. - * - * However, waking up a chain adds extra latency to - * the first_waiter. This is undesirable if that - * waiter is a high priority task. - */ - u32 seqno = intel_engine_get_seqno(engine); - - while (i915_seqno_passed(seqno, to_wait(next)->seqno)) { - struct rb_node *n = rb_next(next); - - __intel_breadcrumbs_finish(b, to_wait(next)); - next = n; - if (!chain_wakeup(next, priority)) - break; - } - } - - __intel_breadcrumbs_next(engine, next); - } else { - GEM_BUG_ON(rb_first(&b->waiters) == &wait->node); - } - - GEM_BUG_ON(RB_EMPTY_NODE(&wait->node)); - rb_erase(&wait->node, &b->waiters); - RB_CLEAR_NODE(&wait->node); - -out: - GEM_BUG_ON(b->irq_wait == wait); - GEM_BUG_ON(rb_first(&b->waiters) != - (b->irq_wait ? &b->irq_wait->node : NULL)); -} - -void intel_engine_remove_wait(struct intel_engine_cs *engine, - struct intel_wait *wait) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - /* Quick check to see if this waiter was already decoupled from - * the tree by the bottom-half to avoid contention on the spinlock - * by the herd. - */ - if (RB_EMPTY_NODE(&wait->node)) { - GEM_BUG_ON(READ_ONCE(b->irq_wait) == wait); - return; - } - - spin_lock_irq(&b->rb_lock); - __intel_engine_remove_wait(engine, wait); - spin_unlock_irq(&b->rb_lock); -} - -static void signaler_set_rtpriority(void) -{ - struct sched_param param = { .sched_priority = 1 }; - - sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); -} - -static int intel_breadcrumbs_signaler(void *arg) -{ - struct intel_engine_cs *engine = arg; - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct i915_request *rq, *n; - - /* Install ourselves with high priority to reduce signalling latency */ - signaler_set_rtpriority(); - - do { - bool do_schedule = true; - LIST_HEAD(list); - u32 seqno; - - set_current_state(TASK_INTERRUPTIBLE); - if (list_empty(&b->signals)) - goto sleep; - - /* - * We are either woken up by the interrupt bottom-half, - * or by a client adding a new signaller. In both cases, - * the GPU seqno may have advanced beyond our oldest signal. - * If it has, propagate the signal, remove the waiter and - * check again with the next oldest signal. Otherwise we - * need to wait for a new interrupt from the GPU or for - * a new client. - */ - seqno = intel_engine_get_seqno(engine); - - spin_lock_irq(&b->rb_lock); - list_for_each_entry_safe(rq, n, &b->signals, signaling.link) { - u32 this = rq->signaling.wait.seqno; - - GEM_BUG_ON(!rq->signaling.wait.seqno); - - if (!i915_seqno_passed(seqno, this)) - break; - - if (likely(this == i915_request_global_seqno(rq))) { - __intel_engine_remove_wait(engine, - &rq->signaling.wait); - - rq->signaling.wait.seqno = 0; - __list_del_entry(&rq->signaling.link); - - if (!i915_request_signaled(rq)) { - list_add_tail(&rq->signaling.link, - &list); - i915_request_get(rq); - } - } - } - spin_unlock_irq(&b->rb_lock); - - if (!list_empty(&list)) { - local_bh_disable(); - list_for_each_entry_safe(rq, n, &list, signaling.link) { - dma_fence_signal(&rq->fence); - GEM_BUG_ON(!i915_request_completed(rq)); - i915_request_put(rq); - } - local_bh_enable(); /* kick start the tasklets */ - - /* - * If the engine is saturated we may be continually - * processing completed requests. This angers the - * NMI watchdog if we never let anything else - * have access to the CPU. Let's pretend to be nice - * and relinquish the CPU if we burn through the - * entire RT timeslice! - */ - do_schedule = need_resched(); - } - - if (unlikely(do_schedule)) { -sleep: - if (kthread_should_park()) - kthread_parkme(); - - if (unlikely(kthread_should_stop())) - break; - - schedule(); - } - } while (1); - __set_current_state(TASK_RUNNING); - - return 0; -} - -static void insert_signal(struct intel_breadcrumbs *b, - struct i915_request *request, - const u32 seqno) -{ - struct i915_request *iter; - - lockdep_assert_held(&b->rb_lock); - - /* - * A reasonable assumption is that we are called to add signals - * in sequence, as the requests are submitted for execution and - * assigned a global_seqno. This will be the case for the majority - * of internally generated signals (inter-engine signaling). - * - * Out of order waiters triggering random signaling enabling will - * be more problematic, but hopefully rare enough and the list - * small enough that the O(N) insertion sort is not an issue. - */ - - list_for_each_entry_reverse(iter, &b->signals, signaling.link) - if (i915_seqno_passed(seqno, iter->signaling.wait.seqno)) - break; - - list_add(&request->signaling.link, &iter->signaling.link); -} - -bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup) -{ - struct intel_engine_cs *engine = request->engine; - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct intel_wait *wait = &request->signaling.wait; - u32 seqno; - - /* - * Note that we may be called from an interrupt handler on another - * device (e.g. nouveau signaling a fence completion causing us - * to submit a request, and so enable signaling). As such, - * we need to make sure that all other users of b->rb_lock protect - * against interrupts, i.e. use spin_lock_irqsave. - */ - - /* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */ - GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&request->lock); - - seqno = i915_request_global_seqno(request); - if (!seqno) /* will be enabled later upon execution */ - return true; - - GEM_BUG_ON(wait->seqno); - wait->tsk = b->signaler; - wait->request = request; - wait->seqno = seqno; - - /* - * Add ourselves into the list of waiters, but registering our - * bottom-half as the signaller thread. As per usual, only the oldest - * waiter (not just signaller) is tasked as the bottom-half waking - * up all completed waiters after the user interrupt. - * - * If we are the oldest waiter, enable the irq (after which we - * must double check that the seqno did not complete). - */ - spin_lock(&b->rb_lock); - insert_signal(b, request, seqno); - wakeup &= __intel_engine_add_wait(engine, wait); - spin_unlock(&b->rb_lock); - - if (wakeup) { - wake_up_process(b->signaler); - return !intel_wait_complete(wait); - } - - return true; -} - -void intel_engine_cancel_signaling(struct i915_request *request) -{ - struct intel_engine_cs *engine = request->engine; - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&request->lock); - - if (!READ_ONCE(request->signaling.wait.seqno)) - return; - - spin_lock(&b->rb_lock); - __intel_engine_remove_wait(engine, &request->signaling.wait); - if (fetch_and_zero(&request->signaling.wait.seqno)) - __list_del_entry(&request->signaling.link); - spin_unlock(&b->rb_lock); -} - -int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct task_struct *tsk; - - spin_lock_init(&b->rb_lock); spin_lock_init(&b->irq_lock); + INIT_LIST_HEAD(&b->signalers); + + init_irq_work(&b->irq_work, signal_irq_work); timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0); timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0); - - INIT_LIST_HEAD(&b->signals); - - /* Spawn a thread to provide a common bottom-half for all signals. - * As this is an asynchronous interface we cannot steal the current - * task for handling the bottom-half to the user interrupt, therefore - * we create a thread to do the coherent seqno dance after the - * interrupt and then signal the waitqueue (via the dma-buf/fence). - */ - tsk = kthread_run(intel_breadcrumbs_signaler, engine, - "i915/signal:%d", engine->id); - if (IS_ERR(tsk)) - return PTR_ERR(tsk); - - b->signaler = tsk; - - return 0; } static void cancel_fake_irq(struct intel_engine_cs *engine) @@ -832,19 +386,103 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - /* The engines should be idle and all requests accounted for! */ - WARN_ON(READ_ONCE(b->irq_wait)); - WARN_ON(!RB_EMPTY_ROOT(&b->waiters)); - WARN_ON(!list_empty(&b->signals)); - - if (!IS_ERR_OR_NULL(b->signaler)) - kthread_stop(b->signaler); - cancel_fake_irq(engine); } -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/intel_breadcrumbs.c" -#endif +bool i915_request_enable_breadcrumb(struct i915_request *rq) +{ + struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; + + GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); + + if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) + return true; + + spin_lock(&b->irq_lock); + if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags) && + !__request_completed(rq)) { + struct intel_context *ce = rq->hw_context; + struct list_head *pos; + + __intel_breadcrumbs_arm_irq(b); + + /* + * We keep the seqno in retirement order, so we can break + * inside intel_engine_breadcrumbs_irq as soon as we've passed + * the last completed request (or seen a request that hasn't + * event started). We could iterate the timeline->requests list, + * but keeping a separate signalers_list has the advantage of + * hopefully being much smaller than the full list and so + * provides faster iteration and detection when there are no + * more interrupts required for this context. + * + * We typically expect to add new signalers in order, so we + * start looking for our insertion point from the tail of + * the list. + */ + list_for_each_prev(pos, &ce->signals) { + struct i915_request *it = + list_entry(pos, typeof(*it), signal_link); + + if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno)) + break; + } + list_add(&rq->signal_link, pos); + if (pos == &ce->signals) /* catch transitions from empty list */ + list_move_tail(&ce->signal_link, &b->signalers); + + set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + } + spin_unlock(&b->irq_lock); + + return !__request_completed(rq); +} + +void i915_request_cancel_breadcrumb(struct i915_request *rq) +{ + struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; + + if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) + return; + + spin_lock(&b->irq_lock); + if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { + struct intel_context *ce = rq->hw_context; + + list_del(&rq->signal_link); + if (list_empty(&ce->signals)) + list_del_init(&ce->signal_link); + + clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + } + spin_unlock(&b->irq_lock); +} + +void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, + struct drm_printer *p) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct intel_context *ce; + struct i915_request *rq; + + if (list_empty(&b->signalers)) + return; + + drm_printf(p, "Signals:\n"); + + spin_lock_irq(&b->irq_lock); + list_for_each_entry(ce, &b->signalers, signal_link) { + list_for_each_entry(rq, &ce->signals, signal_link) { + drm_printf(p, "\t[%llx:%llx%s] @ %dms\n", + rq->fence.context, rq->fence.seqno, + i915_request_completed(rq) ? "!" : + i915_request_started(rq) ? "*" : + "", + jiffies_to_msecs(jiffies - rq->emitted_jiffies)); + } + } + spin_unlock_irq(&b->irq_lock); + + if (test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) + drm_printf(p, "Fake irq active\n"); +} diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 0a610c9691fd..71c01eb13af1 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -458,12 +458,6 @@ cleanup: void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno) { intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); - - /* After manually advancing the seqno, fake the interrupt in case - * there are any waiters for that seqno. - */ - intel_engine_wakeup(engine); - GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno); } @@ -607,6 +601,7 @@ int intel_engine_setup_common(struct intel_engine_cs *engine) i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); + intel_engine_init_breadcrumbs(engine); intel_engine_init_execlist(engine); intel_engine_init_hangcheck(engine); intel_engine_init_batch_pool(engine); @@ -717,20 +712,14 @@ int intel_engine_init_common(struct intel_engine_cs *engine) } } - ret = intel_engine_init_breadcrumbs(engine); - if (ret) - goto err_unpin_preempt; - ret = measure_breadcrumb_dw(engine); if (ret < 0) - goto err_breadcrumbs; + goto err_unpin_preempt; engine->emit_fini_breadcrumb_dw = ret; return 0; -err_breadcrumbs: - intel_engine_fini_breadcrumbs(engine); err_unpin_preempt: if (i915->preempt_context) __intel_context_unpin(i915->preempt_context, engine); @@ -1294,12 +1283,14 @@ static void print_request(struct drm_printer *m, x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); - drm_printf(m, "%s%x%s [%llx:%llx]%s @ %dms: %s\n", + drm_printf(m, "%s%x%s%s [%llx:%llx]%s @ %dms: %s\n", prefix, rq->global_seqno, i915_request_completed(rq) ? "!" : i915_request_started(rq) ? "*" : "", + test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &rq->fence.flags) ? "+" : "", rq->fence.context, rq->fence.seqno, buf, jiffies_to_msecs(jiffies - rq->emitted_jiffies), @@ -1492,12 +1483,9 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...) { - struct intel_breadcrumbs * const b = &engine->breadcrumbs; struct i915_gpu_error * const error = &engine->i915->gpu_error; struct i915_request *rq; intel_wakeref_t wakeref; - unsigned long flags; - struct rb_node *rb; if (header) { va_list ap; @@ -1565,21 +1553,12 @@ void intel_engine_dump(struct intel_engine_cs *engine, intel_execlists_show_requests(engine, m, print_request, 8); - spin_lock_irqsave(&b->rb_lock, flags); - for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = rb_entry(rb, typeof(*w), node); - - drm_printf(m, "\t%s [%d:%c] waiting for %x\n", - w->tsk->comm, w->tsk->pid, - task_state_to_char(w->tsk), - w->seqno); - } - spin_unlock_irqrestore(&b->rb_lock, flags); - drm_printf(m, "HWSP:\n"); hexdump(m, engine->status_page.addr, PAGE_SIZE); drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine))); + + intel_engine_print_breadcrumbs(engine, m); } static u8 user_class_map[] = { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 668ed67336a2..b889b27f8aeb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -743,7 +743,7 @@ static int init_ring_common(struct intel_engine_cs *engine) } /* Papering over lost _interrupts_ immediately following the restart */ - intel_engine_wakeup(engine); + intel_engine_queue_breadcrumbs(engine); out: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 953ccc2617ff..71f8ceb937ff 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -5,6 +5,7 @@ #include #include +#include #include #include "i915_gem_batch_pool.h" @@ -381,22 +382,19 @@ struct intel_engine_cs { * the overhead of waking that client is much preferred. */ struct intel_breadcrumbs { - spinlock_t irq_lock; /* protects irq_*; irqsafe */ - struct intel_wait *irq_wait; /* oldest waiter by retirement */ + spinlock_t irq_lock; + struct list_head signalers; - spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ - struct rb_root waiters; /* sorted by retirement, priority */ - struct list_head signals; /* sorted by retirement */ - struct task_struct *signaler; /* used for fence signalling */ + struct irq_work irq_work; /* for use from inside irq_lock */ struct timer_list fake_irq; /* used after a missed interrupt */ struct timer_list hangcheck; /* detect missed interrupts */ unsigned int hangcheck_interrupts; unsigned int irq_enabled; - unsigned int irq_count; - bool irq_armed : 1; + bool irq_armed; + bool irq_fired; } breadcrumbs; struct { @@ -885,83 +883,29 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine, void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone); -/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ -int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); - -static inline void intel_wait_init(struct intel_wait *wait) -{ - wait->tsk = current; - wait->request = NULL; -} - -static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) -{ - wait->tsk = current; - wait->seqno = seqno; -} - -static inline bool intel_wait_has_seqno(const struct intel_wait *wait) -{ - return wait->seqno; -} - -static inline bool -intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) -{ - wait->seqno = seqno; - return intel_wait_has_seqno(wait); -} - -static inline bool -intel_wait_update_request(struct intel_wait *wait, - const struct i915_request *rq) -{ - return intel_wait_update_seqno(wait, i915_request_global_seqno(rq)); -} - -static inline bool -intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) -{ - return wait->seqno == seqno; -} - -static inline bool -intel_wait_check_request(const struct intel_wait *wait, - const struct i915_request *rq) -{ - return intel_wait_check_seqno(wait, i915_request_global_seqno(rq)); -} - -static inline bool intel_wait_complete(const struct intel_wait *wait) -{ - return RB_EMPTY_NODE(&wait->node); -} - -bool intel_engine_add_wait(struct intel_engine_cs *engine, - struct intel_wait *wait); -void intel_engine_remove_wait(struct intel_engine_cs *engine, - struct intel_wait *wait); -bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup); -void intel_engine_cancel_signaling(struct i915_request *request); - -static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) -{ - return READ_ONCE(engine->breadcrumbs.irq_wait); -} - -unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); -#define ENGINE_WAKEUP_WAITER BIT(0) -#define ENGINE_WAKEUP_ASLEEP BIT(1) +void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); -void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); +bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); +static inline void +intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine) +{ + irq_work_queue(&engine->breadcrumbs.irq_work); +} + +bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine); + void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, + struct drm_printer *p); + static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) { memset(batch, 0, 6 * sizeof(u32)); diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 4a83a1c6c406..88e5ab586337 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -15,7 +15,6 @@ selftest(scatterlist, scatterlist_mock_selftests) selftest(syncmap, i915_syncmap_mock_selftests) selftest(uncore, intel_uncore_mock_selftests) selftest(engine, intel_engine_cs_mock_selftests) -selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) selftest(timelines, i915_timeline_mock_selftests) selftest(requests, i915_request_mock_selftests) selftest(objects, i915_gem_object_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 4d4b86b5fa11..6733dc5b6b4c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -25,9 +25,12 @@ #include #include "../i915_selftest.h" +#include "i915_random.h" #include "igt_live_test.h" +#include "lib_sw_fence.h" #include "mock_context.h" +#include "mock_drm.h" #include "mock_gem_device.h" static int igt_add_request(void *arg) @@ -247,6 +250,254 @@ err_context_0: return err; } +struct smoketest { + struct intel_engine_cs *engine; + struct i915_gem_context **contexts; + atomic_long_t num_waits, num_fences; + int ncontexts, max_batch; + struct i915_request *(*request_alloc)(struct i915_gem_context *, + struct intel_engine_cs *); +}; + +static struct i915_request * +__mock_request_alloc(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + return mock_request(engine, ctx, 0); +} + +static struct i915_request * +__live_request_alloc(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + return i915_request_alloc(engine, ctx); +} + +static int __igt_breadcrumbs_smoketest(void *arg) +{ + struct smoketest *t = arg; + struct mutex * const BKL = &t->engine->i915->drm.struct_mutex; + const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; + const unsigned int total = 4 * t->ncontexts + 1; + unsigned int num_waits = 0, num_fences = 0; + struct i915_request **requests; + I915_RND_STATE(prng); + unsigned int *order; + int err = 0; + + /* + * A very simple test to catch the most egregious of list handling bugs. + * + * At its heart, we simply create oodles of requests running across + * multiple kthreads and enable signaling on them, for the sole purpose + * of stressing our breadcrumb handling. The only inspection we do is + * that the fences were marked as signaled. + */ + + requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL); + if (!requests) + return -ENOMEM; + + order = i915_random_order(total, &prng); + if (!order) { + err = -ENOMEM; + goto out_requests; + } + + while (!kthread_should_stop()) { + struct i915_sw_fence *submit, *wait; + unsigned int n, count; + + submit = heap_fence_create(GFP_KERNEL); + if (!submit) { + err = -ENOMEM; + break; + } + + wait = heap_fence_create(GFP_KERNEL); + if (!wait) { + i915_sw_fence_commit(submit); + heap_fence_put(submit); + err = ENOMEM; + break; + } + + i915_random_reorder(order, total, &prng); + count = 1 + i915_prandom_u32_max_state(max_batch, &prng); + + for (n = 0; n < count; n++) { + struct i915_gem_context *ctx = + t->contexts[order[n] % t->ncontexts]; + struct i915_request *rq; + + mutex_lock(BKL); + + rq = t->request_alloc(ctx, t->engine); + if (IS_ERR(rq)) { + mutex_unlock(BKL); + err = PTR_ERR(rq); + count = n; + break; + } + + err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, + submit, + GFP_KERNEL); + + requests[n] = i915_request_get(rq); + i915_request_add(rq); + + mutex_unlock(BKL); + + if (err >= 0) + err = i915_sw_fence_await_dma_fence(wait, + &rq->fence, + 0, + GFP_KERNEL); + + if (err < 0) { + i915_request_put(rq); + count = n; + break; + } + } + + i915_sw_fence_commit(submit); + i915_sw_fence_commit(wait); + + if (!wait_event_timeout(wait->wait, + i915_sw_fence_done(wait), + HZ / 2)) { + struct i915_request *rq = requests[count - 1]; + + pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n", + count, + rq->fence.context, rq->fence.seqno, + t->engine->name); + i915_gem_set_wedged(t->engine->i915); + GEM_BUG_ON(!i915_request_completed(rq)); + i915_sw_fence_wait(wait); + err = -EIO; + } + + for (n = 0; n < count; n++) { + struct i915_request *rq = requests[n]; + + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &rq->fence.flags)) { + pr_err("%llu:%llu was not signaled!\n", + rq->fence.context, rq->fence.seqno); + err = -EINVAL; + } + + i915_request_put(rq); + } + + heap_fence_put(wait); + heap_fence_put(submit); + + if (err < 0) + break; + + num_fences += count; + num_waits++; + + cond_resched(); + } + + atomic_long_add(num_fences, &t->num_fences); + atomic_long_add(num_waits, &t->num_waits); + + kfree(order); +out_requests: + kfree(requests); + return err; +} + +static int mock_breadcrumbs_smoketest(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct smoketest t = { + .engine = i915->engine[RCS], + .ncontexts = 1024, + .max_batch = 1024, + .request_alloc = __mock_request_alloc + }; + unsigned int ncpus = num_online_cpus(); + struct task_struct **threads; + unsigned int n; + int ret = 0; + + /* + * Smoketest our breadcrumb/signal handling for requests across multiple + * threads. A very simple test to only catch the most egregious of bugs. + * See __igt_breadcrumbs_smoketest(); + */ + + threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL); + if (!threads) + return -ENOMEM; + + t.contexts = + kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); + if (!t.contexts) { + ret = -ENOMEM; + goto out_threads; + } + + mutex_lock(&t.engine->i915->drm.struct_mutex); + for (n = 0; n < t.ncontexts; n++) { + t.contexts[n] = mock_context(t.engine->i915, "mock"); + if (!t.contexts[n]) { + ret = -ENOMEM; + goto out_contexts; + } + } + mutex_unlock(&t.engine->i915->drm.struct_mutex); + + for (n = 0; n < ncpus; n++) { + threads[n] = kthread_run(__igt_breadcrumbs_smoketest, + &t, "igt/%d", n); + if (IS_ERR(threads[n])) { + ret = PTR_ERR(threads[n]); + ncpus = n; + break; + } + + get_task_struct(threads[n]); + } + + msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); + + for (n = 0; n < ncpus; n++) { + int err; + + err = kthread_stop(threads[n]); + if (err < 0 && !ret) + ret = err; + + put_task_struct(threads[n]); + } + pr_info("Completed %lu waits for %lu fence across %d cpus\n", + atomic_long_read(&t.num_waits), + atomic_long_read(&t.num_fences), + ncpus); + + mutex_lock(&t.engine->i915->drm.struct_mutex); +out_contexts: + for (n = 0; n < t.ncontexts; n++) { + if (!t.contexts[n]) + break; + mock_context_close(t.contexts[n]); + } + mutex_unlock(&t.engine->i915->drm.struct_mutex); + kfree(t.contexts); +out_threads: + kfree(threads); + + return ret; +} + int i915_request_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -254,6 +505,7 @@ int i915_request_mock_selftests(void) SUBTEST(igt_wait_request), SUBTEST(igt_fence_wait), SUBTEST(igt_request_rewind), + SUBTEST(mock_breadcrumbs_smoketest), }; struct drm_i915_private *i915; intel_wakeref_t wakeref; @@ -812,6 +1064,178 @@ out_unlock: return err; } +static int +max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) +{ + struct i915_request *rq; + int ret; + + /* + * Before execlists, all contexts share the same ringbuffer. With + * execlists, each context/engine has a separate ringbuffer and + * for the purposes of this test, inexhaustible. + * + * For the global ringbuffer though, we have to be very careful + * that we do not wrap while preventing the execution of requests + * with a unsignaled fence. + */ + if (HAS_EXECLISTS(ctx->i915)) + return INT_MAX; + + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + } else { + int sz; + + ret = rq->ring->size - rq->reserved_space; + i915_request_add(rq); + + sz = rq->ring->emit - rq->head; + if (sz < 0) + sz += rq->ring->size; + ret /= sz; + ret /= 2; /* leave half spare, in case of emergency! */ + } + + return ret; +} + +static int live_breadcrumbs_smoketest(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct smoketest t[I915_NUM_ENGINES]; + unsigned int ncpus = num_online_cpus(); + unsigned long num_waits, num_fences; + struct intel_engine_cs *engine; + struct task_struct **threads; + struct igt_live_test live; + enum intel_engine_id id; + intel_wakeref_t wakeref; + struct drm_file *file; + unsigned int n; + int ret = 0; + + /* + * Smoketest our breadcrumb/signal handling for requests across multiple + * threads. A very simple test to only catch the most egregious of bugs. + * See __igt_breadcrumbs_smoketest(); + * + * On real hardware this time. + */ + + wakeref = intel_runtime_pm_get(i915); + + file = mock_file(i915); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto out_rpm; + } + + threads = kcalloc(ncpus * I915_NUM_ENGINES, + sizeof(*threads), + GFP_KERNEL); + if (!threads) { + ret = -ENOMEM; + goto out_file; + } + + memset(&t[0], 0, sizeof(t[0])); + t[0].request_alloc = __live_request_alloc; + t[0].ncontexts = 64; + t[0].contexts = kmalloc_array(t[0].ncontexts, + sizeof(*t[0].contexts), + GFP_KERNEL); + if (!t[0].contexts) { + ret = -ENOMEM; + goto out_threads; + } + + mutex_lock(&i915->drm.struct_mutex); + for (n = 0; n < t[0].ncontexts; n++) { + t[0].contexts[n] = live_context(i915, file); + if (!t[0].contexts[n]) { + ret = -ENOMEM; + goto out_contexts; + } + } + + ret = igt_live_test_begin(&live, i915, __func__, ""); + if (ret) + goto out_contexts; + + for_each_engine(engine, i915, id) { + t[id] = t[0]; + t[id].engine = engine; + t[id].max_batch = max_batches(t[0].contexts[0], engine); + if (t[id].max_batch < 0) { + ret = t[id].max_batch; + mutex_unlock(&i915->drm.struct_mutex); + goto out_flush; + } + /* One ring interleaved between requests from all cpus */ + t[id].max_batch /= num_online_cpus() + 1; + pr_debug("Limiting batches to %d requests on %s\n", + t[id].max_batch, engine->name); + + for (n = 0; n < ncpus; n++) { + struct task_struct *tsk; + + tsk = kthread_run(__igt_breadcrumbs_smoketest, + &t[id], "igt/%d.%d", id, n); + if (IS_ERR(tsk)) { + ret = PTR_ERR(tsk); + mutex_unlock(&i915->drm.struct_mutex); + goto out_flush; + } + + get_task_struct(tsk); + threads[id * ncpus + n] = tsk; + } + } + mutex_unlock(&i915->drm.struct_mutex); + + msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); + +out_flush: + num_waits = 0; + num_fences = 0; + for_each_engine(engine, i915, id) { + for (n = 0; n < ncpus; n++) { + struct task_struct *tsk = threads[id * ncpus + n]; + int err; + + if (!tsk) + continue; + + err = kthread_stop(tsk); + if (err < 0 && !ret) + ret = err; + + put_task_struct(tsk); + } + + num_waits += atomic_long_read(&t[id].num_waits); + num_fences += atomic_long_read(&t[id].num_fences); + } + pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", + num_waits, num_fences, RUNTIME_INFO(i915)->num_rings, ncpus); + + mutex_lock(&i915->drm.struct_mutex); + ret = igt_live_test_end(&live) ?: ret; +out_contexts: + mutex_unlock(&i915->drm.struct_mutex); + kfree(t[0].contexts); +out_threads: + kfree(threads); +out_file: + mock_file_free(i915, file); +out_rpm: + intel_runtime_pm_put(i915, wakeref); + + return ret; +} + int i915_request_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { @@ -819,6 +1243,7 @@ int i915_request_live_selftests(struct drm_i915_private *i915) SUBTEST(live_all_engines), SUBTEST(live_sequential_engines), SUBTEST(live_empty_request), + SUBTEST(live_breadcrumbs_smoketest), }; if (i915_terminally_wedged(&i915->gpu_error)) diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index 0e70df0230b8..9ebd9225684e 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -185,11 +185,6 @@ void igt_spinner_fini(struct igt_spinner *spin) bool igt_wait_for_spinner(struct igt_spinner *spin, struct i915_request *rq) { - if (!wait_event_timeout(rq->execute, - READ_ONCE(rq->global_seqno), - msecs_to_jiffies(10))) - return false; - return !(wait_for_us(i915_seqno_passed(hws_seqno(spin, rq), rq->fence.seqno), 10) && diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c deleted file mode 100644 index f03b407fdbe2..000000000000 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ /dev/null @@ -1,470 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "../i915_selftest.h" -#include "i915_random.h" - -#include "mock_gem_device.h" -#include "mock_engine.h" - -static int check_rbtree(struct intel_engine_cs *engine, - const unsigned long *bitmap, - const struct intel_wait *waiters, - const int count) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct rb_node *rb; - int n; - - if (&b->irq_wait->node != rb_first(&b->waiters)) { - pr_err("First waiter does not match first element of wait-tree\n"); - return -EINVAL; - } - - n = find_first_bit(bitmap, count); - for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = container_of(rb, typeof(*w), node); - int idx = w - waiters; - - if (!test_bit(idx, bitmap)) { - pr_err("waiter[%d, seqno=%d] removed but still in wait-tree\n", - idx, w->seqno); - return -EINVAL; - } - - if (n != idx) { - pr_err("waiter[%d, seqno=%d] does not match expected next element in tree [%d]\n", - idx, w->seqno, n); - return -EINVAL; - } - - n = find_next_bit(bitmap, count, n + 1); - } - - return 0; -} - -static int check_completion(struct intel_engine_cs *engine, - const unsigned long *bitmap, - const struct intel_wait *waiters, - const int count) -{ - int n; - - for (n = 0; n < count; n++) { - if (intel_wait_complete(&waiters[n]) != !!test_bit(n, bitmap)) - continue; - - pr_err("waiter[%d, seqno=%d] is %s, but expected %s\n", - n, waiters[n].seqno, - intel_wait_complete(&waiters[n]) ? "complete" : "active", - test_bit(n, bitmap) ? "active" : "complete"); - return -EINVAL; - } - - return 0; -} - -static int check_rbtree_empty(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - if (b->irq_wait) { - pr_err("Empty breadcrumbs still has a waiter\n"); - return -EINVAL; - } - - if (!RB_EMPTY_ROOT(&b->waiters)) { - pr_err("Empty breadcrumbs, but wait-tree not empty\n"); - return -EINVAL; - } - - return 0; -} - -static int igt_random_insert_remove(void *arg) -{ - const u32 seqno_bias = 0x1000; - I915_RND_STATE(prng); - struct intel_engine_cs *engine = arg; - struct intel_wait *waiters; - const int count = 4096; - unsigned int *order; - unsigned long *bitmap; - int err = -ENOMEM; - int n; - - mock_engine_reset(engine); - - waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL); - if (!waiters) - goto out_engines; - - bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap), - GFP_KERNEL); - if (!bitmap) - goto out_waiters; - - order = i915_random_order(count, &prng); - if (!order) - goto out_bitmap; - - for (n = 0; n < count; n++) - intel_wait_init_for_seqno(&waiters[n], seqno_bias + n); - - err = check_rbtree(engine, bitmap, waiters, count); - if (err) - goto out_order; - - /* Add and remove waiters into the rbtree in random order. At each - * step, we verify that the rbtree is correctly ordered. - */ - for (n = 0; n < count; n++) { - int i = order[n]; - - intel_engine_add_wait(engine, &waiters[i]); - __set_bit(i, bitmap); - - err = check_rbtree(engine, bitmap, waiters, count); - if (err) - goto out_order; - } - - i915_random_reorder(order, count, &prng); - for (n = 0; n < count; n++) { - int i = order[n]; - - intel_engine_remove_wait(engine, &waiters[i]); - __clear_bit(i, bitmap); - - err = check_rbtree(engine, bitmap, waiters, count); - if (err) - goto out_order; - } - - err = check_rbtree_empty(engine); -out_order: - kfree(order); -out_bitmap: - kfree(bitmap); -out_waiters: - kvfree(waiters); -out_engines: - mock_engine_flush(engine); - return err; -} - -static int igt_insert_complete(void *arg) -{ - const u32 seqno_bias = 0x1000; - struct intel_engine_cs *engine = arg; - struct intel_wait *waiters; - const int count = 4096; - unsigned long *bitmap; - int err = -ENOMEM; - int n, m; - - mock_engine_reset(engine); - - waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL); - if (!waiters) - goto out_engines; - - bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap), - GFP_KERNEL); - if (!bitmap) - goto out_waiters; - - for (n = 0; n < count; n++) { - intel_wait_init_for_seqno(&waiters[n], n + seqno_bias); - intel_engine_add_wait(engine, &waiters[n]); - __set_bit(n, bitmap); - } - err = check_rbtree(engine, bitmap, waiters, count); - if (err) - goto out_bitmap; - - /* On each step, we advance the seqno so that several waiters are then - * complete (we increase the seqno by increasingly larger values to - * retire more and more waiters at once). All retired waiters should - * be woken and removed from the rbtree, and so that we check. - */ - for (n = 0; n < count; n = m) { - int seqno = 2 * n; - - GEM_BUG_ON(find_first_bit(bitmap, count) != n); - - if (intel_wait_complete(&waiters[n])) { - pr_err("waiter[%d, seqno=%d] completed too early\n", - n, waiters[n].seqno); - err = -EINVAL; - goto out_bitmap; - } - - /* complete the following waiters */ - mock_seqno_advance(engine, seqno + seqno_bias); - for (m = n; m <= seqno; m++) { - if (m == count) - break; - - GEM_BUG_ON(!test_bit(m, bitmap)); - __clear_bit(m, bitmap); - } - - intel_engine_remove_wait(engine, &waiters[n]); - RB_CLEAR_NODE(&waiters[n].node); - - err = check_rbtree(engine, bitmap, waiters, count); - if (err) { - pr_err("rbtree corrupt after seqno advance to %d\n", - seqno + seqno_bias); - goto out_bitmap; - } - - err = check_completion(engine, bitmap, waiters, count); - if (err) { - pr_err("completions after seqno advance to %d failed\n", - seqno + seqno_bias); - goto out_bitmap; - } - } - - err = check_rbtree_empty(engine); -out_bitmap: - kfree(bitmap); -out_waiters: - kvfree(waiters); -out_engines: - mock_engine_flush(engine); - return err; -} - -struct igt_wakeup { - struct task_struct *tsk; - atomic_t *ready, *set, *done; - struct intel_engine_cs *engine; - unsigned long flags; -#define STOP 0 -#define IDLE 1 - wait_queue_head_t *wq; - u32 seqno; -}; - -static bool wait_for_ready(struct igt_wakeup *w) -{ - DEFINE_WAIT(ready); - - set_bit(IDLE, &w->flags); - if (atomic_dec_and_test(w->done)) - wake_up_var(w->done); - - if (test_bit(STOP, &w->flags)) - goto out; - - for (;;) { - prepare_to_wait(w->wq, &ready, TASK_INTERRUPTIBLE); - if (atomic_read(w->ready) == 0) - break; - - schedule(); - } - finish_wait(w->wq, &ready); - -out: - clear_bit(IDLE, &w->flags); - if (atomic_dec_and_test(w->set)) - wake_up_var(w->set); - - return !test_bit(STOP, &w->flags); -} - -static int igt_wakeup_thread(void *arg) -{ - struct igt_wakeup *w = arg; - struct intel_wait wait; - - while (wait_for_ready(w)) { - GEM_BUG_ON(kthread_should_stop()); - - intel_wait_init_for_seqno(&wait, w->seqno); - intel_engine_add_wait(w->engine, &wait); - for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (i915_seqno_passed(intel_engine_get_seqno(w->engine), - w->seqno)) - break; - - if (test_bit(STOP, &w->flags)) /* emergency escape */ - break; - - schedule(); - } - intel_engine_remove_wait(w->engine, &wait); - __set_current_state(TASK_RUNNING); - } - - return 0; -} - -static void igt_wake_all_sync(atomic_t *ready, - atomic_t *set, - atomic_t *done, - wait_queue_head_t *wq, - int count) -{ - atomic_set(set, count); - atomic_set(ready, 0); - wake_up_all(wq); - - wait_var_event(set, !atomic_read(set)); - atomic_set(ready, count); - atomic_set(done, count); -} - -static int igt_wakeup(void *arg) -{ - I915_RND_STATE(prng); - struct intel_engine_cs *engine = arg; - struct igt_wakeup *waiters; - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); - const int count = 4096; - const u32 max_seqno = count / 4; - atomic_t ready, set, done; - int err = -ENOMEM; - int n, step; - - mock_engine_reset(engine); - - waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL); - if (!waiters) - goto out_engines; - - /* Create a large number of threads, each waiting on a random seqno. - * Multiple waiters will be waiting for the same seqno. - */ - atomic_set(&ready, count); - for (n = 0; n < count; n++) { - waiters[n].wq = &wq; - waiters[n].ready = &ready; - waiters[n].set = &set; - waiters[n].done = &done; - waiters[n].engine = engine; - waiters[n].flags = BIT(IDLE); - - waiters[n].tsk = kthread_run(igt_wakeup_thread, &waiters[n], - "i915/igt:%d", n); - if (IS_ERR(waiters[n].tsk)) - goto out_waiters; - - get_task_struct(waiters[n].tsk); - } - - for (step = 1; step <= max_seqno; step <<= 1) { - u32 seqno; - - /* The waiter threads start paused as we assign them a random - * seqno and reset the engine. Once the engine is reset, - * we signal that the threads may begin their wait upon their - * seqno. - */ - for (n = 0; n < count; n++) { - GEM_BUG_ON(!test_bit(IDLE, &waiters[n].flags)); - waiters[n].seqno = - 1 + prandom_u32_state(&prng) % max_seqno; - } - mock_seqno_advance(engine, 0); - igt_wake_all_sync(&ready, &set, &done, &wq, count); - - /* Simulate the GPU doing chunks of work, with one or more - * seqno appearing to finish at the same time. A random number - * of threads will be waiting upon the update and hopefully be - * woken. - */ - for (seqno = 1; seqno <= max_seqno + step; seqno += step) { - usleep_range(50, 500); - mock_seqno_advance(engine, seqno); - } - GEM_BUG_ON(intel_engine_get_seqno(engine) < 1 + max_seqno); - - /* With the seqno now beyond any of the waiting threads, they - * should all be woken, see that they are complete and signal - * that they are ready for the next test. We wait until all - * threads are complete and waiting for us (i.e. not a seqno). - */ - if (!wait_var_event_timeout(&done, - !atomic_read(&done), 10 * HZ)) { - pr_err("Timed out waiting for %d remaining waiters\n", - atomic_read(&done)); - err = -ETIMEDOUT; - break; - } - - err = check_rbtree_empty(engine); - if (err) - break; - } - -out_waiters: - for (n = 0; n < count; n++) { - if (IS_ERR(waiters[n].tsk)) - break; - - set_bit(STOP, &waiters[n].flags); - } - mock_seqno_advance(engine, INT_MAX); /* wakeup any broken waiters */ - igt_wake_all_sync(&ready, &set, &done, &wq, n); - - for (n = 0; n < count; n++) { - if (IS_ERR(waiters[n].tsk)) - break; - - kthread_stop(waiters[n].tsk); - put_task_struct(waiters[n].tsk); - } - - kvfree(waiters); -out_engines: - mock_engine_flush(engine); - return err; -} - -int intel_breadcrumbs_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_random_insert_remove), - SUBTEST(igt_insert_complete), - SUBTEST(igt_wakeup), - }; - struct drm_i915_private *i915; - int err; - - i915 = mock_gem_device(); - if (!i915) - return -ENOMEM; - - err = i915_subtests(tests, i915->engine[RCS]); - drm_dev_put(&i915->drm); - - return err; -} diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 2c38ea5892d9..7b6f3bea9ef8 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -1127,7 +1127,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, wait_for_completion(&arg.completion); - if (wait_for(waitqueue_active(&rq->execute), 10)) { + if (wait_for(!list_empty(&rq->fence.cb_list), 10)) { struct drm_printer p = drm_info_printer(i915->drm.dev); pr_err("igt/evict_vma kthread did not wait\n"); diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c index b26f07b55d86..2bfa72c1654b 100644 --- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c @@ -76,3 +76,57 @@ void timed_fence_fini(struct timed_fence *tf) destroy_timer_on_stack(&tf->timer); i915_sw_fence_fini(&tf->fence); } + +struct heap_fence { + struct i915_sw_fence fence; + union { + struct kref ref; + struct rcu_head rcu; + }; +}; + +static int __i915_sw_fence_call +heap_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +{ + struct heap_fence *h = container_of(fence, typeof(*h), fence); + + switch (state) { + case FENCE_COMPLETE: + break; + + case FENCE_FREE: + heap_fence_put(&h->fence); + } + + return NOTIFY_DONE; +} + +struct i915_sw_fence *heap_fence_create(gfp_t gfp) +{ + struct heap_fence *h; + + h = kmalloc(sizeof(*h), gfp); + if (!h) + return NULL; + + i915_sw_fence_init(&h->fence, heap_fence_notify); + refcount_set(&h->ref.refcount, 2); + + return &h->fence; +} + +static void heap_fence_release(struct kref *ref) +{ + struct heap_fence *h = container_of(ref, typeof(*h), ref); + + i915_sw_fence_fini(&h->fence); + + kfree_rcu(h, rcu); +} + +void heap_fence_put(struct i915_sw_fence *fence) +{ + struct heap_fence *h = container_of(fence, typeof(*h), fence); + + kref_put(&h->ref, heap_fence_release); +} diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.h b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h index 474aafb92ae1..1f9927e10f3a 100644 --- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.h +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h @@ -39,4 +39,7 @@ struct timed_fence { void timed_fence_init(struct timed_fence *tf, unsigned long expires); void timed_fence_fini(struct timed_fence *tf); +struct i915_sw_fence *heap_fence_create(gfp_t gfp); +void heap_fence_put(struct i915_sw_fence *fence); + #endif /* _LIB_SW_FENCE_H_ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 3b226ebc6bc4..08f0cab02e0f 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -86,17 +86,21 @@ static struct mock_request *first_request(struct mock_engine *engine) static void advance(struct mock_request *request) { list_del_init(&request->link); - mock_seqno_advance(request->base.engine, request->base.global_seqno); + intel_engine_write_global_seqno(request->base.engine, + request->base.global_seqno); i915_request_mark_complete(&request->base); GEM_BUG_ON(!i915_request_completed(&request->base)); + + intel_engine_queue_breadcrumbs(request->base.engine); } static void hw_delay_complete(struct timer_list *t) { struct mock_engine *engine = from_timer(engine, t, hw_delay); struct mock_request *request; + unsigned long flags; - spin_lock(&engine->hw_lock); + spin_lock_irqsave(&engine->hw_lock, flags); /* Timer fired, first request is complete */ request = first_request(engine); @@ -116,7 +120,7 @@ static void hw_delay_complete(struct timer_list *t) advance(request); } - spin_unlock(&engine->hw_lock); + spin_unlock_irqrestore(&engine->hw_lock, flags); } static void mock_context_unpin(struct intel_context *ce) @@ -191,11 +195,12 @@ static void mock_submit_request(struct i915_request *request) struct mock_request *mock = container_of(request, typeof(*mock), base); struct mock_engine *engine = container_of(request->engine, typeof(*engine), base); + unsigned long flags; i915_request_submit(request); GEM_BUG_ON(!request->global_seqno); - spin_lock_irq(&engine->hw_lock); + spin_lock_irqsave(&engine->hw_lock, flags); list_add_tail(&mock->link, &engine->hw_queue); if (mock->link.prev == &engine->hw_queue) { if (mock->delay) @@ -203,7 +208,7 @@ static void mock_submit_request(struct i915_request *request) else advance(mock); } - spin_unlock_irq(&engine->hw_lock); + spin_unlock_irqrestore(&engine->hw_lock, flags); } struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, @@ -273,7 +278,7 @@ void mock_engine_flush(struct intel_engine_cs *engine) void mock_engine_reset(struct intel_engine_cs *engine) { - intel_write_status_page(engine, I915_GEM_HWS_INDEX, 0); + intel_engine_write_global_seqno(engine, 0); } void mock_engine_free(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/selftests/mock_engine.h index 133d0c21790d..b9cc3a245f16 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.h +++ b/drivers/gpu/drm/i915/selftests/mock_engine.h @@ -46,10 +46,4 @@ void mock_engine_flush(struct intel_engine_cs *engine); void mock_engine_reset(struct intel_engine_cs *engine); void mock_engine_free(struct intel_engine_cs *engine); -static inline void mock_seqno_advance(struct intel_engine_cs *engine, u32 seqno) -{ - intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); - intel_engine_wakeup(engine); -} - #endif /* !__MOCK_ENGINE_H__ */ From 789659f4307abc1c2356db09eeefefd3f445c12d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 29 Jan 2019 20:52:30 +0000 Subject: [PATCH 388/539] drm/i915: Drop fake breadcrumb irq Missed breadcrumb detection is defunct due to the tight coupling with dma_fence signaling and the myriad ways we may signal fences from everywhere but from an interrupt, i.e. we frequently signal a fence before we even see its interrupt. This means that even if we miss an interrupt for a fence, it still is signaled before our breadcrumb hangcheck fires, so simplify the breadcrumb hangchecking by moving it into the GPU hangcheck and forgo fake interrupts. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190129205230.19056-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 93 ----------- drivers/gpu/drm/i915/i915_gpu_error.c | 2 - drivers/gpu/drm/i915/i915_gpu_error.h | 5 - drivers/gpu/drm/i915/intel_breadcrumbs.c | 147 +----------------- drivers/gpu/drm/i915/intel_hangcheck.c | 2 + drivers/gpu/drm/i915/intel_ringbuffer.h | 5 - .../gpu/drm/i915/selftests/igt_live_test.c | 7 - 7 files changed, 5 insertions(+), 256 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 29d52304c189..bf3073e63af8 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1321,9 +1321,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) intel_engine_last_submit(engine), jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); - seq_printf(m, "\tfake irq active? %s\n", - yesno(test_bit(engine->id, - &dev_priv->gpu_error.missed_irq_rings))); seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", (long long)engine->hangcheck.acthd, @@ -3899,94 +3896,6 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, i915_wedged_get, i915_wedged_set, "%llu\n"); -static int -fault_irq_set(struct drm_i915_private *i915, - unsigned long *irq, - unsigned long val) -{ - int err; - - err = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (err) - return err; - - err = i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED | - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (err) - goto err_unlock; - - *irq = val; - mutex_unlock(&i915->drm.struct_mutex); - - /* Flush idle worker to disarm irq */ - drain_delayed_work(&i915->gt.idle_work); - - return 0; - -err_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -static int -i915_ring_missed_irq_get(void *data, u64 *val) -{ - struct drm_i915_private *dev_priv = data; - - *val = dev_priv->gpu_error.missed_irq_rings; - return 0; -} - -static int -i915_ring_missed_irq_set(void *data, u64 val) -{ - struct drm_i915_private *i915 = data; - - return fault_irq_set(i915, &i915->gpu_error.missed_irq_rings, val); -} - -DEFINE_SIMPLE_ATTRIBUTE(i915_ring_missed_irq_fops, - i915_ring_missed_irq_get, i915_ring_missed_irq_set, - "0x%08llx\n"); - -static int -i915_ring_test_irq_get(void *data, u64 *val) -{ - struct drm_i915_private *dev_priv = data; - - *val = dev_priv->gpu_error.test_irq_rings; - - return 0; -} - -static int -i915_ring_test_irq_set(void *data, u64 val) -{ - struct drm_i915_private *i915 = data; - - /* GuC keeps the user interrupt permanently enabled for submission */ - if (USES_GUC_SUBMISSION(i915)) - return -ENODEV; - - /* - * From icl, we can no longer individually mask interrupt generation - * from each engine. - */ - if (INTEL_GEN(i915) >= 11) - return -ENODEV; - - val &= INTEL_INFO(i915)->ring_mask; - DRM_DEBUG_DRIVER("Masking interrupts on rings 0x%08llx\n", val); - - return fault_irq_set(i915, &i915->gpu_error.test_irq_rings, val); -} - -DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops, - i915_ring_test_irq_get, i915_ring_test_irq_set, - "0x%08llx\n"); - #define DROP_UNBOUND BIT(0) #define DROP_BOUND BIT(1) #define DROP_RETIRE BIT(2) @@ -4750,8 +4659,6 @@ static const struct i915_debugfs_files { } i915_debugfs_files[] = { {"i915_wedged", &i915_wedged_fops}, {"i915_cache_sharing", &i915_cache_sharing_fops}, - {"i915_ring_missed_irq", &i915_ring_missed_irq_fops}, - {"i915_ring_test_irq", &i915_ring_test_irq_fops}, {"i915_gem_drop_caches", &i915_drop_caches_fops}, #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) {"i915_error_state", &i915_error_state_fops}, diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 304a7ef7f7fb..6e2e5ed2bd0a 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -723,8 +723,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake); err_printf(m, "DERRMR: 0x%08x\n", error->derrmr); err_printf(m, "CCID: 0x%08x\n", error->ccid); - err_printf(m, "Missed interrupts: 0x%08lx\n", - m->i915->gpu_error.missed_irq_rings); for (i = 0; i < error->nfence; i++) err_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 74757c424aab..53b1f22dd365 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -204,8 +204,6 @@ struct i915_gpu_error { atomic_t pending_fb_pin; - unsigned long missed_irq_rings; - /** * State variable controlling the reset flow and count * @@ -274,9 +272,6 @@ struct i915_gpu_error { */ wait_queue_head_t reset_queue; - /* For missed irq/seqno simulation. */ - unsigned long test_irq_rings; - struct i915_gpu_restart *restart; }; diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index b0795b0ad227..cacaa1d04d17 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -91,7 +91,6 @@ bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) spin_lock(&b->irq_lock); - b->irq_fired = true; if (b->irq_armed && list_empty(&b->signalers)) __intel_breadcrumbs_disarm_irq(b); @@ -172,86 +171,6 @@ static void signal_irq_work(struct irq_work *work) intel_engine_breadcrumbs_irq(engine); } -static unsigned long wait_timeout(void) -{ - return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); -} - -static noinline void missed_breadcrumb(struct intel_engine_cs *engine) -{ - if (GEM_SHOW_DEBUG()) { - struct drm_printer p = drm_debug_printer(__func__); - - intel_engine_dump(engine, &p, - "%s missed breadcrumb at %pS\n", - engine->name, __builtin_return_address(0)); - } - - set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); -} - -static void intel_breadcrumbs_hangcheck(struct timer_list *t) -{ - struct intel_engine_cs *engine = - from_timer(engine, t, breadcrumbs.hangcheck); - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - if (!b->irq_armed) - return; - - if (b->irq_fired) - goto rearm; - - /* - * We keep the hangcheck timer alive until we disarm the irq, even - * if there are no waiters at present. - * - * If the waiter was currently running, assume it hasn't had a chance - * to process the pending interrupt (e.g, low priority task on a loaded - * system) and wait until it sleeps before declaring a missed interrupt. - * - * If the waiter was asleep (and not even pending a wakeup), then we - * must have missed an interrupt as the GPU has stopped advancing - * but we still have a waiter. Assuming all batches complete within - * DRM_I915_HANGCHECK_JIFFIES [1.5s]! - */ - synchronize_hardirq(engine->i915->drm.irq); - if (intel_engine_signal_breadcrumbs(engine)) { - missed_breadcrumb(engine); - mod_timer(&b->fake_irq, jiffies + 1); - } else { -rearm: - b->irq_fired = false; - mod_timer(&b->hangcheck, wait_timeout()); - } -} - -static void intel_breadcrumbs_fake_irq(struct timer_list *t) -{ - struct intel_engine_cs *engine = - from_timer(engine, t, breadcrumbs.fake_irq); - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - /* - * The timer persists in case we cannot enable interrupts, - * or if we have previously seen seqno/interrupt incoherency - * ("missed interrupt" syndrome, better known as a "missed breadcrumb"). - * Here the worker will wake up every jiffie in order to kick the - * oldest waiter to do the coherent seqno check. - */ - - if (!intel_engine_signal_breadcrumbs(engine) && !b->irq_armed) - return; - - /* If the user has disabled the fake-irq, restore the hangchecking */ - if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) { - mod_timer(&b->hangcheck, wait_timeout()); - return; - } - - mod_timer(&b->fake_irq, jiffies + 1); -} - void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; @@ -274,43 +193,14 @@ void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine) spin_unlock_irq(&b->irq_lock); } -static bool use_fake_irq(const struct intel_breadcrumbs *b) -{ - const struct intel_engine_cs *engine = - container_of(b, struct intel_engine_cs, breadcrumbs); - - if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) - return false; - - /* - * Only start with the heavy weight fake irq timer if we have not - * seen any interrupts since enabling it the first time. If the - * interrupts are still arriving, it means we made a mistake in our - * engine->seqno_barrier(), a timing error that should be transient - * and unlikely to reoccur. - */ - return !b->irq_fired; -} - -static void enable_fake_irq(struct intel_breadcrumbs *b) -{ - /* Ensure we never sleep indefinitely */ - if (!b->irq_enabled || use_fake_irq(b)) - mod_timer(&b->fake_irq, jiffies + 1); - else - mod_timer(&b->hangcheck, wait_timeout()); -} - -static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) +static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) { struct intel_engine_cs *engine = container_of(b, struct intel_engine_cs, breadcrumbs); - struct drm_i915_private *i915 = engine->i915; - bool enabled; lockdep_assert_held(&b->irq_lock); if (b->irq_armed) - return false; + return; /* * The breadcrumb irq will be disarmed on the interrupt after the @@ -328,16 +218,8 @@ static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) * the driver is idle) we disarm the breadcrumbs. */ - /* No interrupts? Kick the waiter every jiffie! */ - enabled = false; - if (!b->irq_enabled++ && - !test_bit(engine->id, &i915->gpu_error.test_irq_rings)) { + if (!b->irq_enabled++) irq_enable(engine); - enabled = true; - } - - enable_fake_irq(b); - return enabled; } void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) @@ -348,18 +230,6 @@ void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) INIT_LIST_HEAD(&b->signalers); init_irq_work(&b->irq_work, signal_irq_work); - - timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0); - timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0); -} - -static void cancel_fake_irq(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - del_timer_sync(&b->fake_irq); /* may queue b->hangcheck */ - del_timer_sync(&b->hangcheck); - clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); } void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) @@ -369,13 +239,6 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) spin_lock_irqsave(&b->irq_lock, flags); - /* - * Leave the fake_irq timer enabled (if it is running), but clear the - * bit so that it turns itself off on its next wake up and goes back - * to the long hangcheck interval if still required. - */ - clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); - if (b->irq_enabled) irq_enable(engine); else @@ -386,7 +249,6 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) { - cancel_fake_irq(engine); } bool i915_request_enable_breadcrumb(struct i915_request *rq) @@ -482,7 +344,4 @@ void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, } } spin_unlock_irq(&b->irq_lock); - - if (test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) - drm_printf(p, "Fake irq active\n"); } diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index 5662d6fed523..a219c796e56d 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -275,6 +275,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work) for_each_engine(engine, dev_priv, id) { struct hangcheck hc; + intel_engine_signal_breadcrumbs(engine); + hangcheck_load_sample(engine, &hc); hangcheck_accumulate_sample(engine, &hc); hangcheck_store_sample(engine, &hc); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 71f8ceb937ff..34d0a148e664 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -387,14 +387,9 @@ struct intel_engine_cs { struct irq_work irq_work; /* for use from inside irq_lock */ - struct timer_list fake_irq; /* used after a missed interrupt */ - struct timer_list hangcheck; /* detect missed interrupts */ - - unsigned int hangcheck_interrupts; unsigned int irq_enabled; bool irq_armed; - bool irq_fired; } breadcrumbs; struct { diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index 5deb485fb942..3e902761cd16 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -35,7 +35,6 @@ int igt_live_test_begin(struct igt_live_test *t, return err; } - i915->gpu_error.missed_irq_rings = 0; t->reset_global = i915_reset_count(&i915->gpu_error); for_each_engine(engine, i915, id) @@ -75,11 +74,5 @@ int igt_live_test_end(struct igt_live_test *t) return -EIO; } - if (i915->gpu_error.missed_irq_rings) { - pr_err("%s(%s): Missed interrupts on engines %lx\n", - t->func, t->name, i915->gpu_error.missed_irq_rings); - return -EIO; - } - return 0; } From 584fca111d0cf32f6002472d9cb13c4555587438 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 25 Jan 2019 14:24:41 -0800 Subject: [PATCH 389/539] drm/i915/icl: use tc_port in MG_PLL macros Fix the TODO leftover in the code by changing the argument in MG_PLL macros. The MG_PLL ids used to access the register values can be converted from tc_port rather than port. All these registers can use the TC port to calculate the right offsets because they are only available for TC ports. The range (PORT_C onwards) may not be stable and change from platform to platform. So by using the TC id directly we avoid having to check for the platform in the "leaf functions" and thus passing dev_priv around. The helper functions were also renamed to use "tc" as prefix to make them more generic. v2: Improve commit message and fix checkpatch warning (from Paulo) Signed-off-by: Lucas De Marchi Reviewed-by: Paulo Zanoni Link: https://patchwork.freedesktop.org/patch/msgid/20190125222444.19926-2-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 52 +++++++++--------- drivers/gpu/drm/i915/intel_ddi.c | 7 ++- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_dpll_mgr.c | 79 +++++++++++++-------------- drivers/gpu/drm/i915/intel_dpll_mgr.h | 2 +- 5 files changed, 71 insertions(+), 71 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 1eca166d95bb..03adcf3838de 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -9554,7 +9554,7 @@ enum skl_power_gate { #define _MG_PLL3_ENABLE 0x46038 #define _MG_PLL4_ENABLE 0x4603C /* Bits are the same as DPLL0_ENABLE */ -#define MG_PLL_ENABLE(port) _MMIO_PORT((port) - PORT_C, _MG_PLL1_ENABLE, \ +#define MG_PLL_ENABLE(tc_port) _MMIO_PORT((tc_port), _MG_PLL1_ENABLE, \ _MG_PLL2_ENABLE) #define _MG_REFCLKIN_CTL_PORT1 0x16892C @@ -9563,9 +9563,9 @@ enum skl_power_gate { #define _MG_REFCLKIN_CTL_PORT4 0x16B92C #define MG_REFCLKIN_CTL_OD_2_MUX(x) ((x) << 8) #define MG_REFCLKIN_CTL_OD_2_MUX_MASK (0x7 << 8) -#define MG_REFCLKIN_CTL(port) _MMIO_PORT((port) - PORT_C, \ - _MG_REFCLKIN_CTL_PORT1, \ - _MG_REFCLKIN_CTL_PORT2) +#define MG_REFCLKIN_CTL(tc_port) _MMIO_PORT((tc_port), \ + _MG_REFCLKIN_CTL_PORT1, \ + _MG_REFCLKIN_CTL_PORT2) #define _MG_CLKTOP2_CORECLKCTL1_PORT1 0x1688D8 #define _MG_CLKTOP2_CORECLKCTL1_PORT2 0x1698D8 @@ -9575,9 +9575,9 @@ enum skl_power_gate { #define MG_CLKTOP2_CORECLKCTL1_B_DIVRATIO_MASK (0xff << 16) #define MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO(x) ((x) << 8) #define MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK (0xff << 8) -#define MG_CLKTOP2_CORECLKCTL1(port) _MMIO_PORT((port) - PORT_C, \ - _MG_CLKTOP2_CORECLKCTL1_PORT1, \ - _MG_CLKTOP2_CORECLKCTL1_PORT2) +#define MG_CLKTOP2_CORECLKCTL1(tc_port) _MMIO_PORT((tc_port), \ + _MG_CLKTOP2_CORECLKCTL1_PORT1, \ + _MG_CLKTOP2_CORECLKCTL1_PORT2) #define _MG_CLKTOP2_HSCLKCTL_PORT1 0x1688D4 #define _MG_CLKTOP2_HSCLKCTL_PORT2 0x1698D4 @@ -9595,9 +9595,9 @@ enum skl_power_gate { #define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO(x) ((x) << 8) #define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_SHIFT 8 #define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK (0xf << 8) -#define MG_CLKTOP2_HSCLKCTL(port) _MMIO_PORT((port) - PORT_C, \ - _MG_CLKTOP2_HSCLKCTL_PORT1, \ - _MG_CLKTOP2_HSCLKCTL_PORT2) +#define MG_CLKTOP2_HSCLKCTL(tc_port) _MMIO_PORT((tc_port), \ + _MG_CLKTOP2_HSCLKCTL_PORT1, \ + _MG_CLKTOP2_HSCLKCTL_PORT2) #define _MG_PLL_DIV0_PORT1 0x168A00 #define _MG_PLL_DIV0_PORT2 0x169A00 @@ -9609,8 +9609,8 @@ enum skl_power_gate { #define MG_PLL_DIV0_FBDIV_FRAC(x) ((x) << 8) #define MG_PLL_DIV0_FBDIV_INT_MASK (0xff << 0) #define MG_PLL_DIV0_FBDIV_INT(x) ((x) << 0) -#define MG_PLL_DIV0(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV0_PORT1, \ - _MG_PLL_DIV0_PORT2) +#define MG_PLL_DIV0(tc_port) _MMIO_PORT((tc_port), _MG_PLL_DIV0_PORT1, \ + _MG_PLL_DIV0_PORT2) #define _MG_PLL_DIV1_PORT1 0x168A04 #define _MG_PLL_DIV1_PORT2 0x169A04 @@ -9624,8 +9624,8 @@ enum skl_power_gate { #define MG_PLL_DIV1_NDIVRATIO(x) ((x) << 4) #define MG_PLL_DIV1_FBPREDIV_MASK (0xf << 0) #define MG_PLL_DIV1_FBPREDIV(x) ((x) << 0) -#define MG_PLL_DIV1(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV1_PORT1, \ - _MG_PLL_DIV1_PORT2) +#define MG_PLL_DIV1(tc_port) _MMIO_PORT((tc_port), _MG_PLL_DIV1_PORT1, \ + _MG_PLL_DIV1_PORT2) #define _MG_PLL_LF_PORT1 0x168A08 #define _MG_PLL_LF_PORT2 0x169A08 @@ -9637,8 +9637,8 @@ enum skl_power_gate { #define MG_PLL_LF_GAINCTRL(x) ((x) << 16) #define MG_PLL_LF_INT_COEFF(x) ((x) << 8) #define MG_PLL_LF_PROP_COEFF(x) ((x) << 0) -#define MG_PLL_LF(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_LF_PORT1, \ - _MG_PLL_LF_PORT2) +#define MG_PLL_LF(tc_port) _MMIO_PORT((tc_port), _MG_PLL_LF_PORT1, \ + _MG_PLL_LF_PORT2) #define _MG_PLL_FRAC_LOCK_PORT1 0x168A0C #define _MG_PLL_FRAC_LOCK_PORT2 0x169A0C @@ -9650,9 +9650,9 @@ enum skl_power_gate { #define MG_PLL_FRAC_LOCK_DCODITHEREN (1 << 10) #define MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN (1 << 8) #define MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(x) ((x) << 0) -#define MG_PLL_FRAC_LOCK(port) _MMIO_PORT((port) - PORT_C, \ - _MG_PLL_FRAC_LOCK_PORT1, \ - _MG_PLL_FRAC_LOCK_PORT2) +#define MG_PLL_FRAC_LOCK(tc_port) _MMIO_PORT((tc_port), \ + _MG_PLL_FRAC_LOCK_PORT1, \ + _MG_PLL_FRAC_LOCK_PORT2) #define _MG_PLL_SSC_PORT1 0x168A10 #define _MG_PLL_SSC_PORT2 0x169A10 @@ -9664,8 +9664,8 @@ enum skl_power_gate { #define MG_PLL_SSC_STEPNUM(x) ((x) << 10) #define MG_PLL_SSC_FLLEN (1 << 9) #define MG_PLL_SSC_STEPSIZE(x) ((x) << 0) -#define MG_PLL_SSC(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_SSC_PORT1, \ - _MG_PLL_SSC_PORT2) +#define MG_PLL_SSC(tc_port) _MMIO_PORT((tc_port), _MG_PLL_SSC_PORT1, \ + _MG_PLL_SSC_PORT2) #define _MG_PLL_BIAS_PORT1 0x168A14 #define _MG_PLL_BIAS_PORT2 0x169A14 @@ -9684,8 +9684,8 @@ enum skl_power_gate { #define MG_PLL_BIAS_VREF_RDAC_MASK (0x7 << 5) #define MG_PLL_BIAS_IREFTRIM(x) ((x) << 0) #define MG_PLL_BIAS_IREFTRIM_MASK (0x1f << 0) -#define MG_PLL_BIAS(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_BIAS_PORT1, \ - _MG_PLL_BIAS_PORT2) +#define MG_PLL_BIAS(tc_port) _MMIO_PORT((tc_port), _MG_PLL_BIAS_PORT1, \ + _MG_PLL_BIAS_PORT2) #define _MG_PLL_TDC_COLDST_BIAS_PORT1 0x168A18 #define _MG_PLL_TDC_COLDST_BIAS_PORT2 0x169A18 @@ -9696,9 +9696,9 @@ enum skl_power_gate { #define MG_PLL_TDC_COLDST_COLDSTART (1 << 16) #define MG_PLL_TDC_TDCOVCCORR_EN (1 << 2) #define MG_PLL_TDC_TDCSEL(x) ((x) << 0) -#define MG_PLL_TDC_COLDST_BIAS(port) _MMIO_PORT((port) - PORT_C, \ - _MG_PLL_TDC_COLDST_BIAS_PORT1, \ - _MG_PLL_TDC_COLDST_BIAS_PORT2) +#define MG_PLL_TDC_COLDST_BIAS(tc_port) _MMIO_PORT((tc_port), \ + _MG_PLL_TDC_COLDST_BIAS_PORT1, \ + _MG_PLL_TDC_COLDST_BIAS_PORT2) #define _CNL_DPLL0_CFGCR0 0x6C000 #define _CNL_DPLL1_CFGCR0 0x6C080 diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index acd94354afc8..ae2413ec2196 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1391,16 +1391,17 @@ static int icl_calc_tbt_pll_link(struct drm_i915_private *dev_priv, static int icl_calc_mg_pll_link(struct drm_i915_private *dev_priv, enum port port) { + enum tc_port tc_port = intel_port_to_tc(dev_priv, port); u32 mg_pll_div0, mg_clktop_hsclkctl; u32 m1, m2_int, m2_frac, div1, div2, refclk; u64 tmp; refclk = dev_priv->cdclk.hw.ref; - mg_pll_div0 = I915_READ(MG_PLL_DIV0(port)); - mg_clktop_hsclkctl = I915_READ(MG_CLKTOP2_HSCLKCTL(port)); + mg_pll_div0 = I915_READ(MG_PLL_DIV0(tc_port)); + mg_clktop_hsclkctl = I915_READ(MG_CLKTOP2_HSCLKCTL(tc_port)); - m1 = I915_READ(MG_PLL_DIV1(port)) & MG_PLL_DIV1_FBPREDIV_MASK; + m1 = I915_READ(MG_PLL_DIV1(tc_port)) & MG_PLL_DIV1_FBPREDIV_MASK; m2_int = mg_pll_div0 & MG_PLL_DIV0_FBDIV_INT_MASK; m2_frac = (mg_pll_div0 & MG_PLL_DIV0_FRACNEN_H) ? (mg_pll_div0 & MG_PLL_DIV0_FBDIV_FRAC_MASK) >> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 734ac77c9a8e..efd1683f61bc 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9445,7 +9445,7 @@ static void icelake_get_ddi_pll(struct drm_i915_private *dev_priv, if (WARN_ON(!intel_dpll_is_combophy(id))) return; } else if (intel_port_is_tc(dev_priv, port)) { - id = icl_port_to_mg_pll_id(port); + id = icl_tc_port_to_pll_id(intel_port_to_tc(dev_priv, port)); } else { WARN(1, "Invalid port %x\n", port); return; diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 606f54dde086..211b3ffa5bed 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2639,14 +2639,14 @@ int icl_calc_dp_combo_pll_link(struct drm_i915_private *dev_priv, return link_clock; } -static enum port icl_mg_pll_id_to_port(enum intel_dpll_id id) +static enum tc_port icl_pll_id_to_tc_port(enum intel_dpll_id id) { - return id - DPLL_ID_ICL_MGPLL1 + PORT_C; + return id - DPLL_ID_ICL_MGPLL1; } -enum intel_dpll_id icl_port_to_mg_pll_id(enum port port) +enum intel_dpll_id icl_tc_port_to_pll_id(enum tc_port tc_port) { - return port - PORT_C + DPLL_ID_ICL_MGPLL1; + return tc_port + DPLL_ID_ICL_MGPLL1; } bool intel_dpll_is_combophy(enum intel_dpll_id id) @@ -2925,7 +2925,10 @@ icl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, ret = icl_calc_dpll_state(crtc_state, encoder, clock, &pll_state); } else { - min = icl_port_to_mg_pll_id(port); + enum tc_port tc_port; + + tc_port = intel_port_to_tc(dev_priv, port); + min = icl_tc_port_to_pll_id(tc_port); max = min; ret = icl_calc_mg_pll_state(crtc_state, encoder, clock, &pll_state); @@ -2959,12 +2962,8 @@ static i915_reg_t icl_pll_id_to_enable_reg(enum intel_dpll_id id) return CNL_DPLL_ENABLE(id); else if (id == DPLL_ID_ICL_TBTPLL) return TBT_PLL_ENABLE; - else - /* - * TODO: Make MG_PLL macros use - * tc port id instead of port id - */ - return MG_PLL_ENABLE(icl_mg_pll_id_to_port(id)); + + return MG_PLL_ENABLE(icl_pll_id_to_tc_port(id)); } static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, @@ -2974,7 +2973,6 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, const enum intel_dpll_id id = pll->info->id; intel_wakeref_t wakeref; bool ret = false; - enum port port; u32 val; wakeref = intel_display_power_get_if_enabled(dev_priv, @@ -2991,32 +2989,33 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(id)); hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(id)); } else { - port = icl_mg_pll_id_to_port(id); - hw_state->mg_refclkin_ctl = I915_READ(MG_REFCLKIN_CTL(port)); + enum tc_port tc_port = icl_pll_id_to_tc_port(id); + + hw_state->mg_refclkin_ctl = I915_READ(MG_REFCLKIN_CTL(tc_port)); hw_state->mg_refclkin_ctl &= MG_REFCLKIN_CTL_OD_2_MUX_MASK; hw_state->mg_clktop2_coreclkctl1 = - I915_READ(MG_CLKTOP2_CORECLKCTL1(port)); + I915_READ(MG_CLKTOP2_CORECLKCTL1(tc_port)); hw_state->mg_clktop2_coreclkctl1 &= MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK; hw_state->mg_clktop2_hsclkctl = - I915_READ(MG_CLKTOP2_HSCLKCTL(port)); + I915_READ(MG_CLKTOP2_HSCLKCTL(tc_port)); hw_state->mg_clktop2_hsclkctl &= MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK | MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK | MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK | MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK; - hw_state->mg_pll_div0 = I915_READ(MG_PLL_DIV0(port)); - hw_state->mg_pll_div1 = I915_READ(MG_PLL_DIV1(port)); - hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(port)); - hw_state->mg_pll_frac_lock = I915_READ(MG_PLL_FRAC_LOCK(port)); - hw_state->mg_pll_ssc = I915_READ(MG_PLL_SSC(port)); + hw_state->mg_pll_div0 = I915_READ(MG_PLL_DIV0(tc_port)); + hw_state->mg_pll_div1 = I915_READ(MG_PLL_DIV1(tc_port)); + hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(tc_port)); + hw_state->mg_pll_frac_lock = I915_READ(MG_PLL_FRAC_LOCK(tc_port)); + hw_state->mg_pll_ssc = I915_READ(MG_PLL_SSC(tc_port)); - hw_state->mg_pll_bias = I915_READ(MG_PLL_BIAS(port)); + hw_state->mg_pll_bias = I915_READ(MG_PLL_BIAS(tc_port)); hw_state->mg_pll_tdc_coldst_bias = - I915_READ(MG_PLL_TDC_COLDST_BIAS(port)); + I915_READ(MG_PLL_TDC_COLDST_BIAS(tc_port)); if (dev_priv->cdclk.hw.ref == 38400) { hw_state->mg_pll_tdc_coldst_bias_mask = MG_PLL_TDC_COLDST_COLDSTART; @@ -3051,7 +3050,7 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { struct intel_dpll_hw_state *hw_state = &pll->state.hw_state; - enum port port = icl_mg_pll_id_to_port(pll->info->id); + enum tc_port tc_port = icl_pll_id_to_tc_port(pll->info->id); u32 val; /* @@ -3060,41 +3059,41 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv, * during the calc/readout phase if the mask depends on some other HW * state like refclk, see icl_calc_mg_pll_state(). */ - val = I915_READ(MG_REFCLKIN_CTL(port)); + val = I915_READ(MG_REFCLKIN_CTL(tc_port)); val &= ~MG_REFCLKIN_CTL_OD_2_MUX_MASK; val |= hw_state->mg_refclkin_ctl; - I915_WRITE(MG_REFCLKIN_CTL(port), val); + I915_WRITE(MG_REFCLKIN_CTL(tc_port), val); - val = I915_READ(MG_CLKTOP2_CORECLKCTL1(port)); + val = I915_READ(MG_CLKTOP2_CORECLKCTL1(tc_port)); val &= ~MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK; val |= hw_state->mg_clktop2_coreclkctl1; - I915_WRITE(MG_CLKTOP2_CORECLKCTL1(port), val); + I915_WRITE(MG_CLKTOP2_CORECLKCTL1(tc_port), val); - val = I915_READ(MG_CLKTOP2_HSCLKCTL(port)); + val = I915_READ(MG_CLKTOP2_HSCLKCTL(tc_port)); val &= ~(MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK | MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK | MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK | MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK); val |= hw_state->mg_clktop2_hsclkctl; - I915_WRITE(MG_CLKTOP2_HSCLKCTL(port), val); + I915_WRITE(MG_CLKTOP2_HSCLKCTL(tc_port), val); - I915_WRITE(MG_PLL_DIV0(port), hw_state->mg_pll_div0); - I915_WRITE(MG_PLL_DIV1(port), hw_state->mg_pll_div1); - I915_WRITE(MG_PLL_LF(port), hw_state->mg_pll_lf); - I915_WRITE(MG_PLL_FRAC_LOCK(port), hw_state->mg_pll_frac_lock); - I915_WRITE(MG_PLL_SSC(port), hw_state->mg_pll_ssc); + I915_WRITE(MG_PLL_DIV0(tc_port), hw_state->mg_pll_div0); + I915_WRITE(MG_PLL_DIV1(tc_port), hw_state->mg_pll_div1); + I915_WRITE(MG_PLL_LF(tc_port), hw_state->mg_pll_lf); + I915_WRITE(MG_PLL_FRAC_LOCK(tc_port), hw_state->mg_pll_frac_lock); + I915_WRITE(MG_PLL_SSC(tc_port), hw_state->mg_pll_ssc); - val = I915_READ(MG_PLL_BIAS(port)); + val = I915_READ(MG_PLL_BIAS(tc_port)); val &= ~hw_state->mg_pll_bias_mask; val |= hw_state->mg_pll_bias; - I915_WRITE(MG_PLL_BIAS(port), val); + I915_WRITE(MG_PLL_BIAS(tc_port), val); - val = I915_READ(MG_PLL_TDC_COLDST_BIAS(port)); + val = I915_READ(MG_PLL_TDC_COLDST_BIAS(tc_port)); val &= ~hw_state->mg_pll_tdc_coldst_bias_mask; val |= hw_state->mg_pll_tdc_coldst_bias; - I915_WRITE(MG_PLL_TDC_COLDST_BIAS(port), val); + I915_WRITE(MG_PLL_TDC_COLDST_BIAS(tc_port), val); - POSTING_READ(MG_PLL_TDC_COLDST_BIAS(port)); + POSTING_READ(MG_PLL_TDC_COLDST_BIAS(tc_port)); } static void icl_pll_enable(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h index e96e79413b54..40e8391a92f2 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h @@ -344,7 +344,7 @@ void intel_dpll_dump_hw_state(struct drm_i915_private *dev_priv, int icl_calc_dp_combo_pll_link(struct drm_i915_private *dev_priv, u32 pll_id); int cnl_hdmi_pll_ref_clock(struct drm_i915_private *dev_priv); -enum intel_dpll_id icl_port_to_mg_pll_id(enum port port); +enum intel_dpll_id icl_tc_port_to_pll_id(enum tc_port tc_port); bool intel_dpll_is_combophy(enum intel_dpll_id id); #endif /* _INTEL_DPLL_MGR_H_ */ From 7a61a6dec3dfb9f2e8c39a337580a3c3036c5cdf Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 25 Jan 2019 14:24:42 -0800 Subject: [PATCH 390/539] drm/i915: always return something on DDI clock selection Even if we don't have the correct clock and get a warning, we should not skip the return. v2: improve commit message (from Joonas) Fixes: 1fa11ee2d9d0 ("drm/i915/icl: start adding the TBT pll") Cc: Paulo Zanoni Cc: # v4.19+ Signed-off-by: Lucas De Marchi Reviewed-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20190125222444.19926-3-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index ae2413ec2196..51d4be072268 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1021,7 +1021,7 @@ static u32 icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, return DDI_CLK_SEL_TBT_810; default: MISSING_CASE(clock); - break; + return DDI_CLK_SEL_NONE; } case DPLL_ID_ICL_MGPLL1: case DPLL_ID_ICL_MGPLL2: From 20fd2ab7be40658ded75f8a49cf649845c1e3572 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 25 Jan 2019 14:24:43 -0800 Subject: [PATCH 391/539] drm/i915/icl: remove dpll from clk_sel We should not pass DPLL_ID_ICL_DPLL0 or DPLL_ID_ICL_DPLL1 to this function because the path is only taken for non-combophy ports. Let the warning trigger if improper value is given. While at it, rename the function to match the register name we are trying to program. v2: fix typo in comment Signed-off-by: Lucas De Marchi Reviewed-by: Paulo Zanoni Link: https://patchwork.freedesktop.org/patch/msgid/20190125222444.19926-4-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 51d4be072268..ca705546a0ab 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -995,7 +995,7 @@ static u32 hsw_pll_to_ddi_pll_sel(const struct intel_shared_dpll *pll) } } -static u32 icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, +static u32 icl_pll_to_ddi_clk_sel(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { const struct intel_shared_dpll *pll = crtc_state->shared_dpll; @@ -1004,10 +1004,11 @@ static u32 icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, switch (id) { default: + /* + * DPLL_ID_ICL_DPLL0 and DPLL_ID_ICL_DPLL1 should not be used + * here, so do warn if this get passed in + */ MISSING_CASE(id); - /* fall through */ - case DPLL_ID_ICL_DPLL0: - case DPLL_ID_ICL_DPLL1: return DDI_CLK_SEL_NONE; case DPLL_ID_ICL_TBTPLL: switch (clock) { @@ -2869,7 +2870,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, if (IS_ICELAKE(dev_priv)) { if (!intel_port_is_combophy(dev_priv, port)) I915_WRITE(DDI_CLK_SEL(port), - icl_pll_to_ddi_pll_sel(encoder, crtc_state)); + icl_pll_to_ddi_clk_sel(encoder, crtc_state)); } else if (IS_CANNONLAKE(dev_priv)) { /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */ val = I915_READ(DPCLKA_CFGCR0); From 5b0bd14dcc6bf0d28ad908aa1b8835667f860c84 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 25 Jan 2019 14:24:44 -0800 Subject: [PATCH 392/539] drm/i915/icl: keep track of unused pll while looping Instead of looping again on the range of plls, just keep track of one unused one and use it later. Signed-off-by: Lucas De Marchi Reviewed-by: Paulo Zanoni Link: https://patchwork.freedesktop.org/patch/msgid/20190125222444.19926-5-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 211b3ffa5bed..8f70838ac7d8 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -247,7 +247,7 @@ intel_find_shared_dpll(struct intel_crtc *crtc, enum intel_dpll_id range_max) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_shared_dpll *pll; + struct intel_shared_dpll *pll, *unused_pll = NULL; struct intel_shared_dpll_state *shared_dpll; enum intel_dpll_id i; @@ -257,8 +257,10 @@ intel_find_shared_dpll(struct intel_crtc *crtc, pll = &dev_priv->shared_dplls[i]; /* Only want to check enabled timings first */ - if (shared_dpll[i].crtc_mask == 0) + if (shared_dpll[i].crtc_mask == 0) { + unused_pll = pll; continue; + } if (memcmp(&crtc_state->dpll_hw_state, &shared_dpll[i].hw_state, @@ -273,14 +275,11 @@ intel_find_shared_dpll(struct intel_crtc *crtc, } /* Ok no matching timings, maybe there's a free one? */ - for (i = range_min; i <= range_max; i++) { - pll = &dev_priv->shared_dplls[i]; - if (shared_dpll[i].crtc_mask == 0) { - DRM_DEBUG_KMS("[CRTC:%d:%s] allocated %s\n", - crtc->base.base.id, crtc->base.name, - pll->info->name); - return pll; - } + if (unused_pll) { + DRM_DEBUG_KMS("[CRTC:%d:%s] allocated %s\n", + crtc->base.base.id, crtc->base.name, + unused_pll->info->name); + return unused_pll; } return NULL; From b52c273be6876107a9f77987d9d42cee36d0081e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Dec 2018 19:14:28 +0200 Subject: [PATCH 393/539] drm/i915: Don't ignore level 0 lines watermark for glk+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On glk+ the level 0 lines watermark actually matters. Do not ignore it. And while at it let's change things so that we always program a consistnet 0 to the register when the lines watermarks is ignored by the hardware. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181221171436.8218-2-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/intel_pm.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index fdc28a3d2936..e517bdd2c2f9 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4674,6 +4674,15 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate, return 0; } +static bool skl_wm_has_lines(struct drm_i915_private *dev_priv, int level) +{ + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + return true; + + /* The number of lines are ignored for the level 0 watermark. */ + return level > 0; +} + static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, const struct intel_plane_state *intel_pstate, int level, @@ -4756,8 +4765,10 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, } } - /* The number of lines are ignored for the level 0 watermark. */ - if (level > 0 && res_lines > 31) + if (!skl_wm_has_lines(dev_priv, level)) + res_lines = 0; + + if (res_lines > 31) return; /* From 692927f4e9058fbdae1baecf72d85659bbc3c59c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Dec 2018 19:14:29 +0200 Subject: [PATCH 394/539] drm/i915: Reinstate an early latency==0 check for skl+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I thought we could remove all the early latency==0 checks and rely on skl_wm_method{1,2}() checking for it. But skl_compute_plane_wm() applies a bunch of workarounds to bump up the latency before calling those guys so clearly it won't end up doing the right thing. Also not sure if the calculations based on the method1/2 results are safe agaisnt overflows so it might not work all that well in any case. Let's put the early check back. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181221171436.8218-3-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/intel_pm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e517bdd2c2f9..7473d6ea4bfe 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4700,6 +4700,9 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, to_intel_atomic_state(cstate->base.state); bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); + if (latency == 0) + return; + /* Display WA #1141: kbl,cfl */ if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) || IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) && From 17b16054b1115559df126c9d7e727770194d39e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Dec 2018 19:14:30 +0200 Subject: [PATCH 395/539] drm/i915: Fix bits vs. bytes mixup in dbuf block size computation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The spec used to say "8bpp" which someone took to mean 8 bytes per pixel when in fact it was supposed to be 8 bits per pixel. The spec has been updated to make it more clear now. Fix the code to match. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181221171436.8218-4-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/intel_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7473d6ea4bfe..59e186b30d97 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4617,7 +4617,7 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate, intel_pstate); if (INTEL_GEN(dev_priv) >= 11 && - fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 8) + fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 1) wp->dbuf_block_size = 256; else wp->dbuf_block_size = 512; From b19c9bcaa20ec10bc39389f2b8bfe4c57cde7cbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Dec 2018 19:14:31 +0200 Subject: [PATCH 396/539] drm/i915: Fix > vs >= mismatch in watermark/ddb calculations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bspec says we have to reject the watermark if it's >= the ddb allocation. Fix the code to reject the == case as it should. For transition watermarks we can just use >=, for the rest we'll do +1 when calculating the minimum ddb allocation size. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181221171436.8218-5-ville.syrjala@linux.intel.com Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/intel_pm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 59e186b30d97..fff44512c44e 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4371,8 +4371,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, continue; wm = &cstate->wm.skl.optimal.planes[plane_id]; - blocks += wm->wm[level].plane_res_b; - blocks += wm->uv_wm[level].plane_res_b; + blocks += wm->wm[level].plane_res_b + 1; + blocks += wm->uv_wm[level].plane_res_b + 1; } if (blocks < alloc_size) { @@ -4413,7 +4413,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, extra = min_t(u16, alloc_size, DIV64_U64_ROUND_UP(alloc_size * rate, total_data_rate)); - total[plane_id] = wm->wm[level].plane_res_b + extra; + total[plane_id] = wm->wm[level].plane_res_b + 1 + extra; alloc_size -= extra; total_data_rate -= rate; @@ -4424,7 +4424,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, extra = min_t(u16, alloc_size, DIV64_U64_ROUND_UP(alloc_size * rate, total_data_rate)); - uv_total[plane_id] = wm->uv_wm[level].plane_res_b + extra; + uv_total[plane_id] = wm->uv_wm[level].plane_res_b + 1 + extra; alloc_size -= extra; total_data_rate -= rate; } @@ -4477,7 +4477,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, */ for_each_plane_id_on_crtc(intel_crtc, plane_id) { wm = &cstate->wm.skl.optimal.planes[plane_id]; - if (wm->trans_wm.plane_res_b > total[plane_id]) + if (wm->trans_wm.plane_res_b >= total[plane_id]) memset(&wm->trans_wm, 0, sizeof(wm->trans_wm)); } From 961d95e09c045fe2998c74da96961edc8658f171 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Dec 2018 19:14:32 +0200 Subject: [PATCH 397/539] drm/i915: Account for minimum ddb allocation restrictions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On icl+ bspec tells us to calculate a separate minimum ddb allocation from the blocks watermark. Both have to be checked against the actual ddb allocation, but since we do things the other way around we'll just calculat the minimum acceptable ddb allocation by taking the maximum of the two values. We'll also replace the memcmp() with a full trawl over the the watermarks so that it'll ignore the min_ddb_alloc because we can't directly read that out from the hw. I suppose we could reconstruct it from the other values, but I was too lazy to do that now. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181221171436.8218-6-ville.syrjala@linux.intel.com Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 53 +++++++++++++++++++++++++++------ 2 files changed, 45 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d072f3369ee1..d1cecc588fbb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1114,6 +1114,7 @@ struct skl_ddb_values { }; struct skl_wm_level { + u16 min_ddb_alloc; u16 plane_res_b; u8 plane_res_l; bool plane_en; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index fff44512c44e..abb018f49a0e 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4371,8 +4371,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, continue; wm = &cstate->wm.skl.optimal.planes[plane_id]; - blocks += wm->wm[level].plane_res_b + 1; - blocks += wm->uv_wm[level].plane_res_b + 1; + blocks += wm->wm[level].min_ddb_alloc; + blocks += wm->uv_wm[level].min_ddb_alloc; } if (blocks < alloc_size) { @@ -4413,7 +4413,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, extra = min_t(u16, alloc_size, DIV64_U64_ROUND_UP(alloc_size * rate, total_data_rate)); - total[plane_id] = wm->wm[level].plane_res_b + 1 + extra; + total[plane_id] = wm->wm[level].min_ddb_alloc + extra; alloc_size -= extra; total_data_rate -= rate; @@ -4424,7 +4424,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, extra = min_t(u16, alloc_size, DIV64_U64_ROUND_UP(alloc_size * rate, total_data_rate)); - uv_total[plane_id] = wm->uv_wm[level].plane_res_b + 1 + extra; + uv_total[plane_id] = wm->uv_wm[level].min_ddb_alloc + extra; alloc_size -= extra; total_data_rate -= rate; } @@ -4695,7 +4695,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, u32 latency = dev_priv->wm.skl_latency[level]; uint_fixed_16_16_t method1, method2; uint_fixed_16_16_t selected_result; - u32 res_blocks, res_lines; + u32 res_blocks, res_lines, min_ddb_alloc = 0; struct intel_atomic_state *state = to_intel_atomic_state(cstate->base.state); bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); @@ -4768,6 +4768,24 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, } } + if (INTEL_GEN(dev_priv) >= 11) { + if (wp->y_tiled) { + int extra_lines; + + if (res_lines % wp->y_min_scanlines == 0) + extra_lines = wp->y_min_scanlines; + else + extra_lines = wp->y_min_scanlines * 2 - + res_lines % wp->y_min_scanlines; + + min_ddb_alloc = mul_round_up_u32_fixed16(res_lines + extra_lines, + wp->plane_blocks_per_line); + } else { + min_ddb_alloc = res_blocks + + DIV_ROUND_UP(res_blocks, 10); + } + } + if (!skl_wm_has_lines(dev_priv, level)) res_lines = 0; @@ -4782,6 +4800,8 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, */ result->plane_res_b = res_blocks; result->plane_res_l = res_lines; + /* Bspec says: value >= plane ddb allocation -> invalid, hence the +1 here */ + result->min_ddb_alloc = max(min_ddb_alloc, res_blocks) + 1; result->plane_en = true; } @@ -5132,6 +5152,23 @@ static bool skl_plane_wm_equals(struct drm_i915_private *dev_priv, return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm); } +static bool skl_pipe_wm_equals(struct intel_crtc *crtc, + const struct skl_pipe_wm *wm1, + const struct skl_pipe_wm *wm2) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum plane_id plane_id; + + for_each_plane_id_on_crtc(crtc, plane_id) { + if (!skl_plane_wm_equals(dev_priv, + &wm1->planes[plane_id], + &wm2->planes[plane_id])) + return false; + } + + return wm1->linetime == wm2->linetime; +} + static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a, const struct skl_ddb_entry *b) { @@ -5158,16 +5195,14 @@ static int skl_update_pipe_wm(struct intel_crtc_state *cstate, struct skl_pipe_wm *pipe_wm, /* out */ bool *changed /* out */) { + struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc); int ret; ret = skl_build_pipe_wm(cstate, pipe_wm); if (ret) return ret; - if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm))) - *changed = false; - else - *changed = true; + *changed = !skl_pipe_wm_equals(crtc, old_pipe_wm, pipe_wm); return 0; } From 60e983ff187f3df4a83e977c90580243f4a71dbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Dec 2018 19:14:33 +0200 Subject: [PATCH 398/539] drm/i915: Pass dev_priv to skl_needs_memory_bw_wa() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit skl_needs_memory_bw_wa() doesn't look at the passed in state at all. Possibly it should, but for now let's make life simpler by just passing in dev_priv. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181221171436.8218-7-ville.syrjala@linux.intel.com Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/intel_pm.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index abb018f49a0e..78c893dce4d1 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3631,14 +3631,9 @@ static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv) * FIXME: We still don't have the proper code detect if we need to apply the WA, * so assume we'll always need it in order to avoid underruns. */ -static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) +static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - - if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) - return true; - - return false; + return IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv); } static bool @@ -3790,7 +3785,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) latency = dev_priv->wm.skl_latency[level]; - if (skl_needs_memory_bw_wa(intel_state) && + if (skl_needs_memory_bw_wa(dev_priv) && plane->base.state->fb->modifier == I915_FORMAT_MOD_X_TILED) latency += 15; @@ -4579,9 +4574,6 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate, const struct drm_plane_state *pstate = &intel_pstate->base; const struct drm_framebuffer *fb = pstate->fb; u32 interm_pbpl; - struct intel_atomic_state *state = - to_intel_atomic_state(cstate->base.state); - bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); /* only NV12 format has two planes */ if (color_plane == 1 && fb->format->format != DRM_FORMAT_NV12) { @@ -4642,7 +4634,7 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate, wp->y_min_scanlines = 4; } - if (apply_memory_bw_wa) + if (skl_needs_memory_bw_wa(dev_priv)) wp->y_min_scanlines *= 2; wp->plane_bytes_per_line = wp->width * wp->cpp; @@ -4696,9 +4688,6 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, uint_fixed_16_16_t method1, method2; uint_fixed_16_16_t selected_result; u32 res_blocks, res_lines, min_ddb_alloc = 0; - struct intel_atomic_state *state = - to_intel_atomic_state(cstate->base.state); - bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); if (latency == 0) return; @@ -4709,7 +4698,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, dev_priv->ipc_enabled) latency += 4; - if (apply_memory_bw_wa && wp->x_tiled) + if (skl_needs_memory_bw_wa(dev_priv) && wp->x_tiled) latency += 15; method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate, From ff61a97499fd27944c02b9c8035d50d803a7975f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Dec 2018 19:14:34 +0200 Subject: [PATCH 399/539] drm/i915: Drop the definite article in front of SAGV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The spec doesn't use a definite article in front of SAGV. The rules regarding articles and initialisms are super fuzzy, but at least to my ears it sounds much more natural to not have the article. Perhaps because I tend to pronounce it as "sag-vee" instead of spelling out the letters one at a time. Actually I might still prefer to leave out the article if I did spell them out. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181221171436.8218-8-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_pm.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 78c893dce4d1..c78674ef616f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3665,25 +3665,25 @@ intel_enable_sagv(struct drm_i915_private *dev_priv) if (dev_priv->sagv_status == I915_SAGV_ENABLED) return 0; - DRM_DEBUG_KMS("Enabling the SAGV\n"); + DRM_DEBUG_KMS("Enabling SAGV\n"); mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL, GEN9_SAGV_ENABLE); - /* We don't need to wait for the SAGV when enabling */ + /* We don't need to wait for SAGV when enabling */ mutex_unlock(&dev_priv->pcu_lock); /* * Some skl systems, pre-release machines in particular, - * don't actually have an SAGV. + * don't actually have SAGV. */ if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; return 0; } else if (ret < 0) { - DRM_ERROR("Failed to enable the SAGV\n"); + DRM_ERROR("Failed to enable SAGV\n"); return ret; } @@ -3702,7 +3702,7 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) if (dev_priv->sagv_status == I915_SAGV_DISABLED) return 0; - DRM_DEBUG_KMS("Disabling the SAGV\n"); + DRM_DEBUG_KMS("Disabling SAGV\n"); mutex_lock(&dev_priv->pcu_lock); /* bspec says to keep retrying for at least 1 ms */ @@ -3714,14 +3714,14 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) /* * Some skl systems, pre-release machines in particular, - * don't actually have an SAGV. + * don't actually have SAGV. */ if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; return 0; } else if (ret < 0) { - DRM_ERROR("Failed to disable the SAGV (%d)\n", ret); + DRM_ERROR("Failed to disable SAGV (%d)\n", ret); return ret; } @@ -3752,7 +3752,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) sagv_block_time_us = 10; /* - * SKL+ workaround: bspec recommends we disable the SAGV when we have + * SKL+ workaround: bspec recommends we disable SAGV when we have * more then one pipe enabled * * If there are no active CRTCs, no additional checks need be performed @@ -3793,7 +3793,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) /* * If any of the planes on this pipe don't enable wm levels that * incur memory latencies higher than sagv_block_time_us we - * can't enable the SAGV. + * can't enable SAGV. */ if (latency < sagv_block_time_us) return false; From ff58c11cdbe99aaed8759caa39952e7355d50b78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Dec 2018 19:14:35 +0200 Subject: [PATCH 400/539] drm/i915: Drop the pointless linetime==0 check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 0*whatever==0 so this check is pointless. Remove it. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181221171436.8218-9-ville.syrjala@linux.intel.com Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/intel_pm.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c78674ef616f..1d1c56961b39 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4824,10 +4824,6 @@ skl_compute_linetime_wm(const struct intel_crtc_state *cstate) u32 linetime_wm; linetime_us = intel_get_linetime_us(cstate); - - if (is_fixed16_zero(linetime_us)) - return 0; - linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us)); /* Display WA #1135: bxt:ALL GLK:ALL */ From 717671c610fc4119ec4be87a99d8fe35e5863725 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Dec 2018 19:14:36 +0200 Subject: [PATCH 401/539] drm/i915: Use IS_GEN9_LP() for the linetime w/a check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IS_GLK||IS_BXT == IS_GEN9_LP Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181221171436.8218-10-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/intel_pm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 1d1c56961b39..53b706154c94 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4826,9 +4826,8 @@ skl_compute_linetime_wm(const struct intel_crtc_state *cstate) linetime_us = intel_get_linetime_us(cstate); linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us)); - /* Display WA #1135: bxt:ALL GLK:ALL */ - if ((IS_BROXTON(dev_priv) || IS_GEMINILAKE(dev_priv)) && - dev_priv->ipc_enabled) + /* Display WA #1135: BXT:ALL GLK:ALL */ + if (IS_GEN9_LP(dev_priv) && dev_priv->ipc_enabled) linetime_wm /= 2; return linetime_wm; From d54e5f76d8fe025fa557857d826515116d124a90 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 29 Jan 2019 17:39:13 -0800 Subject: [PATCH 402/539] drm/i915: Update DRIVER_DATE to 20190129 Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d1cecc588fbb..22da9df1f0a7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -91,8 +91,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20190124" -#define DRIVER_TIMESTAMP 1548370857 +#define DRIVER_DATE "20190129" +#define DRIVER_TIMESTAMP 1548812351 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions From dcc9d76b6d834d06a317e27fa8242d7e009135ac Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 23 Jan 2019 12:37:55 +0300 Subject: [PATCH 403/539] drm/komeda: Off by one in komeda_fb_get_pixel_addr() The > should be >= to avoid an off by one bug. Fixes: c46c24bb6b11 ("drm/komeda: Add komeda_framebuffer") Signed-off-by: Dan Carpenter Reviewed-by: James Qian Wang (Arm Technology China) Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c index 4ddd5314ca23..23ee74d42239 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c @@ -144,7 +144,7 @@ komeda_fb_get_pixel_addr(struct komeda_fb *kfb, int x, int y, int plane) const struct drm_gem_cma_object *obj; u32 plane_x, plane_y, cpp, pitch, offset; - if (plane > fb->format->num_planes) { + if (plane >= fb->format->num_planes) { DRM_DEBUG_KMS("Out of max plane num.\n"); return -EINVAL; } From c0550305fcbd82c0b0d094e385551d9c63fc8852 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 30 Jan 2019 10:51:20 -0800 Subject: [PATCH 404/539] drm/i915: Force background color to black for gen9+ (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't yet allow userspace to control the CRTC background color, but we should manually program the color to black to ensure the BIOS didn't leave us with some other color. We should also set the pipe gamma and pipe CSC bits so that the background color goes through the same color management transformations that a plane with black pixels would. v2: Rename register to SKL_BOTTOM_COLOR to more closely follow bspec naming. (Ville) Cc: Ville Syrjälä Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20190130185122.10322-2-matthew.d.roper@intel.com Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/i915_reg.h | 6 ++++++ drivers/gpu/drm/i915/intel_display.c | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 03adcf3838de..a64deeb4e517 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5710,6 +5710,12 @@ enum { #define PIPEMISC_DITHER_TYPE_SP (0 << 2) #define PIPEMISC(pipe) _MMIO_PIPE2(pipe, _PIPE_MISC_A) +/* Skylake+ pipe bottom (background) color */ +#define _SKL_BOTTOM_COLOR_A 0x70034 +#define SKL_BOTTOM_COLOR_GAMMA_ENABLE (1 << 31) +#define SKL_BOTTOM_COLOR_CSC_ENABLE (1 << 30) +#define SKL_BOTTOM_COLOR(pipe) _MMIO_PIPE2(pipe, _SKL_BOTTOM_COLOR_A) + #define VLV_DPFLIPSTAT _MMIO(VLV_DISPLAY_BASE + 0x70028) #define PIPEB_LINE_COMPARE_INT_EN (1 << 29) #define PIPEB_HLINE_INT_EN (1 << 28) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index efd1683f61bc..b0bb8adcf26f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3930,6 +3930,16 @@ static void intel_update_pipe_config(const struct intel_crtc_state *old_crtc_sta else if (old_crtc_state->pch_pfit.enabled) ironlake_pfit_disable(old_crtc_state); } + + /* + * We don't (yet) allow userspace to control the pipe background color, + * so force it to black, but apply pipe gamma and CSC so that its + * handling will match how we program our planes. + */ + if (INTEL_GEN(dev_priv) >= 9) + I915_WRITE(SKL_BOTTOM_COLOR(crtc->pipe), + SKL_BOTTOM_COLOR_GAMMA_ENABLE | + SKL_BOTTOM_COLOR_CSC_ENABLE); } static void intel_fdi_normal_train(struct intel_crtc *crtc) @@ -15488,6 +15498,15 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, plane->base.type != DRM_PLANE_TYPE_PRIMARY) intel_plane_disable_noatomic(crtc, plane); } + + /* + * Disable any background color set by the BIOS, but enable the + * gamma and CSC to match how we program our planes. + */ + if (INTEL_GEN(dev_priv) >= 9) + I915_WRITE(SKL_BOTTOM_COLOR(crtc->pipe), + SKL_BOTTOM_COLOR_GAMMA_ENABLE | + SKL_BOTTOM_COLOR_CSC_ENABLE); } /* Adjust the state of the output pipe according to whether we From e4c0d5314dede34ac48257cbdd4b3a046b2415df Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 30 Jan 2019 10:10:22 -0800 Subject: [PATCH 405/539] drm/i915: Apply LUT validation checks to platforms more accurately (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use of the new DRM_COLOR_LUT_NON_DECREASING test was a bit over-zealous; it doesn't actually need to be applied to the degamma on "bdw-style" platforms. Likewise, we overlooked the fact that CHV should have that test applied to the gamma LUT as well as the degamma LUT. Rather than adding more complicated platform checking to intel_color_check(), let's just store the appropriate set of LUT validation flags for each platform in the intel_device_info structure. v2: - Shuffle around LUT size tests so that the hardware-specific tests won't be applied to legacy gamma tables. (Ville) - Add a debug message so that it will be easier to understand why an atomic transaction involving incorrectly-sized LUT's got rejected by the driver. v3: - Switch size_t's to int's. (Ville) Fixes: 85e2d61e4976 ("drm/i915: Validate userspace-provided color management LUT's (v4)") References: https://lists.freedesktop.org/archives/intel-gfx/2019-January/187634.html Cc: Ville Syrjälä Signed-off-by: Matt Roper Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190130181022.4291-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 10 +++- drivers/gpu/drm/i915/intel_color.c | 64 ++++++++++++------------ drivers/gpu/drm/i915/intel_device_info.h | 2 + 3 files changed, 42 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index dd4aff2b256e..6f7ad10f34b8 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -67,9 +67,15 @@ #define BDW_COLORS \ .color = { .degamma_lut_size = 512, .gamma_lut_size = 512 } #define CHV_COLORS \ - .color = { .degamma_lut_size = 65, .gamma_lut_size = 257 } + .color = { .degamma_lut_size = 65, .gamma_lut_size = 257, \ + .degamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING, \ + .gamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING, \ + } #define GLK_COLORS \ - .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } + .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024, \ + .degamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING | \ + DRM_COLOR_LUT_EQUAL_CHANNELS, \ + } /* Keep in gen based order, and chronological order within a gen */ diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index bc7589656a8f..4b0044cdcf1a 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -605,48 +605,48 @@ void intel_color_load_luts(struct intel_crtc_state *crtc_state) dev_priv->display.load_luts(crtc_state); } +static int check_lut_size(const struct drm_property_blob *lut, int expected) +{ + int len; + + if (!lut) + return 0; + + len = drm_color_lut_size(lut); + if (len != expected) { + DRM_DEBUG_KMS("Invalid LUT size; got %d, expected %d\n", + len, expected); + return -EINVAL; + } + + return 0; +} + int intel_color_check(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - size_t gamma_length, degamma_length; - uint32_t tests = DRM_COLOR_LUT_NON_DECREASING; + int gamma_length, degamma_length; + u32 gamma_tests, degamma_tests; degamma_length = INTEL_INFO(dev_priv)->color.degamma_lut_size; gamma_length = INTEL_INFO(dev_priv)->color.gamma_lut_size; + degamma_tests = INTEL_INFO(dev_priv)->color.degamma_lut_tests; + gamma_tests = INTEL_INFO(dev_priv)->color.gamma_lut_tests; - /* - * All of our platforms mandate that the degamma curve be - * non-decreasing. Additionally, GLK and gen11 only accept a single - * value for red, green, and blue in the degamma table. Make sure - * userspace didn't try to pass us something we can't handle. - * - * We don't have any extra hardware constraints on the gamma table, - * so no need to explicitly check it. - */ - if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10) - tests |= DRM_COLOR_LUT_EQUAL_CHANNELS; - - if (drm_color_lut_check(crtc_state->base.degamma_lut, tests) != 0) - return -EINVAL; - - /* - * We allow both degamma & gamma luts at the right size or - * NULL. - */ - if ((!crtc_state->base.degamma_lut || - drm_color_lut_size(crtc_state->base.degamma_lut) == degamma_length) && - (!crtc_state->base.gamma_lut || - drm_color_lut_size(crtc_state->base.gamma_lut) == gamma_length)) - return 0; - - /* - * We also allow no degamma lut/ctm and a gamma lut at the legacy - * size (256 entries). - */ + /* Always allow legacy gamma LUT with no further checking. */ if (crtc_state_is_legacy_gamma(crtc_state)) return 0; - return -EINVAL; + if (check_lut_size(crtc_state->base.degamma_lut, degamma_length) || + check_lut_size(crtc_state->base.gamma_lut, gamma_length)) + return -EINVAL; + + if (drm_color_lut_check(crtc_state->base.degamma_lut, degamma_tests) || + drm_color_lut_check(crtc_state->base.gamma_lut, gamma_tests)) + return -EINVAL; + + + return 0; } void intel_color_init(struct intel_crtc *crtc) diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 957c6527f76b..7bf09cef591a 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -189,6 +189,8 @@ struct intel_device_info { struct color_luts { u16 degamma_lut_size; u16 gamma_lut_size; + u32 degamma_lut_tests; + u32 gamma_lut_tests; } color; }; From 49220789617bdc439c95146eb94529c0fcf01043 Mon Sep 17 00:00:00 2001 From: Hang Yuan Date: Wed, 30 Jan 2019 18:25:52 +0800 Subject: [PATCH 406/539] drm/i915/gvt: add functions to get default resolution These functions will get default resolution according to vgpu type. Signed-off-by: Hang Yuan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 1 + drivers/gpu/drm/i915/gvt/display.h | 37 +++++++++++++++++++++++++----- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 4f25b6b7728e..6a86faccbe3f 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -342,6 +342,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, port->dpcd->data_valid = true; port->dpcd->data[DPCD_SINK_COUNT] = 0x1; port->type = type; + port->id = resolution; emulate_monitor_status_change(vgpu); diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h index ea7c1c525b8c..a87f33e6a23c 100644 --- a/drivers/gpu/drm/i915/gvt/display.h +++ b/drivers/gpu/drm/i915/gvt/display.h @@ -146,18 +146,19 @@ enum intel_vgpu_port_type { GVT_PORT_MAX }; +enum intel_vgpu_edid { + GVT_EDID_1024_768, + GVT_EDID_1920_1200, + GVT_EDID_NUM, +}; + struct intel_vgpu_port { /* per display EDID information */ struct intel_vgpu_edid_data *edid; /* per display DPCD information */ struct intel_vgpu_dpcd_data *dpcd; int type; -}; - -enum intel_vgpu_edid { - GVT_EDID_1024_768, - GVT_EDID_1920_1200, - GVT_EDID_NUM, + enum intel_vgpu_edid id; }; static inline char *vgpu_edid_str(enum intel_vgpu_edid id) @@ -172,6 +173,30 @@ static inline char *vgpu_edid_str(enum intel_vgpu_edid id) } } +static inline unsigned int vgpu_edid_xres(enum intel_vgpu_edid id) +{ + switch (id) { + case GVT_EDID_1024_768: + return 1024; + case GVT_EDID_1920_1200: + return 1920; + default: + return 0; + } +} + +static inline unsigned int vgpu_edid_yres(enum intel_vgpu_edid id) +{ + switch (id) { + case GVT_EDID_1024_768: + return 768; + case GVT_EDID_1920_1200: + return 1200; + default: + return 0; + } +} + void intel_gvt_emulate_vblank(struct intel_gvt *gvt); void intel_gvt_check_vblank_emulation(struct intel_gvt *gvt); From 1ca20f33df42ee61a4ae1bf4f2abf8e5f77a16fc Mon Sep 17 00:00:00 2001 From: Hang Yuan Date: Wed, 30 Jan 2019 18:25:53 +0800 Subject: [PATCH 407/539] drm/i915/gvt: add hotplug emulation Add function to emulate hotplug interrupt for SKL/KBL platforms Signed-off-by: Hang Yuan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 30 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/gvt.c | 1 + drivers/gpu/drm/i915/gvt/gvt.h | 3 +++ 3 files changed, 34 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 6a86faccbe3f..035479e273be 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -445,6 +445,36 @@ void intel_gvt_emulate_vblank(struct intel_gvt *gvt) mutex_unlock(&gvt->lock); } +/** + * intel_vgpu_emulate_hotplug - trigger hotplug event for vGPU + * @vgpu: a vGPU + * @conncted: link state + * + * This function is used to trigger hotplug interrupt for vGPU + * + */ +void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected) +{ + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + + /* TODO: add more platforms support */ + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (connected) { + vgpu_vreg_t(vgpu, SFUSE_STRAP) |= + SFUSE_STRAP_DDID_DETECTED; + vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT; + } else { + vgpu_vreg_t(vgpu, SFUSE_STRAP) &= + ~SFUSE_STRAP_DDID_DETECTED; + vgpu_vreg_t(vgpu, SDEISR) &= ~SDE_PORTD_HOTPLUG_CPT; + } + vgpu_vreg_t(vgpu, SDEIIR) |= SDE_PORTD_HOTPLUG_CPT; + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTD_HOTPLUG_STATUS_MASK; + intel_vgpu_trigger_virtual_event(vgpu, DP_D_HOTPLUG); + } +} + /** * intel_vgpu_clean_display - clean vGPU virtual display emulation * @vgpu: a vGPU diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 4e8947f33bd0..43f4242062dd 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -185,6 +185,7 @@ static const struct intel_gvt_ops intel_gvt_ops = { .vgpu_query_plane = intel_vgpu_query_plane, .vgpu_get_dmabuf = intel_vgpu_get_dmabuf, .write_protect_handler = intel_vgpu_page_track_handler, + .emulate_hotplug = intel_vgpu_emulate_hotplug, }; static void init_device_info(struct intel_gvt *gvt) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 8a4cf995d755..0ba4b42e3bb0 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -536,6 +536,8 @@ int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset, int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes); +void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected); + static inline u64 intel_vgpu_get_bar_gpa(struct intel_vgpu *vgpu, int bar) { /* We are 64bit bar. */ @@ -577,6 +579,7 @@ struct intel_gvt_ops { int (*vgpu_get_dmabuf)(struct intel_vgpu *vgpu, unsigned int); int (*write_protect_handler)(struct intel_vgpu *, u64, void *, unsigned int); + void (*emulate_hotplug)(struct intel_vgpu *vgpu, bool connected); }; From 39c68e87bc50a71bcfe93582d9b0673ef30db418 Mon Sep 17 00:00:00 2001 From: Hang Yuan Date: Wed, 30 Jan 2019 18:25:54 +0800 Subject: [PATCH 408/539] drm/i915/gvt: add VFIO EDID region Implement VFIO EDID region for vgpu. Support EDID blob update and notify guest on link state change via hotplug event. v3: move struct edid_region to kvmgt.c v2: add EDID sanity check and size update Tested-by: Gerd Hoffmann Reviewed-by: Gerd Hoffmann Signed-off-by: Hang Yuan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/hypercall.h | 1 + drivers/gpu/drm/i915/gvt/kvmgt.c | 143 +++++++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/mpt.h | 17 ++++ drivers/gpu/drm/i915/gvt/vgpu.c | 6 ++ 4 files changed, 167 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index 50798868ab15..5e01cc8d9b16 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -67,6 +67,7 @@ struct intel_gvt_mpt { int (*set_trap_area)(unsigned long handle, u64 start, u64 end, bool map); int (*set_opregion)(void *vgpu); + int (*set_edid)(void *vgpu, int port_num); int (*get_vfio_device)(void *vgpu); void (*put_vfio_device)(void *vgpu); bool (*is_valid_gfn)(unsigned long handle, unsigned long gfn); diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index f8d44e8f86a6..63eef86a2a85 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -57,6 +57,8 @@ static const struct intel_gvt_ops *intel_gvt_ops; #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) +#define EDID_BLOB_OFFSET (PAGE_SIZE/2) + #define OPREGION_SIGNATURE "IntelGraphicsMem" struct vfio_region; @@ -76,6 +78,11 @@ struct vfio_region { void *data; }; +struct vfio_edid_region { + struct vfio_region_gfx_edid vfio_edid_regs; + void *edid_blob; +}; + struct kvmgt_pgfn { gfn_t gfn; struct hlist_node hnode; @@ -427,6 +434,111 @@ static const struct intel_vgpu_regops intel_vgpu_regops_opregion = { .release = intel_vgpu_reg_release_opregion, }; +static int handle_edid_regs(struct intel_vgpu *vgpu, + struct vfio_edid_region *region, char *buf, + size_t count, u16 offset, bool is_write) +{ + struct vfio_region_gfx_edid *regs = ®ion->vfio_edid_regs; + unsigned int data; + + if (offset + count > sizeof(*regs)) + return -EINVAL; + + if (count != 4) + return -EINVAL; + + if (is_write) { + data = *((unsigned int *)buf); + switch (offset) { + case offsetof(struct vfio_region_gfx_edid, link_state): + if (data == VFIO_DEVICE_GFX_LINK_STATE_UP) { + if (!drm_edid_block_valid( + (u8 *)region->edid_blob, + 0, + true, + NULL)) { + gvt_vgpu_err("invalid EDID blob\n"); + return -EINVAL; + } + intel_gvt_ops->emulate_hotplug(vgpu, true); + } else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN) + intel_gvt_ops->emulate_hotplug(vgpu, false); + else { + gvt_vgpu_err("invalid EDID link state %d\n", + regs->link_state); + return -EINVAL; + } + regs->link_state = data; + break; + case offsetof(struct vfio_region_gfx_edid, edid_size): + if (data > regs->edid_max_size) { + gvt_vgpu_err("EDID size is bigger than %d!\n", + regs->edid_max_size); + return -EINVAL; + } + regs->edid_size = data; + break; + default: + /* read-only regs */ + gvt_vgpu_err("write read-only EDID region at offset %d\n", + offset); + return -EPERM; + } + } else { + memcpy(buf, (char *)regs + offset, count); + } + + return count; +} + +static int handle_edid_blob(struct vfio_edid_region *region, char *buf, + size_t count, u16 offset, bool is_write) +{ + if (offset + count > region->vfio_edid_regs.edid_size) + return -EINVAL; + + if (is_write) + memcpy(region->edid_blob + offset, buf, count); + else + memcpy(buf, region->edid_blob + offset, count); + + return count; +} + +static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf, + size_t count, loff_t *ppos, bool iswrite) +{ + int ret; + unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - + VFIO_PCI_NUM_REGIONS; + struct vfio_edid_region *region = + (struct vfio_edid_region *)vgpu->vdev.region[i].data; + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + + if (pos < region->vfio_edid_regs.edid_offset) { + ret = handle_edid_regs(vgpu, region, buf, count, pos, iswrite); + } else { + pos -= EDID_BLOB_OFFSET; + ret = handle_edid_blob(region, buf, count, pos, iswrite); + } + + if (ret < 0) + gvt_vgpu_err("failed to access EDID region\n"); + + return ret; +} + +static void intel_vgpu_reg_release_edid(struct intel_vgpu *vgpu, + struct vfio_region *region) +{ + kfree(region->data); +} + +static const struct intel_vgpu_regops intel_vgpu_regops_edid = { + .rw = intel_vgpu_reg_rw_edid, + .release = intel_vgpu_reg_release_edid, +}; + static int intel_vgpu_register_reg(struct intel_vgpu *vgpu, unsigned int type, unsigned int subtype, const struct intel_vgpu_regops *ops, @@ -493,6 +605,36 @@ static int kvmgt_set_opregion(void *p_vgpu) return ret; } +static int kvmgt_set_edid(void *p_vgpu, int port_num) +{ + struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; + struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num); + struct vfio_edid_region *base; + int ret; + + base = kzalloc(sizeof(*base), GFP_KERNEL); + if (!base) + return -ENOMEM; + + /* TODO: Add multi-port and EDID extension block support */ + base->vfio_edid_regs.edid_offset = EDID_BLOB_OFFSET; + base->vfio_edid_regs.edid_max_size = EDID_SIZE; + base->vfio_edid_regs.edid_size = EDID_SIZE; + base->vfio_edid_regs.max_xres = vgpu_edid_xres(port->id); + base->vfio_edid_regs.max_yres = vgpu_edid_yres(port->id); + base->edid_blob = port->edid->edid_block; + + ret = intel_vgpu_register_reg(vgpu, + VFIO_REGION_TYPE_GFX, + VFIO_REGION_SUBTYPE_GFX_EDID, + &intel_vgpu_regops_edid, EDID_SIZE, + VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE | + VFIO_REGION_INFO_FLAG_CAPS, base); + + return ret; +} + static void kvmgt_put_vfio_device(void *vgpu) { if (WARN_ON(!((struct intel_vgpu *)vgpu)->vdev.vfio_device)) @@ -1874,6 +2016,7 @@ static struct intel_gvt_mpt kvmgt_mpt = { .dma_map_guest_page = kvmgt_dma_map_guest_page, .dma_unmap_guest_page = kvmgt_dma_unmap_guest_page, .set_opregion = kvmgt_set_opregion, + .set_edid = kvmgt_set_edid, .get_vfio_device = kvmgt_get_vfio_device, .put_vfio_device = kvmgt_put_vfio_device, .is_valid_gfn = kvmgt_is_valid_gfn, diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 9b4225d44243..5d8b8f228d8f 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -313,6 +313,23 @@ static inline int intel_gvt_hypervisor_set_opregion(struct intel_vgpu *vgpu) return intel_gvt_host.mpt->set_opregion(vgpu); } +/** + * intel_gvt_hypervisor_set_edid - Set EDID region for guest + * @vgpu: a vGPU + * @port_num: display port number + * + * Returns: + * Zero on success, negative error code if failed. + */ +static inline int intel_gvt_hypervisor_set_edid(struct intel_vgpu *vgpu, + int port_num) +{ + if (!intel_gvt_host.mpt->set_edid) + return 0; + + return intel_gvt_host.mpt->set_edid(vgpu, port_num); +} + /** * intel_gvt_hypervisor_get_vfio_device - increase vfio device ref count * @vgpu: a vGPU diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index c628be05fbfe..39682b065a71 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -428,6 +428,12 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_clean_sched_policy; + /*TODO: add more platforms support */ + if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) + ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_D); + if (ret) + goto out_clean_sched_policy; + return vgpu; out_clean_sched_policy: From ad3e7b824c185125f281d56a9e8f614effcdae91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 30 Jan 2019 17:51:10 +0200 Subject: [PATCH 409/539] drm/i915: Don't use the second dbuf slice on icl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code managing the dbuf slices is borked and needs some real work to fix. In the meantime let's just stop using the second slice. v2: Drop the change to intel_enabled_dbuf_slices_num() (Mahesh) Cc: Mahesh Kumar Reviewed-by: Imre Deak #v1 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190130155110.12918-1-ville.syrjala@linux.intel.com Reviewed-by: Mahesh Kumar --- drivers/gpu/drm/i915/intel_pm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 53b706154c94..ed9786241307 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3822,8 +3822,13 @@ static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv, /* * 12GB/s is maximum BW supported by single DBuf slice. + * + * FIXME dbuf slice code is broken: + * - must wait for planes to stop using the slice before powering it off + * - plane straddling both slices is illegal in multi-pipe scenarios + * - should validate we stay within the hw bandwidth limits */ - if (num_active > 1 || total_data_bw >= GBps(12)) { + if (0 && (num_active > 1 || total_data_bw >= GBps(12))) { ddb->enabled_slices = 2; } else { ddb->enabled_slices = 1; From 8aae2b1cdf4452817943741ed792068b18191465 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 30 Jan 2019 20:13:59 +0200 Subject: [PATCH 410/539] drm/i915: Pick the first unused PLL once again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5b0bd14dcc6b ("drm/i915/icl: keep track of unused pll while looping") inadvertently (I presume) changed the code to pick the last unused dpll rather than the first unused one like we did before. While there should most likely be no harm in changing the order let's change back just to avoid a change in the behaviour. At least it might reduce the confusion when staring at logs (took me a while to figure out why DPLL1 being picked over DPLL0 when the latter was most definitely available). Cc: Lucas De Marchi Cc: Paulo Zanoni Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190130181359.20693-1-ville.syrjala@linux.intel.com Reviewed-by: Paulo Zanoni Acked-by: Lucas De Marchi --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 8f70838ac7d8..0a42d11c4c33 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -258,7 +258,8 @@ intel_find_shared_dpll(struct intel_crtc *crtc, /* Only want to check enabled timings first */ if (shared_dpll[i].crtc_mask == 0) { - unused_pll = pll; + if (!unused_pll) + unused_pll = pll; continue; } From 828ccb31cf410a1a01e2bdc2a3cf0dd5aabb755d Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 28 Jan 2019 13:42:42 +0200 Subject: [PATCH 411/539] drm/i915/icl: Add TypeC ports only if VBT is present MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can't safely probe Type C ports, whether they are a legacy or a USB/Thunderbolt DP Alternate Type C port. This would require performing the TypeC connect sequence - as described by the specification - but that may have unwanted side-effects. These side-effects include at least - without completeness - timeouts during AUX power well enabling and subsequent PLL enabling errors. To safely identify these ports we really need VBT, which has the proper flag for this (ddi_vbt_port_info::supports_typec_usb, supports_tbt). Based on the above disable Type C ports if we can't load VBT for some reason. v2: - Notice that we disable TypeC ports completely and simplify accordingly (Jose). - Add code comment explaining why we disabled the ports. (Jani) Cc: Jani Nikula Cc: Paulo Zanoni Cc: Jose Roberto de Souza Cc: Ville Syrjälä Cc: Rodrigo Vivi Signed-off-by: Imre Deak Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190128114242.28666-1-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_bios.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 561a4f9f044c..b508d8a735e0 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1663,6 +1663,13 @@ init_vbt_missing_defaults(struct drm_i915_private *dev_priv) struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port]; + /* + * VBT has the TypeC mode (native,TBT/USB) and we don't want + * to detect it. + */ + if (intel_port_is_tc(dev_priv, port)) + continue; + info->supports_dvi = (port != PORT_A && port != PORT_E); info->supports_hdmi = info->supports_dvi; info->supports_dp = (port != PORT_E); From 2b34e562361f6e440524272187432c551b57481e Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 20 Dec 2018 17:52:11 +0200 Subject: [PATCH 412/539] drm/i915/icl: Work around broken VBTs for port F detection VBT may include incorrect information about the presence of port F. Work around this on SKUs where we know the port is not present. v2: - Fix IS_ICL_WITH_PORT_F, so it's useable from any context. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108915 Cc: Mika Kahola Cc: Jani Nikula Signed-off-by: Imre Deak Reviewed-by: Dhinakaran Pandiyan Link: https://patchwork.freedesktop.org/patch/msgid/20181220155211.31456-1-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/intel_display.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 22da9df1f0a7..f11c66d172d3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2353,6 +2353,8 @@ static inline unsigned int i915_sg_segment_size(void) INTEL_INFO(dev_priv)->gt == 3) #define IS_CNL_WITH_PORT_F(dev_priv) (IS_CANNONLAKE(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0x0004) == 0x0004) +#define IS_ICL_WITH_PORT_F(dev_priv) (IS_ICELAKE(dev_priv) && \ + INTEL_DEVID(dev_priv) != 0x8A51) #define IS_ALPHA_SUPPORT(intel_info) ((intel_info)->is_alpha_support) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b0bb8adcf26f..4424b4c90578 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14376,8 +14376,10 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) /* * On some ICL SKUs port F is not present. No strap bits for * this, so rely on VBT. + * Work around broken VBTs on SKUs known to have no port F. */ - if (intel_bios_is_port_present(dev_priv, PORT_F)) + if (IS_ICL_WITH_PORT_F(dev_priv) && + intel_bios_is_port_present(dev_priv, PORT_F)) intel_ddi_init(dev_priv, PORT_F); icl_dsi_init(dev_priv); From 69b768f2bc6750f785bcf04f87b06e8c27e22de9 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 31 Jan 2019 17:08:42 -0800 Subject: [PATCH 413/539] drm/i915: Move workaround infrastructure code up Top comment in intel_workarounds.c says common code should come first so lets respect that. Also, by moving the common code together opportunities to reduce duplication will become more obvious. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1548983324-15344-2-git-send-email-talha.nassar@intel.com --- drivers/gpu/drm/i915/intel_workarounds.c | 74 ++++++++++++------------ 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index 3210ad4e08f7..584c4a5d2674 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -153,6 +153,43 @@ __wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) _wa_add(wal, &wa); } +static void +wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ + struct i915_wa wa = { + .reg = reg, + .mask = val, + .val = _MASKED_BIT_ENABLE(val) + }; + + _wa_add(wal, &wa); +} + +static void +wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, + u32 val) +{ + struct i915_wa wa = { + .reg = reg, + .mask = mask, + .val = val + }; + + _wa_add(wal, &wa); +} + +static void +wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ + wa_write_masked_or(wal, reg, ~0, val); +} + +static void +wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ + wa_write_masked_or(wal, reg, val, val); +} + #define WA_REG(addr, mask, val) __wa_add(wal, (addr), (mask), (val)) #define WA_SET_BIT_MASKED(addr, mask) \ @@ -602,43 +639,6 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) return 0; } -static void -wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) -{ - struct i915_wa wa = { - .reg = reg, - .mask = val, - .val = _MASKED_BIT_ENABLE(val) - }; - - _wa_add(wal, &wa); -} - -static void -wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, - u32 val) -{ - struct i915_wa wa = { - .reg = reg, - .mask = mask, - .val = val - }; - - _wa_add(wal, &wa); -} - -static void -wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val) -{ - wa_write_masked_or(wal, reg, ~0, val); -} - -static void -wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) -{ - wa_write_masked_or(wal, reg, val, val); -} - static void gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { From ae598b0d6b50f5da3d31dc1fab9880b03c631b70 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 31 Jan 2019 17:08:43 -0800 Subject: [PATCH 414/539] drm/i915: Save some lines of source code in workarounds No functional or code size change - just notice we can compact the source by re-using a single helper for adding workarounds. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1548983324-15344-3-git-send-email-talha.nassar@intel.com --- drivers/gpu/drm/i915/intel_workarounds.c | 32 +++++------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index 584c4a5d2674..5c010551cd23 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -142,7 +142,8 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) } static void -__wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) +wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, + u32 val) { struct i915_wa wa = { .reg = reg, @@ -156,26 +157,7 @@ __wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) static void wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) { - struct i915_wa wa = { - .reg = reg, - .mask = val, - .val = _MASKED_BIT_ENABLE(val) - }; - - _wa_add(wal, &wa); -} - -static void -wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, - u32 val) -{ - struct i915_wa wa = { - .reg = reg, - .mask = mask, - .val = val - }; - - _wa_add(wal, &wa); + wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val)); } static void @@ -190,16 +172,14 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) wa_write_masked_or(wal, reg, val, val); } -#define WA_REG(addr, mask, val) __wa_add(wal, (addr), (mask), (val)) - #define WA_SET_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) + wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask)) #define WA_CLR_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) + wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask)) #define WA_SET_FIELD_MASKED(addr, mask, value) \ - WA_REG(addr, (mask), _MASKED_FIELD(mask, value)) + wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value))) static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine) { From 0b904c890ac2d9947aad96ca48f0fe5a8d032459 Mon Sep 17 00:00:00 2001 From: Talha Nassar Date: Thu, 31 Jan 2019 17:08:44 -0800 Subject: [PATCH 415/539] drm/i915/icl: restore WaEnableFloatBlendOptimization Enables blend optimization for floating point RTs This restores the workaround that was reverted in c358514ba8da ("Revert "drm/i915/icl: WaEnableFloatBlendOptimization""). The revert was due to the register write seemingly not sticking, but the HW team has confirmed that this is because the register is WO and that the workaround is indeed required. Here the wa is added with a mask of 0 since the register is WO. References: https://hsdes.intel.com/resource/1408134172 References: https://bugs.freedesktop.org/show_bug.cgi?id=107338 Cc: Chris Wilson Cc: Mika Kuoppala Signed-off-by: Talha Nassar Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1548983324-15344-4-git-send-email-talha.nassar@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 3 +++ drivers/gpu/drm/i915/intel_workarounds.c | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index a64deeb4e517..ede54fdc1676 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2801,6 +2801,9 @@ enum i915_power_well_id { #define GEN6_RCS_PWR_FSM _MMIO(0x22ac) #define GEN9_RCS_FE_FSM2 _MMIO(0x22a4) +#define GEN10_CACHE_MODE_SS _MMIO(0xe420) +#define FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4) + /* Fuse readout registers for GT */ #define HSW_PAVP_FUSE1 _MMIO(0x911C) #define HSW_F1_EU_DIS_SHIFT 16 diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index 5c010551cd23..15f4a6dee5aa 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -549,6 +549,12 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine) if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); + + /* WaEnableFloatBlendOptimization:icl */ + wa_write_masked_or(wal, + GEN10_CACHE_MODE_SS, + 0, /* write-only, so skip validation */ + _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE)); } void intel_engine_init_ctx_wa(struct intel_engine_cs *engine) From 7360c9f6b857e22a48e545f4e99c79630994e932 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 29 Jan 2019 15:22:37 +0100 Subject: [PATCH 416/539] drm/i915: Enable fastboot by default on VLV and CHV We really want to have fastboot enabled by default to avoid an ugly modeset during boot. Currently we are enabling fastboot by default on gen9+ (Skylake and newer). The intention is to enable it on older generations after it has seen more testing on gen9+. VLV and CHV devices are still being sold in stores today, as such it is desirable to also enable fastboot by default on these now. I've extensively tested fastboot=1 support on over 50 different Bay- and Cherry-Trail devices. Testing DSI and eDP panels as well as HDMI output (and even DP over Type-C on one device). All 50 devices work fine with fastboot=1. On 2 devices their DSI panel turns black as soon as the i915 driver loads when fastboot=0, so having fastboot enabled is required for these 2 to work properly (for lack of a better fix). Signed-off-by: Hans de Goede Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20190129142237.8684-1-hdegoede@redhat.com --- drivers/gpu/drm/i915/intel_display.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4424b4c90578..8b36ee183c7e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11735,7 +11735,15 @@ static bool fastboot_enabled(struct drm_i915_private *dev_priv) return i915_modparams.fastboot; /* Enable fastboot by default on Skylake and newer */ - return INTEL_GEN(dev_priv) >= 9; + if (INTEL_GEN(dev_priv) >= 9) + return true; + + /* Enable fastboot by default on VLV and CHV */ + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + return true; + + /* Disabled by default on all others */ + return false; } static bool From 5e0f5a58b167fc2c8352d90c0faa8c0c9ca75c26 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 1 Feb 2019 15:50:49 -0800 Subject: [PATCH 417/539] drm/i915/cfl: Adding another PCI Device ID. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While cross checking PCI IDs from Intel Media SDK and kernel Dmitry noticed this gap. So we checked the spec and this new ID had been recently added. v2: Adding new H_GT1 entry to i915_pci.c (Jose) Reported-by: Dmitry Rogozhkin Cc: Dmitry Rogozhkin Cc: José Roberto de Souza Signed-off-by: Rodrigo Vivi Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190201235049.27206-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 1 + include/drm/i915_pciids.h | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 6f7ad10f34b8..55c9058e91a8 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -711,6 +711,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_AML_KBL_GT2_IDS(&intel_kabylake_gt2_info), INTEL_CFL_S_GT1_IDS(&intel_coffeelake_gt1_info), INTEL_CFL_S_GT2_IDS(&intel_coffeelake_gt2_info), + INTEL_CFL_H_GT1_IDS(&intel_coffeelake_gt1_info), INTEL_CFL_H_GT2_IDS(&intel_coffeelake_gt2_info), INTEL_CFL_U_GT2_IDS(&intel_coffeelake_gt2_info), INTEL_CFL_U_GT3_IDS(&intel_coffeelake_gt3_info), diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index df72be7e8b88..d2fad7b0fcf6 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -394,6 +394,9 @@ INTEL_VGA_DEVICE(0x3E9A, info) /* SRV GT2 */ /* CFL H */ +#define INTEL_CFL_H_GT1_IDS(info) \ + INTEL_VGA_DEVICE(0x3E9C, info) + #define INTEL_CFL_H_GT2_IDS(info) \ INTEL_VGA_DEVICE(0x3E9B, info), /* Halo GT2 */ \ INTEL_VGA_DEVICE(0x3E94, info) /* Halo GT2 */ @@ -426,6 +429,7 @@ #define INTEL_CFL_IDS(info) \ INTEL_CFL_S_GT1_IDS(info), \ INTEL_CFL_S_GT2_IDS(info), \ + INTEL_CFL_H_GT1_IDS(info), \ INTEL_CFL_H_GT2_IDS(info), \ INTEL_CFL_U_GT2_IDS(info), \ INTEL_CFL_U_GT3_IDS(info), \ From fe57085a36de5813ab63a8d178ccfb5f257f028e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 22 Jan 2019 15:44:54 -0500 Subject: [PATCH 418/539] drm/amdgpu: clean up memory/GDS/GWS/OA alignment code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - move all adjustments into one place - specify GDS/GWS/OA alignment in basic units of the heaps - it looks like GDS alignment was 1 instead of 4 Signed-off-by: Marek Olšák Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 7 ------- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 16 ++++++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 6 +++--- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index f4f00217546e..d21dd2f369da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -54,10 +54,6 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, memset(&bp, 0, sizeof(bp)); *obj = NULL; - /* At least align on page size */ - if (alignment < PAGE_SIZE) { - alignment = PAGE_SIZE; - } bp.size = size; bp.byte_align = alignment; @@ -244,9 +240,6 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; } flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; - /* GDS allocations must be DW aligned */ - if (args->in.domains & AMDGPU_GEM_DOMAIN_GDS) - size = ALIGN(size, 4); } if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 728e15e5d68a..fd9c4beeaaa4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -426,12 +426,20 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, size_t acc_size; int r; - page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT; - if (bp->domain & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | - AMDGPU_GEM_DOMAIN_OA)) + /* Note that GDS/GWS/OA allocates 1 page per byte/resource. */ + if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { + /* GWS and OA don't need any alignment. */ + page_align = bp->byte_align; size <<= PAGE_SHIFT; - else + } else if (bp->domain & AMDGPU_GEM_DOMAIN_GDS) { + /* Both size and alignment must be a multiple of 4. */ + page_align = ALIGN(bp->byte_align, 4); + size = ALIGN(size, 4) << PAGE_SHIFT; + } else { + /* Memory should be aligned at least to a page size. */ + page_align = ALIGN(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT; size = ALIGN(size, PAGE_SIZE); + } if (!amdgpu_bo_validate_size(adev, size, bp->domain)) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b852abb9db0f..73e71e61dc99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1756,7 +1756,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) } r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, + 4, AMDGPU_GEM_DOMAIN_GDS, &adev->gds.gds_gfx_bo, NULL, NULL); if (r) return r; @@ -1769,7 +1769,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) } r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, + 1, AMDGPU_GEM_DOMAIN_GWS, &adev->gds.gws_gfx_bo, NULL, NULL); if (r) return r; @@ -1782,7 +1782,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) } r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, + 1, AMDGPU_GEM_DOMAIN_OA, &adev->gds.oa_gfx_bo, NULL, NULL); if (r) return r; From 46c0cd8c562bc3e4a99cbaa4ba0904b6871b7b4b Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Sat, 2 Feb 2019 00:14:28 -0800 Subject: [PATCH 419/539] drm/i915: Update DRIVER_DATE to 20190202 Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f11c66d172d3..534e52e3a8da 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -91,8 +91,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20190129" -#define DRIVER_TIMESTAMP 1548812351 +#define DRIVER_DATE "20190202" +#define DRIVER_TIMESTAMP 1549095268 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions From 0747a672a32829809675028ba396f394b104ee31 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 10 Dec 2018 22:51:04 +0100 Subject: [PATCH 420/539] gpu: host1x: Use completion instead of semaphore In this usage, the two are completely equivalent, but the completion documents better what is going on, and we generally try to avoid semaphores these days. Signed-off-by: Arnd Bergmann Signed-off-by: Thierry Reding --- drivers/gpu/host1x/cdma.c | 6 +++--- drivers/gpu/host1x/cdma.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index 91df51e631b2..15bdeb836e3d 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -210,7 +210,7 @@ unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma, cdma->event = event; mutex_unlock(&cdma->lock); - down(&cdma->sem); + wait_for_completion(&cdma->complete); mutex_lock(&cdma->lock); } @@ -314,7 +314,7 @@ static void update_cdma_locked(struct host1x_cdma *cdma) if (signal) { cdma->event = CDMA_EVENT_NONE; - up(&cdma->sem); + complete(&cdma->complete); } } @@ -416,7 +416,7 @@ int host1x_cdma_init(struct host1x_cdma *cdma) int err; mutex_init(&cdma->lock); - sema_init(&cdma->sem, 0); + init_completion(&cdma->complete); INIT_LIST_HEAD(&cdma->sync_queue); diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h index e97e17b82370..71078359c626 100644 --- a/drivers/gpu/host1x/cdma.h +++ b/drivers/gpu/host1x/cdma.h @@ -20,7 +20,7 @@ #define __HOST1X_CDMA_H #include -#include +#include #include struct host1x_syncpt; @@ -69,8 +69,8 @@ enum cdma_event { struct host1x_cdma { struct mutex lock; /* controls access to shared state */ - struct semaphore sem; /* signalled when event occurs */ - enum cdma_event event; /* event that sem is waiting for */ + struct completion complete; /* signalled when event occurs */ + enum cdma_event event; /* event that complete is waiting for */ unsigned int slots_used; /* pb slots used in current submit */ unsigned int slots_free; /* pb slots free in current submit */ unsigned int first_get; /* DMAGET value, where submit begins */ From f67524caf49949b8d1a219f1fd8ea263854a6683 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 23 Jan 2019 12:34:05 +0100 Subject: [PATCH 421/539] gpu: host1x: Represent host1x bus devices in debugfs This new debugfs file represents the state of host1x bus devices, specifying the list of subdevices and marking which ones have successfully registered. Signed-off-by: Thierry Reding --- drivers/gpu/host1x/bus.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c index b4c385d4a6af..103fffc1904b 100644 --- a/drivers/gpu/host1x/bus.c +++ b/drivers/gpu/host1x/bus.c @@ -15,8 +15,10 @@ * along with this program. If not, see . */ +#include #include #include +#include #include #include @@ -500,6 +502,36 @@ static void host1x_detach_driver(struct host1x *host1x, mutex_unlock(&host1x->devices_lock); } +static int host1x_devices_show(struct seq_file *s, void *data) +{ + struct host1x *host1x = s->private; + struct host1x_device *device; + + mutex_lock(&host1x->devices_lock); + + list_for_each_entry(device, &host1x->devices, list) { + struct host1x_subdev *subdev; + + seq_printf(s, "%s\n", dev_name(&device->dev)); + + mutex_lock(&device->subdevs_lock); + + list_for_each_entry(subdev, &device->active, list) + seq_printf(s, " %pOFf: %s\n", subdev->np, + dev_name(subdev->client->dev)); + + list_for_each_entry(subdev, &device->subdevs, list) + seq_printf(s, " %pOFf:\n", subdev->np); + + mutex_unlock(&device->subdevs_lock); + } + + mutex_unlock(&host1x->devices_lock); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(host1x_devices); + /** * host1x_register() - register a host1x controller * @host1x: host1x controller @@ -523,6 +555,9 @@ int host1x_register(struct host1x *host1x) mutex_unlock(&drivers_lock); + debugfs_create_file("devices", S_IRUGO, host1x->debugfs, host1x, + &host1x_devices_fops); + return 0; } From 6841482b82e5ba8a403559cbc0c15706624db17a Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 1 Feb 2019 14:28:22 +0100 Subject: [PATCH 422/539] gpu: host1x: Set up stream ID table In order to enable the MMIO path stream ID protection provided by the incarnation of host1x found in Tegra186 and later, the host1x must be provided with the list of stream ID register offsets for each of its clients. Some clients (such as VIC) have multiple stream ID registers that are assumed to be contiguous. The host1x is programmed with the base offset and a limit which provide the range of registers that the host1x needs to monitor for writes. Signed-off-by: Thierry Reding --- drivers/gpu/host1x/dev.c | 38 ++++++++++++++++++++++++++++++++++++++ drivers/gpu/host1x/dev.h | 8 ++++++++ 2 files changed, 46 insertions(+) diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 419d8929a98f..4c044ee54fe6 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -120,6 +120,15 @@ static const struct host1x_info host1x05_info = { .dma_mask = DMA_BIT_MASK(34), }; +static const struct host1x_sid_entry tegra186_sid_table[] = { + { + /* VIC */ + .base = 0x1af0, + .offset = 0x30, + .limit = 0x34 + }, +}; + static const struct host1x_info host1x06_info = { .nb_channels = 63, .nb_pts = 576, @@ -129,6 +138,17 @@ static const struct host1x_info host1x06_info = { .sync_offset = 0x0, .dma_mask = DMA_BIT_MASK(34), .has_hypervisor = true, + .num_sid_entries = ARRAY_SIZE(tegra186_sid_table), + .sid_table = tegra186_sid_table, +}; + +static const struct host1x_sid_entry tegra194_sid_table[] = { + { + /* VIC */ + .base = 0x1af0, + .offset = 0x30, + .limit = 0x34 + }, }; static const struct host1x_info host1x07_info = { @@ -140,6 +160,8 @@ static const struct host1x_info host1x07_info = { .sync_offset = 0x0, .dma_mask = DMA_BIT_MASK(40), .has_hypervisor = true, + .num_sid_entries = ARRAY_SIZE(tegra194_sid_table), + .sid_table = tegra194_sid_table, }; static const struct of_device_id host1x_of_match[] = { @@ -154,6 +176,19 @@ static const struct of_device_id host1x_of_match[] = { }; MODULE_DEVICE_TABLE(of, host1x_of_match); +static void host1x_setup_sid_table(struct host1x *host) +{ + const struct host1x_info *info = host->info; + unsigned int i; + + for (i = 0; i < info->num_sid_entries; i++) { + const struct host1x_sid_entry *entry = &info->sid_table[i]; + + host1x_hypervisor_writel(host, entry->offset, entry->base); + host1x_hypervisor_writel(host, entry->limit, entry->base + 4); + } +} + static int host1x_probe(struct platform_device *pdev) { struct host1x *host; @@ -316,6 +351,9 @@ skip_iommu: host1x_debug_init(host); + if (host->info->has_hypervisor) + host1x_setup_sid_table(host); + err = host1x_register(host); if (err < 0) goto fail_deinit_intr; diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index 36f44ffebe73..05216a7e4830 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -94,6 +94,12 @@ struct host1x_intr_ops { int (*free_syncpt_irq)(struct host1x *host); }; +struct host1x_sid_entry { + unsigned int base; + unsigned int offset; + unsigned int limit; +}; + struct host1x_info { unsigned int nb_channels; /* host1x: number of channels supported */ unsigned int nb_pts; /* host1x: number of syncpoints supported */ @@ -103,6 +109,8 @@ struct host1x_info { unsigned int sync_offset; /* offset of syncpoint registers */ u64 dma_mask; /* mask of addressable memory */ bool has_hypervisor; /* has hypervisor registers */ + unsigned int num_sid_entries; + const struct host1x_sid_entry *sid_table; }; struct host1x { From 3d7a64b992eab27be75c6cf734be4e87fc88e4b5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 4 Feb 2019 08:41:05 +0000 Subject: [PATCH 423/539] drm/i915: Allow normal clients to always preempt idle priority clients MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When first enabling preemption, we hesitated from making it a free-for-all where every higher priority client would force a preempt-to-idle cycle and take over from all lower priority clients. We hesitated because we were uncertain just how well preemption would work in practice, whether the preemption latency itself would detract from the latency gains for higher priority tasks and whether it would work at all. Since introducing preemption, we have been enabling it for more common tasks, even giving normal clients a small preemptive boost when they first start (to aide fairness and improve interactivity). Now lets take one step further and give permission for all normal (priority:0) clients to preempt any idle (priority:<0) task so that users running long compute jobs do not overly impact other jobs (i.e. their desktop) and the system remains responsive under such idle loads. References: f6322eddaff7 ("drm/i915/preemption: Allow preemption between submission ports") References: b16c765122f9 ("drm/i915: Priority boost for new clients") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Michal Wajdeczko Cc: Michał Winiarski Cc: "Bloomfield, Jon" Cc: "Stead, Alan" Reviewed-by: Daniele Ceraolo Spurio Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190204084116.3013-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 34d0a148e664..1398eb81dee6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -592,7 +592,20 @@ intel_engine_has_preemption(const struct intel_engine_cs *engine) static inline bool __execlists_need_preempt(int prio, int last) { - return prio > max(0, last); + /* + * Allow preemption of low -> normal -> high, but we do + * not allow low priority tasks to preempt other low priority + * tasks under the impression that latency for low priority + * tasks does not matter (as much as background throughput), + * so kiss. + * + * More naturally we would write + * prio >= max(0, last); + * except that we wish to prevent triggering preemption at the same + * priority level: the task that is running should remain running + * to preserve FIFO ordering of dependencies. + */ + return prio > max(I915_PRIORITY_NORMAL - 1, last); } static inline void From 1413b2bc0717036a5a653eef20cc3ae4cc66501a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 4 Feb 2019 15:01:01 +0000 Subject: [PATCH 424/539] drm/i915: Trim NEWCLIENT boosting Limit the NEWCLIENT boost to only give its small priority boost to fresh clients only that have no dependencies. The idea for using NEWCLIENT boosting, commit b16c765122f9 ("drm/i915: Priority boost for new clients"), is that short-lived streams are often interactive and require lower latency -- and that by executing those ahead of the long running hogs, the short-lived clients do little to interfere with the system throughput by virtue of their short-lived nature. However, we were only considering the client's own timeline for determining whether or not it was a fresh stream. This allowed for compositors to wake up before their vblank and bump all of its client streams. However, in testing with media-bench this results in chaining all cooperating contexts together preventing us from being able to reorder contexts to reduce bubbles (pipeline stalls), overall increasing latency, and reducing system throughput. The exact opposite of our intent. The compromise of applying the NEWCLIENT boost to strictly fresh clients (that do not wait upon anything else) should maintain the "real-time response under load" characteristics of FQ_CODEL, without locking together the long chains of dependencies across the system. References: b16c765122f9 ("drm/i915: Priority boost for new clients") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190204150101.30759-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 9ed5baf157a3..9383a9fb4893 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -968,7 +968,7 @@ void i915_request_add(struct i915_request *request) * Allow interactive/synchronous clients to jump ahead of * the bulk clients. (FQ_CODEL) */ - if (!prev || i915_request_completed(prev)) + if (list_empty(&request->sched.signalers_list)) attr.priority |= I915_PRIORITY_NEWCLIENT; engine->schedule(request, &attr); From 87f1ef225242d40f99e0ddf0f22020dc2b73601b Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 5 Feb 2019 09:50:28 +0000 Subject: [PATCH 425/539] drm/i915: Record the sseu configuration per-context & engine We want to expose the ability to reconfigure the slices, subslice and eu per context and per engine. To facilitate that, store the current configuration on the context for each engine, which is initially set to the device default upon creation. v2: record sseu configuration per context & engine (Chris) v3: introduce the i915_gem_context_sseu to store powergating programming, sseu_dev_info has grown quite a bit (Lionel) v4: rename i915_gem_sseu into intel_sseu (Chris) use to_intel_context() (Chris) v5: More to_intel_context() (Tvrtko) Switch intel_sseu from union to struct (Tvrtko) Move context default sseu in existing loop (Chris) v6: s/intel_sseu_from_device_sseu/intel_device_default_sseu/ (Tvrtko) Tvrtko Ursulin: v7: * Pass intel_sseu by pointer instead of value to make_rpcs. * Rebase for make_rpcs changes. v8: * Rebase for RPCS edit on pin. v9: * Rebase for context image setup changes. v10: * Rename dev_priv to i915. (Chris Wilson) v11: * Rebase. v12: * Rebase for IS_GEN changes. v13: * Rebase for RUNTIME_INFO. v14: * Rebase for intel_context_init. v15: * Rebase for drm-tip changes. v16: * Moved struct intel_sseu definition to i915_gem_context.h. Signed-off-by: Chris Wilson Signed-off-by: Lionel Landwerlin Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20190205095032.22673-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 14 +++++++++++ drivers/gpu/drm/i915/i915_gem_context.c | 3 +++ drivers/gpu/drm/i915/i915_gem_context.h | 14 +++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 31 +++++++++++++------------ 4 files changed, 47 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 534e52e3a8da..45b837bc8f9e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3305,6 +3305,20 @@ mkwrite_device_info(struct drm_i915_private *dev_priv) return (struct intel_device_info *)INTEL_INFO(dev_priv); } +static inline struct intel_sseu +intel_device_default_sseu(struct drm_i915_private *i915) +{ + const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; + struct intel_sseu value = { + .slice_mask = sseu->slice_mask, + .subslice_mask = sseu->subslice_mask[0], + .min_eus_per_subslice = sseu->max_eus_per_subslice, + .max_eus_per_subslice = sseu->max_eus_per_subslice, + }; + + return value; +} + /* modesetting */ extern void intel_modeset_init_hw(struct drm_device *dev); extern int intel_modeset_init(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 6faf1f6faab5..d3887c27c3ba 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -330,6 +330,9 @@ intel_context_init(struct intel_context *ce, INIT_LIST_HEAD(&ce->signal_link); INIT_LIST_HEAD(&ce->signals); + + /* Use the whole device by default */ + ce->sseu = intel_device_default_sseu(ctx->i915); } static struct i915_gem_context * diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 6ba40ff6b91f..919f6f0a0f7a 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -31,6 +31,7 @@ #include "i915_gem.h" #include "i915_scheduler.h" +#include "intel_device_info.h" struct pid; @@ -53,6 +54,16 @@ struct intel_context_ops { void (*destroy)(struct intel_context *ce); }; +/* + * Powergating configuration for a particular (context,engine). + */ +struct intel_sseu { + u8 slice_mask; + u8 subslice_mask; + u8 min_eus_per_subslice; + u8 max_eus_per_subslice; +}; + /** * struct i915_gem_context - client state * @@ -173,6 +184,9 @@ struct i915_gem_context { int pin_count; const struct intel_context_ops *ops; + + /** sseu: Control eu/slice partitioning */ + struct intel_sseu sseu; } __engine[I915_NUM_ENGINES]; /** ring_size: size for allocating the per-engine ring buffer */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index a9eb0211ce77..7682f1e71d55 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1266,7 +1266,8 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma) return i915_vma_pin(vma, 0, 0, flags); } -static u32 make_rpcs(struct drm_i915_private *dev_priv); +static u32 +make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu); static void __execlists_update_reg_state(struct intel_engine_cs *engine, @@ -1281,7 +1282,8 @@ __execlists_update_reg_state(struct intel_engine_cs *engine, /* RPCS */ if (engine->class == RENDER_CLASS) - regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(engine->i915); + regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(engine->i915, + &ce->sseu); } static struct intel_context * @@ -2432,18 +2434,19 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine) } static u32 -make_rpcs(struct drm_i915_private *dev_priv) +make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu) { - bool subslice_pg = RUNTIME_INFO(dev_priv)->sseu.has_subslice_pg; - u8 slices = hweight8(RUNTIME_INFO(dev_priv)->sseu.slice_mask); - u8 subslices = hweight8(RUNTIME_INFO(dev_priv)->sseu.subslice_mask[0]); + const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; + bool subslice_pg = sseu->has_subslice_pg; + u8 slices = hweight8(ctx_sseu->slice_mask); + u8 subslices = hweight8(ctx_sseu->subslice_mask); u32 rpcs = 0; /* * No explicit RPCS request is needed to ensure full * slice/subslice/EU enablement prior to Gen9. */ - if (INTEL_GEN(dev_priv) < 9) + if (INTEL_GEN(i915) < 9) return 0; /* @@ -2471,7 +2474,7 @@ make_rpcs(struct drm_i915_private *dev_priv) * subslices are enabled, or a count between one and four on the first * slice. */ - if (IS_GEN(dev_priv, 11) && slices == 1 && subslices >= 4) { + if (IS_GEN(i915, 11) && slices == 1 && subslices >= 4) { GEM_BUG_ON(subslices & 1); subslice_pg = false; @@ -2484,10 +2487,10 @@ make_rpcs(struct drm_i915_private *dev_priv) * must make an explicit request through RPCS for full * enablement. */ - if (RUNTIME_INFO(dev_priv)->sseu.has_slice_pg) { + if (sseu->has_slice_pg) { u32 mask, val = slices; - if (INTEL_GEN(dev_priv) >= 11) { + if (INTEL_GEN(i915) >= 11) { mask = GEN11_RPCS_S_CNT_MASK; val <<= GEN11_RPCS_S_CNT_SHIFT; } else { @@ -2512,18 +2515,16 @@ make_rpcs(struct drm_i915_private *dev_priv) rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val; } - if (RUNTIME_INFO(dev_priv)->sseu.has_eu_pg) { + if (sseu->has_eu_pg) { u32 val; - val = RUNTIME_INFO(dev_priv)->sseu.eu_per_subslice << - GEN8_RPCS_EU_MIN_SHIFT; + val = ctx_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); val &= GEN8_RPCS_EU_MIN_MASK; rpcs |= val; - val = RUNTIME_INFO(dev_priv)->sseu.eu_per_subslice << - GEN8_RPCS_EU_MAX_SHIFT; + val = ctx_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); val &= GEN8_RPCS_EU_MAX_MASK; From ec431eae8fc51c1b4555ec5f566e081804207657 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 5 Feb 2019 09:50:29 +0000 Subject: [PATCH 426/539] drm/i915/perf: lock powergating configuration to default when active If some of the contexts submitting workloads to the GPU have been configured to shutdown slices/subslices, we might loose the NOA configurations written in the NOA muxes. One possible solution to this problem is to reprogram the NOA muxes when we switch to a new context. We initially tried this in the workaround batchbuffer but some concerns where raised about the cost of reprogramming at every context switch. This solution is also not without consequences from the userspace point of view. Reprogramming of the muxes can only happen once the powergating configuration has changed (which happens after context switch). This means for a window of time during the recording, counters recorded by the OA unit might be invalid. This requires userspace dealing with OA reports to discard the invalid values. Minimizing the reprogramming could be implemented by tracking of the last programmed configuration somewhere in GGTT and use MI_PREDICATE to discard some of the programming commands, but the command streamer would still have to parse all the MI_LRI instructions in the workaround batchbuffer. Another solution, which this change implements, is to simply disregard the user requested configuration for the period of time when i915/perf is active. On most platforms there are no issues with this apart from a performance penality for some media workloads that benefit from running on a partially powergated GPU. We already prevent RC6 from affecting the programming so it doesn't sound completely unreasonable to hold on powergating for the same reason. On Icelake however there would a functional problem if the slices not- containing the VME block were left enabled with a running media workload which explicitly disabled them. To avoid a GPU hang in this case, on Icelake we lock the enablement to only slices which contain VME blocks. Downside is that it means degraded GPU performance when OA is active but there is no known alternative solution for this. v2: Leave RPCS programming in intel_lrc.c (Lionel) v3: Update for s/union intel_sseu/struct intel_sseu/ (Lionel) More to_intel_context() (Tvrtko) s/dev_priv/i915/ (Tvrtko) Tvrtko Ursulin: v4: * Rebase for make_rpcs changes. v5: * Apply OA restriction from make_rpcs directly. v6: * Rebase for context image setup changes. v7: * Move stream assignment before metric enable. v8-9: * Rebase. v10: * Squashed with ICL support patch. Bspec: 21140 Co-developed-by: Tvrtko Ursulin Signed-off-by: Lionel Landwerlin Signed-off-by: Tvrtko Ursulin Cc: Lionel Landwerlin Reviewed-by: Chris Wilson # v9 Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20190205095032.22673-2-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_perf.c | 13 ++++++--- drivers/gpu/drm/i915/intel_lrc.c | 46 ++++++++++++++++++++++++-------- drivers/gpu/drm/i915/intel_lrc.h | 2 ++ 3 files changed, 46 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 727118301f91..9ebf99f3d8d3 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1677,6 +1677,11 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, CTX_REG(reg_state, state_offset, flex_regs[i], value); } + + CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, + gen8_make_rpcs(dev_priv, + &to_intel_context(ctx, + dev_priv->engine[RCS])->sseu)); } /* @@ -2098,21 +2103,21 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, if (ret) goto err_lock; + stream->ops = &i915_oa_stream_ops; + dev_priv->perf.oa.exclusive_stream = stream; + ret = dev_priv->perf.oa.ops.enable_metric_set(stream); if (ret) { DRM_DEBUG("Unable to enable metric set\n"); goto err_enable; } - stream->ops = &i915_oa_stream_ops; - - dev_priv->perf.oa.exclusive_stream = stream; - mutex_unlock(&dev_priv->drm.struct_mutex); return 0; err_enable: + dev_priv->perf.oa.exclusive_stream = NULL; dev_priv->perf.oa.ops.disable_metric_set(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 7682f1e71d55..d99c462e2c09 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1266,9 +1266,6 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma) return i915_vma_pin(vma, 0, 0, flags); } -static u32 -make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu); - static void __execlists_update_reg_state(struct intel_engine_cs *engine, struct intel_context *ce) @@ -1282,8 +1279,8 @@ __execlists_update_reg_state(struct intel_engine_cs *engine, /* RPCS */ if (engine->class == RENDER_CLASS) - regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(engine->i915, - &ce->sseu); + regs[CTX_R_PWR_CLK_STATE + 1] = gen8_make_rpcs(engine->i915, + &ce->sseu); } static struct intel_context * @@ -2433,13 +2430,12 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine) return logical_ring_init(engine); } -static u32 -make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu) +u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu) { const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; bool subslice_pg = sseu->has_subslice_pg; - u8 slices = hweight8(ctx_sseu->slice_mask); - u8 subslices = hweight8(ctx_sseu->subslice_mask); + struct intel_sseu ctx_sseu; + u8 slices, subslices; u32 rpcs = 0; /* @@ -2449,6 +2445,34 @@ make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu) if (INTEL_GEN(i915) < 9) return 0; + /* + * If i915/perf is active, we want a stable powergating configuration + * on the system. + * + * We could choose full enablement, but on ICL we know there are use + * cases which disable slices for functional, apart for performance + * reasons. So in this case we select a known stable subset. + */ + if (!i915->perf.oa.exclusive_stream) { + ctx_sseu = *req_sseu; + } else { + ctx_sseu = intel_device_default_sseu(i915); + + if (IS_GEN(i915, 11)) { + /* + * We only need subslice count so it doesn't matter + * which ones we select - just turn off low bits in the + * amount of half of all available subslices per slice. + */ + ctx_sseu.subslice_mask = + ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2)); + ctx_sseu.slice_mask = 0x1; + } + } + + slices = hweight8(ctx_sseu.slice_mask); + subslices = hweight8(ctx_sseu.subslice_mask); + /* * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits * wide and Icelake has up to eight subslices, specfial programming is @@ -2518,13 +2542,13 @@ make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu) if (sseu->has_eu_pg) { u32 val; - val = ctx_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; + val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); val &= GEN8_RPCS_EU_MIN_MASK; rpcs |= val; - val = ctx_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; + val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); val &= GEN8_RPCS_EU_MAX_MASK; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 3d86c27c6b32..f1aec8a6986f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -112,4 +112,6 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, const char *prefix), unsigned int max); +u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu); + #endif /* _INTEL_LRC_H_ */ From 7810858412a0ab8b8ebb97d301dd601808968c88 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 5 Feb 2019 09:50:30 +0000 Subject: [PATCH 427/539] drm/i915: Add timeline barrier support Timeline barrier allows serialization between different timelines. After calling i915_timeline_set_barrier with a request, all following submissions on this timeline will be set up as depending on this request, or barrier. Once the barrier has been completed it automatically gets cleared and things continue as normal. This facility will be used by the upcoming context SSEU code. v2: * Assert barrier has been retired on timeline_fini. (Chris Wilson) * Fix mock_timeline. v3: * Improved comment language. (Chris Wilson) v4: * Maintain ordering with previous barriers set on the timeline. v5: * Rebase. Signed-off-by: Tvrtko Ursulin Suggested-by: Chris Wilson Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190205095032.22673-3-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_request.c | 17 ++++++++++++++ drivers/gpu/drm/i915/i915_timeline.c | 21 ++++++++++++++++++ drivers/gpu/drm/i915/i915_timeline.h | 22 +++++++++++++++++++ .../gpu/drm/i915/selftests/mock_timeline.c | 1 + 4 files changed, 61 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 9383a9fb4893..6512630b59b8 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -526,6 +526,19 @@ out: return kmem_cache_alloc(ce->gem_context->i915->requests, GFP_KERNEL); } +static int add_barrier(struct i915_request *rq, struct i915_gem_active *active) +{ + struct i915_request *barrier = + i915_gem_active_raw(active, &rq->i915->drm.struct_mutex); + + return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0; +} + +static int add_timeline_barrier(struct i915_request *rq) +{ + return add_barrier(rq, &rq->timeline->barrier); +} + /** * i915_request_alloc - allocate a request structure * @@ -668,6 +681,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) */ rq->head = rq->ring->emit; + ret = add_timeline_barrier(rq); + if (ret) + goto err_unwind; + ret = engine->request_alloc(rq); if (ret) goto err_unwind; diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index 5ea3af393ffe..dcff3ae96683 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -163,6 +163,7 @@ int i915_timeline_init(struct drm_i915_private *i915, spin_lock_init(&timeline->lock); + init_request_active(&timeline->barrier, NULL); init_request_active(&timeline->last_request, NULL); INIT_LIST_HEAD(&timeline->requests); @@ -235,6 +236,7 @@ void i915_timeline_fini(struct i915_timeline *timeline) { GEM_BUG_ON(timeline->pin_count); GEM_BUG_ON(!list_empty(&timeline->requests)); + GEM_BUG_ON(i915_gem_active_isset(&timeline->barrier)); i915_syncmap_free(&timeline->sync); hwsp_free(timeline); @@ -309,6 +311,25 @@ void i915_timeline_unpin(struct i915_timeline *tl) __i915_vma_unpin(tl->hwsp_ggtt); } +int i915_timeline_set_barrier(struct i915_timeline *tl, struct i915_request *rq) +{ + struct i915_request *old; + int err; + + lockdep_assert_held(&rq->i915->drm.struct_mutex); + + /* Must maintain ordering wrt existing barriers */ + old = i915_gem_active_raw(&tl->barrier, &rq->i915->drm.struct_mutex); + if (old) { + err = i915_request_await_dma_fence(rq, &old->fence); + if (err) + return err; + } + + i915_gem_active_set(&tl->barrier, rq); + return 0; +} + void __i915_timeline_free(struct kref *kref) { struct i915_timeline *timeline = diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 8caeb66d1cd5..d167e04073c5 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -74,6 +74,16 @@ struct i915_timeline { */ struct i915_syncmap *sync; + /** + * Barrier provides the ability to serialize ordering between different + * timelines. + * + * Users can call i915_timeline_set_barrier which will make all + * subsequent submissions to this timeline be executed only after the + * barrier has been completed. + */ + struct i915_gem_active barrier; + struct list_head link; const char *name; struct drm_i915_private *i915; @@ -155,4 +165,16 @@ void i915_timelines_init(struct drm_i915_private *i915); void i915_timelines_park(struct drm_i915_private *i915); void i915_timelines_fini(struct drm_i915_private *i915); +/** + * i915_timeline_set_barrier - orders submission between different timelines + * @timeline: timeline to set the barrier on + * @rq: request after which new submissions can proceed + * + * Sets the passed in request as the serialization point for all subsequent + * submissions on @timeline. Subsequent requests will not be submitted to GPU + * until the barrier has been completed. + */ +int i915_timeline_set_barrier(struct i915_timeline *timeline, + struct i915_request *rq); + #endif diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c index cf39ccd9fc05..e5659aaa856d 100644 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c @@ -15,6 +15,7 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context) spin_lock_init(&timeline->lock); + init_request_active(&timeline->barrier, NULL); init_request_active(&timeline->last_request, NULL); INIT_LIST_HEAD(&timeline->requests); From e46c2e99f60043bfdb63f18fe775db1f688906d9 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 5 Feb 2019 09:50:31 +0000 Subject: [PATCH 428/539] drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to allow userspace to reconfigure the subslice configuration on a per context basis. This is required for the functional requirement of shutting down non-VME enabled sub-slices on Gen11 parts. To do so, we expose a context parameter to allow adjustment of the RPCS register stored within the context image (and currently not accessible via LRI). If the context is adjusted before first use or whilst idle, the adjustment is for "free"; otherwise if the context is active we queue a request to do so (using the kernel context), following all other activity by that context, which is also marked as barrier for all following submission against the same context. Since the overhead of device re-configuration during context switching can be significant, especially in multi-context workloads, we limit this new uAPI to only support the Gen11 VME use case. In this use case either the device is fully enabled, and exactly one slice and half of the subslices are enabled. Example usage: struct drm_i915_gem_context_param_sseu sseu = { }; struct drm_i915_gem_context_param arg = { .param = I915_CONTEXT_PARAM_SSEU, .ctx_id = gem_context_create(fd), .size = sizeof(sseu), .value = to_user_pointer(&sseu) }; /* Query device defaults. */ gem_context_get_param(fd, &arg); /* Set VME configuration on a 1x6x8 part. */ sseu.slice_mask = 0x1; sseu.subslice_mask = 0xe0; gem_context_set_param(fd, &arg); v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel) v3: Add ability to program this per engine (Chris) v4: Move most get_sseu() into i915_gem_context.c (Lionel) v5: Validate sseu configuration against the device's capabilities (Lionel) v6: Change context powergating settings through MI_SDM on kernel context (Chris) v7: Synchronize the requests following a powergating setting change using a global dependency (Chris) Iterate timelines through dev_priv.gt.active_rings (Tvrtko) Disable RPCS configuration setting for non capable users (Lionel/Tvrtko) v8: s/union intel_sseu/struct intel_sseu/ (Lionel) s/dev_priv/i915/ (Tvrtko) Change uapi class/instance fields to u16 (Tvrtko) Bump mask fields to 64bits (Lionel) Don't return EPERM when dynamic sseu is disabled (Tvrtko) v9: Import context image into kernel context's ppgtt only when reconfiguring powergated slice/subslices (Chris) Use aliasing ppgtt when needed (Michel) Tvrtko Ursulin: v10: * Update for upstream changes. * Request submit needs a RPM reference. * Reject on !FULL_PPGTT for simplicity. * Pull out get/set param to helpers for readability and less indent. * Use i915_request_await_dma_fence in add_global_barrier to skip waits on the same timeline and avoid GEM_BUG_ON. * No need to explicitly assign a NULL pointer to engine in legacy mode. * No need to move gen8_make_rpcs up. * Factored out global barrier as prep patch. * Allow to only CAP_SYS_ADMIN if !Gen11. v11: * Remove engine vfunc in favour of local helper. (Chris Wilson) * Stop retiring requests before updates since it is not needed (Chris Wilson) * Implement direct CPU update path for idle contexts. (Chris Wilson) * Left side dependency needs only be on the same context timeline. (Chris Wilson) * It is sufficient to order the timeline. (Chris Wilson) * Reject !RCS configuration attempts with -ENODEV for now. v12: * Rebase for make_rpcs. v13: * Centralize SSEU normalization to make_rpcs. * Type width checking (uAPI <-> implementation). * Gen11 restrictions uAPI checks. * Gen11 subslice count differences handling. Chris Wilson: * args->size handling fixes. * Update context image from GGTT. * Postpone context image update to pinning. * Use i915_gem_active_raw instead of last_request_on_engine. v14: * Add activity tracker on intel_context to fix the lifetime issues and simplify the code. (Chris Wilson) v15: * Fix context pin leak if no space in ring by simplifying the context pinning sequence. v16: * Rebase for context get/set param locking changes. * Just -ENODEV on !Gen11. (Joonas) v17: * Fix one Gen11 subslice enablement rule. * Handle error from i915_sw_fence_await_sw_fence_gfp. (Chris Wilson) v18: * Update commit message. (Joonas) * Restrict uAPI to VME use case. (Joonas) v19: * Rebase. v20: * Rebase for ce->active_tracker. v21: * Rebase for IS_GEN changes. v22: * Reserve uAPI for flags straight away. (Chris Wilson) v23: * Rebase for RUNTIME_INFO. v24: * Added some headline docs for the uapi usage. (Joonas/Chris) v25: * Renamed class/instance to engine_class/engine_instance to avoid clash with C++ keyword. (Tony Ye) v26: * Rebased for runtime pm api changes. v27: * Rebased for intel_context_init. * Wrap commit msg to 75. v28: (Chris Wilson) * Use i915_gem_ggtt. * Use i915_request_await_dma_fence to show a better example. v29: * i915_timeline_set_barrier can now fail. (Chris Wilson) v30: * Capture some acks. v31: * Drop the WARN_ON from use controllable paths. (Chris Wilson) * Use overflows_type for all checks. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107634 Issue: https://github.com/intel/media-driver/issues/267 Signed-off-by: Chris Wilson Signed-off-by: Lionel Landwerlin Cc: Dmitry Rogozhkin Cc: Tvrtko Ursulin Cc: Zhipeng Gong Cc: Joonas Lahtinen Cc: Tony Ye Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Reviewed-by: Joonas Lahtinen Acked-by: Timo Aaltonen Acked-by: Takashi Iwai Acked-by: Stéphane Marchesin Link: https://patchwork.freedesktop.org/patch/msgid/20190205095032.22673-4-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_context.c | 340 +++++++++++++++++++++++- drivers/gpu/drm/i915/i915_gem_context.h | 6 + drivers/gpu/drm/i915/intel_lrc.c | 4 +- include/uapi/drm/i915_drm.h | 64 +++++ 4 files changed, 411 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index d3887c27c3ba..2d3e1ce9cc76 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -89,6 +89,7 @@ #include #include "i915_drv.h" #include "i915_trace.h" +#include "intel_lrc_reg.h" #include "intel_workarounds.h" #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 @@ -321,6 +322,15 @@ static u32 default_desc_template(const struct drm_i915_private *i915, return desc; } +static void intel_context_retire(struct i915_gem_active *active, + struct i915_request *rq) +{ + struct intel_context *ce = + container_of(active, typeof(*ce), active_tracker); + + intel_context_unpin(ce); +} + void intel_context_init(struct intel_context *ce, struct i915_gem_context *ctx, @@ -333,6 +343,8 @@ intel_context_init(struct intel_context *ce, /* Use the whole device by default */ ce->sseu = intel_device_default_sseu(ctx->i915); + + init_request_active(&ce->active_tracker, intel_context_retire); } static struct i915_gem_context * @@ -850,6 +862,56 @@ out: return 0; } +static int get_sseu(struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct drm_i915_gem_context_param_sseu user_sseu; + struct intel_engine_cs *engine; + struct intel_context *ce; + int ret; + + if (args->size == 0) + goto out; + else if (args->size < sizeof(user_sseu)) + return -EINVAL; + + if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value), + sizeof(user_sseu))) + return -EFAULT; + + if (user_sseu.flags || user_sseu.rsvd) + return -EINVAL; + + engine = intel_engine_lookup_user(ctx->i915, + user_sseu.engine_class, + user_sseu.engine_instance); + if (!engine) + return -EINVAL; + + /* Only use for mutex here is to serialize get_param and set_param. */ + ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); + if (ret) + return ret; + + ce = to_intel_context(ctx, engine); + + user_sseu.slice_mask = ce->sseu.slice_mask; + user_sseu.subslice_mask = ce->sseu.subslice_mask; + user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice; + user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice; + + mutex_unlock(&ctx->i915->drm.struct_mutex); + + if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu, + sizeof(user_sseu))) + return -EFAULT; + +out: + args->size = sizeof(user_sseu); + + return 0; +} + int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { @@ -862,15 +924,17 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, if (!ctx) return -ENOENT; - args->size = 0; switch (args->param) { case I915_CONTEXT_PARAM_BAN_PERIOD: ret = -EINVAL; break; case I915_CONTEXT_PARAM_NO_ZEROMAP: + args->size = 0; args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); break; case I915_CONTEXT_PARAM_GTT_SIZE: + args->size = 0; + if (ctx->ppgtt) args->value = ctx->ppgtt->vm.total; else if (to_i915(dev)->mm.aliasing_ppgtt) @@ -879,14 +943,20 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, args->value = to_i915(dev)->ggtt.vm.total; break; case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: + args->size = 0; args->value = i915_gem_context_no_error_capture(ctx); break; case I915_CONTEXT_PARAM_BANNABLE: + args->size = 0; args->value = i915_gem_context_is_bannable(ctx); break; case I915_CONTEXT_PARAM_PRIORITY: + args->size = 0; args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT; break; + case I915_CONTEXT_PARAM_SSEU: + ret = get_sseu(ctx, args); + break; default: ret = -EINVAL; break; @@ -896,6 +966,270 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, return ret; } +static int gen8_emit_rpcs_config(struct i915_request *rq, + struct intel_context *ce, + struct intel_sseu sseu) +{ + u64 offset; + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + offset = i915_ggtt_offset(ce->state) + + LRC_STATE_PN * PAGE_SIZE + + (CTX_R_PWR_CLK_STATE + 1) * 4; + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = gen8_make_rpcs(rq->i915, &sseu); + + intel_ring_advance(rq, cs); + + return 0; +} + +static int +gen8_modify_rpcs_gpu(struct intel_context *ce, + struct intel_engine_cs *engine, + struct intel_sseu sseu) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_request *rq, *prev; + intel_wakeref_t wakeref; + int ret; + + GEM_BUG_ON(!ce->pin_count); + + lockdep_assert_held(&i915->drm.struct_mutex); + + /* Submitting requests etc needs the hw awake. */ + wakeref = intel_runtime_pm_get(i915); + + rq = i915_request_alloc(engine, i915->kernel_context); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + goto out_put; + } + + /* Queue this switch after all other activity by this context. */ + prev = i915_gem_active_raw(&ce->ring->timeline->last_request, + &i915->drm.struct_mutex); + if (prev && !i915_request_completed(prev)) { + ret = i915_request_await_dma_fence(rq, &prev->fence); + if (ret < 0) + goto out_add; + } + + /* Order all following requests to be after. */ + ret = i915_timeline_set_barrier(ce->ring->timeline, rq); + if (ret) + goto out_add; + + ret = gen8_emit_rpcs_config(rq, ce, sseu); + if (ret) + goto out_add; + + /* + * Guarantee context image and the timeline remains pinned until the + * modifying request is retired by setting the ce activity tracker. + * + * But we only need to take one pin on the account of it. Or in other + * words transfer the pinned ce object to tracked active request. + */ + if (!i915_gem_active_isset(&ce->active_tracker)) + __intel_context_pin(ce); + i915_gem_active_set(&ce->active_tracker, rq); + +out_add: + i915_request_add(rq); +out_put: + intel_runtime_pm_put(i915, wakeref); + + return ret; +} + +static int +i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct intel_sseu sseu) +{ + struct intel_context *ce = to_intel_context(ctx, engine); + int ret; + + GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8); + GEM_BUG_ON(engine->id != RCS); + + ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); + if (ret) + return ret; + + /* Nothing to do if unmodified. */ + if (!memcmp(&ce->sseu, &sseu, sizeof(sseu))) + goto out; + + /* + * If context is not idle we have to submit an ordered request to modify + * its context image via the kernel context. Pristine and idle contexts + * will be configured on pinning. + */ + if (ce->pin_count) + ret = gen8_modify_rpcs_gpu(ce, engine, sseu); + + if (!ret) + ce->sseu = sseu; + +out: + mutex_unlock(&ctx->i915->drm.struct_mutex); + + return ret; +} + +static int +user_to_context_sseu(struct drm_i915_private *i915, + const struct drm_i915_gem_context_param_sseu *user, + struct intel_sseu *context) +{ + const struct sseu_dev_info *device = &RUNTIME_INFO(i915)->sseu; + + /* No zeros in any field. */ + if (!user->slice_mask || !user->subslice_mask || + !user->min_eus_per_subslice || !user->max_eus_per_subslice) + return -EINVAL; + + /* Max > min. */ + if (user->max_eus_per_subslice < user->min_eus_per_subslice) + return -EINVAL; + + /* + * Some future proofing on the types since the uAPI is wider than the + * current internal implementation. + */ + if (overflows_type(user->slice_mask, context->slice_mask) || + overflows_type(user->subslice_mask, context->subslice_mask) || + overflows_type(user->min_eus_per_subslice, + context->min_eus_per_subslice) || + overflows_type(user->max_eus_per_subslice, + context->max_eus_per_subslice)) + return -EINVAL; + + /* Check validity against hardware. */ + if (user->slice_mask & ~device->slice_mask) + return -EINVAL; + + if (user->subslice_mask & ~device->subslice_mask[0]) + return -EINVAL; + + if (user->max_eus_per_subslice > device->max_eus_per_subslice) + return -EINVAL; + + context->slice_mask = user->slice_mask; + context->subslice_mask = user->subslice_mask; + context->min_eus_per_subslice = user->min_eus_per_subslice; + context->max_eus_per_subslice = user->max_eus_per_subslice; + + /* Part specific restrictions. */ + if (IS_GEN(i915, 11)) { + unsigned int hw_s = hweight8(device->slice_mask); + unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]); + unsigned int req_s = hweight8(context->slice_mask); + unsigned int req_ss = hweight8(context->subslice_mask); + + /* + * Only full subslice enablement is possible if more than one + * slice is turned on. + */ + if (req_s > 1 && req_ss != hw_ss_per_s) + return -EINVAL; + + /* + * If more than four (SScount bitfield limit) subslices are + * requested then the number has to be even. + */ + if (req_ss > 4 && (req_ss & 1)) + return -EINVAL; + + /* + * If only one slice is enabled and subslice count is below the + * device full enablement, it must be at most half of the all + * available subslices. + */ + if (req_s == 1 && req_ss < hw_ss_per_s && + req_ss > (hw_ss_per_s / 2)) + return -EINVAL; + + /* ABI restriction - VME use case only. */ + + /* All slices or one slice only. */ + if (req_s != 1 && req_s != hw_s) + return -EINVAL; + + /* + * Half subslices or full enablement only when one slice is + * enabled. + */ + if (req_s == 1 && + (req_ss != hw_ss_per_s && req_ss != (hw_ss_per_s / 2))) + return -EINVAL; + + /* No EU configuration changes. */ + if ((user->min_eus_per_subslice != + device->max_eus_per_subslice) || + (user->max_eus_per_subslice != + device->max_eus_per_subslice)) + return -EINVAL; + } + + return 0; +} + +static int set_sseu(struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_context_param_sseu user_sseu; + struct intel_engine_cs *engine; + struct intel_sseu sseu; + int ret; + + if (args->size < sizeof(user_sseu)) + return -EINVAL; + + if (!IS_GEN(i915, 11)) + return -ENODEV; + + if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value), + sizeof(user_sseu))) + return -EFAULT; + + if (user_sseu.flags || user_sseu.rsvd) + return -EINVAL; + + engine = intel_engine_lookup_user(i915, + user_sseu.engine_class, + user_sseu.engine_instance); + if (!engine) + return -EINVAL; + + /* Only render engine supports RPCS configuration. */ + if (engine->class != RENDER_CLASS) + return -ENODEV; + + ret = user_to_context_sseu(i915, &user_sseu, &sseu); + if (ret) + return ret; + + ret = i915_gem_context_reconfigure_sseu(ctx, engine, sseu); + if (ret) + return ret; + + args->size = sizeof(user_sseu); + + return 0; +} + int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { @@ -958,7 +1292,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, I915_USER_PRIORITY(priority); } break; - + case I915_CONTEXT_PARAM_SSEU: + ret = set_sseu(ctx, args); + break; default: ret = -EINVAL; break; diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 919f6f0a0f7a..92ad5272e57f 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -183,6 +183,12 @@ struct i915_gem_context { u64 lrc_desc; int pin_count; + /** + * active_tracker: Active tracker for the external rq activity + * on this intel_context object. + */ + struct i915_gem_active active_tracker; + const struct intel_context_ops *ops; /** sseu: Control eu/slice partitioning */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d99c462e2c09..5e98fd79bd9d 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2498,7 +2498,9 @@ u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu) * subslices are enabled, or a count between one and four on the first * slice. */ - if (IS_GEN(i915, 11) && slices == 1 && subslices >= 4) { + if (IS_GEN(i915, 11) && + slices == 1 && + subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { GEM_BUG_ON(subslices & 1); subslice_pg = false; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 298b2e197744..397810fa2d33 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1486,9 +1486,73 @@ struct drm_i915_gem_context_param { #define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */ #define I915_CONTEXT_DEFAULT_PRIORITY 0 #define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */ + /* + * When using the following param, value should be a pointer to + * drm_i915_gem_context_param_sseu. + */ +#define I915_CONTEXT_PARAM_SSEU 0x7 __u64 value; }; +/** + * Context SSEU programming + * + * It may be necessary for either functional or performance reason to configure + * a context to run with a reduced number of SSEU (where SSEU stands for Slice/ + * Sub-slice/EU). + * + * This is done by configuring SSEU configuration using the below + * @struct drm_i915_gem_context_param_sseu for every supported engine which + * userspace intends to use. + * + * Not all GPUs or engines support this functionality in which case an error + * code -ENODEV will be returned. + * + * Also, flexibility of possible SSEU configuration permutations varies between + * GPU generations and software imposed limitations. Requesting such a + * combination will return an error code of -EINVAL. + * + * NOTE: When perf/OA is active the context's SSEU configuration is ignored in + * favour of a single global setting. + */ +struct drm_i915_gem_context_param_sseu { + /* + * Engine class & instance to be configured or queried. + */ + __u16 engine_class; + __u16 engine_instance; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 flags; + + /* + * Mask of slices to enable for the context. Valid values are a subset + * of the bitmask value returned for I915_PARAM_SLICE_MASK. + */ + __u64 slice_mask; + + /* + * Mask of subslices to enable for the context. Valid values are a + * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK. + */ + __u64 subslice_mask; + + /* + * Minimum/Maximum number of EUs to enable per subslice for the + * context. min_eus_per_subslice must be inferior or equal to + * max_eus_per_subslice. + */ + __u16 min_eus_per_subslice; + __u16 max_eus_per_subslice; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 rsvd; +}; + enum drm_i915_oa_format { I915_OA_FORMAT_A13 = 1, /* HSW only */ I915_OA_FORMAT_A29, /* HSW only */ From c06ee6ff2cbcf7f23c6dcfe97e2be415eb3a43e4 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 5 Feb 2019 09:50:32 +0000 Subject: [PATCH 429/539] drm/i915/selftests: Context SSEU reconfiguration tests Exercise the context image reconfiguration logic for idle and busy contexts, with the resets thrown into the mix as well. Free from the uAPI restrictions this test runs on all Gen9+ platforms with slice power gating. v2: * Rename some helpers for clarity. * Include subtest names in error logs. * Remove unnecessary function export. v3: * Rebase for RUNTIME_INFO. v4: * Fix incomplete unexport from v2. (Chris Wilson) v5: * Rebased for runtime pm api changes. v6: * Rebased for i915_reset.c. v7: * Tidy checkpatch warnings. * Consolidate error checking and logging a bit. * Skip idle test phase if something failed before it. v8: (Chris Wilson) * Fix i915_request_wait error handling. * No need to PIN_HIGH the VMA. * Remove pointless GEM_BUG_ON before pointer dereference. v9: * Avoid rq leak if rpcs query fails. (Chris) Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Reviewed-by: Joonas Lahtinen # v6 Link: https://patchwork.freedesktop.org/patch/msgid/20190205095032.22673-5-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_context.c | 31 +- .../gpu/drm/i915/selftests/i915_gem_context.c | 467 ++++++++++++++++++ 2 files changed, 488 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 2d3e1ce9cc76..93ab287f44b6 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -1052,23 +1052,19 @@ out_put: } static int -i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - struct intel_sseu sseu) +__i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct intel_sseu sseu) { struct intel_context *ce = to_intel_context(ctx, engine); - int ret; + int ret = 0; GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8); GEM_BUG_ON(engine->id != RCS); - ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); - if (ret) - return ret; - /* Nothing to do if unmodified. */ if (!memcmp(&ce->sseu, &sseu, sizeof(sseu))) - goto out; + return 0; /* * If context is not idle we have to submit an ordered request to modify @@ -1081,7 +1077,22 @@ i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx, if (!ret) ce->sseu = sseu; -out: + return ret; +} + +static int +i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct intel_sseu sseu) +{ + int ret; + + ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); + if (ret) + return ret; + + ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu); + mutex_unlock(&ctx->i915->drm.struct_mutex); return ret; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index e2c1f0bc2abe..d00d0bb07784 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -24,10 +24,13 @@ #include +#include "../i915_reset.h" #include "../i915_selftest.h" #include "i915_random.h" #include "igt_flush_test.h" #include "igt_live_test.h" +#include "igt_reset.h" +#include "igt_spinner.h" #include "mock_drm.h" #include "mock_gem_device.h" @@ -576,6 +579,469 @@ out_unlock: return err; } +static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj; + u32 *cmd; + int err; + + if (INTEL_GEN(vma->vm->i915) < 8) + return ERR_PTR(-EINVAL); + + obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + *cmd++ = MI_STORE_REGISTER_MEM_GEN8; + *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); + *cmd++ = lower_32_bits(vma->node.start); + *cmd++ = upper_32_bits(vma->node.start); + *cmd = MI_BATCH_BUFFER_END; + + i915_gem_object_unpin_map(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err; + + vma = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static int +emit_rpcs_query(struct drm_i915_gem_object *obj, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct i915_request **rq_out) +{ + struct i915_request *rq; + struct i915_vma *batch; + struct i915_vma *vma; + int err; + + GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + + vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + + batch = rpcs_query_batch(vma); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_vma; + } + + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_batch; + } + + err = engine->emit_bb_start(rq, batch->node.start, batch->node.size, 0); + if (err) + goto err_request; + + err = i915_vma_move_to_active(batch, rq, 0); + if (err) + goto skip_request; + + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + if (err) + goto skip_request; + + i915_gem_object_set_active_reference(batch->obj); + i915_vma_unpin(batch); + i915_vma_close(batch); + + i915_vma_unpin(vma); + + *rq_out = i915_request_get(rq); + + i915_request_add(rq); + + return 0; + +skip_request: + i915_request_skip(rq, err); +err_request: + i915_request_add(rq); +err_batch: + i915_vma_unpin(batch); +err_vma: + i915_vma_unpin(vma); + + return err; +} + +#define TEST_IDLE BIT(0) +#define TEST_BUSY BIT(1) +#define TEST_RESET BIT(2) + +static int +__sseu_prepare(struct drm_i915_private *i915, + const char *name, + unsigned int flags, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct igt_spinner **spin_out) +{ + int ret = 0; + + if (flags & (TEST_BUSY | TEST_RESET)) { + struct igt_spinner *spin; + struct i915_request *rq; + + spin = kzalloc(sizeof(*spin), GFP_KERNEL); + if (!spin) { + ret = -ENOMEM; + goto out; + } + + ret = igt_spinner_init(spin, i915); + if (ret) + return ret; + + rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + igt_spinner_fini(spin); + kfree(spin); + goto out; + } + + i915_request_add(rq); + + if (!igt_wait_for_spinner(spin, rq)) { + pr_err("%s: Spinner failed to start!\n", name); + igt_spinner_end(spin); + igt_spinner_fini(spin); + kfree(spin); + ret = -ETIMEDOUT; + goto out; + } + + *spin_out = spin; + } + +out: + return ret; +} + +static int +__read_slice_count(struct drm_i915_private *i915, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct drm_i915_gem_object *obj, + struct igt_spinner *spin, + u32 *rpcs) +{ + struct i915_request *rq = NULL; + u32 s_mask, s_shift; + unsigned int cnt; + u32 *buf, val; + long ret; + + ret = emit_rpcs_query(obj, ctx, engine, &rq); + if (ret) + return ret; + + if (spin) + igt_spinner_end(spin); + + ret = i915_request_wait(rq, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + if (ret < 0) + return ret; + + buf = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + return ret; + } + + if (INTEL_GEN(i915) >= 11) { + s_mask = GEN11_RPCS_S_CNT_MASK; + s_shift = GEN11_RPCS_S_CNT_SHIFT; + } else { + s_mask = GEN8_RPCS_S_CNT_MASK; + s_shift = GEN8_RPCS_S_CNT_SHIFT; + } + + val = *buf; + cnt = (val & s_mask) >> s_shift; + *rpcs = val; + + i915_gem_object_unpin_map(obj); + + return cnt; +} + +static int +__check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, + const char *prefix, const char *suffix) +{ + if (slices == expected) + return 0; + + if (slices < 0) { + pr_err("%s: %s read slice count failed with %d%s\n", + name, prefix, slices, suffix); + return slices; + } + + pr_err("%s: %s slice count %d is not %u%s\n", + name, prefix, slices, expected, suffix); + + pr_info("RPCS=0x%x; %u%sx%u%s\n", + rpcs, slices, + (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", + (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, + (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); + + return -EINVAL; +} + +static int +__sseu_finish(struct drm_i915_private *i915, + const char *name, + unsigned int flags, + struct i915_gem_context *ctx, + struct i915_gem_context *kctx, + struct intel_engine_cs *engine, + struct drm_i915_gem_object *obj, + unsigned int expected, + struct igt_spinner *spin) +{ + unsigned int slices = + hweight32(intel_device_default_sseu(i915).slice_mask); + u32 rpcs = 0; + int ret = 0; + + if (flags & TEST_RESET) { + ret = i915_reset_engine(engine, "sseu"); + if (ret) + goto out; + } + + ret = __read_slice_count(i915, ctx, engine, obj, + flags & TEST_RESET ? NULL : spin, &rpcs); + ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); + if (ret) + goto out; + + ret = __read_slice_count(i915, kctx, engine, obj, NULL, &rpcs); + ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); + +out: + if (spin) + igt_spinner_end(spin); + + if ((flags & TEST_IDLE) && ret == 0) { + ret = i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (ret) + return ret; + + ret = __read_slice_count(i915, ctx, engine, obj, NULL, &rpcs); + ret = __check_rpcs(name, rpcs, ret, expected, + "Context", " after idle!"); + } + + return ret; +} + +static int +__sseu_test(struct drm_i915_private *i915, + const char *name, + unsigned int flags, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct drm_i915_gem_object *obj, + struct intel_sseu sseu) +{ + struct igt_spinner *spin = NULL; + struct i915_gem_context *kctx; + int ret; + + kctx = kernel_context(i915); + if (IS_ERR(kctx)) + return PTR_ERR(kctx); + + ret = __sseu_prepare(i915, name, flags, ctx, engine, &spin); + if (ret) + goto out; + + ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu); + if (ret) + goto out; + + ret = __sseu_finish(i915, name, flags, ctx, kctx, engine, obj, + hweight32(sseu.slice_mask), spin); + +out: + if (spin) { + igt_spinner_end(spin); + igt_spinner_fini(spin); + kfree(spin); + } + + kernel_context_close(kctx); + + return ret; +} + +static int +__igt_ctx_sseu(struct drm_i915_private *i915, + const char *name, + unsigned int flags) +{ + struct intel_sseu default_sseu = intel_device_default_sseu(i915); + struct intel_engine_cs *engine = i915->engine[RCS]; + struct drm_i915_gem_object *obj; + struct i915_gem_context *ctx; + struct intel_sseu pg_sseu; + intel_wakeref_t wakeref; + struct drm_file *file; + int ret; + + if (INTEL_GEN(i915) < 9) + return 0; + + if (!RUNTIME_INFO(i915)->sseu.has_slice_pg) + return 0; + + if (hweight32(default_sseu.slice_mask) < 2) + return 0; + + /* + * Gen11 VME friendly power-gated configuration with half enabled + * sub-slices. + */ + pg_sseu = default_sseu; + pg_sseu.slice_mask = 1; + pg_sseu.subslice_mask = + ~(~0 << (hweight32(default_sseu.subslice_mask) / 2)); + + pr_info("SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", + name, flags, hweight32(default_sseu.slice_mask), + hweight32(pg_sseu.slice_mask)); + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + if (flags & TEST_RESET) + igt_global_reset_lock(i915); + + mutex_lock(&i915->drm.struct_mutex); + + ctx = i915_gem_create_context(i915, file->driver_priv); + if (IS_ERR(ctx)) { + ret = PTR_ERR(ctx); + goto out_unlock; + } + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + ret = PTR_ERR(obj); + goto out_unlock; + } + + wakeref = intel_runtime_pm_get(i915); + + /* First set the default mask. */ + ret = __sseu_test(i915, name, flags, ctx, engine, obj, default_sseu); + if (ret) + goto out_fail; + + /* Then set a power-gated configuration. */ + ret = __sseu_test(i915, name, flags, ctx, engine, obj, pg_sseu); + if (ret) + goto out_fail; + + /* Back to defaults. */ + ret = __sseu_test(i915, name, flags, ctx, engine, obj, default_sseu); + if (ret) + goto out_fail; + + /* One last power-gated configuration for the road. */ + ret = __sseu_test(i915, name, flags, ctx, engine, obj, pg_sseu); + if (ret) + goto out_fail; + +out_fail: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + ret = -EIO; + + i915_gem_object_put(obj); + + intel_runtime_pm_put(i915, wakeref); + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + + if (flags & TEST_RESET) + igt_global_reset_unlock(i915); + + mock_file_free(i915, file); + + if (ret) + pr_err("%s: Failed with %d!\n", name, ret); + + return ret; +} + +static int igt_ctx_sseu(void *arg) +{ + struct { + const char *name; + unsigned int flags; + } *phase, phases[] = { + { .name = "basic", .flags = 0 }, + { .name = "idle", .flags = TEST_IDLE }, + { .name = "busy", .flags = TEST_BUSY }, + { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET }, + { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE }, + { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE }, + }; + unsigned int i; + int ret = 0; + + for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases); + i++, phase++) + ret = __igt_ctx_sseu(arg, phase->name, phase->flags); + + return ret; +} + static int igt_ctx_readonly(void *arg) { struct drm_i915_private *i915 = arg; @@ -1162,6 +1628,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) SUBTEST(live_nop_switch), SUBTEST(igt_ctx_exec), SUBTEST(igt_ctx_readonly), + SUBTEST(igt_ctx_sseu), SUBTEST(igt_vm_isolation), }; From a21f453c73aa3705029a3460b456ef77e0bc2215 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 5 Feb 2019 12:38:35 +0000 Subject: [PATCH 430/539] drm/i915/selftests: Exercise some AB...BA preemption chains Build a chain using 2 contexts (A, B) then request a preemption such that a later A request runs before the spinner in B. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190205123835.25331-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/intel_lrc.c | 103 +++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index fb35f53c9ce3..58144e024751 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -4,6 +4,8 @@ * Copyright © 2018 Intel Corporation */ +#include + #include "../i915_reset.h" #include "../i915_selftest.h" @@ -405,6 +407,106 @@ err_wedged: goto err_client_b; } +static int live_chain_preempt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct preempt_client hi, lo; + enum intel_engine_id id; + intel_wakeref_t wakeref; + int err = -ENOMEM; + + /* + * Build a chain AB...BA between two contexts (A, B) and request + * preemption of the last request. It should then complete before + * the previously submitted spinner in B. + */ + + if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + if (preempt_client_init(i915, &hi)) + goto err_unlock; + + if (preempt_client_init(i915, &lo)) + goto err_client_hi; + + for_each_engine(engine, i915, id) { + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), + }; + int count, i; + + for_each_prime_number_from(count, 1, 32) { /* must fit ring! */ + struct i915_request *rq; + + rq = igt_spinner_create_request(&hi.spin, + hi.ctx, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) + goto err_wedged; + i915_request_add(rq); + if (!igt_wait_for_spinner(&hi.spin, rq)) + goto err_wedged; + + rq = igt_spinner_create_request(&lo.spin, + lo.ctx, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) + goto err_wedged; + i915_request_add(rq); + + for (i = 0; i < count; i++) { + rq = i915_request_alloc(engine, lo.ctx); + if (IS_ERR(rq)) + goto err_wedged; + i915_request_add(rq); + } + + rq = i915_request_alloc(engine, hi.ctx); + if (IS_ERR(rq)) + goto err_wedged; + i915_request_add(rq); + engine->schedule(rq, &attr); + + igt_spinner_end(&hi.spin); + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) { + struct drm_printer p = + drm_info_printer(i915->drm.dev); + + pr_err("Failed to preempt over chain of %d\n", + count); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + goto err_wedged; + } + igt_spinner_end(&lo.spin); + } + } + + err = 0; +err_client_lo: + preempt_client_fini(&lo); +err_client_hi: + preempt_client_fini(&hi); +err_unlock: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + return err; + +err_wedged: + igt_spinner_end(&hi.spin); + igt_spinner_end(&lo.spin); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_client_lo; +} + static int live_preempt_hang(void *arg) { struct drm_i915_private *i915 = arg; @@ -785,6 +887,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_preempt), SUBTEST(live_late_preempt), SUBTEST(live_suppress_self_preempt), + SUBTEST(live_chain_preempt), SUBTEST(live_preempt_hang), SUBTEST(live_preempt_smoke), }; From 64d6c500a3843408559164223d69fb31e1a00e52 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 5 Feb 2019 13:00:02 +0000 Subject: [PATCH 431/539] drm/i915: Generalise GPU activity tracking We currently track GPU memory usage inside VMA, such that we never release memory used by the GPU until after it has finished accessing it. However, we may want to track other resources aside from VMA, or we may want to split a VMA into multiple independent regions and track each separately. For this purpose, generalise our request tracking (akin to struct reservation_object) so that we can embed it into other objects. v2: Tweak error handling during selftest setup. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190205130005.2807-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 4 +- drivers/gpu/drm/i915/i915_active.c | 228 ++++++++++++++++++ drivers/gpu/drm/i915/i915_active.h | 69 ++++++ drivers/gpu/drm/i915/i915_active_types.h | 26 ++ drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +- drivers/gpu/drm/i915/i915_vma.c | 175 +++----------- drivers/gpu/drm/i915/i915_vma.h | 9 +- drivers/gpu/drm/i915/selftests/i915_active.c | 157 ++++++++++++ .../drm/i915/selftests/i915_live_selftests.h | 3 +- 9 files changed, 519 insertions(+), 155 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_active.c create mode 100644 drivers/gpu/drm/i915/i915_active.h create mode 100644 drivers/gpu/drm/i915/i915_active_types.h create mode 100644 drivers/gpu/drm/i915/selftests/i915_active.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 210d0e8777b6..1787e1299b1b 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -57,7 +57,9 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o # GEM code -i915-y += i915_cmd_parser.o \ +i915-y += \ + i915_active.o \ + i915_cmd_parser.o \ i915_gem_batch_pool.o \ i915_gem_clflush.o \ i915_gem_context.o \ diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c new file mode 100644 index 000000000000..91950d778cab --- /dev/null +++ b/drivers/gpu/drm/i915/i915_active.c @@ -0,0 +1,228 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_active.h" + +#define BKL(ref) (&(ref)->i915->drm.struct_mutex) + +struct active_node { + struct i915_gem_active base; + struct i915_active *ref; + struct rb_node node; + u64 timeline; +}; + +static void +__active_retire(struct i915_active *ref) +{ + GEM_BUG_ON(!ref->count); + if (!--ref->count) + ref->retire(ref); +} + +static void +node_retire(struct i915_gem_active *base, struct i915_request *rq) +{ + __active_retire(container_of(base, struct active_node, base)->ref); +} + +static void +last_retire(struct i915_gem_active *base, struct i915_request *rq) +{ + __active_retire(container_of(base, struct i915_active, last)); +} + +static struct i915_gem_active * +active_instance(struct i915_active *ref, u64 idx) +{ + struct active_node *node; + struct rb_node **p, *parent; + struct i915_request *old; + + /* + * We track the most recently used timeline to skip a rbtree search + * for the common case, under typical loads we never need the rbtree + * at all. We can reuse the last slot if it is empty, that is + * after the previous activity has been retired, or if it matches the + * current timeline. + * + * Note that we allow the timeline to be active simultaneously in + * the rbtree and the last cache. We do this to avoid having + * to search and replace the rbtree element for a new timeline, with + * the cost being that we must be aware that the ref may be retired + * twice for the same timeline (as the older rbtree element will be + * retired before the new request added to last). + */ + old = i915_gem_active_raw(&ref->last, BKL(ref)); + if (!old || old->fence.context == idx) + goto out; + + /* Move the currently active fence into the rbtree */ + idx = old->fence.context; + + parent = NULL; + p = &ref->tree.rb_node; + while (*p) { + parent = *p; + + node = rb_entry(parent, struct active_node, node); + if (node->timeline == idx) + goto replace; + + if (node->timeline < idx) + p = &parent->rb_right; + else + p = &parent->rb_left; + } + + node = kmalloc(sizeof(*node), GFP_KERNEL); + + /* kmalloc may retire the ref->last (thanks shrinker)! */ + if (unlikely(!i915_gem_active_raw(&ref->last, BKL(ref)))) { + kfree(node); + goto out; + } + + if (unlikely(!node)) + return ERR_PTR(-ENOMEM); + + init_request_active(&node->base, node_retire); + node->ref = ref; + node->timeline = idx; + + rb_link_node(&node->node, parent, p); + rb_insert_color(&node->node, &ref->tree); + +replace: + /* + * Overwrite the previous active slot in the rbtree with last, + * leaving last zeroed. If the previous slot is still active, + * we must be careful as we now only expect to receive one retire + * callback not two, and so much undo the active counting for the + * overwritten slot. + */ + if (i915_gem_active_isset(&node->base)) { + /* Retire ourselves from the old rq->active_list */ + __list_del_entry(&node->base.link); + ref->count--; + GEM_BUG_ON(!ref->count); + } + GEM_BUG_ON(list_empty(&ref->last.link)); + list_replace_init(&ref->last.link, &node->base.link); + node->base.request = fetch_and_zero(&ref->last.request); + +out: + return &ref->last; +} + +void i915_active_init(struct drm_i915_private *i915, + struct i915_active *ref, + void (*retire)(struct i915_active *ref)) +{ + ref->i915 = i915; + ref->retire = retire; + ref->tree = RB_ROOT; + init_request_active(&ref->last, last_retire); + ref->count = 0; +} + +int i915_active_ref(struct i915_active *ref, + u64 timeline, + struct i915_request *rq) +{ + struct i915_gem_active *active; + + active = active_instance(ref, timeline); + if (IS_ERR(active)) + return PTR_ERR(active); + + if (!i915_gem_active_isset(active)) + ref->count++; + i915_gem_active_set(active, rq); + + GEM_BUG_ON(!ref->count); + return 0; +} + +bool i915_active_acquire(struct i915_active *ref) +{ + lockdep_assert_held(BKL(ref)); + return !ref->count++; +} + +void i915_active_release(struct i915_active *ref) +{ + lockdep_assert_held(BKL(ref)); + __active_retire(ref); +} + +int i915_active_wait(struct i915_active *ref) +{ + struct active_node *it, *n; + int ret = 0; + + if (i915_active_acquire(ref)) + goto out_release; + + ret = i915_gem_active_retire(&ref->last, BKL(ref)); + if (ret) + goto out_release; + + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { + ret = i915_gem_active_retire(&it->base, BKL(ref)); + if (ret) + break; + } + +out_release: + i915_active_release(ref); + return ret; +} + +static int __i915_request_await_active(struct i915_request *rq, + struct i915_gem_active *active) +{ + struct i915_request *barrier = + i915_gem_active_raw(active, &rq->i915->drm.struct_mutex); + + return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0; +} + +int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) +{ + struct active_node *it, *n; + int ret; + + ret = __i915_request_await_active(rq, &ref->last); + if (ret) + return ret; + + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { + ret = __i915_request_await_active(rq, &it->base); + if (ret) + return ret; + } + + return 0; +} + +void i915_active_fini(struct i915_active *ref) +{ + struct active_node *it, *n; + + GEM_BUG_ON(i915_gem_active_isset(&ref->last)); + + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { + GEM_BUG_ON(i915_gem_active_isset(&it->base)); + kfree(it); + } + ref->tree = RB_ROOT; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_active.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h new file mode 100644 index 000000000000..0aa2628ea734 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_active.h @@ -0,0 +1,69 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef _I915_ACTIVE_H_ +#define _I915_ACTIVE_H_ + +#include "i915_active_types.h" + +/* + * GPU activity tracking + * + * Each set of commands submitted to the GPU compromises a single request that + * signals a fence upon completion. struct i915_request combines the + * command submission, scheduling and fence signaling roles. If we want to see + * if a particular task is complete, we need to grab the fence (struct + * i915_request) for that task and check or wait for it to be signaled. More + * often though we want to track the status of a bunch of tasks, for example + * to wait for the GPU to finish accessing some memory across a variety of + * different command pipelines from different clients. We could choose to + * track every single request associated with the task, but knowing that + * each request belongs to an ordered timeline (later requests within a + * timeline must wait for earlier requests), we need only track the + * latest request in each timeline to determine the overall status of the + * task. + * + * struct i915_active provides this tracking across timelines. It builds a + * composite shared-fence, and is updated as new work is submitted to the task, + * forming a snapshot of the current status. It should be embedded into the + * different resources that need to track their associated GPU activity to + * provide a callback when that GPU activity has ceased, or otherwise to + * provide a serialisation point either for request submission or for CPU + * synchronisation. + */ + +void i915_active_init(struct drm_i915_private *i915, + struct i915_active *ref, + void (*retire)(struct i915_active *ref)); + +int i915_active_ref(struct i915_active *ref, + u64 timeline, + struct i915_request *rq); + +int i915_active_wait(struct i915_active *ref); + +int i915_request_await_active(struct i915_request *rq, + struct i915_active *ref); + +bool i915_active_acquire(struct i915_active *ref); + +static inline void i915_active_cancel(struct i915_active *ref) +{ + GEM_BUG_ON(ref->count != 1); + ref->count = 0; +} + +void i915_active_release(struct i915_active *ref); + +static inline bool +i915_active_is_idle(const struct i915_active *ref) +{ + return !ref->count; +} + +void i915_active_fini(struct i915_active *ref); + +#endif /* _I915_ACTIVE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h new file mode 100644 index 000000000000..411e502ed8dd --- /dev/null +++ b/drivers/gpu/drm/i915/i915_active_types.h @@ -0,0 +1,26 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef _I915_ACTIVE_TYPES_H_ +#define _I915_ACTIVE_TYPES_H_ + +#include + +#include "i915_request.h" + +struct drm_i915_private; + +struct i915_active { + struct drm_i915_private *i915; + + struct rb_root tree; + struct i915_gem_active last; + unsigned int count; + + void (*retire)(struct i915_active *ref); +}; + +#endif /* _I915_ACTIVE_TYPES_H_ */ diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 49b00996a15e..e625659c03a2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1917,14 +1917,13 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size) if (!vma) return ERR_PTR(-ENOMEM); + i915_active_init(i915, &vma->active, NULL); init_request_active(&vma->last_fence, NULL); vma->vm = &ggtt->vm; vma->ops = &pd_vma_ops; vma->private = ppgtt; - vma->active = RB_ROOT; - vma->size = size; vma->fence_size = size; vma->flags = I915_VMA_GGTT; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index d83b8ad5f859..d4772061e642 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -63,21 +63,22 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason) #endif -struct i915_vma_active { - struct i915_gem_active base; - struct i915_vma *vma; - struct rb_node node; - u64 timeline; -}; - -static void -__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq) +static void obj_bump_mru(struct drm_i915_gem_object *obj) { - struct drm_i915_gem_object *obj = vma->obj; + struct drm_i915_private *i915 = to_i915(obj->base.dev); - GEM_BUG_ON(!i915_vma_is_active(vma)); - if (--vma->active_count) - return; + spin_lock(&i915->mm.obj_lock); + if (obj->bind_count) + list_move_tail(&obj->mm.link, &i915->mm.bound_list); + spin_unlock(&i915->mm.obj_lock); + + obj->mm.dirty = true; /* be paranoid */ +} + +static void __i915_vma_retire(struct i915_active *ref) +{ + struct i915_vma *vma = container_of(ref, typeof(*vma), active); + struct drm_i915_gem_object *obj = vma->obj; GEM_BUG_ON(!i915_gem_object_is_active(obj)); if (--obj->active_count) @@ -90,16 +91,12 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq) reservation_object_unlock(obj->resv); } - /* Bump our place on the bound list to keep it roughly in LRU order + /* + * Bump our place on the bound list to keep it roughly in LRU order * so that we don't steal from recently used but inactive objects * (unless we are forced to ofc!) */ - spin_lock(&rq->i915->mm.obj_lock); - if (obj->bind_count) - list_move_tail(&obj->mm.link, &rq->i915->mm.bound_list); - spin_unlock(&rq->i915->mm.obj_lock); - - obj->mm.dirty = true; /* be paranoid */ + obj_bump_mru(obj); if (i915_gem_object_has_active_reference(obj)) { i915_gem_object_clear_active_reference(obj); @@ -107,21 +104,6 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq) } } -static void -i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq) -{ - struct i915_vma_active *active = - container_of(base, typeof(*active), base); - - __i915_vma_retire(active->vma, rq); -} - -static void -i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq) -{ - __i915_vma_retire(container_of(base, struct i915_vma, last_active), rq); -} - static struct i915_vma * vma_create(struct drm_i915_gem_object *obj, struct i915_address_space *vm, @@ -137,10 +119,9 @@ vma_create(struct drm_i915_gem_object *obj, if (vma == NULL) return ERR_PTR(-ENOMEM); - vma->active = RB_ROOT; - - init_request_active(&vma->last_active, i915_vma_last_retire); + i915_active_init(vm->i915, &vma->active, __i915_vma_retire); init_request_active(&vma->last_fence, NULL); + vma->vm = vm; vma->ops = &vm->vma_ops; vma->obj = obj; @@ -823,7 +804,6 @@ void i915_vma_reopen(struct i915_vma *vma) static void __i915_vma_destroy(struct i915_vma *vma) { struct drm_i915_private *i915 = vma->vm->i915; - struct i915_vma_active *iter, *n; GEM_BUG_ON(vma->node.allocated); GEM_BUG_ON(vma->fence); @@ -843,10 +823,7 @@ static void __i915_vma_destroy(struct i915_vma *vma) spin_unlock(&obj->vma.lock); } - rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) { - GEM_BUG_ON(i915_gem_active_isset(&iter->base)); - kfree(iter); - } + i915_active_fini(&vma->active); kmem_cache_free(i915->vmas, vma); } @@ -931,104 +908,15 @@ static void export_fence(struct i915_vma *vma, reservation_object_unlock(resv); } -static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx) -{ - struct i915_vma_active *active; - struct rb_node **p, *parent; - struct i915_request *old; - - /* - * We track the most recently used timeline to skip a rbtree search - * for the common case, under typical loads we never need the rbtree - * at all. We can reuse the last_active slot if it is empty, that is - * after the previous activity has been retired, or if the active - * matches the current timeline. - * - * Note that we allow the timeline to be active simultaneously in - * the rbtree and the last_active cache. We do this to avoid having - * to search and replace the rbtree element for a new timeline, with - * the cost being that we must be aware that the vma may be retired - * twice for the same timeline (as the older rbtree element will be - * retired before the new request added to last_active). - */ - old = i915_gem_active_raw(&vma->last_active, - &vma->vm->i915->drm.struct_mutex); - if (!old || old->fence.context == idx) - goto out; - - /* Move the currently active fence into the rbtree */ - idx = old->fence.context; - - parent = NULL; - p = &vma->active.rb_node; - while (*p) { - parent = *p; - - active = rb_entry(parent, struct i915_vma_active, node); - if (active->timeline == idx) - goto replace; - - if (active->timeline < idx) - p = &parent->rb_right; - else - p = &parent->rb_left; - } - - active = kmalloc(sizeof(*active), GFP_KERNEL); - - /* kmalloc may retire the vma->last_active request (thanks shrinker)! */ - if (unlikely(!i915_gem_active_raw(&vma->last_active, - &vma->vm->i915->drm.struct_mutex))) { - kfree(active); - goto out; - } - - if (unlikely(!active)) - return ERR_PTR(-ENOMEM); - - init_request_active(&active->base, i915_vma_retire); - active->vma = vma; - active->timeline = idx; - - rb_link_node(&active->node, parent, p); - rb_insert_color(&active->node, &vma->active); - -replace: - /* - * Overwrite the previous active slot in the rbtree with last_active, - * leaving last_active zeroed. If the previous slot is still active, - * we must be careful as we now only expect to receive one retire - * callback not two, and so much undo the active counting for the - * overwritten slot. - */ - if (i915_gem_active_isset(&active->base)) { - /* Retire ourselves from the old rq->active_list */ - __list_del_entry(&active->base.link); - vma->active_count--; - GEM_BUG_ON(!vma->active_count); - } - GEM_BUG_ON(list_empty(&vma->last_active.link)); - list_replace_init(&vma->last_active.link, &active->base.link); - active->base.request = fetch_and_zero(&vma->last_active.request); - -out: - return &vma->last_active; -} - int i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq, unsigned int flags) { struct drm_i915_gem_object *obj = vma->obj; - struct i915_gem_active *active; lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - active = active_instance(vma, rq->fence.context); - if (IS_ERR(active)) - return PTR_ERR(active); - /* * Add a reference if we're newly entering the active list. * The order in which we add operations to the retirement queue is @@ -1037,9 +925,15 @@ int i915_vma_move_to_active(struct i915_vma *vma, * add the active reference first and queue for it to be dropped * *last*. */ - if (!i915_gem_active_isset(active) && !vma->active_count++) + if (!vma->active.count) obj->active_count++; - i915_gem_active_set(active, rq); + + if (unlikely(i915_active_ref(&vma->active, rq->fence.context, rq))) { + if (!vma->active.count) + obj->active_count--; + return -ENOMEM; + } + GEM_BUG_ON(!i915_vma_is_active(vma)); GEM_BUG_ON(!obj->active_count); @@ -1073,8 +967,6 @@ int i915_vma_unbind(struct i915_vma *vma) */ might_sleep(); if (i915_vma_is_active(vma)) { - struct i915_vma_active *active, *n; - /* * When a closed VMA is retired, it is unbound - eek. * In order to prevent it from being recursively closed, @@ -1090,19 +982,10 @@ int i915_vma_unbind(struct i915_vma *vma) */ __i915_vma_pin(vma); - ret = i915_gem_active_retire(&vma->last_active, - &vma->vm->i915->drm.struct_mutex); + ret = i915_active_wait(&vma->active); if (ret) goto unpin; - rbtree_postorder_for_each_entry_safe(active, n, - &vma->active, node) { - ret = i915_gem_active_retire(&active->base, - &vma->vm->i915->drm.struct_mutex); - if (ret) - goto unpin; - } - ret = i915_gem_active_retire(&vma->last_fence, &vma->vm->i915->drm.struct_mutex); unpin: diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 5793abe509a2..3c03d4569481 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -34,6 +34,7 @@ #include "i915_gem_fence_reg.h" #include "i915_gem_object.h" +#include "i915_active.h" #include "i915_request.h" enum i915_cache_level; @@ -108,9 +109,7 @@ struct i915_vma { #define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT) #define I915_VMA_GGTT_WRITE BIT(15) - unsigned int active_count; - struct rb_root active; - struct i915_gem_active last_active; + struct i915_active active; struct i915_gem_active last_fence; /** @@ -154,9 +153,9 @@ i915_vma_instance(struct drm_i915_gem_object *obj, void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags); #define I915_VMA_RELEASE_MAP BIT(0) -static inline bool i915_vma_is_active(struct i915_vma *vma) +static inline bool i915_vma_is_active(const struct i915_vma *vma) { - return vma->active_count; + return !i915_active_is_idle(&vma->active); } int __must_check i915_vma_move_to_active(struct i915_vma *vma, diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c new file mode 100644 index 000000000000..337b1f98b923 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -0,0 +1,157 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "../i915_selftest.h" + +#include "igt_flush_test.h" +#include "lib_sw_fence.h" + +struct live_active { + struct i915_active base; + bool retired; +}; + +static void __live_active_retire(struct i915_active *base) +{ + struct live_active *active = container_of(base, typeof(*active), base); + + active->retired = true; +} + +static int __live_active_setup(struct drm_i915_private *i915, + struct live_active *active) +{ + struct intel_engine_cs *engine; + struct i915_sw_fence *submit; + enum intel_engine_id id; + unsigned int count = 0; + int err = 0; + + submit = heap_fence_create(GFP_KERNEL); + if (!submit) + return -ENOMEM; + + i915_active_init(i915, &active->base, __live_active_retire); + active->retired = false; + + if (!i915_active_acquire(&active->base)) { + pr_err("First i915_active_acquire should report being idle\n"); + err = -EINVAL; + goto out; + } + + for_each_engine(engine, i915, id) { + struct i915_request *rq; + + rq = i915_request_alloc(engine, i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, + submit, + GFP_KERNEL); + if (err >= 0) + err = i915_active_ref(&active->base, + rq->fence.context, rq); + i915_request_add(rq); + if (err) { + pr_err("Failed to track active ref!\n"); + break; + } + + count++; + } + + i915_active_release(&active->base); + if (active->retired && count) { + pr_err("i915_active retired before submission!\n"); + err = -EINVAL; + } + if (active->base.count != count) { + pr_err("i915_active not tracking all requests, found %d, expected %d\n", + active->base.count, count); + err = -EINVAL; + } + +out: + i915_sw_fence_commit(submit); + heap_fence_put(submit); + + return err; +} + +static int live_active_wait(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct live_active active; + intel_wakeref_t wakeref; + int err; + + /* Check that we get a callback when requests retire upon waiting */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + err = __live_active_setup(i915, &active); + + i915_active_wait(&active.base); + if (!active.retired) { + pr_err("i915_active not retired after waiting!\n"); + err = -EINVAL; + } + + i915_active_fini(&active.base); + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int live_active_retire(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct live_active active; + intel_wakeref_t wakeref; + int err; + + /* Check that we get a callback when requests are indirectly retired */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + err = __live_active_setup(i915, &active); + + /* waits for & retires all requests */ + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + if (!active.retired) { + pr_err("i915_active not retired after flushing!\n"); + err = -EINVAL; + } + + i915_active_fini(&active.base); + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +int i915_active_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_active_wait), + SUBTEST(live_active_retire), + }; + + if (i915_terminally_wedged(&i915->gpu_error)) + return 0; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 76b4f87fc853..6d766925ad04 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -12,8 +12,9 @@ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(uncore, intel_uncore_live_selftests) selftest(workarounds, intel_workarounds_live_selftests) -selftest(requests, i915_request_live_selftests) selftest(timelines, i915_timeline_live_selftests) +selftest(requests, i915_request_live_selftests) +selftest(active, i915_active_live_selftests) selftest(objects, i915_gem_object_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) From a42375af0a305e9c299cffd739a39e415930614c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 5 Feb 2019 13:00:03 +0000 Subject: [PATCH 432/539] drm/i915: Release the active tracker tree upon idling As soon as we detect that the active tracker is idle and we prepare to call the retire callback, release the storage for our tree of per-timeline nodes. We expect these to be infrequently used and quick to allocate, so there is little benefit in keeping the tree cached and we would prefer to return the pages back to the system in a timely fashion. This also means that when we finalize the struct as a whole, we know as the activity tracker must be idle, the tree has already been released. Indeed we can reduce i915_active_fini() just to the assertions that there is nothing to do. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190205130005.2807-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_active.c | 33 +++++++++++++++++++++--------- drivers/gpu/drm/i915/i915_active.h | 4 ++++ 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 91950d778cab..b1fefe98f9a6 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -16,12 +16,29 @@ struct active_node { u64 timeline; }; +static void +__active_park(struct i915_active *ref) +{ + struct active_node *it, *n; + + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { + GEM_BUG_ON(i915_gem_active_isset(&it->base)); + kfree(it); + } + ref->tree = RB_ROOT; +} + static void __active_retire(struct i915_active *ref) { GEM_BUG_ON(!ref->count); - if (!--ref->count) - ref->retire(ref); + if (--ref->count) + return; + + /* return the unused nodes to our slabcache */ + __active_park(ref); + + ref->retire(ref); } static void @@ -210,18 +227,14 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) return 0; } +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) void i915_active_fini(struct i915_active *ref) { - struct active_node *it, *n; - GEM_BUG_ON(i915_gem_active_isset(&ref->last)); - - rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { - GEM_BUG_ON(i915_gem_active_isset(&it->base)); - kfree(it); - } - ref->tree = RB_ROOT; + GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); + GEM_BUG_ON(ref->count); } +#endif #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_active.c" diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index 0aa2628ea734..ec4b66efd9a7 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -64,6 +64,10 @@ i915_active_is_idle(const struct i915_active *ref) return !ref->count; } +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) void i915_active_fini(struct i915_active *ref); +#else +static inline void i915_active_fini(struct i915_active *ref) { } +#endif #endif /* _I915_ACTIVE_H_ */ From 5f5c139d6900b3338963bddcd8a567dcad33cf92 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 5 Feb 2019 13:00:04 +0000 Subject: [PATCH 433/539] drm/i915: Allocate active tracking nodes from a slabcache Wrap the active tracking for a GPU references in a slabcache for faster allocations, and hopefully better fragmentation reduction. v3: Nothing device specific left, it's just a slabcache that we can make global. v4: Include i915_active.h and don't put the initfunc under DEBUG_GEM Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190205130005.2807-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_active.c | 31 +++++++++++++++++++++++++++--- drivers/gpu/drm/i915/i915_active.h | 3 +++ drivers/gpu/drm/i915/i915_pci.c | 4 ++++ 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index b1fefe98f9a6..64661c41532b 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -9,6 +9,17 @@ #define BKL(ref) (&(ref)->i915->drm.struct_mutex) +/* + * Active refs memory management + * + * To be more economical with memory, we reap all the i915_active trees as + * they idle (when we know the active requests are inactive) and allocate the + * nodes from a local slab cache to hopefully reduce the fragmentation. + */ +static struct i915_global_active { + struct kmem_cache *slab_cache; +} global; + struct active_node { struct i915_gem_active base; struct i915_active *ref; @@ -23,7 +34,7 @@ __active_park(struct i915_active *ref) rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { GEM_BUG_ON(i915_gem_active_isset(&it->base)); - kfree(it); + kmem_cache_free(global.slab_cache, it); } ref->tree = RB_ROOT; } @@ -96,11 +107,11 @@ active_instance(struct i915_active *ref, u64 idx) p = &parent->rb_left; } - node = kmalloc(sizeof(*node), GFP_KERNEL); + node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); /* kmalloc may retire the ref->last (thanks shrinker)! */ if (unlikely(!i915_gem_active_raw(&ref->last, BKL(ref)))) { - kfree(node); + kmem_cache_free(global.slab_cache, node); goto out; } @@ -239,3 +250,17 @@ void i915_active_fini(struct i915_active *ref) #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_active.c" #endif + +int __init i915_global_active_init(void) +{ + global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); + if (!global.slab_cache) + return -ENOMEM; + + return 0; +} + +void __exit i915_global_active_exit(void) +{ + kmem_cache_destroy(global.slab_cache); +} diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index ec4b66efd9a7..179b47aeec33 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -70,4 +70,7 @@ void i915_active_fini(struct i915_active *ref); static inline void i915_active_fini(struct i915_active *ref) { } #endif +int i915_global_active_init(void); +void i915_global_active_exit(void); + #endif /* _I915_ACTIVE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 55c9058e91a8..c6533c57ea2f 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -26,6 +26,7 @@ #include #include +#include "i915_active.h" #include "i915_drv.h" #include "i915_selftest.h" @@ -798,6 +799,8 @@ static int __init i915_init(void) bool use_kms = true; int err; + i915_global_active_init(); + err = i915_mock_selftests(); if (err) return err > 0 ? 0 : err; @@ -829,6 +832,7 @@ static void __exit i915_exit(void) return; pci_unregister_driver(&i915_pci_driver); + i915_global_active_exit(); } module_init(i915_init); From 21950ee7cc8f13c5350bda0cae22cdb7ac7e3058 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 5 Feb 2019 13:00:05 +0000 Subject: [PATCH 434/539] drm/i915: Pull i915_gem_active into the i915_active family Looking forward, we need to break the struct_mutex dependency on i915_gem_active. In the meantime, external use of i915_gem_active is quite beguiling, little do new users suspect that it implies a barrier as each request it tracks must be ordered wrt the previous one. As one of many, it can be used to track activity across multiple timelines, a shared fence, which fits our unordered request submission much better. We need to steer external users away from the singular, exclusive fence imposed by i915_gem_active to i915_active instead. As part of that process, we move i915_gem_active out of i915_request.c into i915_active.c to start separating the two concepts, and rename it to i915_active_request (both to tie it to the concept of tracking just one request, and to give it a longer, less appealing name). Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190205130005.2807-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_active.c | 62 ++- drivers/gpu/drm/i915/i915_active.h | 349 ++++++++++++++++ drivers/gpu/drm/i915/i915_active_types.h | 16 +- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 10 +- drivers/gpu/drm/i915/i915_gem_context.c | 17 +- drivers/gpu/drm/i915/i915_gem_context.h | 2 +- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 4 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/i915_gem_object.h | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 10 +- drivers/gpu/drm/i915/i915_request.c | 35 +- drivers/gpu/drm/i915/i915_request.h | 383 ------------------ drivers/gpu/drm/i915/i915_reset.c | 2 +- drivers/gpu/drm/i915/i915_timeline.c | 25 +- drivers/gpu/drm/i915/i915_timeline.h | 14 +- drivers/gpu/drm/i915/i915_vma.c | 12 +- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/intel_engine_cs.c | 2 +- drivers/gpu/drm/i915/intel_overlay.c | 33 +- .../gpu/drm/i915/selftests/mock_timeline.c | 4 +- 21 files changed, 480 insertions(+), 508 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 64661c41532b..215b6ff8aa73 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -21,7 +21,7 @@ static struct i915_global_active { } global; struct active_node { - struct i915_gem_active base; + struct i915_active_request base; struct i915_active *ref; struct rb_node node; u64 timeline; @@ -33,7 +33,7 @@ __active_park(struct i915_active *ref) struct active_node *it, *n; rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { - GEM_BUG_ON(i915_gem_active_isset(&it->base)); + GEM_BUG_ON(i915_active_request_isset(&it->base)); kmem_cache_free(global.slab_cache, it); } ref->tree = RB_ROOT; @@ -53,18 +53,18 @@ __active_retire(struct i915_active *ref) } static void -node_retire(struct i915_gem_active *base, struct i915_request *rq) +node_retire(struct i915_active_request *base, struct i915_request *rq) { __active_retire(container_of(base, struct active_node, base)->ref); } static void -last_retire(struct i915_gem_active *base, struct i915_request *rq) +last_retire(struct i915_active_request *base, struct i915_request *rq) { __active_retire(container_of(base, struct i915_active, last)); } -static struct i915_gem_active * +static struct i915_active_request * active_instance(struct i915_active *ref, u64 idx) { struct active_node *node; @@ -85,7 +85,7 @@ active_instance(struct i915_active *ref, u64 idx) * twice for the same timeline (as the older rbtree element will be * retired before the new request added to last). */ - old = i915_gem_active_raw(&ref->last, BKL(ref)); + old = i915_active_request_raw(&ref->last, BKL(ref)); if (!old || old->fence.context == idx) goto out; @@ -110,7 +110,7 @@ active_instance(struct i915_active *ref, u64 idx) node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); /* kmalloc may retire the ref->last (thanks shrinker)! */ - if (unlikely(!i915_gem_active_raw(&ref->last, BKL(ref)))) { + if (unlikely(!i915_active_request_raw(&ref->last, BKL(ref)))) { kmem_cache_free(global.slab_cache, node); goto out; } @@ -118,7 +118,7 @@ active_instance(struct i915_active *ref, u64 idx) if (unlikely(!node)) return ERR_PTR(-ENOMEM); - init_request_active(&node->base, node_retire); + i915_active_request_init(&node->base, NULL, node_retire); node->ref = ref; node->timeline = idx; @@ -133,7 +133,7 @@ replace: * callback not two, and so much undo the active counting for the * overwritten slot. */ - if (i915_gem_active_isset(&node->base)) { + if (i915_active_request_isset(&node->base)) { /* Retire ourselves from the old rq->active_list */ __list_del_entry(&node->base.link); ref->count--; @@ -154,7 +154,7 @@ void i915_active_init(struct drm_i915_private *i915, ref->i915 = i915; ref->retire = retire; ref->tree = RB_ROOT; - init_request_active(&ref->last, last_retire); + i915_active_request_init(&ref->last, NULL, last_retire); ref->count = 0; } @@ -162,15 +162,15 @@ int i915_active_ref(struct i915_active *ref, u64 timeline, struct i915_request *rq) { - struct i915_gem_active *active; + struct i915_active_request *active; active = active_instance(ref, timeline); if (IS_ERR(active)) return PTR_ERR(active); - if (!i915_gem_active_isset(active)) + if (!i915_active_request_isset(active)) ref->count++; - i915_gem_active_set(active, rq); + __i915_active_request_set(active, rq); GEM_BUG_ON(!ref->count); return 0; @@ -196,12 +196,12 @@ int i915_active_wait(struct i915_active *ref) if (i915_active_acquire(ref)) goto out_release; - ret = i915_gem_active_retire(&ref->last, BKL(ref)); + ret = i915_active_request_retire(&ref->last, BKL(ref)); if (ret) goto out_release; rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { - ret = i915_gem_active_retire(&it->base, BKL(ref)); + ret = i915_active_request_retire(&it->base, BKL(ref)); if (ret) break; } @@ -211,11 +211,11 @@ out_release: return ret; } -static int __i915_request_await_active(struct i915_request *rq, - struct i915_gem_active *active) +int i915_request_await_active_request(struct i915_request *rq, + struct i915_active_request *active) { struct i915_request *barrier = - i915_gem_active_raw(active, &rq->i915->drm.struct_mutex); + i915_active_request_raw(active, &rq->i915->drm.struct_mutex); return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0; } @@ -225,12 +225,12 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) struct active_node *it, *n; int ret; - ret = __i915_request_await_active(rq, &ref->last); + ret = i915_request_await_active_request(rq, &ref->last); if (ret) return ret; rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { - ret = __i915_request_await_active(rq, &it->base); + ret = i915_request_await_active_request(rq, &it->base); if (ret) return ret; } @@ -241,12 +241,32 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) void i915_active_fini(struct i915_active *ref) { - GEM_BUG_ON(i915_gem_active_isset(&ref->last)); + GEM_BUG_ON(i915_active_request_isset(&ref->last)); GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); GEM_BUG_ON(ref->count); } #endif +int i915_active_request_set(struct i915_active_request *active, + struct i915_request *rq) +{ + int err; + + /* Must maintain ordering wrt previous active requests */ + err = i915_request_await_active_request(rq, active); + if (err) + return err; + + __i915_active_request_set(active, rq); + return 0; +} + +void i915_active_retire_noop(struct i915_active_request *active, + struct i915_request *request) +{ + /* Space left intentionally blank */ +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_active.c" #endif diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index 179b47aeec33..12b5c1d287d1 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -7,7 +7,354 @@ #ifndef _I915_ACTIVE_H_ #define _I915_ACTIVE_H_ +#include + #include "i915_active_types.h" +#include "i915_request.h" + +/* + * We treat requests as fences. This is not be to confused with our + * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. + * We use the fences to synchronize access from the CPU with activity on the + * GPU, for example, we should not rewrite an object's PTE whilst the GPU + * is reading them. We also track fences at a higher level to provide + * implicit synchronisation around GEM objects, e.g. set-domain will wait + * for outstanding GPU rendering before marking the object ready for CPU + * access, or a pageflip will wait until the GPU is complete before showing + * the frame on the scanout. + * + * In order to use a fence, the object must track the fence it needs to + * serialise with. For example, GEM objects want to track both read and + * write access so that we can perform concurrent read operations between + * the CPU and GPU engines, as well as waiting for all rendering to + * complete, or waiting for the last GPU user of a "fence register". The + * object then embeds a #i915_active_request to track the most recent (in + * retirement order) request relevant for the desired mode of access. + * The #i915_active_request is updated with i915_active_request_set() to + * track the most recent fence request, typically this is done as part of + * i915_vma_move_to_active(). + * + * When the #i915_active_request completes (is retired), it will + * signal its completion to the owner through a callback as well as mark + * itself as idle (i915_active_request.request == NULL). The owner + * can then perform any action, such as delayed freeing of an active + * resource including itself. + */ + +void i915_active_retire_noop(struct i915_active_request *active, + struct i915_request *request); + +/** + * i915_active_request_init - prepares the activity tracker for use + * @active - the active tracker + * @rq - initial request to track, can be NULL + * @func - a callback when then the tracker is retired (becomes idle), + * can be NULL + * + * i915_active_request_init() prepares the embedded @active struct for use as + * an activity tracker, that is for tracking the last known active request + * associated with it. When the last request becomes idle, when it is retired + * after completion, the optional callback @func is invoked. + */ +static inline void +i915_active_request_init(struct i915_active_request *active, + struct i915_request *rq, + i915_active_retire_fn retire) +{ + RCU_INIT_POINTER(active->request, rq); + INIT_LIST_HEAD(&active->link); + active->retire = retire ?: i915_active_retire_noop; +} + +#define INIT_ACTIVE_REQUEST(name) i915_active_request_init((name), NULL, NULL) + +/** + * i915_active_request_set - updates the tracker to watch the current request + * @active - the active tracker + * @request - the request to watch + * + * __i915_active_request_set() watches the given @request for completion. Whilst + * that @request is busy, the @active reports busy. When that @request is + * retired, the @active tracker is updated to report idle. + */ +static inline void +__i915_active_request_set(struct i915_active_request *active, + struct i915_request *request) +{ + list_move(&active->link, &request->active_list); + rcu_assign_pointer(active->request, request); +} + +int __must_check +i915_active_request_set(struct i915_active_request *active, + struct i915_request *rq); + +/** + * i915_active_request_set_retire_fn - updates the retirement callback + * @active - the active tracker + * @fn - the routine called when the request is retired + * @mutex - struct_mutex used to guard retirements + * + * i915_active_request_set_retire_fn() updates the function pointer that + * is called when the final request associated with the @active tracker + * is retired. + */ +static inline void +i915_active_request_set_retire_fn(struct i915_active_request *active, + i915_active_retire_fn fn, + struct mutex *mutex) +{ + lockdep_assert_held(mutex); + active->retire = fn ?: i915_active_retire_noop; +} + +static inline struct i915_request * +__i915_active_request_peek(const struct i915_active_request *active) +{ + /* + * Inside the error capture (running with the driver in an unknown + * state), we want to bend the rules slightly (a lot). + * + * Work is in progress to make it safer, in the meantime this keeps + * the known issue from spamming the logs. + */ + return rcu_dereference_protected(active->request, 1); +} + +/** + * i915_active_request_raw - return the active request + * @active - the active tracker + * + * i915_active_request_raw() returns the current request being tracked, or NULL. + * It does not obtain a reference on the request for the caller, so the caller + * must hold struct_mutex. + */ +static inline struct i915_request * +i915_active_request_raw(const struct i915_active_request *active, + struct mutex *mutex) +{ + return rcu_dereference_protected(active->request, + lockdep_is_held(mutex)); +} + +/** + * i915_active_request_peek - report the active request being monitored + * @active - the active tracker + * + * i915_active_request_peek() returns the current request being tracked if + * still active, or NULL. It does not obtain a reference on the request + * for the caller, so the caller must hold struct_mutex. + */ +static inline struct i915_request * +i915_active_request_peek(const struct i915_active_request *active, + struct mutex *mutex) +{ + struct i915_request *request; + + request = i915_active_request_raw(active, mutex); + if (!request || i915_request_completed(request)) + return NULL; + + return request; +} + +/** + * i915_active_request_get - return a reference to the active request + * @active - the active tracker + * + * i915_active_request_get() returns a reference to the active request, or NULL + * if the active tracker is idle. The caller must hold struct_mutex. + */ +static inline struct i915_request * +i915_active_request_get(const struct i915_active_request *active, + struct mutex *mutex) +{ + return i915_request_get(i915_active_request_peek(active, mutex)); +} + +/** + * __i915_active_request_get_rcu - return a reference to the active request + * @active - the active tracker + * + * __i915_active_request_get() returns a reference to the active request, + * or NULL if the active tracker is idle. The caller must hold the RCU read + * lock, but the returned pointer is safe to use outside of RCU. + */ +static inline struct i915_request * +__i915_active_request_get_rcu(const struct i915_active_request *active) +{ + /* + * Performing a lockless retrieval of the active request is super + * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing + * slab of request objects will not be freed whilst we hold the + * RCU read lock. It does not guarantee that the request itself + * will not be freed and then *reused*. Viz, + * + * Thread A Thread B + * + * rq = active.request + * retire(rq) -> free(rq); + * (rq is now first on the slab freelist) + * active.request = NULL + * + * rq = new submission on a new object + * ref(rq) + * + * To prevent the request from being reused whilst the caller + * uses it, we take a reference like normal. Whilst acquiring + * the reference we check that it is not in a destroyed state + * (refcnt == 0). That prevents the request being reallocated + * whilst the caller holds on to it. To check that the request + * was not reallocated as we acquired the reference we have to + * check that our request remains the active request across + * the lookup, in the same manner as a seqlock. The visibility + * of the pointer versus the reference counting is controlled + * by using RCU barriers (rcu_dereference and rcu_assign_pointer). + * + * In the middle of all that, we inspect whether the request is + * complete. Retiring is lazy so the request may be completed long + * before the active tracker is updated. Querying whether the + * request is complete is far cheaper (as it involves no locked + * instructions setting cachelines to exclusive) than acquiring + * the reference, so we do it first. The RCU read lock ensures the + * pointer dereference is valid, but does not ensure that the + * seqno nor HWS is the right one! However, if the request was + * reallocated, that means the active tracker's request was complete. + * If the new request is also complete, then both are and we can + * just report the active tracker is idle. If the new request is + * incomplete, then we acquire a reference on it and check that + * it remained the active request. + * + * It is then imperative that we do not zero the request on + * reallocation, so that we can chase the dangling pointers! + * See i915_request_alloc(). + */ + do { + struct i915_request *request; + + request = rcu_dereference(active->request); + if (!request || i915_request_completed(request)) + return NULL; + + /* + * An especially silly compiler could decide to recompute the + * result of i915_request_completed, more specifically + * re-emit the load for request->fence.seqno. A race would catch + * a later seqno value, which could flip the result from true to + * false. Which means part of the instructions below might not + * be executed, while later on instructions are executed. Due to + * barriers within the refcounting the inconsistency can't reach + * past the call to i915_request_get_rcu, but not executing + * that while still executing i915_request_put() creates + * havoc enough. Prevent this with a compiler barrier. + */ + barrier(); + + request = i915_request_get_rcu(request); + + /* + * What stops the following rcu_access_pointer() from occurring + * before the above i915_request_get_rcu()? If we were + * to read the value before pausing to get the reference to + * the request, we may not notice a change in the active + * tracker. + * + * The rcu_access_pointer() is a mere compiler barrier, which + * means both the CPU and compiler are free to perform the + * memory read without constraint. The compiler only has to + * ensure that any operations after the rcu_access_pointer() + * occur afterwards in program order. This means the read may + * be performed earlier by an out-of-order CPU, or adventurous + * compiler. + * + * The atomic operation at the heart of + * i915_request_get_rcu(), see dma_fence_get_rcu(), is + * atomic_inc_not_zero() which is only a full memory barrier + * when successful. That is, if i915_request_get_rcu() + * returns the request (and so with the reference counted + * incremented) then the following read for rcu_access_pointer() + * must occur after the atomic operation and so confirm + * that this request is the one currently being tracked. + * + * The corresponding write barrier is part of + * rcu_assign_pointer(). + */ + if (!request || request == rcu_access_pointer(active->request)) + return rcu_pointer_handoff(request); + + i915_request_put(request); + } while (1); +} + +/** + * i915_active_request_get_unlocked - return a reference to the active request + * @active - the active tracker + * + * i915_active_request_get_unlocked() returns a reference to the active request, + * or NULL if the active tracker is idle. The reference is obtained under RCU, + * so no locking is required by the caller. + * + * The reference should be freed with i915_request_put(). + */ +static inline struct i915_request * +i915_active_request_get_unlocked(const struct i915_active_request *active) +{ + struct i915_request *request; + + rcu_read_lock(); + request = __i915_active_request_get_rcu(active); + rcu_read_unlock(); + + return request; +} + +/** + * i915_active_request_isset - report whether the active tracker is assigned + * @active - the active tracker + * + * i915_active_request_isset() returns true if the active tracker is currently + * assigned to a request. Due to the lazy retiring, that request may be idle + * and this may report stale information. + */ +static inline bool +i915_active_request_isset(const struct i915_active_request *active) +{ + return rcu_access_pointer(active->request); +} + +/** + * i915_active_request_retire - waits until the request is retired + * @active - the active request on which to wait + * + * i915_active_request_retire() waits until the request is completed, + * and then ensures that at least the retirement handler for this + * @active tracker is called before returning. If the @active + * tracker is idle, the function returns immediately. + */ +static inline int __must_check +i915_active_request_retire(struct i915_active_request *active, + struct mutex *mutex) +{ + struct i915_request *request; + long ret; + + request = i915_active_request_raw(active, mutex); + if (!request) + return 0; + + ret = i915_request_wait(request, + I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (ret < 0) + return ret; + + list_del_init(&active->link); + RCU_INIT_POINTER(active->request, NULL); + + active->retire(active, request); + + return 0; +} /* * GPU activity tracking @@ -47,6 +394,8 @@ int i915_active_wait(struct i915_active *ref); int i915_request_await_active(struct i915_request *rq, struct i915_active *ref); +int i915_request_await_active_request(struct i915_request *rq, + struct i915_active_request *active); bool i915_active_acquire(struct i915_active *ref); diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h index 411e502ed8dd..b679253b53a5 100644 --- a/drivers/gpu/drm/i915/i915_active_types.h +++ b/drivers/gpu/drm/i915/i915_active_types.h @@ -8,16 +8,26 @@ #define _I915_ACTIVE_TYPES_H_ #include - -#include "i915_request.h" +#include struct drm_i915_private; +struct i915_active_request; +struct i915_request; + +typedef void (*i915_active_retire_fn)(struct i915_active_request *, + struct i915_request *); + +struct i915_active_request { + struct i915_request __rcu *request; + struct list_head link; + i915_active_retire_fn retire; +}; struct i915_active { struct drm_i915_private *i915; struct rb_root tree; - struct i915_gem_active last; + struct i915_active_request last; unsigned int count; void (*retire)(struct i915_active *ref); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index bf3073e63af8..c48733a15e63 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -206,7 +206,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) if (vma->fence) seq_printf(m, " , fence: %d%s", vma->fence->id, - i915_gem_active_isset(&vma->last_fence) ? "*" : ""); + i915_active_request_isset(&vma->last_fence) ? "*" : ""); seq_puts(m, ")"); } if (obj->stolen) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d92e7ab0005e..52b5a24be42b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3018,7 +3018,7 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915) GEM_BUG_ON(i915->gt.active_requests); for_each_engine(engine, i915, id) { - GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); + GEM_BUG_ON(__i915_active_request_peek(&engine->timeline.last_request)); GEM_BUG_ON(engine->last_retired_context != to_intel_context(i915->kernel_context, engine)); } @@ -3264,7 +3264,7 @@ wait_for_timelines(struct drm_i915_private *i915, list_for_each_entry(tl, >->active_list, link) { struct i915_request *rq; - rq = i915_gem_active_get_unlocked(&tl->last_request); + rq = i915_active_request_get_unlocked(&tl->last_request); if (!rq) continue; @@ -4165,7 +4165,8 @@ out: } static void -frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request) +frontbuffer_retire(struct i915_active_request *active, + struct i915_request *request) { struct drm_i915_gem_object *obj = container_of(active, typeof(*obj), frontbuffer_write); @@ -4192,7 +4193,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, obj->resv = &obj->__builtin_resv; obj->frontbuffer_ggtt_origin = ORIGIN_GTT; - init_request_active(&obj->frontbuffer_write, frontbuffer_retire); + i915_active_request_init(&obj->frontbuffer_write, + NULL, frontbuffer_retire); obj->mm.madv = I915_MADV_WILLNEED; INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 93ab287f44b6..280813a4bf82 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -322,7 +322,7 @@ static u32 default_desc_template(const struct drm_i915_private *i915, return desc; } -static void intel_context_retire(struct i915_gem_active *active, +static void intel_context_retire(struct i915_active_request *active, struct i915_request *rq) { struct intel_context *ce = @@ -344,7 +344,8 @@ intel_context_init(struct intel_context *ce, /* Use the whole device by default */ ce->sseu = intel_device_default_sseu(ctx->i915); - init_request_active(&ce->active_tracker, intel_context_retire); + i915_active_request_init(&ce->active_tracker, + NULL, intel_context_retire); } static struct i915_gem_context * @@ -668,8 +669,8 @@ last_request_on_engine(struct i915_timeline *timeline, GEM_BUG_ON(timeline == &engine->timeline); - rq = i915_gem_active_raw(&timeline->last_request, - &engine->i915->drm.struct_mutex); + rq = i915_active_request_raw(&timeline->last_request, + &engine->i915->drm.struct_mutex); if (rq && rq->engine == engine) { GEM_TRACE("last request for %s on engine %s: %llx:%llu\n", timeline->name, engine->name, @@ -1015,8 +1016,8 @@ gen8_modify_rpcs_gpu(struct intel_context *ce, } /* Queue this switch after all other activity by this context. */ - prev = i915_gem_active_raw(&ce->ring->timeline->last_request, - &i915->drm.struct_mutex); + prev = i915_active_request_raw(&ce->ring->timeline->last_request, + &i915->drm.struct_mutex); if (prev && !i915_request_completed(prev)) { ret = i915_request_await_dma_fence(rq, &prev->fence); if (ret < 0) @@ -1039,9 +1040,9 @@ gen8_modify_rpcs_gpu(struct intel_context *ce, * But we only need to take one pin on the account of it. Or in other * words transfer the pinned ce object to tracked active request. */ - if (!i915_gem_active_isset(&ce->active_tracker)) + if (!i915_active_request_isset(&ce->active_tracker)) __intel_context_pin(ce); - i915_gem_active_set(&ce->active_tracker, rq); + __i915_active_request_set(&ce->active_tracker, rq); out_add: i915_request_add(rq); diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 92ad5272e57f..ca150a764c24 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -187,7 +187,7 @@ struct i915_gem_context { * active_tracker: Active tracker for the external rq activity * on this intel_context object. */ - struct i915_gem_active active_tracker; + struct i915_active_request active_tracker; const struct intel_context_ops *ops; diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 46e259661294..e037e94792f3 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -223,7 +223,7 @@ static int fence_update(struct drm_i915_fence_reg *fence, i915_gem_object_get_tiling(vma->obj))) return -EINVAL; - ret = i915_gem_active_retire(&vma->last_fence, + ret = i915_active_request_retire(&vma->last_fence, &vma->obj->base.dev->struct_mutex); if (ret) return ret; @@ -232,7 +232,7 @@ static int fence_update(struct drm_i915_fence_reg *fence, if (fence->vma) { struct i915_vma *old = fence->vma; - ret = i915_gem_active_retire(&old->last_fence, + ret = i915_active_request_retire(&old->last_fence, &old->obj->base.dev->struct_mutex); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e625659c03a2..d646d37eec2f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1918,7 +1918,7 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size) return ERR_PTR(-ENOMEM); i915_active_init(i915, &vma->active, NULL); - init_request_active(&vma->last_fence, NULL); + INIT_ACTIVE_REQUEST(&vma->last_fence); vma->vm = &ggtt->vm; vma->ops = &pd_vma_ops; diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 73fec917d097..fab040331cdb 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -175,7 +175,7 @@ struct drm_i915_gem_object { atomic_t frontbuffer_bits; unsigned int frontbuffer_ggtt_origin; /* write once */ - struct i915_gem_active frontbuffer_write; + struct i915_active_request frontbuffer_write; /** Current tiling stride for the object, if it's tiled. */ unsigned int tiling_and_stride; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 6e2e5ed2bd0a..9a65341fec09 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1062,23 +1062,23 @@ i915_error_object_create(struct drm_i915_private *i915, } /* The error capture is special as tries to run underneath the normal - * locking rules - so we use the raw version of the i915_gem_active lookup. + * locking rules - so we use the raw version of the i915_active_request lookup. */ static inline u32 -__active_get_seqno(struct i915_gem_active *active) +__active_get_seqno(struct i915_active_request *active) { struct i915_request *request; - request = __i915_gem_active_peek(active); + request = __i915_active_request_peek(active); return request ? request->global_seqno : 0; } static inline int -__active_get_engine_id(struct i915_gem_active *active) +__active_get_engine_id(struct i915_active_request *active) { struct i915_request *request; - request = __i915_gem_active_peek(active); + request = __i915_active_request_peek(active); return request ? request->engine->id : -1; } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 6512630b59b8..c2a5c48c7541 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -29,6 +29,7 @@ #include #include "i915_drv.h" +#include "i915_active.h" #include "i915_reset.h" static const char *i915_fence_get_driver_name(struct dma_fence *fence) @@ -125,12 +126,6 @@ static void unreserve_gt(struct drm_i915_private *i915) i915_gem_park(i915); } -void i915_gem_retire_noop(struct i915_gem_active *active, - struct i915_request *request) -{ - /* Space left intentionally blank */ -} - static void advance_ring(struct i915_request *request) { struct intel_ring *ring = request->ring; @@ -244,7 +239,7 @@ static void __retire_engine_upto(struct intel_engine_cs *engine, static void i915_request_retire(struct i915_request *request) { - struct i915_gem_active *active, *next; + struct i915_active_request *active, *next; GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n", request->engine->name, @@ -278,10 +273,10 @@ static void i915_request_retire(struct i915_request *request) * we may spend an inordinate amount of time simply handling * the retirement of requests and processing their callbacks. * Of which, this loop itself is particularly hot due to the - * cache misses when jumping around the list of i915_gem_active. - * So we try to keep this loop as streamlined as possible and - * also prefetch the next i915_gem_active to try and hide - * the likely cache miss. + * cache misses when jumping around the list of + * i915_active_request. So we try to keep this loop as + * streamlined as possible and also prefetch the next + * i915_active_request to try and hide the likely cache miss. */ prefetchw(next); @@ -526,17 +521,9 @@ out: return kmem_cache_alloc(ce->gem_context->i915->requests, GFP_KERNEL); } -static int add_barrier(struct i915_request *rq, struct i915_gem_active *active) -{ - struct i915_request *barrier = - i915_gem_active_raw(active, &rq->i915->drm.struct_mutex); - - return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0; -} - static int add_timeline_barrier(struct i915_request *rq) { - return add_barrier(rq, &rq->timeline->barrier); + return i915_request_await_active_request(rq, &rq->timeline->barrier); } /** @@ -595,7 +582,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * We use RCU to look up requests in flight. The lookups may * race with the request being allocated from the slab freelist. * That is the request we are writing to here, may be in the process - * of being read by __i915_gem_active_get_rcu(). As such, + * of being read by __i915_active_request_get_rcu(). As such, * we have to be very careful when overwriting the contents. During * the RCU lookup, we change chase the request->engine pointer, * read the request->global_seqno and increment the reference count. @@ -937,8 +924,8 @@ void i915_request_add(struct i915_request *request) * see a more recent value in the hws than we are tracking. */ - prev = i915_gem_active_raw(&timeline->last_request, - &request->i915->drm.struct_mutex); + prev = i915_active_request_raw(&timeline->last_request, + &request->i915->drm.struct_mutex); if (prev && !i915_request_completed(prev)) { i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, &request->submitq); @@ -954,7 +941,7 @@ void i915_request_add(struct i915_request *request) spin_unlock_irq(&timeline->lock); GEM_BUG_ON(timeline->seqno != request->fence.seqno); - i915_gem_active_set(&timeline->last_request, request); + __i915_active_request_set(&timeline->last_request, request); list_add_tail(&request->ring_link, &ring->request_list); if (list_is_first(&request->ring_link, &ring->request_list)) { diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 3cffb96203b9..40f3e8dcbdd5 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -403,387 +403,4 @@ static inline void i915_request_mark_complete(struct i915_request *rq) void i915_retire_requests(struct drm_i915_private *i915); -/* - * We treat requests as fences. This is not be to confused with our - * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. - * We use the fences to synchronize access from the CPU with activity on the - * GPU, for example, we should not rewrite an object's PTE whilst the GPU - * is reading them. We also track fences at a higher level to provide - * implicit synchronisation around GEM objects, e.g. set-domain will wait - * for outstanding GPU rendering before marking the object ready for CPU - * access, or a pageflip will wait until the GPU is complete before showing - * the frame on the scanout. - * - * In order to use a fence, the object must track the fence it needs to - * serialise with. For example, GEM objects want to track both read and - * write access so that we can perform concurrent read operations between - * the CPU and GPU engines, as well as waiting for all rendering to - * complete, or waiting for the last GPU user of a "fence register". The - * object then embeds a #i915_gem_active to track the most recent (in - * retirement order) request relevant for the desired mode of access. - * The #i915_gem_active is updated with i915_gem_active_set() to track the - * most recent fence request, typically this is done as part of - * i915_vma_move_to_active(). - * - * When the #i915_gem_active completes (is retired), it will - * signal its completion to the owner through a callback as well as mark - * itself as idle (i915_gem_active.request == NULL). The owner - * can then perform any action, such as delayed freeing of an active - * resource including itself. - */ -struct i915_gem_active; - -typedef void (*i915_gem_retire_fn)(struct i915_gem_active *, - struct i915_request *); - -struct i915_gem_active { - struct i915_request __rcu *request; - struct list_head link; - i915_gem_retire_fn retire; -}; - -void i915_gem_retire_noop(struct i915_gem_active *, - struct i915_request *request); - -/** - * init_request_active - prepares the activity tracker for use - * @active - the active tracker - * @func - a callback when then the tracker is retired (becomes idle), - * can be NULL - * - * init_request_active() prepares the embedded @active struct for use as - * an activity tracker, that is for tracking the last known active request - * associated with it. When the last request becomes idle, when it is retired - * after completion, the optional callback @func is invoked. - */ -static inline void -init_request_active(struct i915_gem_active *active, - i915_gem_retire_fn retire) -{ - RCU_INIT_POINTER(active->request, NULL); - INIT_LIST_HEAD(&active->link); - active->retire = retire ?: i915_gem_retire_noop; -} - -/** - * i915_gem_active_set - updates the tracker to watch the current request - * @active - the active tracker - * @request - the request to watch - * - * i915_gem_active_set() watches the given @request for completion. Whilst - * that @request is busy, the @active reports busy. When that @request is - * retired, the @active tracker is updated to report idle. - */ -static inline void -i915_gem_active_set(struct i915_gem_active *active, - struct i915_request *request) -{ - list_move(&active->link, &request->active_list); - rcu_assign_pointer(active->request, request); -} - -/** - * i915_gem_active_set_retire_fn - updates the retirement callback - * @active - the active tracker - * @fn - the routine called when the request is retired - * @mutex - struct_mutex used to guard retirements - * - * i915_gem_active_set_retire_fn() updates the function pointer that - * is called when the final request associated with the @active tracker - * is retired. - */ -static inline void -i915_gem_active_set_retire_fn(struct i915_gem_active *active, - i915_gem_retire_fn fn, - struct mutex *mutex) -{ - lockdep_assert_held(mutex); - active->retire = fn ?: i915_gem_retire_noop; -} - -static inline struct i915_request * -__i915_gem_active_peek(const struct i915_gem_active *active) -{ - /* - * Inside the error capture (running with the driver in an unknown - * state), we want to bend the rules slightly (a lot). - * - * Work is in progress to make it safer, in the meantime this keeps - * the known issue from spamming the logs. - */ - return rcu_dereference_protected(active->request, 1); -} - -/** - * i915_gem_active_raw - return the active request - * @active - the active tracker - * - * i915_gem_active_raw() returns the current request being tracked, or NULL. - * It does not obtain a reference on the request for the caller, so the caller - * must hold struct_mutex. - */ -static inline struct i915_request * -i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex) -{ - return rcu_dereference_protected(active->request, - lockdep_is_held(mutex)); -} - -/** - * i915_gem_active_peek - report the active request being monitored - * @active - the active tracker - * - * i915_gem_active_peek() returns the current request being tracked if - * still active, or NULL. It does not obtain a reference on the request - * for the caller, so the caller must hold struct_mutex. - */ -static inline struct i915_request * -i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex) -{ - struct i915_request *request; - - request = i915_gem_active_raw(active, mutex); - if (!request || i915_request_completed(request)) - return NULL; - - return request; -} - -/** - * i915_gem_active_get - return a reference to the active request - * @active - the active tracker - * - * i915_gem_active_get() returns a reference to the active request, or NULL - * if the active tracker is idle. The caller must hold struct_mutex. - */ -static inline struct i915_request * -i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex) -{ - return i915_request_get(i915_gem_active_peek(active, mutex)); -} - -/** - * __i915_gem_active_get_rcu - return a reference to the active request - * @active - the active tracker - * - * __i915_gem_active_get() returns a reference to the active request, or NULL - * if the active tracker is idle. The caller must hold the RCU read lock, but - * the returned pointer is safe to use outside of RCU. - */ -static inline struct i915_request * -__i915_gem_active_get_rcu(const struct i915_gem_active *active) -{ - /* - * Performing a lockless retrieval of the active request is super - * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing - * slab of request objects will not be freed whilst we hold the - * RCU read lock. It does not guarantee that the request itself - * will not be freed and then *reused*. Viz, - * - * Thread A Thread B - * - * rq = active.request - * retire(rq) -> free(rq); - * (rq is now first on the slab freelist) - * active.request = NULL - * - * rq = new submission on a new object - * ref(rq) - * - * To prevent the request from being reused whilst the caller - * uses it, we take a reference like normal. Whilst acquiring - * the reference we check that it is not in a destroyed state - * (refcnt == 0). That prevents the request being reallocated - * whilst the caller holds on to it. To check that the request - * was not reallocated as we acquired the reference we have to - * check that our request remains the active request across - * the lookup, in the same manner as a seqlock. The visibility - * of the pointer versus the reference counting is controlled - * by using RCU barriers (rcu_dereference and rcu_assign_pointer). - * - * In the middle of all that, we inspect whether the request is - * complete. Retiring is lazy so the request may be completed long - * before the active tracker is updated. Querying whether the - * request is complete is far cheaper (as it involves no locked - * instructions setting cachelines to exclusive) than acquiring - * the reference, so we do it first. The RCU read lock ensures the - * pointer dereference is valid, but does not ensure that the - * seqno nor HWS is the right one! However, if the request was - * reallocated, that means the active tracker's request was complete. - * If the new request is also complete, then both are and we can - * just report the active tracker is idle. If the new request is - * incomplete, then we acquire a reference on it and check that - * it remained the active request. - * - * It is then imperative that we do not zero the request on - * reallocation, so that we can chase the dangling pointers! - * See i915_request_alloc(). - */ - do { - struct i915_request *request; - - request = rcu_dereference(active->request); - if (!request || i915_request_completed(request)) - return NULL; - - /* - * An especially silly compiler could decide to recompute the - * result of i915_request_completed, more specifically - * re-emit the load for request->fence.seqno. A race would catch - * a later seqno value, which could flip the result from true to - * false. Which means part of the instructions below might not - * be executed, while later on instructions are executed. Due to - * barriers within the refcounting the inconsistency can't reach - * past the call to i915_request_get_rcu, but not executing - * that while still executing i915_request_put() creates - * havoc enough. Prevent this with a compiler barrier. - */ - barrier(); - - request = i915_request_get_rcu(request); - - /* - * What stops the following rcu_access_pointer() from occurring - * before the above i915_request_get_rcu()? If we were - * to read the value before pausing to get the reference to - * the request, we may not notice a change in the active - * tracker. - * - * The rcu_access_pointer() is a mere compiler barrier, which - * means both the CPU and compiler are free to perform the - * memory read without constraint. The compiler only has to - * ensure that any operations after the rcu_access_pointer() - * occur afterwards in program order. This means the read may - * be performed earlier by an out-of-order CPU, or adventurous - * compiler. - * - * The atomic operation at the heart of - * i915_request_get_rcu(), see dma_fence_get_rcu(), is - * atomic_inc_not_zero() which is only a full memory barrier - * when successful. That is, if i915_request_get_rcu() - * returns the request (and so with the reference counted - * incremented) then the following read for rcu_access_pointer() - * must occur after the atomic operation and so confirm - * that this request is the one currently being tracked. - * - * The corresponding write barrier is part of - * rcu_assign_pointer(). - */ - if (!request || request == rcu_access_pointer(active->request)) - return rcu_pointer_handoff(request); - - i915_request_put(request); - } while (1); -} - -/** - * i915_gem_active_get_unlocked - return a reference to the active request - * @active - the active tracker - * - * i915_gem_active_get_unlocked() returns a reference to the active request, - * or NULL if the active tracker is idle. The reference is obtained under RCU, - * so no locking is required by the caller. - * - * The reference should be freed with i915_request_put(). - */ -static inline struct i915_request * -i915_gem_active_get_unlocked(const struct i915_gem_active *active) -{ - struct i915_request *request; - - rcu_read_lock(); - request = __i915_gem_active_get_rcu(active); - rcu_read_unlock(); - - return request; -} - -/** - * i915_gem_active_isset - report whether the active tracker is assigned - * @active - the active tracker - * - * i915_gem_active_isset() returns true if the active tracker is currently - * assigned to a request. Due to the lazy retiring, that request may be idle - * and this may report stale information. - */ -static inline bool -i915_gem_active_isset(const struct i915_gem_active *active) -{ - return rcu_access_pointer(active->request); -} - -/** - * i915_gem_active_wait - waits until the request is completed - * @active - the active request on which to wait - * @flags - how to wait - * @timeout - how long to wait at most - * @rps - userspace client to charge for a waitboost - * - * i915_gem_active_wait() waits until the request is completed before - * returning, without requiring any locks to be held. Note that it does not - * retire any requests before returning. - * - * This function relies on RCU in order to acquire the reference to the active - * request without holding any locks. See __i915_gem_active_get_rcu() for the - * glory details on how that is managed. Once the reference is acquired, we - * can then wait upon the request, and afterwards release our reference, - * free of any locking. - * - * This function wraps i915_request_wait(), see it for the full details on - * the arguments. - * - * Returns 0 if successful, or a negative error code. - */ -static inline int -i915_gem_active_wait(const struct i915_gem_active *active, unsigned int flags) -{ - struct i915_request *request; - long ret = 0; - - request = i915_gem_active_get_unlocked(active); - if (request) { - ret = i915_request_wait(request, flags, MAX_SCHEDULE_TIMEOUT); - i915_request_put(request); - } - - return ret < 0 ? ret : 0; -} - -/** - * i915_gem_active_retire - waits until the request is retired - * @active - the active request on which to wait - * - * i915_gem_active_retire() waits until the request is completed, - * and then ensures that at least the retirement handler for this - * @active tracker is called before returning. If the @active - * tracker is idle, the function returns immediately. - */ -static inline int __must_check -i915_gem_active_retire(struct i915_gem_active *active, - struct mutex *mutex) -{ - struct i915_request *request; - long ret; - - request = i915_gem_active_raw(active, mutex); - if (!request) - return 0; - - ret = i915_request_wait(request, - I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (ret < 0) - return ret; - - list_del_init(&active->link); - RCU_INIT_POINTER(active->request, NULL); - - active->retire(active, request); - - return 0; -} - -#define for_each_active(mask, idx) \ - for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx)) - #endif /* I915_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index 4462007a681c..0e0ddf2e6815 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -862,7 +862,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) struct i915_request *rq; long timeout; - rq = i915_gem_active_get_unlocked(&tl->last_request); + rq = i915_active_request_get_unlocked(&tl->last_request); if (!rq) continue; diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index dcff3ae96683..b2202d2e58a2 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -163,8 +163,8 @@ int i915_timeline_init(struct drm_i915_private *i915, spin_lock_init(&timeline->lock); - init_request_active(&timeline->barrier, NULL); - init_request_active(&timeline->last_request, NULL); + INIT_ACTIVE_REQUEST(&timeline->barrier); + INIT_ACTIVE_REQUEST(&timeline->last_request); INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); @@ -236,7 +236,7 @@ void i915_timeline_fini(struct i915_timeline *timeline) { GEM_BUG_ON(timeline->pin_count); GEM_BUG_ON(!list_empty(&timeline->requests)); - GEM_BUG_ON(i915_gem_active_isset(&timeline->barrier)); + GEM_BUG_ON(i915_active_request_isset(&timeline->barrier)); i915_syncmap_free(&timeline->sync); hwsp_free(timeline); @@ -311,25 +311,6 @@ void i915_timeline_unpin(struct i915_timeline *tl) __i915_vma_unpin(tl->hwsp_ggtt); } -int i915_timeline_set_barrier(struct i915_timeline *tl, struct i915_request *rq) -{ - struct i915_request *old; - int err; - - lockdep_assert_held(&rq->i915->drm.struct_mutex); - - /* Must maintain ordering wrt existing barriers */ - old = i915_gem_active_raw(&tl->barrier, &rq->i915->drm.struct_mutex); - if (old) { - err = i915_request_await_dma_fence(rq, &old->fence); - if (err) - return err; - } - - i915_gem_active_set(&tl->barrier, rq); - return 0; -} - void __i915_timeline_free(struct kref *kref) { struct i915_timeline *timeline = diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index d167e04073c5..7bec7d2e45bf 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -28,6 +28,7 @@ #include #include +#include "i915_active.h" #include "i915_request.h" #include "i915_syncmap.h" #include "i915_utils.h" @@ -58,10 +59,10 @@ struct i915_timeline { /* Contains an RCU guarded pointer to the last request. No reference is * held to the request, users must carefully acquire a reference to - * the request using i915_gem_active_get_request_rcu(), or hold the + * the request using i915_active_request_get_request_rcu(), or hold the * struct_mutex. */ - struct i915_gem_active last_request; + struct i915_active_request last_request; /** * We track the most recent seqno that we wait on in every context so @@ -82,7 +83,7 @@ struct i915_timeline { * subsequent submissions to this timeline be executed only after the * barrier has been completed. */ - struct i915_gem_active barrier; + struct i915_active_request barrier; struct list_head link; const char *name; @@ -174,7 +175,10 @@ void i915_timelines_fini(struct drm_i915_private *i915); * submissions on @timeline. Subsequent requests will not be submitted to GPU * until the barrier has been completed. */ -int i915_timeline_set_barrier(struct i915_timeline *timeline, - struct i915_request *rq); +static inline int +i915_timeline_set_barrier(struct i915_timeline *tl, struct i915_request *rq) +{ + return i915_active_request_set(&tl->barrier, rq); +} #endif diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index d4772061e642..b713bed20c38 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -120,7 +120,7 @@ vma_create(struct drm_i915_gem_object *obj, return ERR_PTR(-ENOMEM); i915_active_init(vm->i915, &vma->active, __i915_vma_retire); - init_request_active(&vma->last_fence, NULL); + INIT_ACTIVE_REQUEST(&vma->last_fence); vma->vm = vm; vma->ops = &vm->vma_ops; @@ -808,7 +808,7 @@ static void __i915_vma_destroy(struct i915_vma *vma) GEM_BUG_ON(vma->node.allocated); GEM_BUG_ON(vma->fence); - GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence)); + GEM_BUG_ON(i915_active_request_isset(&vma->last_fence)); mutex_lock(&vma->vm->mutex); list_del(&vma->vm_link); @@ -942,14 +942,14 @@ int i915_vma_move_to_active(struct i915_vma *vma, obj->write_domain = I915_GEM_DOMAIN_RENDER; if (intel_fb_obj_invalidate(obj, ORIGIN_CS)) - i915_gem_active_set(&obj->frontbuffer_write, rq); + __i915_active_request_set(&obj->frontbuffer_write, rq); obj->read_domains = 0; } obj->read_domains |= I915_GEM_GPU_DOMAINS; if (flags & EXEC_OBJECT_NEEDS_FENCE) - i915_gem_active_set(&vma->last_fence, rq); + __i915_active_request_set(&vma->last_fence, rq); export_fence(vma, rq, flags); return 0; @@ -986,8 +986,8 @@ int i915_vma_unbind(struct i915_vma *vma) if (ret) goto unpin; - ret = i915_gem_active_retire(&vma->last_fence, - &vma->vm->i915->drm.struct_mutex); + ret = i915_active_request_retire(&vma->last_fence, + &vma->vm->i915->drm.struct_mutex); unpin: __i915_vma_unpin(vma); if (ret) diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 3c03d4569481..7c742027f866 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -110,7 +110,7 @@ struct i915_vma { #define I915_VMA_GGTT_WRITE BIT(15) struct i915_active active; - struct i915_gem_active last_fence; + struct i915_active_request last_fence; /** * Support different GGTT views into the same object. diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 71c01eb13af1..49fa43ff02ba 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1086,7 +1086,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) * the last request that remains in the timeline. When idle, it is * the last executed context as tracked by retirement. */ - rq = __i915_gem_active_peek(&engine->timeline.last_request); + rq = __i915_active_request_peek(&engine->timeline.last_request); if (rq) return rq->hw_context == kernel_context; else diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index f68c7975006c..fc2e283d326b 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -184,7 +184,7 @@ struct intel_overlay { struct overlay_registers __iomem *regs; u32 flip_addr; /* flip handling */ - struct i915_gem_active last_flip; + struct i915_active_request last_flip; }; static void i830_overlay_clock_gating(struct drm_i915_private *dev_priv, @@ -212,23 +212,23 @@ static void i830_overlay_clock_gating(struct drm_i915_private *dev_priv, static void intel_overlay_submit_request(struct intel_overlay *overlay, struct i915_request *rq, - i915_gem_retire_fn retire) + i915_active_retire_fn retire) { - GEM_BUG_ON(i915_gem_active_peek(&overlay->last_flip, - &overlay->i915->drm.struct_mutex)); - i915_gem_active_set_retire_fn(&overlay->last_flip, retire, - &overlay->i915->drm.struct_mutex); - i915_gem_active_set(&overlay->last_flip, rq); + GEM_BUG_ON(i915_active_request_peek(&overlay->last_flip, + &overlay->i915->drm.struct_mutex)); + i915_active_request_set_retire_fn(&overlay->last_flip, retire, + &overlay->i915->drm.struct_mutex); + __i915_active_request_set(&overlay->last_flip, rq); i915_request_add(rq); } static int intel_overlay_do_wait_request(struct intel_overlay *overlay, struct i915_request *rq, - i915_gem_retire_fn retire) + i915_active_retire_fn retire) { intel_overlay_submit_request(overlay, rq, retire); - return i915_gem_active_retire(&overlay->last_flip, - &overlay->i915->drm.struct_mutex); + return i915_active_request_retire(&overlay->last_flip, + &overlay->i915->drm.struct_mutex); } static struct i915_request *alloc_request(struct intel_overlay *overlay) @@ -349,8 +349,9 @@ static void intel_overlay_release_old_vma(struct intel_overlay *overlay) i915_vma_put(vma); } -static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active, - struct i915_request *rq) +static void +intel_overlay_release_old_vid_tail(struct i915_active_request *active, + struct i915_request *rq) { struct intel_overlay *overlay = container_of(active, typeof(*overlay), last_flip); @@ -358,7 +359,7 @@ static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active, intel_overlay_release_old_vma(overlay); } -static void intel_overlay_off_tail(struct i915_gem_active *active, +static void intel_overlay_off_tail(struct i915_active_request *active, struct i915_request *rq) { struct intel_overlay *overlay = @@ -421,8 +422,8 @@ static int intel_overlay_off(struct intel_overlay *overlay) * We have to be careful not to repeat work forever an make forward progess. */ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) { - return i915_gem_active_retire(&overlay->last_flip, - &overlay->i915->drm.struct_mutex); + return i915_active_request_retire(&overlay->last_flip, + &overlay->i915->drm.struct_mutex); } /* Wait for pending overlay flip and release old frame. @@ -1355,7 +1356,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv) overlay->contrast = 75; overlay->saturation = 146; - init_request_active(&overlay->last_flip, NULL); + INIT_ACTIVE_REQUEST(&overlay->last_flip); mutex_lock(&dev_priv->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c index e5659aaa856d..d2de9ece2118 100644 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c @@ -15,8 +15,8 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context) spin_lock_init(&timeline->lock); - init_request_active(&timeline->barrier, NULL); - init_request_active(&timeline->last_request, NULL); + INIT_ACTIVE_REQUEST(&timeline->barrier); + INIT_ACTIVE_REQUEST(&timeline->last_request); INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); From b2ae318acdcaf1c9562329c9cc1bd2f479157ad2 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 4 Feb 2019 14:25:38 -0800 Subject: [PATCH 435/539] drm/i915: Rename HAS_GMCH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First of all GMCH can be considered a feature by itself since it is a chip present in some platforms that connects the IA processor to memory and other components in PC. Also with the introduction of display block at device info, we got a redundant definition: .display.has_gmch_display = 1, So, let's clean up things a bit and use the standardized way of has_feature on displays side. No functional change and no manual interaction to generate this patch. It is only: sed -si -e 's/has_gmch_display/has_gmch/g' \ -e 's/HAS_GMCH_DISPLAY/HAS_GMCH/g' drivers/gpu/drm/i915/*{c,h} Cc: José Roberto de Souza Cc: Ville Syrjälä Signed-off-by: Rodrigo Vivi Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190204222538.15842-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- drivers/gpu/drm/i915/i915_pci.c | 10 ++++---- drivers/gpu/drm/i915/i915_suspend.c | 4 ++-- drivers/gpu/drm/i915/intel_color.c | 6 ++--- drivers/gpu/drm/i915/intel_device_info.h | 2 +- drivers/gpu/drm/i915/intel_display.c | 28 +++++++++++----------- drivers/gpu/drm/i915/intel_dp.c | 12 +++++----- drivers/gpu/drm/i915/intel_fifo_underrun.c | 6 ++--- drivers/gpu/drm/i915/intel_hdmi.c | 6 ++--- drivers/gpu/drm/i915/intel_hotplug.c | 2 +- drivers/gpu/drm/i915/intel_i2c.c | 2 +- drivers/gpu/drm/i915/vlv_dsi.c | 4 ++-- 13 files changed, 45 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index c48733a15e63..e6b5bd845245 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3727,7 +3727,7 @@ static int spr_wm_latency_open(struct inode *inode, struct file *file) { struct drm_i915_private *dev_priv = inode->i_private; - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) return -ENODEV; return single_open(file, spr_wm_latency_show, dev_priv); @@ -3737,7 +3737,7 @@ static int cur_wm_latency_open(struct inode *inode, struct file *file) { struct drm_i915_private *dev_priv = inode->i_private; - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) return -ENODEV; return single_open(file, cur_wm_latency_show, dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 45b837bc8f9e..a2293152cb6a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2490,7 +2490,7 @@ static inline unsigned int i915_sg_segment_size(void) #define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2) #define HAS_FBC(dev_priv) (INTEL_INFO(dev_priv)->display.has_fbc) -#define HAS_CUR_FBC(dev_priv) (!HAS_GMCH_DISPLAY(dev_priv) && INTEL_GEN(dev_priv) >= 7) +#define HAS_CUR_FBC(dev_priv) (!HAS_GMCH(dev_priv) && INTEL_GEN(dev_priv) >= 7) #define HAS_IPS(dev_priv) (IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv)) @@ -2570,7 +2570,7 @@ static inline unsigned int i915_sg_segment_size(void) #define HAS_PCH_NOP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_NOP) #define HAS_PCH_SPLIT(dev_priv) (INTEL_PCH_TYPE(dev_priv) != PCH_NONE) -#define HAS_GMCH_DISPLAY(dev_priv) (INTEL_INFO(dev_priv)->display.has_gmch_display) +#define HAS_GMCH(dev_priv) (INTEL_INFO(dev_priv)->display.has_gmch) #define HAS_LSPCON(dev_priv) (INTEL_GEN(dev_priv) >= 9) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index c6533c57ea2f..36fa6f1905fc 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -88,7 +88,7 @@ .num_pipes = 1, \ .display.has_overlay = 1, \ .display.overlay_needs_physical = 1, \ - .display.has_gmch_display = 1, \ + .display.has_gmch = 1, \ .gpu_reset_clobbers_display = true, \ .hws_needs_physical = 1, \ .unfenced_needs_alignment = 1, \ @@ -129,7 +129,7 @@ static const struct intel_device_info intel_i865g_info = { #define GEN3_FEATURES \ GEN(3), \ .num_pipes = 2, \ - .display.has_gmch_display = 1, \ + .display.has_gmch = 1, \ .gpu_reset_clobbers_display = true, \ .ring_mask = RENDER_RING, \ .has_snoop = true, \ @@ -206,7 +206,7 @@ static const struct intel_device_info intel_pineview_info = { GEN(4), \ .num_pipes = 2, \ .display.has_hotplug = 1, \ - .display.has_gmch_display = 1, \ + .display.has_gmch = 1, \ .gpu_reset_clobbers_display = true, \ .ring_mask = RENDER_RING, \ .has_snoop = true, \ @@ -382,7 +382,7 @@ static const struct intel_device_info intel_valleyview_info = { .num_pipes = 2, .has_runtime_pm = 1, .has_rc6 = 1, - .display.has_gmch_display = 1, + .display.has_gmch = 1, .display.has_hotplug = 1, .ppgtt = INTEL_PPGTT_FULL, .has_snoop = true, @@ -474,7 +474,7 @@ static const struct intel_device_info intel_cherryview_info = { .has_runtime_pm = 1, .has_rc6 = 1, .has_logical_ring_contexts = 1, - .display.has_gmch_display = 1, + .display.has_gmch = 1, .ppgtt = INTEL_PPGTT_FULL, .has_reset_engine = 1, .has_snoop = true, diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index f18afa2bac8d..d2f2a9c2fabd 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -86,7 +86,7 @@ int i915_save_state(struct drm_i915_private *dev_priv) } else if (IS_GEN(dev_priv, 2)) { for (i = 0; i < 7; i++) dev_priv->regfile.saveSWF1[i] = I915_READ(SWF1(i)); - } else if (HAS_GMCH_DISPLAY(dev_priv)) { + } else if (HAS_GMCH(dev_priv)) { for (i = 0; i < 16; i++) { dev_priv->regfile.saveSWF0[i] = I915_READ(SWF0(i)); dev_priv->regfile.saveSWF1[i] = I915_READ(SWF1(i)); @@ -131,7 +131,7 @@ int i915_restore_state(struct drm_i915_private *dev_priv) } else if (IS_GEN(dev_priv, 2)) { for (i = 0; i < 7; i++) I915_WRITE(SWF1(i), dev_priv->regfile.saveSWF1[i]); - } else if (HAS_GMCH_DISPLAY(dev_priv)) { + } else if (HAS_GMCH(dev_priv)) { for (i = 0; i < 16; i++) { I915_WRITE(SWF0(i), dev_priv->regfile.saveSWF0[i]); I915_WRITE(SWF1(i), dev_priv->regfile.saveSWF1[i]); diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index 4b0044cdcf1a..31fa599b0d61 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -321,7 +321,7 @@ static void i9xx_load_luts_internal(struct intel_crtc_state *crtc_state, enum pipe pipe = crtc->pipe; int i; - if (HAS_GMCH_DISPLAY(dev_priv)) { + if (HAS_GMCH(dev_priv)) { if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI)) assert_dsi_pll_enabled(dev_priv); else @@ -336,7 +336,7 @@ static void i9xx_load_luts_internal(struct intel_crtc_state *crtc_state, (drm_color_lut_extract(lut[i].green, 8) << 8) | drm_color_lut_extract(lut[i].blue, 8); - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) I915_WRITE(PALETTE(pipe, i), word); else I915_WRITE(LGC_PALETTE(pipe, i), word); @@ -345,7 +345,7 @@ static void i9xx_load_luts_internal(struct intel_crtc_state *crtc_state, for (i = 0; i < 256; i++) { u32 word = (i << 16) | (i << 8) | i; - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) I915_WRITE(PALETTE(pipe, i), word); else I915_WRITE(LGC_PALETTE(pipe, i), word); diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 7bf09cef591a..e8b8661df746 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -115,7 +115,7 @@ enum intel_ppgtt { func(has_ddi); \ func(has_dp_mst); \ func(has_fbc); \ - func(has_gmch_display); \ + func(has_gmch); \ func(has_hotplug); \ func(has_ipc); \ func(has_overlay); \ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8b36ee183c7e..d324b816350b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1805,7 +1805,7 @@ static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state) * a plane. On ILK+ the pipe PLLs are integrated, so we don't * need the check. */ - if (HAS_GMCH_DISPLAY(dev_priv)) { + if (HAS_GMCH(dev_priv)) { if (intel_crtc_has_type(new_crtc_state, INTEL_OUTPUT_DSI)) assert_dsi_pll_enabled(dev_priv); else @@ -2094,7 +2094,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, * complicated than this. For example, Cherryview appears quite * happy to scanout from anywhere within its global aperture. */ - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) pinctl |= PIN_MAPPABLE; vma = i915_gem_object_pin_to_display_plane(obj, @@ -3195,7 +3195,7 @@ i9xx_plane_max_stride(struct intel_plane *plane, { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - if (!HAS_GMCH_DISPLAY(dev_priv)) { + if (!HAS_GMCH(dev_priv)) { return 32*1024; } else if (INTEL_GEN(dev_priv) >= 4) { if (modifier == I915_FORMAT_MOD_X_TILED) @@ -3771,7 +3771,7 @@ __intel_display_resume(struct drm_device *dev, } /* ignore any reset values/BIOS leftovers in the WM registers */ - if (!HAS_GMCH_DISPLAY(to_i915(dev))) + if (!HAS_GMCH(to_i915(dev))) to_intel_atomic_state(state)->skip_intermediate_wm = true; ret = drm_atomic_helper_commit_duplicated_state(state, ctx); @@ -5294,7 +5294,7 @@ intel_pre_disable_primary_noatomic(struct drm_crtc *crtc) * event which is after the vblank start event, so we need to have a * wait-for-vblank between disabling the plane and the pipe. */ - if (HAS_GMCH_DISPLAY(dev_priv) && + if (HAS_GMCH(dev_priv) && intel_set_memory_cxsr(dev_priv, false)) intel_wait_for_vblank(dev_priv, pipe); } @@ -5431,7 +5431,7 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, * event which is after the vblank start event, so we need to have a * wait-for-vblank between disabling the plane and the pipe. */ - if (HAS_GMCH_DISPLAY(dev_priv) && old_crtc_state->base.active && + if (HAS_GMCH(dev_priv) && old_crtc_state->base.active && pipe_config->disable_cxsr && intel_set_memory_cxsr(dev_priv, false)) intel_wait_for_vblank(dev_priv, crtc->pipe); @@ -6705,7 +6705,7 @@ static void intel_crtc_compute_pixel_rate(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) /* FIXME calculate proper pipe pixel rate for GMCH pfit */ crtc_state->pixel_rate = crtc_state->base.adjusted_mode.crtc_clock; @@ -9814,7 +9814,7 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state) base += plane_state->color_plane[0].offset; /* ILK+ do this automagically */ - if (HAS_GMCH_DISPLAY(dev_priv) && + if (HAS_GMCH(dev_priv) && plane_state->base.rotation & DRM_MODE_ROTATE_180) base += (plane_state->base.crtc_h * plane_state->base.crtc_w - 1) * fb->format->cpp[0]; @@ -11356,7 +11356,7 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, pipe_config->scaler_state.scaler_users, pipe_config->scaler_state.scaler_id); - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) DRM_DEBUG_KMS("gmch pfit: control: 0x%08x, ratios: 0x%08x, lvds border: 0x%08x\n", pipe_config->gmch_pfit.control, pipe_config->gmch_pfit.pgm_ratios, @@ -13096,7 +13096,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) /* FIXME unify this for all platforms */ if (!new_crtc_state->active && - !HAS_GMCH_DISPLAY(dev_priv) && + !HAS_GMCH(dev_priv) && dev_priv->display.initial_watermarks) dev_priv->display.initial_watermarks(intel_state, new_intel_crtc_state); @@ -15074,7 +15074,7 @@ retry: * intermediate watermarks (since we don't trust the current * watermarks). */ - if (!HAS_GMCH_DISPLAY(dev_priv)) + if (!HAS_GMCH(dev_priv)) intel_state->skip_intermediate_wm = true; ret = intel_atomic_check(dev, state); @@ -15315,7 +15315,7 @@ int intel_modeset_init(struct drm_device *dev) * Note that we need to do this after reconstructing the BIOS fb's * since the watermark calculation done here will use pstate->fb. */ - if (!HAS_GMCH_DISPLAY(dev_priv)) + if (!HAS_GMCH(dev_priv)) sanitize_watermarks(dev); /* @@ -15524,7 +15524,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, if (crtc_state->base.active && !intel_crtc_has_encoders(crtc)) intel_crtc_disable_noatomic(&crtc->base, ctx); - if (crtc_state->base.active || HAS_GMCH_DISPLAY(dev_priv)) { + if (crtc_state->base.active || HAS_GMCH(dev_priv)) { /* * We start out with underrun reporting disabled to avoid races. * For correct bookkeeping mark this on active crtcs. @@ -16271,7 +16271,7 @@ intel_display_capture_error_state(struct drm_i915_private *dev_priv) error->pipe[i].source = I915_READ(PIPESRC(i)); - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) error->pipe[i].stat = I915_READ(PIPESTAT(i)); } diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 2e994b5ba40b..d9bc8ee52300 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2141,7 +2141,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, return ret; } - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) intel_gmch_panel_fitting(intel_crtc, pipe_config, conn_state->scaling_mode); else @@ -2152,7 +2152,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) return -EINVAL; - if (HAS_GMCH_DISPLAY(dev_priv) && + if (HAS_GMCH(dev_priv) && adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) return -EINVAL; @@ -5300,7 +5300,7 @@ bool intel_digital_port_connected(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - if (HAS_GMCH_DISPLAY(dev_priv)) { + if (HAS_GMCH(dev_priv)) { if (IS_GM45(dev_priv)) return gm45_digital_port_connected(encoder); else @@ -6038,7 +6038,7 @@ intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connect intel_attach_force_audio_property(connector); intel_attach_broadcast_rgb_property(connector); - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) drm_connector_attach_max_bpc_property(connector, 6, 10); else if (INTEL_GEN(dev_priv) >= 5) drm_connector_attach_max_bpc_property(connector, 6, 12); @@ -6047,7 +6047,7 @@ intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connect u32 allowed_scalers; allowed_scalers = BIT(DRM_MODE_SCALE_ASPECT) | BIT(DRM_MODE_SCALE_FULLSCREEN); - if (!HAS_GMCH_DISPLAY(dev_priv)) + if (!HAS_GMCH(dev_priv)) allowed_scalers |= BIT(DRM_MODE_SCALE_CENTER); drm_connector_attach_scaling_mode_property(connector, allowed_scalers); @@ -6919,7 +6919,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, drm_connector_init(dev, connector, &intel_dp_connector_funcs, type); drm_connector_helper_add(connector, &intel_dp_connector_helper_funcs); - if (!HAS_GMCH_DISPLAY(dev_priv)) + if (!HAS_GMCH(dev_priv)) connector->interlace_allowed = true; connector->doublescan_allowed = 0; diff --git a/drivers/gpu/drm/i915/intel_fifo_underrun.c b/drivers/gpu/drm/i915/intel_fifo_underrun.c index 3b9285130ef5..f33de4be4b89 100644 --- a/drivers/gpu/drm/i915/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/intel_fifo_underrun.c @@ -258,7 +258,7 @@ static bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, old = !crtc->cpu_fifo_underrun_disabled; crtc->cpu_fifo_underrun_disabled = !enable; - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) i9xx_set_fifo_underrun_reporting(dev, pipe, enable, old); else if (IS_GEN_RANGE(dev_priv, 5, 6)) ironlake_set_fifo_underrun_reporting(dev, pipe, enable); @@ -369,7 +369,7 @@ void intel_cpu_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv, return; /* GMCH can't disable fifo underruns, filter them. */ - if (HAS_GMCH_DISPLAY(dev_priv) && + if (HAS_GMCH(dev_priv) && crtc->cpu_fifo_underrun_disabled) return; @@ -421,7 +421,7 @@ void intel_check_cpu_fifo_underruns(struct drm_i915_private *dev_priv) if (crtc->cpu_fifo_underrun_disabled) continue; - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) i9xx_check_fifo_underruns(crtc); else if (IS_GEN(dev_priv, 7)) ivybridge_check_fifo_underruns(crtc); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 97a98e1bea56..f125a62eba8c 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1588,7 +1588,7 @@ intel_hdmi_mode_valid(struct drm_connector *connector, if (hdmi->has_hdmi_sink && !force_dvi) { /* if we can't do 8bpc we may still be able to do 12bpc */ - if (status != MODE_OK && !HAS_GMCH_DISPLAY(dev_priv)) + if (status != MODE_OK && !HAS_GMCH(dev_priv)) status = hdmi_port_clock_valid(hdmi, clock * 3 / 2, true, force_dvi); @@ -1613,7 +1613,7 @@ static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, &crtc_state->base.adjusted_mode; int i; - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) return false; if (bpc == 10 && INTEL_GEN(dev_priv) < 11) @@ -2150,7 +2150,7 @@ intel_hdmi_add_properties(struct intel_hdmi *intel_hdmi, struct drm_connector *c drm_connector_attach_content_type_property(connector); connector->state->picture_aspect_ratio = HDMI_PICTURE_ASPECT_NONE; - if (!HAS_GMCH_DISPLAY(dev_priv)) + if (!HAS_GMCH(dev_priv)) drm_connector_attach_max_bpc_property(connector, 8, 12); } diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index e027d2b4abe5..b8937c788f03 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -470,7 +470,7 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, * hotplug bits itself. So only WARN about unexpected * interrupts on saner platforms. */ - WARN_ONCE(!HAS_GMCH_DISPLAY(dev_priv), + WARN_ONCE(!HAS_GMCH(dev_priv), "Received HPD interrupt on pin %d although disabled\n", pin); continue; } diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index 4f6dc8c94634..5a733e711355 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -823,7 +823,7 @@ int intel_setup_gmbus(struct drm_i915_private *dev_priv) if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) dev_priv->gpio_mmio_base = VLV_DISPLAY_BASE; - else if (!HAS_GMCH_DISPLAY(dev_priv)) + else if (!HAS_GMCH(dev_priv)) /* * Broxton uses the same PCH offsets for South Display Engine, * even though it doesn't have a PCH. diff --git a/drivers/gpu/drm/i915/vlv_dsi.c b/drivers/gpu/drm/i915/vlv_dsi.c index 696b750acd1d..6403728fe778 100644 --- a/drivers/gpu/drm/i915/vlv_dsi.c +++ b/drivers/gpu/drm/i915/vlv_dsi.c @@ -275,7 +275,7 @@ static int intel_dsi_compute_config(struct intel_encoder *encoder, if (fixed_mode) { intel_fixed_panel_mode(fixed_mode, adjusted_mode); - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH(dev_priv)) intel_gmch_panel_fitting(crtc, pipe_config, conn_state->scaling_mode); else @@ -1633,7 +1633,7 @@ static void intel_dsi_add_properties(struct intel_connector *connector) u32 allowed_scalers; allowed_scalers = BIT(DRM_MODE_SCALE_ASPECT) | BIT(DRM_MODE_SCALE_FULLSCREEN); - if (!HAS_GMCH_DISPLAY(dev_priv)) + if (!HAS_GMCH(dev_priv)) allowed_scalers |= BIT(DRM_MODE_SCALE_CENTER); drm_connector_attach_scaling_mode_property(&connector->base, From ab1ab0eb0cb64d1c225bdf42bee067f81c87e2a4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 31 Jan 2019 14:15:07 +0100 Subject: [PATCH 436/539] drm/i915: do not return invalid pointers as a *dentry When calling debugfs functions, they can now return error values if something went wrong. If that happens, return a NULL as a *dentry to the relay core instead of passing it an illegal pointer. The relay core should be able to handle an illegal pointer, but add this check to be safe. Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: David Airlie Cc: Daniel Vetter Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Greg Kroah-Hartman Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20190131131507.GA19807@kroah.com --- drivers/gpu/drm/i915/intel_guc_log.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index b53582c0c6c1..806fdfd7c78a 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -140,6 +140,9 @@ static struct dentry *create_buf_file_callback(const char *filename, buf_file = debugfs_create_file(filename, mode, parent, buf, &relay_file_operations); + if (IS_ERR(buf_file)) + return NULL; + return buf_file; } From 39806c3f11e206d03e76521c808a96aedfcc58d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 4 Feb 2019 23:16:44 +0200 Subject: [PATCH 437/539] drm/i915: Include register polling in reg_rw traces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We generally omit register polling from the i915_reg_rw tracepoint. Understandable since polling could generate a lot of noise in the trace. The downside is that the trace is incomplete. As a compromise let's trace the final register value observed while polling. That should be generally sufficient to observe what the code should be doing next. I suppose in some cases it might make sense to also trace the initial register value, and maybe the number of times we polled. But that would require a separate tracepoint so let's leave it for the future. The other users of _NOTRACE() are i915_pmu and i2c bitbanging, which I decided to leave alone. Next we should do something to claw back the tracepoints for planes and whatnot which were switched to _FW() a while back. I guess just new macros for raw_rw+trace. The question is what to call it? Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190204211644.21967-1-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.c | 12 ++++++++++-- drivers/gpu/drm/i915/intel_dp.c | 6 ++++++ drivers/gpu/drm/i915/intel_uncore.c | 3 +++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f462a4d28af4..349ba9e23a5b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2543,6 +2543,10 @@ static void vlv_restore_gunit_s0ix_state(struct drm_i915_private *dev_priv) static int vlv_wait_for_pw_status(struct drm_i915_private *dev_priv, u32 mask, u32 val) { + i915_reg_t reg = VLV_GTLC_PW_STATUS; + u32 reg_value; + int ret; + /* The HW does not like us polling for PW_STATUS frequently, so * use the sleeping loop rather than risk the busy spin within * intel_wait_for_register(). @@ -2550,8 +2554,12 @@ static int vlv_wait_for_pw_status(struct drm_i915_private *dev_priv, * Transitioning between RC6 states should be at most 2ms (see * valleyview_enable_rps) so use a 3ms timeout. */ - return wait_for((I915_READ_NOTRACE(VLV_GTLC_PW_STATUS) & mask) == val, - 3); + ret = wait_for(((reg_value = I915_READ_NOTRACE(reg)) & mask) == val, 3); + + /* just trace the final value */ + trace_i915_reg_rw(false, reg, reg_value, sizeof(reg_value), true); + + return ret; } int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool force_on) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index d9bc8ee52300..a15d9fd80641 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1061,6 +1061,10 @@ intel_dp_aux_wait_done(struct intel_dp *intel_dp) #define C (((status = I915_READ_NOTRACE(ch_ctl)) & DP_AUX_CH_CTL_SEND_BUSY) == 0) done = wait_event_timeout(dev_priv->gmbus_wait_queue, C, msecs_to_jiffies_timeout(10)); + + /* just trace the final value */ + trace_i915_reg_rw(false, ch_ctl, status, sizeof(status), true); + if (!done) DRM_ERROR("dp aux hw did not signal timeout!\n"); #undef C @@ -1227,6 +1231,8 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, break; msleep(1); } + /* just trace the final value */ + trace_i915_reg_rw(false, ch_ctl, status, sizeof(status), true); if (try == 3) { static u32 last_status = -1; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index e88f0252d77e..75646a1e0051 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1819,6 +1819,9 @@ int __intel_wait_for_register(struct drm_i915_private *dev_priv, (reg_value & mask) == value, slow_timeout_ms * 1000, 10, 1000); + /* just trace the final value */ + trace_i915_reg_rw(false, reg, reg_value, sizeof(reg_value), true); + if (out_value) *out_value = reg_value; From f81b845f72c31e7cbba4e8a91dc30252bc1589d0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 5 Feb 2019 09:27:59 +0000 Subject: [PATCH 438/539] drm/i915: Push clear_intel_crtc_state() onto the heap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clear_intel_crtc_state() uses the stack for saving a temporary copy of certain bits of the inherited crtc_state before clearing the unwanted bits. This pushes it over the stack limit for my little 32b Pineview, so move the temporary allocation to the heap instead. As we now use a zeroed struct, we can copy the whole extended state back to both preserve what bits need to be preserved and zero the rest. Signed-off-by: Chris Wilson Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190205092759.16018-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_display.c | 40 +++++++++++++--------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index d324b816350b..113ebd196775 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11468,44 +11468,38 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) return ret; } -static void +static int clear_intel_crtc_state(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - struct intel_crtc_scaler_state scaler_state; - struct intel_dpll_hw_state dpll_hw_state; - struct intel_shared_dpll *shared_dpll; - struct intel_crtc_wm_state wm_state; - bool force_thru, ips_force_disable; + struct intel_crtc_state *saved_state; + + saved_state = kzalloc(sizeof(*saved_state), GFP_KERNEL); + if (!saved_state) + return -ENOMEM; /* FIXME: before the switch to atomic started, a new pipe_config was * kzalloc'd. Code that depends on any field being zero should be * fixed, so that the crtc_state can be safely duplicated. For now, * only fields that are know to not cause problems are preserved. */ - scaler_state = crtc_state->scaler_state; - shared_dpll = crtc_state->shared_dpll; - dpll_hw_state = crtc_state->dpll_hw_state; - force_thru = crtc_state->pch_pfit.force_thru; - ips_force_disable = crtc_state->ips_force_disable; + saved_state->scaler_state = crtc_state->scaler_state; + saved_state->shared_dpll = crtc_state->shared_dpll; + saved_state->dpll_hw_state = crtc_state->dpll_hw_state; + saved_state->pch_pfit.force_thru = crtc_state->pch_pfit.force_thru; + saved_state->ips_force_disable = crtc_state->ips_force_disable; if (IS_G4X(dev_priv) || IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - wm_state = crtc_state->wm; + saved_state->wm = crtc_state->wm; /* Keep base drm_crtc_state intact, only clear our extended struct */ BUILD_BUG_ON(offsetof(struct intel_crtc_state, base)); - memset(&crtc_state->base + 1, 0, + memcpy(&crtc_state->base + 1, &saved_state->base + 1, sizeof(*crtc_state) - sizeof(crtc_state->base)); - crtc_state->scaler_state = scaler_state; - crtc_state->shared_dpll = shared_dpll; - crtc_state->dpll_hw_state = dpll_hw_state; - crtc_state->pch_pfit.force_thru = force_thru; - crtc_state->ips_force_disable = ips_force_disable; - if (IS_G4X(dev_priv) || - IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - crtc_state->wm = wm_state; + kfree(saved_state); + return 0; } static int @@ -11520,7 +11514,9 @@ intel_modeset_pipe_config(struct drm_crtc *crtc, int i; bool retry = true; - clear_intel_crtc_state(pipe_config); + ret = clear_intel_crtc_state(pipe_config); + if (ret) + return ret; pipe_config->cpu_transcoder = (enum transcoder) to_intel_crtc(crtc)->pipe; From 0aded171e20e438015a95fbfe57d679dbf228090 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 5 Feb 2019 17:50:53 +0200 Subject: [PATCH 439/539] drm/i915: Fix wm latency==0 disable on skl+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When adding the early latency==0 check back I neglected to realize that we no longer have a way to return a failure from the wm computation like we had in the past (since we now calculate wms before ddb allocations). Also plane_en being false doesn't actually indicate that the level is invalid as it wil also happen when the plane is not enabled. skl_allocate_pipe_ddb() starts scanning from the maximum watermark level and it stops as soon as it finds a level that is deemed viable. The assumption being that if level n+1 is valid then level n is valid as well. Thus if we now disable any watermark level by zeroing its latency the code will think that level to be actually valid and won't confirm whether the actually enabled lower watermark level(s) actually fit into the allotted ddb space. This results in hilarious watermark values that exceed the ddb allocation of the plane. The way we must now indicate a failure is to assign an unreasoanbly big value to min_ddb_alloc which will then make skl_allocate_pipe_ddb() reject the entire level. v2: Also do the same for the lines>31 case (Matt) v3: Make 'blocks' u32 (Matt) Cc: Matt Roper Cc: Stanislav Lisovskiy Reviewed-by: Matt Roper Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190205155053.10081-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_pm.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ed9786241307..737005bf6816 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4319,7 +4319,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, int num_active; u64 plane_data_rate[I915_MAX_PLANES] = {}; u64 uv_plane_data_rate[I915_MAX_PLANES] = {}; - u16 blocks = 0; + u32 blocks; int level; /* Clear the partitioning for disabled planes. */ @@ -4694,8 +4694,11 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, uint_fixed_16_16_t selected_result; u32 res_blocks, res_lines, min_ddb_alloc = 0; - if (latency == 0) + if (latency == 0) { + /* reject it */ + result->min_ddb_alloc = U16_MAX; return; + } /* Display WA #1141: kbl,cfl */ if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) || @@ -4783,8 +4786,11 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, if (!skl_wm_has_lines(dev_priv, level)) res_lines = 0; - if (res_lines > 31) + if (res_lines > 31) { + /* reject it */ + result->min_ddb_alloc = U16_MAX; return; + } /* * If res_lines is valid, assume we can use this watermark level From d16221195ae255beae1fa62f961a9b8e020a6c22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 4 Feb 2019 22:21:39 +0200 Subject: [PATCH 440/539] drm/i915: Extract icl_set_pipe_chicken() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need configure PIPE_CHICKEN during fastboot as well. Let's extract it to a helper. v2: Apparently PIPE_CHICKEN is icl+ only Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190204202139.26884-1-ville.syrjala@linux.intel.com Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/intel_display.c | 31 ++++++++++++++++++---------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 113ebd196775..af848fb72ed6 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3896,6 +3896,24 @@ unlock: clear_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags); } +static void icl_set_pipe_chicken(struct intel_crtc *crtc) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + u32 tmp; + + tmp = I915_READ(PIPE_CHICKEN(pipe)); + + /* + * Display WA #1153: icl + * enable hardware to bypass the alpha math + * and rounding for per-pixel values 00 and 0xff + */ + tmp |= PER_PIXEL_ALPHA_BYPASS_EN; + + I915_WRITE(PIPE_CHICKEN(pipe), tmp); +} + static void intel_update_pipe_config(const struct intel_crtc_state *old_crtc_state, const struct intel_crtc_state *new_crtc_state) { @@ -5782,7 +5800,6 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, struct intel_atomic_state *old_intel_state = to_intel_atomic_state(old_state); bool psl_clkgate_wa; - u32 pipe_chicken; if (WARN_ON(intel_crtc->active)) return; @@ -5839,16 +5856,8 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, */ intel_color_load_luts(pipe_config); - /* - * Display WA #1153: enable hardware to bypass the alpha math - * and rounding for per-pixel values 00 and 0xff - */ - if (INTEL_GEN(dev_priv) >= 11) { - pipe_chicken = I915_READ(PIPE_CHICKEN(pipe)); - if (!(pipe_chicken & PER_PIXEL_ALPHA_BYPASS_EN)) - I915_WRITE_FW(PIPE_CHICKEN(pipe), - pipe_chicken | PER_PIXEL_ALPHA_BYPASS_EN); - } + if (INTEL_GEN(dev_priv) >= 11) + icl_set_pipe_chicken(intel_crtc); intel_ddi_set_pipe_settings(pipe_config); if (!transcoder_is_dsi(cpu_transcoder)) From 108d14bdaef67617412af3f40256810281464425 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 4 Feb 2019 22:22:14 +0200 Subject: [PATCH 441/539] drm/i915: Setup PIPE_CHICKEN for fastsets too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Configure PIPE_CHICKEN during intel_update_pipe_config() to make sure we have our chickens in a row with fastboot too. v2: Apparently PIPE_CHICKEN is icl+ only Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190204202214.27051-1-ville.syrjala@linux.intel.com Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/intel_display.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index af848fb72ed6..dc7a2aa62874 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3958,6 +3958,9 @@ static void intel_update_pipe_config(const struct intel_crtc_state *old_crtc_sta I915_WRITE(SKL_BOTTOM_COLOR(crtc->pipe), SKL_BOTTOM_COLOR_GAMMA_ENABLE | SKL_BOTTOM_COLOR_CSC_ENABLE); + + if (INTEL_GEN(dev_priv) >= 11) + icl_set_pipe_chicken(crtc); } static void intel_fdi_normal_train(struct intel_crtc *crtc) From bf002c100740f4ae01d0d86b44f65a712ee14031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 4 Feb 2019 22:22:32 +0200 Subject: [PATCH 442/539] drm/i915: W/A for underruns with WM1+ disabled on icl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Disabling WM1+ on ICL causes tons of underruns with linear/X-tiled framebuffers. We can avoid this by flipping on a chicken bit affecting the way the hw fill the FIFO. This may not be the final solution but should hopefully avoid some underruns in the meantime. v2: Apparently PIPE_CHICKEN is icl+ only Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190204202232.27153-1-ville.syrjala@linux.intel.com Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_display.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ede54fdc1676..12964b0fbc54 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7618,6 +7618,7 @@ enum { #define _PIPEB_CHICKEN 0x71038 #define _PIPEC_CHICKEN 0x72038 #define PER_PIXEL_ALPHA_BYPASS_EN (1 << 7) +#define PM_FILL_MAINTAIN_DBUF_FULLNESS (1 << 0) #define PIPE_CHICKEN(pipe) _MMIO_PIPE(pipe, _PIPEA_CHICKEN,\ _PIPEB_CHICKEN) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index dc7a2aa62874..9db931a450f6 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3911,6 +3911,12 @@ static void icl_set_pipe_chicken(struct intel_crtc *crtc) */ tmp |= PER_PIXEL_ALPHA_BYPASS_EN; + /* + * W/A for underruns with linear/X-tiled with + * WM1+ disabled. + */ + tmp |= PM_FILL_MAINTAIN_DBUF_FULLNESS; + I915_WRITE(PIPE_CHICKEN(pipe), tmp); } From c1219b941c1ff0abc615e471ad81507ddb397253 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 1 Feb 2019 10:41:59 +0000 Subject: [PATCH 443/539] drm/amd/amdgpu: fix spelling mistake "matech" -> "match" There is a spelling mistake in a dev_err message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 0d90672d0e58..407dd16cc35c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -125,7 +125,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) if (!hive) { ret = -EINVAL; dev_err(adev->dev, - "XGMI: node 0x%llx, can not matech hive 0x%llx in the hive list.\n", + "XGMI: node 0x%llx, can not match hive 0x%llx in the hive list.\n", adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id); goto exit; } From d6d36b55a0f394a3895349285ed6980278809489 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 1 Feb 2019 13:15:41 -0700 Subject: [PATCH 444/539] drm/amd/display: Use memset to initialize variable in wait_for_training_aux_rd_interval Clang warns: drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dp.c:50:57: warning: suggest braces around initialization of subobject [-Wmissing-braces] union training_aux_rd_interval training_rd_interval = {0}; ^ {} 1 warning generated. Previous efforts to fix this type of warning by adding or removing braces have been met with some pushback in favor of using memset [1][2]. Do that here, mirroring commit 05794eff1aa6 ("drm/amdgpu/gmc: fix compiler errors [-Werror,-Wmissing-braces] (V2)") in this tree. [1]: https://lore.kernel.org/lkml/022e41c0-8465-dc7a-a45c-64187ecd9684@amd.com/ [2]: https://lore.kernel.org/lkml/20181128.215241.702406654469517539.davem@davemloft.net/ Fixes: 3cec41769d21 ("drm/amd/display: Fix use of uninitialized union") Signed-off-by: Nathan Chancellor Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 92f565ca1260..5ee36d6e0512 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -47,7 +47,9 @@ static void wait_for_training_aux_rd_interval( struct dc_link *link, uint32_t default_wait_in_micro_secs) { - union training_aux_rd_interval training_rd_interval = {0}; + union training_aux_rd_interval training_rd_interval; + + memset(&training_rd_interval, 0, sizeof(training_rd_interval)); /* overwrite the delay if rev > 1.1*/ if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) { From 19afd79951e6303dbe0d15293a2f66ceb4c772f6 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 1 Feb 2019 13:14:28 -0700 Subject: [PATCH 445/539] drm/amd/display: Use memset to initialize variables in amdgpu_dm_atomic_commit_tail Clang warns: drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:5089:60: warning: suggest braces around initialization of subobject [-Wmissing-braces] struct dc_surface_update dummy_updates[MAX_SURFACES] = { 0 }; ^ {} Previous efforts to fix this type of warning by adding or removing braces have been met with some pushback in favor of using memset [1][2]. Do that here, mirroring commit 05794eff1aa6 ("drm/amdgpu/gmc: fix compiler errors [-Werror,-Wmissing-braces] (V2)") in this tree. [1]: https://lore.kernel.org/lkml/022e41c0-8465-dc7a-a45c-64187ecd9684@amd.com/ [2]: https://lore.kernel.org/lkml/20181128.215241.702406654469517539.davem@davemloft.net/ Fixes: 02d6a6fcdf68 ("drm/amd/display: Simplify underscan and ABM commit") Signed-off-by: Nathan Chancellor Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 1923f47d3dd6..640ac9d9f04f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5094,10 +5094,13 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state); struct dm_connector_state *dm_old_con_state = to_dm_connector_state(old_con_state); struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc); - struct dc_surface_update dummy_updates[MAX_SURFACES] = { 0 }; - struct dc_stream_update stream_update = { 0 }; + struct dc_surface_update dummy_updates[MAX_SURFACES]; + struct dc_stream_update stream_update; struct dc_stream_status *status = NULL; + memset(&dummy_updates, 0, sizeof(dummy_updates)); + memset(&stream_update, 0, sizeof(stream_update)); + if (acrtc) { new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base); old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base); From 8daa12182a7ab441a2b3eeb170f9d8a12b206d6c Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 1 Feb 2019 13:12:26 -0700 Subject: [PATCH 446/539] drm/amd/display: Use memset to initialize variables in fill_plane_dcc_attributes Clang warns: drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:2314:38: warning: suggest braces around initialization of subobject [-Wmissing-braces] struct dc_surface_dcc_cap output = {0}; ^ {} Previous efforts to fix this type of warning by adding or removing braces have been met with some pushback in favor of using memset [1][2]. Do that here, mirroring commit 05794eff1aa6 ("drm/amdgpu/gmc: fix compiler errors [-Werror,-Wmissing-braces] (V2)") in this tree. [1]: https://lore.kernel.org/lkml/022e41c0-8465-dc7a-a45c-64187ecd9684@amd.com/ [2]: https://lore.kernel.org/lkml/20181128.215241.702406654469517539.davem@davemloft.net/ Fixes: 7df7e505e82a ("drm/amd/display: Set requested plane state DCC params for GFX9") Signed-off-by: Nathan Chancellor Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 640ac9d9f04f..a08820f86f0f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2297,12 +2297,15 @@ static bool fill_plane_dcc_attributes(struct amdgpu_device *adev, uint64_t info) { struct dc *dc = adev->dm.dc; - struct dc_dcc_surface_param input = {0}; - struct dc_surface_dcc_cap output = {0}; + struct dc_dcc_surface_param input; + struct dc_surface_dcc_cap output; uint32_t offset = AMDGPU_TILING_GET(info, DCC_OFFSET_256B); uint32_t i64b = AMDGPU_TILING_GET(info, DCC_INDEPENDENT_64B) != 0; uint64_t dcc_address; + memset(&input, 0, sizeof(input)); + memset(&output, 0, sizeof(output)); + if (!offset) return false; From 373e87fc91527124cb8ec21465a6d070a65c56af Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Sat, 2 Feb 2019 15:01:53 +0800 Subject: [PATCH 447/539] drm/amd/powerplay: update soc boot and max level on vega10 update soc boot and max level,then uclk isn't stuck at minimum. Bug: https://bugs.freedesktop.org/show_bug.cgi?id=109462 Signed-off-by: Kenneth Feng Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 0d38ac2fdbf1..5479125ff4f6 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -3579,6 +3579,10 @@ static int vega10_generate_dpm_level_enable_mask( vega10_find_lowest_dpm_level(&(data->dpm_table.mem_table)); data->smc_state_table.mem_max_level = vega10_find_highest_dpm_level(&(data->dpm_table.mem_table)); + data->smc_state_table.soc_boot_level = + vega10_find_lowest_dpm_level(&(data->dpm_table.soc_table)); + data->smc_state_table.soc_max_level = + vega10_find_highest_dpm_level(&(data->dpm_table.soc_table)); PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr), "Attempt to upload DPM Bootup Levels Failed!", @@ -3593,6 +3597,9 @@ static int vega10_generate_dpm_level_enable_mask( for(i = data->smc_state_table.mem_boot_level; i < data->smc_state_table.mem_max_level; i++) data->dpm_table.mem_table.dpm_levels[i].enabled = true; + for (i = data->smc_state_table.soc_boot_level; i < data->smc_state_table.soc_max_level; i++) + data->dpm_table.soc_table.dpm_levels[i].enabled = true; + return 0; } From 67dd1a36334ffce82bebeb2d633e152aa436d370 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Thu, 31 Jan 2019 15:44:22 -0500 Subject: [PATCH 448/539] drm/amdgpu: Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New chunk for dependency on start of job's execution instead on the end. This is used for GPU deadlock prevention when userspace uses mid-IB fences to wait for mid-IB work on other rings. v2: Fix typo in AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES v3: Bump KMS version v4: put old fence AFTER acquiring the scheduled fence. Signed-off-by: Andrey Grodzovsky Suggested-by: Christian Koenig Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 13 ++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- include/uapi/drm/amdgpu_drm.h | 1 + 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 1c49b8266d69..52a5e4fdc95b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -214,6 +214,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs case AMDGPU_CHUNK_ID_DEPENDENCIES: case AMDGPU_CHUNK_ID_SYNCOBJ_IN: case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: + case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: break; default: @@ -1090,6 +1091,15 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle); + + if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { + struct drm_sched_fence *s_fence = to_drm_sched_fence(fence); + struct dma_fence *old = fence; + + fence = dma_fence_get(&s_fence->scheduled); + dma_fence_put(old); + } + if (IS_ERR(fence)) { r = PTR_ERR(fence); amdgpu_ctx_put(ctx); @@ -1177,7 +1187,8 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, chunk = &p->chunks[i]; - if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) { + if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES || + chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { r = amdgpu_cs_process_fence_dep(p, chunk); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index c806f984bcc5..1158a6f4eec6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -71,9 +71,10 @@ * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. + * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 27 +#define KMS_DRIVER_MINOR 28 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index be84e43c1e19..47296f2ec3fa 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -523,6 +523,7 @@ struct drm_amdgpu_gem_va { #define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04 #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05 #define AMDGPU_CHUNK_ID_BO_HANDLES 0x06 +#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07 struct drm_amdgpu_cs_chunk { __u32 chunk_id; From 41cca166cc57e75e94d888595a428d23a3bf4e36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 21 Jan 2019 17:22:55 -0500 Subject: [PATCH 449/539] drm/amdgpu: add a workaround for GDS ordered append hangs with compute queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'm not increasing the DRM version because GDS isn't totally without bugs yet. v2: update emit_ib_size Signed-off-by: Marek Olšák Acked-by: Christian König Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 19 +++++++++++- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 21 +++++++++++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 40 +++++++++++++++++++++++-- include/uapi/drm/amdgpu_drm.h | 5 ++++ 6 files changed, 84 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 1158a6f4eec6..2f0ea380c031 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -72,9 +72,10 @@ * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES + * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 28 +#define KMS_DRIVER_MINOR 29 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h index ecbcefe49a98..f89f5734d985 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h @@ -37,6 +37,8 @@ struct amdgpu_gds { struct amdgpu_gds_asic_info mem; struct amdgpu_gds_asic_info gws; struct amdgpu_gds_asic_info oa; + uint32_t gds_compute_max_wave_id; + /* At present, GDS, GWS and OA resources for gfx (graphics) * is always pre-allocated and available for graphics operation. * Such resource is shared between all gfx clients. diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 7984292f9282..a59e0fdf5a97 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2264,6 +2264,22 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + /* Currently, there is a high possibility to get wave ID mismatch + * between ME and GDS, leading to a hw deadlock, because ME generates + * different wave IDs than the GDS expects. This situation happens + * randomly when at least 5 compute pipes use GDS ordered append. + * The wave IDs generated by ME are also wrong after suspend/resume. + * Those are probably bugs somewhere else in the kernel driver. + * + * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and + * GDS to 0 for this ring (me/pipe). + */ + if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START); + amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); + } + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN @@ -5000,7 +5016,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { 7 + /* gfx_v7_0_ring_emit_pipeline_sync */ CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */ 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ - .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */ + .emit_ib_size = 7, /* gfx_v7_0_ring_emit_ib_compute */ .emit_ib = gfx_v7_0_ring_emit_ib_compute, .emit_fence = gfx_v7_0_ring_emit_fence_compute, .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync, @@ -5057,6 +5073,7 @@ static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev) adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); adev->gds.gws.total_size = 64; adev->gds.oa.total_size = 16; + adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); if (adev->gds.mem.total_size == 64 * 1024) { adev->gds.mem.gfx_partition_size = 4096; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index a26747681ed6..b8e50a34bdb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6084,6 +6084,22 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + /* Currently, there is a high possibility to get wave ID mismatch + * between ME and GDS, leading to a hw deadlock, because ME generates + * different wave IDs than the GDS expects. This situation happens + * randomly when at least 5 compute pipes use GDS ordered append. + * The wave IDs generated by ME are also wrong after suspend/resume. + * Those are probably bugs somewhere else in the kernel driver. + * + * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and + * GDS to 0 for this ring (me/pipe). + */ + if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START); + amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); + } + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN @@ -6890,7 +6906,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ - .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ + .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */ .emit_ib = gfx_v8_0_ring_emit_ib_compute, .emit_fence = gfx_v8_0_ring_emit_fence_compute, .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, @@ -6920,7 +6936,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 17 + /* gfx_v8_0_ring_emit_vm_flush */ 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ - .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ + .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */ .emit_fence = gfx_v8_0_ring_emit_fence_kiq, .test_ring = gfx_v8_0_ring_test_ring, .insert_nop = amdgpu_ring_insert_nop, @@ -6996,6 +7012,7 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); adev->gds.gws.total_size = 64; adev->gds.oa.total_size = 16; + adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); if (adev->gds.mem.total_size == 64 * 1024) { adev->gds.mem.gfx_partition_size = 4096; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 262ee3cf6f1c..5533f6e4f4a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4010,6 +4010,22 @@ static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + /* Currently, there is a high possibility to get wave ID mismatch + * between ME and GDS, leading to a hw deadlock, because ME generates + * different wave IDs than the GDS expects. This situation happens + * randomly when at least 5 compute pipes use GDS ordered append. + * The wave IDs generated by ME are also wrong after suspend/resume. + * Those are probably bugs somewhere else in the kernel driver. + * + * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and + * GDS to 0 for this ring (me/pipe). + */ + if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); + amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); + } + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, @@ -4729,7 +4745,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ - .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ + .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ .emit_ib = gfx_v9_0_ring_emit_ib_compute, .emit_fence = gfx_v9_0_ring_emit_fence, .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, @@ -4764,7 +4780,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ - .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ + .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ .emit_fence = gfx_v9_0_ring_emit_fence_kiq, .test_ring = gfx_v9_0_ring_test_ring, .insert_nop = amdgpu_ring_insert_nop, @@ -4846,6 +4862,26 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) break; } + switch (adev->asic_type) { + case CHIP_VEGA10: + case CHIP_VEGA20: + adev->gds.gds_compute_max_wave_id = 0x7ff; + break; + case CHIP_VEGA12: + adev->gds.gds_compute_max_wave_id = 0x27f; + break; + case CHIP_RAVEN: + if (adev->rev_id >= 0x8) + adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ + else + adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ + break; + default: + /* this really depends on the chip */ + adev->gds.gds_compute_max_wave_id = 0x7ff; + break; + } + adev->gds.gws.total_size = 64; adev->gds.oa.total_size = 16; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 47296f2ec3fa..2acbc8bc465b 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -566,6 +566,11 @@ union drm_amdgpu_cs { * caches (L2/vL1/sL1/I$). */ #define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3) +/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER. + * This will reset wave ID counters for the IB. + */ +#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4) + struct drm_amdgpu_cs_chunk_ib { __u32 _pad; /** AMDGPU_IB_FLAG_* */ From cc7e422d3db338bf00ac73b4575668d149a25a34 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Mon, 4 Feb 2019 09:32:07 -0500 Subject: [PATCH 450/539] drm/amd/display: Don't re-enable CRC when CONFIG_DEBUG_FS isn't defined [Why] When CONFIG_DEBUG_FS isn't defined then amdgpu_dm_crtc_set_crc_source is NULL. This causes a compilation error since it's being called unconditionally. [How] Guard the call based on CONFIG_DEBUG_FS - CRC capture isn't supported without this. Cc: Leo Li Cc: Harry Wentland Fixes: 43a6a02eb355 ("drm/amd/display: Re-enable CRC capture following modeset") Signed-off-by: Nicholas Kazlauskas Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a08820f86f0f..0eea6c80864e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5180,9 +5180,11 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) manage_dm_interrupts(adev, acrtc, true); +#ifdef CONFIG_DEBUG_FS /* The stream has changed so CRC capture needs to re-enabled. */ if (dm_new_crtc_state->crc_enabled) amdgpu_dm_crtc_set_crc_source(crtc, "auto"); +#endif } /* update planes when needed per crtc*/ From 12a8bd8862ebd7d6e0f764120e2f322ddc077a11 Mon Sep 17 00:00:00 2001 From: Shirish S Date: Mon, 4 Feb 2019 14:24:25 +0530 Subject: [PATCH 451/539] drm/amd/display: Use context parameters to enable FBC [What] FBC fails to get enabled when switched between LINEAR(console/VT) and non-LINEAR(GUI) based rendering due to default value of tiling info stored in the current_state which is used for deciding whether or not to turn FBC on or off. [How] Use context structure's tiling information which is coherant with the screen updates. Signed-off-by: Shirish S Reviewed-by: Roman Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index db0ef41eb91c..fd7cd5b5a17c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -2535,7 +2535,7 @@ static void dce110_apply_ctx_for_surface( } if (dc->fbc_compressor) - enable_fbc(dc, dc->current_state); + enable_fbc(dc, context); } static void dce110_power_down_fe(struct dc *dc, struct pipe_ctx *pipe_ctx) From 90d647222a8f004bf1430ecea3099ebcc54bfc21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 30 Jan 2019 14:12:51 +0100 Subject: [PATCH 452/539] drm/amdgpu: fix waiting for BO moves with CPU based PD/PT updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we open up the possibility to use uninitialized memory. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index a404ac17e5ae..93b936f7de4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1781,13 +1781,18 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (pages_addr) params.src = ~0; - /* Wait for PT BOs to be free. PTs share the same resv. object + /* Wait for PT BOs to be idle. PTs share the same resv. object * as the root PD BO */ r = amdgpu_vm_wait_pd(adev, vm, owner); if (unlikely(r)) return r; + /* Wait for any BO move to be completed */ + r = dma_fence_wait(exclusive, true); + if (unlikely(r)) + return r; + params.func = amdgpu_vm_cpu_set_ptes; params.pages_addr = pages_addr; return amdgpu_vm_update_ptes(¶ms, start, last + 1, From 1b52f2d5679db90166ea71382211f3c7319aab51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 30 Jan 2019 14:09:29 +0100 Subject: [PATCH 453/539] drm/amdgpu: cleanup VM dw estimation a bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No functional change. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 93b936f7de4b..1e3a36c90d38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1806,13 +1806,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, /* * reserve space for two commands every (1 << BLOCK_SIZE) * entries or 2k dwords (whatever is smaller) - * - * The second command is for the shadow pagetables. */ + ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1); + + /* The second command is for the shadow pagetables. */ if (vm->root.base.bo->shadow) - ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2; - else - ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1); + ncmds *= 2; /* padding, etc. */ ndw = 64; @@ -1831,10 +1830,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ndw += ncmds * 10; /* extra commands for begin/end fragments */ + ncmds = 2 * adev->vm_manager.fragment_size; if (vm->root.base.bo->shadow) - ndw += 2 * 10 * adev->vm_manager.fragment_size * 2; - else - ndw += 2 * 10 * adev->vm_manager.fragment_size; + ncmds *= 2; + + ndw += 10 * ncmds; params.func = amdgpu_vm_do_set_ptes; } From 9ca089925f0c318307881eadc0535e7c7a7b5e2d Mon Sep 17 00:00:00 2001 From: John Barberiz Date: Wed, 16 Jan 2019 17:21:31 -0500 Subject: [PATCH 454/539] drm/amd/display: Use udelay when waiting between aux retries [Why] "IRQ_HPD Pulse Length Test" DP compliance test fails. Test complains that certain DPCD registers are not read within 100 ms. [How] msleep is inaccurate for small values. Used udelay instead for accuracy. Signed-off-by: John Barberiz Reviewed-by: Wenjing Liu Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_aux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index adbb22224e1a..4febf4ef7240 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -516,7 +516,7 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, } } - msleep(1); + udelay(1000); } return false; } From f3e3698d05454f2aeb9ef9969fc58a68090441ad Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 9 Jan 2019 09:14:54 -0500 Subject: [PATCH 455/539] drm/amd/display: Apply all surface updates onto surfaces [Why] Most surface updates weren't propagated onto the surface during dc_commit_updates_for_stream. This makes it more difficult for DC to determine the actual surface update type required. [How] Use copy_surface_update_to_plane to propagate the changes. The FreeSync surface timing information update for BTR has been moved out of amdgpu_dm.c into this function as well. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Sun peng Li Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 9 -- drivers/gpu/drm/amd/display/dc/core/dc.c | 104 ++++++++++++++++-- 2 files changed, 96 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0eea6c80864e..f0baf358fb4e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4760,15 +4760,6 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, surface, flip->flip_addrs[flip_count].flip_timestamp_in_us); - /* Update surface timing information. */ - surface->time.time_elapsed_in_us[surface->time.index] = - flip->flip_addrs[flip_count].flip_timestamp_in_us - - surface->time.prev_update_time_in_us; - surface->time.prev_update_time_in_us = flip->flip_addrs[flip_count].flip_timestamp_in_us; - surface->time.index++; - if (surface->time.index >= DC_PLANE_UPDATE_TIMES_MAX) - surface->time.index = 0; - DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x\n", __func__, flip->flip_addrs[flip_count].address.grph.addr.high_part, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index e22be0aefd1b..db8252ec3671 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1440,6 +1440,101 @@ static struct dc_stream_status *stream_get_status( static const enum surface_update_type update_surface_trace_level = UPDATE_TYPE_FULL; +static void copy_surface_update_to_plane( + struct dc_plane_state *surface, + struct dc_surface_update *srf_update) +{ + if (srf_update->flip_addr) { + surface->address = srf_update->flip_addr->address; + surface->flip_immediate = + srf_update->flip_addr->flip_immediate; + surface->time.time_elapsed_in_us[surface->time.index] = + srf_update->flip_addr->flip_timestamp_in_us - + surface->time.prev_update_time_in_us; + surface->time.prev_update_time_in_us = + srf_update->flip_addr->flip_timestamp_in_us; + surface->time.index++; + if (surface->time.index >= DC_PLANE_UPDATE_TIMES_MAX) + surface->time.index = 0; + } + + if (srf_update->scaling_info) { + surface->scaling_quality = + srf_update->scaling_info->scaling_quality; + surface->dst_rect = + srf_update->scaling_info->dst_rect; + surface->src_rect = + srf_update->scaling_info->src_rect; + surface->clip_rect = + srf_update->scaling_info->clip_rect; + } + + if (srf_update->plane_info) { + surface->color_space = + srf_update->plane_info->color_space; + surface->format = + srf_update->plane_info->format; + surface->plane_size = + srf_update->plane_info->plane_size; + surface->rotation = + srf_update->plane_info->rotation; + surface->horizontal_mirror = + srf_update->plane_info->horizontal_mirror; + surface->stereo_format = + srf_update->plane_info->stereo_format; + surface->tiling_info = + srf_update->plane_info->tiling_info; + surface->visible = + srf_update->plane_info->visible; + surface->per_pixel_alpha = + srf_update->plane_info->per_pixel_alpha; + surface->global_alpha = + srf_update->plane_info->global_alpha; + surface->global_alpha_value = + srf_update->plane_info->global_alpha_value; + surface->dcc = + srf_update->plane_info->dcc; + surface->sdr_white_level = + srf_update->plane_info->sdr_white_level; + } + + if (srf_update->gamma && + (surface->gamma_correction != + srf_update->gamma)) { + memcpy(&surface->gamma_correction->entries, + &srf_update->gamma->entries, + sizeof(struct dc_gamma_entries)); + surface->gamma_correction->is_identity = + srf_update->gamma->is_identity; + surface->gamma_correction->num_entries = + srf_update->gamma->num_entries; + surface->gamma_correction->type = + srf_update->gamma->type; + } + + if (srf_update->in_transfer_func && + (surface->in_transfer_func != + srf_update->in_transfer_func)) { + surface->in_transfer_func->sdr_ref_white_level = + srf_update->in_transfer_func->sdr_ref_white_level; + surface->in_transfer_func->tf = + srf_update->in_transfer_func->tf; + surface->in_transfer_func->type = + srf_update->in_transfer_func->type; + memcpy(&surface->in_transfer_func->tf_pts, + &srf_update->in_transfer_func->tf_pts, + sizeof(struct dc_transfer_func_distributed_points)); + } + + if (srf_update->input_csc_color_matrix) + surface->input_csc_color_matrix = + *srf_update->input_csc_color_matrix; + + if (srf_update->coeff_reduction_factor) + surface->coeff_reduction_factor = + *srf_update->coeff_reduction_factor; +} + static void commit_planes_do_stream_update(struct dc *dc, struct dc_stream_state *stream, struct dc_stream_update *stream_update, @@ -1645,14 +1740,7 @@ void dc_commit_updates_for_stream(struct dc *dc, for (i = 0; i < surface_count; i++) { struct dc_plane_state *surface = srf_updates[i].surface; - /* TODO: On flip we don't build the state, so it still has the - * old address. Which is why we are updating the address here - */ - if (srf_updates[i].flip_addr) { - surface->address = srf_updates[i].flip_addr->address; - surface->flip_immediate = srf_updates[i].flip_addr->flip_immediate; - - } + copy_surface_update_to_plane(surface, &srf_updates[i]); if (update_type >= UPDATE_TYPE_MED) { for (j = 0; j < dc->res_pool->pipe_count; j++) { From c7af5f77aecdfe95633327cffc7f0c9dd1b64ab1 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 10 Jan 2019 09:51:54 -0500 Subject: [PATCH 456/539] drm/amd/display: Use the right surface for flip and FreeSync [Why] We were always passing the first surface on the stream status for flip updates when we should be using the surface associated with the plane. [How] Use the dc_plane_state from the plane that's being updated. FreeSync should also only keep track of updates from the primary plane, so the check needed to be updated. The acrtc->stream state doesn't need to be checked for NULL before updating FreeSync either since there needs to be a stream to be inside this function as a prerequisite. Signed-off-by: Nicholas Kazlauskas Reviewed-by: David Francis Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f0baf358fb4e..11c256c1dca7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4642,7 +4642,6 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, unsigned long flags; struct amdgpu_bo *abo; uint64_t tiling_flags, dcc_address; - struct dc_stream_status *stream_status; uint32_t target, target_vblank; struct { @@ -4673,7 +4672,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct drm_framebuffer *fb = new_plane_state->fb; struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(fb); bool pflip_needed; - struct dc_plane_state *surface, *dc_plane; + struct dc_plane_state *dc_plane; struct dm_plane_state *dm_new_plane_state = to_dm_plane_state(new_plane_state); if (plane->type == DRM_PLANE_TYPE_CURSOR) { @@ -4736,28 +4735,20 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, timestamp_ns = ktime_get_ns(); flip->flip_addrs[flip_count].flip_timestamp_in_us = div_u64(timestamp_ns, 1000); flip->surface_updates[flip_count].flip_addr = &flip->flip_addrs[flip_count]; + flip->surface_updates[flip_count].surface = dc_plane; - stream_status = dc_stream_get_status(acrtc_state->stream); - if (!stream_status) { - DRM_ERROR("No stream status for CRTC: id=%d\n", - acrtc_attach->crtc_id); - continue; - } - - surface = stream_status->plane_states[0]; - flip->surface_updates[flip_count].surface = surface; if (!flip->surface_updates[flip_count].surface) { DRM_ERROR("No surface for CRTC: id=%d\n", acrtc_attach->crtc_id); continue; } - if (acrtc_state->stream) + if (plane == pcrtc->primary) update_freesync_state_on_stream( dm, acrtc_state, acrtc_state->stream, - surface, + dc_plane, flip->flip_addrs[flip_count].flip_timestamp_in_us); DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x\n", From c744e974a22b0cf78ba581afb8244af1618f2649 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 10 Jan 2019 09:14:49 -0500 Subject: [PATCH 457/539] drm/amd/display: Reformat dm_determine_update_type_for_commit [Why] The indenting for this function is a few levels too deep and can be simplified a fair bit. This patch is in preparation for functional changes that fix update type determination to occur less frequently and more accurately. [How] Place checks early and exit/continue when possible. This isn't a functional change. Signed-off-by: Nicholas Kazlauskas Reviewed-by: David Francis Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 126 +++++++++--------- 1 file changed, 65 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 11c256c1dca7..387f1ba39de4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5791,75 +5791,79 @@ dm_determine_update_type_for_commit(struct dc *dc, old_dm_crtc_state = to_dm_crtc_state(old_crtc_state); num_plane = 0; - if (new_dm_crtc_state->stream) { - - for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, j) { - new_plane_crtc = new_plane_state->crtc; - old_plane_crtc = old_plane_state->crtc; - new_dm_plane_state = to_dm_plane_state(new_plane_state); - old_dm_plane_state = to_dm_plane_state(old_plane_state); - - if (plane->type == DRM_PLANE_TYPE_CURSOR) - continue; - - if (!state->allow_modeset) - continue; - - if (crtc == new_plane_crtc) { - updates[num_plane].surface = &surface[num_plane]; - - if (new_crtc_state->mode_changed) { - updates[num_plane].surface->src_rect = - new_dm_plane_state->dc_state->src_rect; - updates[num_plane].surface->dst_rect = - new_dm_plane_state->dc_state->dst_rect; - updates[num_plane].surface->rotation = - new_dm_plane_state->dc_state->rotation; - updates[num_plane].surface->in_transfer_func = - new_dm_plane_state->dc_state->in_transfer_func; - stream_update.dst = new_dm_crtc_state->stream->dst; - stream_update.src = new_dm_crtc_state->stream->src; - } - - if (new_crtc_state->color_mgmt_changed) { - updates[num_plane].gamma = - new_dm_plane_state->dc_state->gamma_correction; - updates[num_plane].in_transfer_func = - new_dm_plane_state->dc_state->in_transfer_func; - stream_update.gamut_remap = - &new_dm_crtc_state->stream->gamut_remap_matrix; - stream_update.out_transfer_func = - new_dm_crtc_state->stream->out_transfer_func; - } - - num_plane++; - } + if (!new_dm_crtc_state->stream) { + if (!new_dm_crtc_state->stream && old_dm_crtc_state->stream) { + update_type = UPDATE_TYPE_FULL; + goto cleanup; } - if (num_plane > 0) { - ret = dm_atomic_get_state(state, &dm_state); - if (ret) - goto cleanup; + continue; + } - old_dm_state = dm_atomic_get_old_state(state); - if (!old_dm_state) { - ret = -EINVAL; - goto cleanup; - } + for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, j) { + new_plane_crtc = new_plane_state->crtc; + old_plane_crtc = old_plane_state->crtc; + new_dm_plane_state = to_dm_plane_state(new_plane_state); + old_dm_plane_state = to_dm_plane_state(old_plane_state); - status = dc_stream_get_status_from_state(old_dm_state->context, - new_dm_crtc_state->stream); + if (plane->type == DRM_PLANE_TYPE_CURSOR) + continue; - update_type = dc_check_update_surfaces_for_stream(dc, updates, num_plane, - &stream_update, status); + if (!state->allow_modeset) + continue; - if (update_type > UPDATE_TYPE_MED) { - update_type = UPDATE_TYPE_FULL; - goto cleanup; - } + if (crtc != new_plane_crtc) + continue; + + updates[num_plane].surface = &surface[num_plane]; + + if (new_crtc_state->mode_changed) { + updates[num_plane].surface->src_rect = + new_dm_plane_state->dc_state->src_rect; + updates[num_plane].surface->dst_rect = + new_dm_plane_state->dc_state->dst_rect; + updates[num_plane].surface->rotation = + new_dm_plane_state->dc_state->rotation; + updates[num_plane].surface->in_transfer_func = + new_dm_plane_state->dc_state->in_transfer_func; + stream_update.dst = new_dm_crtc_state->stream->dst; + stream_update.src = new_dm_crtc_state->stream->src; } - } else if (!new_dm_crtc_state->stream && old_dm_crtc_state->stream) { + if (new_crtc_state->color_mgmt_changed) { + updates[num_plane].gamma = + new_dm_plane_state->dc_state->gamma_correction; + updates[num_plane].in_transfer_func = + new_dm_plane_state->dc_state->in_transfer_func; + stream_update.gamut_remap = + &new_dm_crtc_state->stream->gamut_remap_matrix; + stream_update.out_transfer_func = + new_dm_crtc_state->stream->out_transfer_func; + } + + num_plane++; + } + + if (num_plane == 0) + continue; + + ret = dm_atomic_get_state(state, &dm_state); + if (ret) + goto cleanup; + + old_dm_state = dm_atomic_get_old_state(state); + if (!old_dm_state) { + ret = -EINVAL; + goto cleanup; + } + + status = dc_stream_get_status_from_state(old_dm_state->context, + new_dm_crtc_state->stream); + + update_type = dc_check_update_surfaces_for_stream(dc, updates, num_plane, + &stream_update, status); + + if (update_type > UPDATE_TYPE_MED) { update_type = UPDATE_TYPE_FULL; goto cleanup; } From c448a53aaf9ef69942a8a4d6a9b7b191839d90c1 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 10 Jan 2019 11:52:11 -0500 Subject: [PATCH 458/539] drm/amd/display: Initialize stream_update to zero [Why] The stream_update struct is left unitialized but DC will access its fields. This usually results in global state validation occur during any atomic commit with state->allow_modeset = true. [How] Initialize the struct to zero for every stream we check. Signed-off-by: Nicholas Kazlauskas Reviewed-by: David Francis Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 387f1ba39de4..824e177fba9b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5773,7 +5773,6 @@ dm_determine_update_type_for_commit(struct dc *dc, struct dc_surface_update *updates; struct dc_plane_state *surface; - struct dc_stream_update stream_update; enum surface_update_type update_type = UPDATE_TYPE_FAST; updates = kcalloc(MAX_SURFACES, sizeof(*updates), GFP_KERNEL); @@ -5787,6 +5786,8 @@ dm_determine_update_type_for_commit(struct dc *dc, } for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + struct dc_stream_update stream_update = { 0 }; + new_dm_crtc_state = to_dm_crtc_state(new_crtc_state); old_dm_crtc_state = to_dm_crtc_state(old_crtc_state); num_plane = 0; From 385d7eeaf14752b9d8180feb4bc52730b6dc99ce Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 10 Jan 2019 10:33:53 -0500 Subject: [PATCH 459/539] drm/amd/display: Remove FreeSync timing changed debug output [Why] This provides little debug value and creates a lot of dmesg noise. [How] Remove it. Signed-off-by: Nicholas Kazlauskas Reviewed-by: David Francis Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 824e177fba9b..41e3620bed4d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4613,12 +4613,6 @@ static void update_freesync_state_on_stream( new_crtc_state->base.crtc->base.id, (int)new_crtc_state->base.vrr_enabled, (int)vrr_params.state); - - if (new_crtc_state->freesync_timing_changed) - DRM_DEBUG_KMS("VRR timing update: crtc=%u min=%u max=%u\n", - new_crtc_state->base.crtc->base.id, - vrr_params.adjust.v_total_min, - vrr_params.adjust.v_total_max); } static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, From a4f1d2b8a543e136a0c945ca85d5d64ab471ac2a Mon Sep 17 00:00:00 2001 From: Xiaodong Yan Date: Thu, 17 Jan 2019 00:18:01 +0800 Subject: [PATCH 460/539] drm/amd/display: Add monitor patch for backlight off [Why] Different panel need different time from backlight disable to end of valid video data, if the time is too short, panel will flash when dpms off [How] Add monitor patch to control the time from backlight disable to end of valid video data, Signed-off-by: Xiaodong Yan Reviewed-by: Wenjing Liu Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c | 4 ++++ drivers/gpu/drm/amd/display/dc/dc_types.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c index 16d441d3af8a..f7f7515f65f4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c @@ -119,6 +119,10 @@ bool edp_receiver_ready_T9(struct dc_link *link) break; udelay(100); //MAx T9 } while (++tries < 50); + + if (link->local_sink->edid_caps.panel_patch.extra_delay_backlight_off > 0) + udelay(link->local_sink->edid_caps.panel_patch.extra_delay_backlight_off * 1000); + return result; } bool edp_receiver_ready_T7(struct dc_link *link) diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 56e7f3dab15a..da2009a108cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -201,6 +201,7 @@ union display_content_support { struct dc_panel_patch { unsigned int dppowerup_delay; unsigned int extra_t12_ms; + unsigned int extra_delay_backlight_off; }; struct dc_edid_caps { From b21e09d0558e4af7fa1a147be4e3da1a46ebbf77 Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Thu, 17 Jan 2019 10:57:23 -0500 Subject: [PATCH 461/539] drm/amd/display: fix issue with DC brightness low with VB [Why] The problem is that we accidentally stopped loading some of the IRAM bytes used for the backlight ramping mechanism. This happened when we started reserving some region of IRAM as DMCU FW write only. [How] This change will define a start+end region for the IRAM read only region. So the parameters needed for the backlight operation will be loaded since it will be defined outside of the read only region. Signed-off-by: Anthony Koo Reviewed-by: Aric Cyr Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../amd/display/modules/power/power_helpers.c | 34 +++++++++++++------ 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index baab6c4ae191..27b8cf5460b5 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -67,9 +67,14 @@ static const unsigned char abm_config[abm_defines_max_config][abm_defines_max_le #define NUM_AGGR_LEVEL 4 #define NUM_POWER_FN_SEGS 8 #define NUM_BL_CURVE_SEGS 16 -#define IRAM_RESERVE_AREA_START 0xF0 // reserve 0xF0~0xFF are write by DMCU only #define IRAM_SIZE 256 +#define IRAM_RESERVE_AREA_START_V2 0xF0 // reserve 0xF0~0xF6 are write by DMCU only +#define IRAM_RESERVE_AREA_END_V2 0xF6 // reserve 0xF0~0xF6 are write by DMCU only + +#define IRAM_RESERVE_AREA_START_V2_2 0xF0 // reserve 0xF0~0xFF are write by DMCU only +#define IRAM_RESERVE_AREA_END_V2_2 0xFF // reserve 0xF0~0xFF are write by DMCU only + #pragma pack(push, 1) /* NOTE: iRAM is 256B in size */ struct iram_table_v_2 { @@ -148,8 +153,10 @@ struct iram_table_v_2_2 { uint16_t dmcu_version; /* 0xf4 */ uint8_t dmcu_state; /* 0xf6 */ - uint16_t blRampReduction; /* 0xf7 */ - uint16_t blRampStart; /* 0xf9 */ + uint8_t dummy1; /* 0xf7 */ + uint8_t dummy2; /* 0xf8 */ + uint8_t dummy3; /* 0xf9 */ + uint8_t dummy4; /* 0xfa */ uint8_t dummy5; /* 0xfb */ uint8_t dummy6; /* 0xfc */ uint8_t dummy7; /* 0xfd */ @@ -420,11 +427,6 @@ void fill_iram_v_2_2(struct iram_table_v_2_2 *ram_table, struct dmcu_iram_parame ram_table->deviation_gain[2] = 0xb3; ram_table->deviation_gain[3] = 0xb3; - ram_table->blRampReduction = - cpu_to_be16(params.backlight_ramping_reduction); - ram_table->blRampStart = - cpu_to_be16(params.backlight_ramping_start); - ram_table->min_reduction[0][0] = min_reduction_table_v_2_2[abm_config[set][0]]; ram_table->min_reduction[1][0] = min_reduction_table_v_2_2[abm_config[set][0]]; ram_table->min_reduction[2][0] = min_reduction_table_v_2_2[abm_config[set][0]]; @@ -561,6 +563,7 @@ bool dmcu_load_iram(struct dmcu *dmcu, struct dmcu_iram_parameters params) { unsigned char ram_table[IRAM_SIZE]; + bool result = false; if (dmcu == NULL) return false; @@ -572,10 +575,21 @@ bool dmcu_load_iram(struct dmcu *dmcu, if (dmcu->dmcu_version.abm_version == 0x22) { fill_iram_v_2_2((struct iram_table_v_2_2 *)ram_table, params); + + result = dmcu->funcs->load_iram( + dmcu, 0, (char *)(&ram_table), IRAM_RESERVE_AREA_START_V2_2); } else { fill_iram_v_2((struct iram_table_v_2 *)ram_table, params); + + result = dmcu->funcs->load_iram( + dmcu, 0, (char *)(&ram_table), IRAM_RESERVE_AREA_START_V2); + + if (result) + result = dmcu->funcs->load_iram( + dmcu, IRAM_RESERVE_AREA_END_V2 + 1, + (char *)(&ram_table) + IRAM_RESERVE_AREA_END_V2 + 1, + sizeof(ram_table) - IRAM_RESERVE_AREA_END_V2 - 1); } - return dmcu->funcs->load_iram( - dmcu, 0, (char *)(&ram_table), IRAM_RESERVE_AREA_START); + return result; } From 22d7663f47cc36704c319c58bc1b1da8d42d53eb Mon Sep 17 00:00:00 2001 From: Paul Hsieh Date: Thu, 17 Jan 2019 18:56:14 +0800 Subject: [PATCH 462/539] drm/amd/display: dmcu is blocking due to wrong disable ABM command [Why] Second screen to clone/extend mode, driver will send ABM pipe command to DMCU. Change mode from clone/extend to second screen only, driver send ABM level command to disable ABM but this command will not clear ABM pipe data. At this time, change second screen to PC screen only, driver will send first command "ABM_LEVEL", it will turn on ABM with incorrect ABM pile so that DMCU is blocking. [How] When driver try to disable ABM, change command from "ABM LEVEL" to "ABM PIPE" so that it will clear ABM pile data. Signed-off-by: Paul Hsieh Reviewed-by: Charlene Liu Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_abm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c index 2a342eae80fd..01e56f1a9f34 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c @@ -314,8 +314,8 @@ static bool dce_abm_immediate_disable(struct abm *abm) /* setDMCUParam_ABMLevel */ REG_UPDATE_2(MASTER_COMM_CMD_REG, - MASTER_COMM_CMD_REG_BYTE0, MCP_ABM_LEVEL_SET, - MASTER_COMM_CMD_REG_BYTE2, MCP_DISABLE_ABM_IMMEDIATELY); + MASTER_COMM_CMD_REG_BYTE0, MCP_ABM_PIPE_SET, + MASTER_COMM_CMD_REG_BYTE1, MCP_DISABLE_ABM_IMMEDIATELY); /* notifyDMCUMsg */ REG_UPDATE(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 1); From a057ec460ab1f43a85e0fbb9938c2c3460cb7445 Mon Sep 17 00:00:00 2001 From: Ilya Bakoulin Date: Thu, 17 Jan 2019 13:40:34 -0500 Subject: [PATCH 463/539] drm/amd/display: Check that vrefresh is in freesync range [Why] Setting monitor refresh rate below freesync range would cause the monitor to go blank indefinitely with freesync enabled [How] Set vrr_supported and ignore_msa_timing_param according to whether the refresh rate is above or below the minimum freesync frequency. Signed-off-by: Ilya Bakoulin Reviewed-by: Nicholas Kazlauskas Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 41e3620bed4d..06fc7c58a7b1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3030,9 +3030,6 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, update_stream_signal(stream, sink); - if (dm_state && dm_state->freesync_capable) - stream->ignore_msa_timing_param = true; - finish: if (sink && sink->sink_signal == SIGNAL_TYPE_VIRTUAL && aconnector->base.force != DRM_FORCE_ON) dc_sink_release(sink); @@ -5356,10 +5353,13 @@ static void get_freesync_config_for_crtc( struct mod_freesync_config config = {0}; struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(new_con_state->base.connector); + struct drm_display_mode *mode = &new_crtc_state->base.mode; - new_crtc_state->vrr_supported = new_con_state->freesync_capable; + new_crtc_state->vrr_supported = new_con_state->freesync_capable && + aconnector->min_vfreq <= drm_mode_vrefresh(mode); - if (new_con_state->freesync_capable) { + if (new_crtc_state->vrr_supported) { + new_crtc_state->stream->ignore_msa_timing_param = true; config.state = new_crtc_state->base.vrr_enabled ? VRR_STATE_ACTIVE_VARIABLE : VRR_STATE_INACTIVE; From 8f0159122714d38f89140993eb0a070f8eeb2e9d Mon Sep 17 00:00:00 2001 From: Krunoslav Kovac Date: Thu, 17 Jan 2019 17:56:51 -0500 Subject: [PATCH 464/539] drm/amd/display: DGAM enabled for HDR [Why] On HW that doesn't have input LUT, we may combine degamma with OS ramp Problem here is that it assumes DGAM is inverse of PQ or SRGB. It doesn't handle linear case, it would default to sRGB and always enable DGAM.. [How] Add handling for linear case. Also check for null ramp and instead of blowing up, assume it's identity. Signed-off-by: Krunoslav Kovac Reviewed-by: Anthony Koo Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../amd/display/modules/color/color_gamma.c | 89 +++++++++++-------- 1 file changed, 52 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index eefb85928298..0fbc8fbc3541 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -1765,68 +1765,85 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, { struct dc_transfer_func_distributed_points *tf_pts = &input_tf->tf_pts; struct dividers dividers; - struct pwl_float_data *rgb_user = NULL; struct pwl_float_data_ex *curve = NULL; struct gamma_pixel *axis_x = NULL; struct pixel_gamma_point *coeff = NULL; enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB; + uint32_t i; bool ret = false; if (input_tf->type == TF_TYPE_BYPASS) return false; - /* we can use hardcoded curve for plain SRGB TF */ + /* we can use hardcoded curve for plain SRGB TF + * If linear, it's bypass if on user ramp + */ if (input_tf->type == TF_TYPE_PREDEFINED && - input_tf->tf == TRANSFER_FUNCTION_SRGB && - !mapUserRamp) + (input_tf->tf == TRANSFER_FUNCTION_SRGB || + input_tf->tf == TRANSFER_FUNCTION_LINEAR) && + !mapUserRamp) return true; input_tf->type = TF_TYPE_DISTRIBUTED_POINTS; - rgb_user = kvcalloc(ramp->num_entries + _EXTRA_POINTS, - sizeof(*rgb_user), - GFP_KERNEL); - if (!rgb_user) - goto rgb_user_alloc_fail; + if (mapUserRamp && ramp && ramp->type == GAMMA_RGB_256) { + rgb_user = kvcalloc(ramp->num_entries + _EXTRA_POINTS, + sizeof(*rgb_user), + GFP_KERNEL); + if (!rgb_user) + goto rgb_user_alloc_fail; + + axis_x = kvcalloc(ramp->num_entries + _EXTRA_POINTS, sizeof(*axis_x), + GFP_KERNEL); + if (!axis_x) + goto axis_x_alloc_fail; + + dividers.divider1 = dc_fixpt_from_fraction(3, 2); + dividers.divider2 = dc_fixpt_from_int(2); + dividers.divider3 = dc_fixpt_from_fraction(5, 2); + + build_evenly_distributed_points( + axis_x, + ramp->num_entries, + dividers); + + scale_gamma(rgb_user, ramp, dividers); + } + curve = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*curve), - GFP_KERNEL); + GFP_KERNEL); if (!curve) goto curve_alloc_fail; - axis_x = kvcalloc(ramp->num_entries + _EXTRA_POINTS, sizeof(*axis_x), - GFP_KERNEL); - if (!axis_x) - goto axis_x_alloc_fail; + coeff = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*coeff), - GFP_KERNEL); + GFP_KERNEL); if (!coeff) goto coeff_alloc_fail; - dividers.divider1 = dc_fixpt_from_fraction(3, 2); - dividers.divider2 = dc_fixpt_from_int(2); - dividers.divider3 = dc_fixpt_from_fraction(5, 2); - tf = input_tf->tf; - build_evenly_distributed_points( - axis_x, - ramp->num_entries, - dividers); - - if (ramp->type == GAMMA_RGB_256 && mapUserRamp) - scale_gamma(rgb_user, ramp, dividers); - else if (ramp->type == GAMMA_RGB_FLOAT_1024) - scale_gamma_dx(rgb_user, ramp, dividers); - if (tf == TRANSFER_FUNCTION_PQ) build_de_pq(curve, MAX_HW_POINTS, coordinates_x); - else + else if (tf == TRANSFER_FUNCTION_SRGB || + tf == TRANSFER_FUNCTION_BT709) build_degamma(curve, MAX_HW_POINTS, coordinates_x, - tf == TRANSFER_FUNCTION_SRGB ? true:false); + tf == TRANSFER_FUNCTION_SRGB ? true : false); + else if (tf == TRANSFER_FUNCTION_LINEAR) { + // just copy coordinates_x into curve + i = 0; + while (i != MAX_HW_POINTS + 1) { + curve[i].r = coordinates_x[i].x; + curve[i].g = curve[i].r; + curve[i].b = curve[i].r; + i++; + } + } else + goto invalid_tf_fail; tf_pts->end_exponent = 0; tf_pts->x_point_at_y1_red = 1; @@ -1836,23 +1853,21 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, map_regamma_hw_to_x_user(ramp, coeff, rgb_user, coordinates_x, axis_x, curve, MAX_HW_POINTS, tf_pts, - mapUserRamp && ramp->type != GAMMA_CUSTOM); - if (ramp->type == GAMMA_CUSTOM) - apply_lut_1d(ramp, MAX_HW_POINTS, tf_pts); + mapUserRamp && ramp && ramp->type == GAMMA_RGB_256); ret = true; +invalid_tf_fail: kvfree(coeff); coeff_alloc_fail: - kvfree(axis_x); -axis_x_alloc_fail: kvfree(curve); curve_alloc_fail: + kvfree(axis_x); +axis_x_alloc_fail: kvfree(rgb_user); rgb_user_alloc_fail: return ret; - } From 5fc0cbfad4564856ee0f323d3f88a7cff19cc3f1 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Fri, 18 Jan 2019 18:19:51 -0500 Subject: [PATCH 465/539] drm/amd/display: determine if a pipe is synced by plane state [why] is_blanked is not a general indicator of if a pipe is synced for all asics. plane state is more accurate and applicable for all asics. [how] Remove is_blanked call and add checking plane_state against NULL instead. Signed-off-by: Wenjing Liu Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index db8252ec3671..e0ac009f00ab 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -907,11 +907,11 @@ static void program_timing_sync( } } - /* set first unblanked pipe as master */ + /* set first pipe with plane as master */ for (j = 0; j < group_size; j++) { struct pipe_ctx *temp; - if (pipe_set[j]->stream_res.tg->funcs->is_blanked && !pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg)) { + if (pipe_set[j]->plane_state) { if (j == 0) break; @@ -922,9 +922,9 @@ static void program_timing_sync( } } - /* remove any other unblanked pipes as they have already been synced */ + /* remove any other pipes with plane as they have already been synced */ for (j = j + 1; j < group_size; j++) { - if (pipe_set[j]->stream_res.tg->funcs->is_blanked && !pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg)) { + if (pipe_set[j]->plane_state) { group_size--; pipe_set[j] = pipe_set[group_size]; j--; From cf7d98d254e9ffe565fc736e101299cc02bc0aa1 Mon Sep 17 00:00:00 2001 From: Steven Chiu Date: Fri, 18 Jan 2019 15:29:38 -0500 Subject: [PATCH 466/539] drm/amd/display: 3.2.16 Signed-off-by: Steven Chiu Reviewed-by: Aric Cyr Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 8391bc39b7a9..0b2ffc7cfffd 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.15" +#define DC_VER "3.2.16" #define MAX_SURFACES 3 #define MAX_STREAMS 6 From ac627caf6b9275a3df4730be3b8637c3bc232979 Mon Sep 17 00:00:00 2001 From: Chiawen Huang Date: Fri, 18 Jan 2019 14:07:54 +0800 Subject: [PATCH 467/539] drm/amd/display: add gpio lock/unlock [Why] When querying HPD via GPIO flow, it will create a new gpio object then free in the end of query. There is a irql issue for HPD querying at ISR level. [How] Therefore, creating the HPD gpio object in dc_link and set it as unlcok in default. 1. reducing unnecessary malloc/free when HPD querying. 2. reducing init GPIO flow. 3. add lock/unlock to prevent multi gpio service running. Signed-off-by: Chiawen Huang Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 47 +++++++++---------- drivers/gpu/drm/amd/display/dc/dc_link.h | 1 + .../gpu/drm/amd/display/dc/gpio/gpio_base.c | 12 +++++ .../drm/amd/display/dc/gpio/gpio_service.c | 28 +++++++++++ .../drm/amd/display/dc/gpio/gpio_service.h | 10 ++++ .../drm/amd/display/include/gpio_interface.h | 8 ++++ 6 files changed, 81 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 8ff5d42587c2..137d3c126632 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -76,6 +76,12 @@ static void destruct(struct dc_link *link) { int i; + if (link->hpd_gpio != NULL) { + dal_gpio_close(link->hpd_gpio); + dal_gpio_destroy_irq(&link->hpd_gpio); + link->hpd_gpio = NULL; + } + if (link->ddc) dal_ddc_service_destroy(&link->ddc); @@ -931,18 +937,11 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) bool dc_link_get_hpd_state(struct dc_link *dc_link) { - struct gpio *hpd_pin; uint32_t state; - hpd_pin = get_hpd_gpio(dc_link->ctx->dc_bios, - dc_link->link_id, dc_link->ctx->gpio_service); - if (hpd_pin == NULL) - ASSERT(false); - - dal_gpio_open(hpd_pin, GPIO_MODE_INTERRUPT); - dal_gpio_get_value(hpd_pin, &state); - dal_gpio_close(hpd_pin); - dal_gpio_destroy_irq(&hpd_pin); + dal_gpio_lock_pin(dc_link->hpd_gpio); + dal_gpio_get_value(dc_link->hpd_gpio, &state); + dal_gpio_unlock_pin(dc_link->hpd_gpio); return state; } @@ -1098,7 +1097,6 @@ static bool construct( const struct link_init_data *init_params) { uint8_t i; - struct gpio *hpd_gpio = NULL; struct ddc_service_init_data ddc_service_init_data = { { 0 } }; struct dc_context *dc_ctx = init_params->ctx; struct encoder_init_data enc_init_data = { 0 }; @@ -1128,10 +1126,11 @@ static bool construct( if (link->dc->res_pool->funcs->link_init) link->dc->res_pool->funcs->link_init(link); - hpd_gpio = get_hpd_gpio(link->ctx->dc_bios, link->link_id, link->ctx->gpio_service); - - if (hpd_gpio != NULL) - link->irq_source_hpd = dal_irq_get_source(hpd_gpio); + link->hpd_gpio = get_hpd_gpio(link->ctx->dc_bios, link->link_id, link->ctx->gpio_service); + dal_gpio_open(link->hpd_gpio, GPIO_MODE_INTERRUPT); + dal_gpio_unlock_pin(link->hpd_gpio); + if (link->hpd_gpio != NULL) + link->irq_source_hpd = dal_irq_get_source(link->hpd_gpio); switch (link->link_id.id) { case CONNECTOR_ID_HDMI_TYPE_A: @@ -1149,18 +1148,18 @@ static bool construct( case CONNECTOR_ID_DISPLAY_PORT: link->connector_signal = SIGNAL_TYPE_DISPLAY_PORT; - if (hpd_gpio != NULL) + if (link->hpd_gpio != NULL) link->irq_source_hpd_rx = - dal_irq_get_rx_source(hpd_gpio); + dal_irq_get_rx_source(link->hpd_gpio); break; case CONNECTOR_ID_EDP: link->connector_signal = SIGNAL_TYPE_EDP; - if (hpd_gpio != NULL) { + if (link->hpd_gpio != NULL) { link->irq_source_hpd = DC_IRQ_SOURCE_INVALID; link->irq_source_hpd_rx = - dal_irq_get_rx_source(hpd_gpio); + dal_irq_get_rx_source(link->hpd_gpio); } break; case CONNECTOR_ID_LVDS: @@ -1171,10 +1170,7 @@ static bool construct( goto create_fail; } - if (hpd_gpio != NULL) { - dal_gpio_destroy_irq(&hpd_gpio); - hpd_gpio = NULL; - } + /* TODO: #DAL3 Implement id to str function.*/ LINK_INFO("Connector[%d] description:" @@ -1277,8 +1273,9 @@ link_enc_create_fail: ddc_create_fail: create_fail: - if (hpd_gpio != NULL) { - dal_gpio_destroy_irq(&hpd_gpio); + if (link->hpd_gpio != NULL) { + dal_gpio_destroy_irq(&link->hpd_gpio); + link->hpd_gpio = NULL; } return false; diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index f249ff9be2a7..d26bbda61ad2 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -125,6 +125,7 @@ struct dc_link { struct dc_link_status link_status; struct link_trace link_trace; + struct gpio *hpd_gpio; }; const struct dc_link_status *dc_link_get_status(const struct dc_link *dc_link); diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c b/drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c index 1d1efd72b291..cf76ea2d9f5a 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c @@ -101,6 +101,18 @@ enum gpio_mode dal_gpio_get_mode( return gpio->mode; } +enum gpio_result dal_gpio_lock_pin( + struct gpio *gpio) +{ + return dal_gpio_service_lock(gpio->service, gpio->id, gpio->en); +} + +enum gpio_result dal_gpio_unlock_pin( + struct gpio *gpio) +{ + return dal_gpio_service_unlock(gpio->service, gpio->id, gpio->en); +} + enum gpio_result dal_gpio_change_mode( struct gpio *gpio, enum gpio_mode mode) diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c index dada04296025..3c63a3c04dbb 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c @@ -192,6 +192,34 @@ static void set_pin_free( service->busyness[id][en] = false; } +enum gpio_result dal_gpio_service_lock( + struct gpio_service *service, + enum gpio_id id, + uint32_t en) +{ + if (!service->busyness[id]) { + ASSERT_CRITICAL(false); + return GPIO_RESULT_OPEN_FAILED; + } + + set_pin_busy(service, id, en); + return GPIO_RESULT_OK; +} + +enum gpio_result dal_gpio_service_unlock( + struct gpio_service *service, + enum gpio_id id, + uint32_t en) +{ + if (!service->busyness[id]) { + ASSERT_CRITICAL(false); + return GPIO_RESULT_OPEN_FAILED; + } + + set_pin_free(service, id, en); + return GPIO_RESULT_OK; +} + enum gpio_result dal_gpio_service_open( struct gpio_service *service, enum gpio_id id, diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.h b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.h index 1d501a43d13b..0c678af75331 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.h +++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.h @@ -52,4 +52,14 @@ void dal_gpio_service_close( struct gpio_service *service, struct hw_gpio_pin **ptr); +enum gpio_result dal_gpio_service_lock( + struct gpio_service *service, + enum gpio_id id, + uint32_t en); + +enum gpio_result dal_gpio_service_unlock( + struct gpio_service *service, + enum gpio_id id, + uint32_t en); + #endif diff --git a/drivers/gpu/drm/amd/display/include/gpio_interface.h b/drivers/gpu/drm/amd/display/include/gpio_interface.h index e4fd31024b92..7de64195dc33 100644 --- a/drivers/gpu/drm/amd/display/include/gpio_interface.h +++ b/drivers/gpu/drm/amd/display/include/gpio_interface.h @@ -59,6 +59,14 @@ enum gpio_result dal_gpio_change_mode( struct gpio *gpio, enum gpio_mode mode); +/* Lock Pin */ +enum gpio_result dal_gpio_lock_pin( + struct gpio *gpio); + +/* Unlock Pin */ +enum gpio_result dal_gpio_unlock_pin( + struct gpio *gpio); + /* Get the GPIO id */ enum gpio_id dal_gpio_get_id( const struct gpio *gpio); From 056f05f65bf42cb69c48bf1bb6e085c7a405a565 Mon Sep 17 00:00:00 2001 From: Yongqiang Sun Date: Fri, 18 Jan 2019 19:44:33 -0500 Subject: [PATCH 468/539] drm/amd/display: pass vline_config parameter by reference. Signed-off-by: Yongqiang Sun Reviewed-by: Aric Cyr Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 8 ++++---- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h | 2 +- drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index e0ac009f00ab..d8579b207300 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1560,11 +1560,11 @@ static void commit_planes_do_stream_update(struct dc *dc, if (stream_update->vline0_config && pipe_ctx->stream_res.tg->funcs->program_vline_interrupt) pipe_ctx->stream_res.tg->funcs->program_vline_interrupt( - pipe_ctx->stream_res.tg, VLINE0, stream->vline0_config); + pipe_ctx->stream_res.tg, VLINE0, &stream->vline0_config); if (stream_update->vline1_config && pipe_ctx->stream_res.tg->funcs->program_vline_interrupt) pipe_ctx->stream_res.tg->funcs->program_vline_interrupt( - pipe_ctx->stream_res.tg, VLINE1, stream->vline1_config); + pipe_ctx->stream_res.tg, VLINE1, &stream->vline1_config); if ((stream_update->hdr_static_metadata && !stream->use_dynamic_meta) || stream_update->vrr_infopacket || diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 1d4f9b48ed7d..cefa322df8a6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -95,19 +95,19 @@ static void optc1_disable_stereo(struct timing_generator *optc) void optc1_program_vline_interrupt( struct timing_generator *optc, enum vline_select vline, - struct vline_config vline_config) + const struct vline_config *vline_config) { struct optc *optc1 = DCN10TG_FROM_TG(optc); switch (vline) { case VLINE0: REG_SET_2(OTG_VERTICAL_INTERRUPT0_POSITION, 0, - OTG_VERTICAL_INTERRUPT0_LINE_START, vline_config.start_line, - OTG_VERTICAL_INTERRUPT0_LINE_END, vline_config.end_line); + OTG_VERTICAL_INTERRUPT0_LINE_START, vline_config->start_line, + OTG_VERTICAL_INTERRUPT0_LINE_END, vline_config->end_line); break; case VLINE1: REG_SET(OTG_VERTICAL_INTERRUPT1_POSITION, 0, - OTG_VERTICAL_INTERRUPT1_LINE_START, vline_config.start_line); + OTG_VERTICAL_INTERRUPT1_LINE_START, vline_config->start_line); break; default: break; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index 8eb71c0160a7..b34c8a240598 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -485,7 +485,7 @@ void optc1_program_timing( void optc1_program_vline_interrupt(struct timing_generator *optc, enum vline_select vline, - struct vline_config vline_config); + const struct vline_config *vline_config); void optc1_program_global_sync( struct timing_generator *optc); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index df64cf73ceb9..d22a406c19c0 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -151,7 +151,7 @@ struct timing_generator_funcs { bool use_vbios); void (*program_vline_interrupt)(struct timing_generator *optc, enum vline_select vline, - struct vline_config vline_config); + const struct vline_config *vline_config); bool (*enable_crtc)(struct timing_generator *tg); bool (*disable_crtc)(struct timing_generator *tg); bool (*is_counter_moving)(struct timing_generator *tg); From 4f69bc8c16f02478717ab5708d106d86e6f9db68 Mon Sep 17 00:00:00 2001 From: Josip Pavic Date: Tue, 22 Jan 2019 13:50:45 -0500 Subject: [PATCH 469/539] drm/amd/display: Modify ABM 2.2 Max Reduction [Why] Reduced backlight for ABM 2.2 at levels 1 and 2 is desired for power savings. [How] Reduce the max reduction parameters for ABM 2.2 Signed-off-by: Josip Pavic Reviewed-by: Josip Pavic Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/power/power_helpers.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 27b8cf5460b5..3ba87b076287 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -47,10 +47,10 @@ static const unsigned char min_reduction_table_v_2_2[13] = { /* Possible ABM 2.2 Max Reduction configs from least aggressive to most aggressive * 0 1 2 3 4 5 6 7 8 9 10 11 12 - * 96.1 89.8 85.1 80.3 69.4 64.7 54.9 45.1 30.2 25.1 19.6 12.5 12.5 % + * 96.1 89.8 74.9 69.4 64.7 52.2 48.6 39.6 30.2 25.1 19.6 12.5 12.5 % */ static const unsigned char max_reduction_table_v_2_2[13] = { -0xf5, 0xe5, 0xd9, 0xcd, 0xb1, 0xa5, 0x8c, 0x73, 0x4d, 0x40, 0x32, 0x20, 0x20}; +0xf5, 0xe5, 0xbf, 0xb1, 0xa5, 0x85, 0x7c, 0x65, 0x4d, 0x40, 0x32, 0x20, 0x20}; /* Predefined ABM configuration sets. We may have different configuration sets * in order to satisfy different power/quality requirements. From 8dac4e7d89ea3b09695c0948d375a62df9443ca0 Mon Sep 17 00:00:00 2001 From: Su Sung Chung Date: Mon, 21 Jan 2019 12:01:53 -0500 Subject: [PATCH 470/539] drm/amd/display: store timing sync info in dc_stream_status in program_timing_sync, after all the pipes are grouped, store timing sync info in dc_stream_status Signed-off-by: Su Sung Chung Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 16 +++++++++++++++- drivers/gpu/drm/amd/display/dc/dc_stream.h | 6 ++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index d8579b207300..d9f62befd86a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -869,8 +869,9 @@ static void program_timing_sync( struct dc *dc, struct dc_state *ctx) { - int i, j; + int i, j, k; int group_index = 0; + int num_group = 0; int pipe_count = dc->res_pool->pipe_count; struct pipe_ctx *unsynced_pipes[MAX_PIPES] = { NULL }; @@ -922,6 +923,18 @@ static void program_timing_sync( } } + + for (k = 0; k < group_size; k++) { + struct dc_stream_status *status = dc_stream_get_status_from_state(ctx, pipe_set[k]->stream); + + status->timing_sync_info.group_id = num_group; + status->timing_sync_info.group_size = group_size; + if (k == 0) + status->timing_sync_info.master = true; + else + status->timing_sync_info.master = false; + + } /* remove any other pipes with plane as they have already been synced */ for (j = j + 1; j < group_size; j++) { if (pipe_set[j]->plane_state) { @@ -936,6 +949,7 @@ static void program_timing_sync( dc, group_index, group_size, pipe_set); group_index++; } + num_group++; } } diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 0de6d7f377a6..2d1f2825de09 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -32,11 +32,17 @@ /******************************************************************************* * Stream Interfaces ******************************************************************************/ +struct timing_sync_info { + int group_id; + int group_size; + bool master; +}; struct dc_stream_status { int primary_otg_inst; int stream_enc_inst; int plane_count; + struct timing_sync_info timing_sync_info; struct dc_plane_state *plane_states[MAX_SURFACE_NUM]; }; From 49782c638f6a33992f5c1c69414a7ef5c08a0d31 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Wed, 5 Dec 2018 10:52:25 -0500 Subject: [PATCH 471/539] drm/amd/display: add a debug flag to force odm combine Adding a bit vector to allow forcing odm on specific otgs Signed-off-by: Dmytro Laktyushkin Reviewed-by: Charlene Liu Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 0b2ffc7cfffd..ee2c7e5b6087 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -255,6 +255,7 @@ struct dc_debug_options { bool scl_reset_length10; bool hdmi20_disable; bool skip_detection_link_training; + unsigned int force_odm_combine; //bit vector based on otg inst unsigned int force_fclk_khz; }; From e3fa5c4cf1b93205d64e3d472d049fe5d8e88e94 Mon Sep 17 00:00:00 2001 From: "Jerry (Fangzhi) Zuo" Date: Wed, 23 Jan 2019 11:41:18 -0500 Subject: [PATCH 472/539] drm/amd/display: Apply fake sink back to MST sequence [Why] It fixes the failure to create stream for sink in the scenario when hotplug SST and MST in sequence, and disconnect MST. [How] Add the fake sink back after the majority of MST rework is done. Signed-off-by: Jerry (Fangzhi) Zuo Reviewed-by: Nicholas Kazlauskas Acked-by: Bhawanpreet Lakha Acked-by: Tony Cheng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +++----- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 3 +++ 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 06fc7c58a7b1..bcc906a4eb04 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2959,11 +2959,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, drm_connector = &aconnector->base; if (!aconnector->dc_sink) { - if (!aconnector->mst_port) { - sink = create_fake_sink(aconnector); - if (!sink) - return stream; - } + sink = create_fake_sink(aconnector); + if (!sink) + return stream; } else { sink = aconnector->dc_sink; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 60da1222afaa..73f8e8fa6edb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -173,6 +173,9 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) aconnector->edid = edid; } + if (aconnector->dc_sink && aconnector->dc_sink->sink_signal == SIGNAL_TYPE_VIRTUAL) + dc_sink_release(aconnector->dc_sink); + if (!aconnector->dc_sink) { struct dc_sink *dc_sink; struct dc_sink_init_data init_params = { From 77476360f173c127c191bfe8ca8113130ef283b8 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 23 Jan 2019 13:50:17 -0500 Subject: [PATCH 473/539] drm/amd/display: Disconnect mpcc when changing tg [Why] This fixes an mpc programming error for the following sequence of atomic commits when pipe split is enabled: Commit 1: CRTC0 (plane 4, plane 3) Pipe 0: old_plane_state = A0, new_plane_state = A1, new_tg = T0 Pipe 1: old_plane_state = B0, new_plane_state = B1, new_tg = T0 Pipe 2: old_plane_state = A0, new_plane_state = A1, new_tg = T0 Pipe 3: old_plane_state = B0, new_plane_state = B1, new_tg = T0 Commit 2: CRTC0 (plane 3), CRTC1 (plane 2) Pipe 0: old_plane_state = A1, new_plane_state = A2, new_tg = T0 Pipe 1: old_plane_state = B1, new_plane_state = B2, new_tg = T1 Pipe 2: old_plane_state = A1, new_plane_state = NULL, new_tg = NULL Pipe 3: old_plane_state = B1, new_plane_state = NULL, new_tg = NULL In the second commit the assertion for mpcc in use is hit because mpcc disconnect never occurs for pipe 1. This is because the stream changes for pipe 1 and the opp_list is empty. This sequence occurs when running the "igt@kms_plane_multiple@atomic-pipe-A-tiling-none" test with two displays connected. [How] Expand the reset condition to include: "old_pipe_ctx->stream_res.tg != new_pipe_ctx->stream_res.tg" ...but only when the plane state is non-NULL for both old and new. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Dmytro Laktyushkin Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 9cde24dbdac8..9f23ea283838 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2334,9 +2334,10 @@ static void dcn10_apply_ctx_for_surface( } } - if (!pipe_ctx->plane_state && - old_pipe_ctx->plane_state && - old_pipe_ctx->stream_res.tg == tg) { + if ((!pipe_ctx->plane_state || + pipe_ctx->stream_res.tg != old_pipe_ctx->stream_res.tg) && + old_pipe_ctx->plane_state && + old_pipe_ctx->stream_res.tg == tg) { dc->hwss.plane_atomic_disconnect(dc, old_pipe_ctx); removed_pipe[i] = true; From 26a11deea685b41a43edb513194718aa1f461c9a Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 5 Feb 2019 13:03:53 +0000 Subject: [PATCH 474/539] drm/i915/pmu: Fix enable count array size and bounds checking Enable count array is supposed to have one counter for each possible engine sampler. As such, array sizing and bounds checking is not correct and would blow up the asserts if more samplers were added. No ill-effect in the current code base but lets fix it for correctness. At the same time tidy the assert for readability and robustness. v2: * One check per assert. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Fixes: b46a33e271ed ("drm/i915/pmu: Expose a PMU interface for perf queries") Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190205130353.21105-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 22 +++++++++++++++------- drivers/gpu/drm/i915/i915_pmu.h | 2 ++ drivers/gpu/drm/i915/intel_ringbuffer.h | 9 +++++---- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index b1cb2d3cae16..13d70b90dd0f 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -599,7 +599,8 @@ static void i915_pmu_enable(struct perf_event *event) * Update the bitmask of enabled events and increment * the event reference counter. */ - GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); + BUILD_BUG_ON(ARRAY_SIZE(i915->pmu.enable_count) != I915_PMU_MASK_BITS); + GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count)); GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0); i915->pmu.enable |= BIT_ULL(bit); i915->pmu.enable_count[bit]++; @@ -620,11 +621,16 @@ static void i915_pmu_enable(struct perf_event *event) engine = intel_engine_lookup_user(i915, engine_event_class(event), engine_event_instance(event)); - GEM_BUG_ON(!engine); - engine->pmu.enable |= BIT(sample); - GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); + BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) != + I915_ENGINE_SAMPLE_COUNT); + BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) != + I915_ENGINE_SAMPLE_COUNT); + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); + + engine->pmu.enable |= BIT(sample); engine->pmu.enable_count[sample]++; } @@ -654,9 +660,11 @@ static void i915_pmu_disable(struct perf_event *event) engine = intel_engine_lookup_user(i915, engine_event_class(event), engine_event_instance(event)); - GEM_BUG_ON(!engine); - GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); + + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); GEM_BUG_ON(engine->pmu.enable_count[sample] == 0); + /* * Decrement the reference count and clear the enabled * bitmask when the last listener on an event goes away. @@ -665,7 +673,7 @@ static void i915_pmu_disable(struct perf_event *event) engine->pmu.enable &= ~BIT(sample); } - GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); + GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count)); GEM_BUG_ON(i915->pmu.enable_count[bit] == 0); /* * Decrement the reference count and clear the enabled diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 7f164ca3db12..b3728c5f13e7 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -31,6 +31,8 @@ enum { ((1 << I915_PMU_SAMPLE_BITS) + \ (I915_PMU_LAST + 1 - __I915_PMU_OTHER(0))) +#define I915_ENGINE_SAMPLE_COUNT (I915_SAMPLE_SEMA + 1) + struct i915_pmu_sample { u64 cur; }; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 1398eb81dee6..4d4ea6963a72 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -403,16 +403,17 @@ struct intel_engine_cs { /** * @enable_count: Reference count for the enabled samplers. * - * Index number corresponds to the bit number from @enable. + * Index number corresponds to @enum drm_i915_pmu_engine_sample. */ - unsigned int enable_count[I915_PMU_SAMPLE_BITS]; + unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT]; /** * @sample: Counter values for sampling events. * * Our internal timer stores the current counters in this field. + * + * Index number corresponds to @enum drm_i915_pmu_engine_sample. */ -#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1) - struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX]; + struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; } pmu; /* From c7e716b8617e5f468d6f8623532b9cc2cc7d3d0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 5 Feb 2019 22:50:55 +0200 Subject: [PATCH 475/539] drm/i915: Bump skl+ wm blocks to 11 bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On icl the plane watermark blocks field is 11 bits. Bump our define to match so that readout won't ignore the extra bit. We can safely do this for older platforms too since the unused bits are hardwired to zero. Cc: José Roberto de Souza Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190205205056.30081-1-ville.syrjala@linux.intel.com Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/i915_reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 12964b0fbc54..2be34e13af78 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6000,7 +6000,7 @@ enum { #define PLANE_WM_EN (1 << 31) #define PLANE_WM_LINES_SHIFT 14 #define PLANE_WM_LINES_MASK 0x1f -#define PLANE_WM_BLOCKS_MASK 0x3ff +#define PLANE_WM_BLOCKS_MASK 0x7ff /* skl+: 10 bits, icl+ 11 bits */ #define _CUR_WM_0(pipe) _PIPE(pipe, _CUR_WM_A_0, _CUR_WM_B_0) #define CUR_WM(pipe, level) _MMIO(_CUR_WM_0(pipe) + ((4) * (level))) From d7e449a858ec280f3b90355246d8df680e40b6dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 5 Feb 2019 22:50:56 +0200 Subject: [PATCH 476/539] drm/i915: Just use icl+ definition for PLANE_WM blocks field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The unused bits on PLANE_WM & co. are hardwired to zero. So no need to worry about reading the extra bit on pre-icl. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190205205056.30081-2-ville.syrjala@linux.intel.com Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/i915_reg.h | 3 +-- drivers/gpu/drm/i915/intel_pm.c | 9 ++------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2be34e13af78..638a586469f9 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6784,8 +6784,7 @@ enum { #define _PLANE_BUF_CFG_1_B 0x7127c #define _PLANE_BUF_CFG_2_B 0x7137c -#define SKL_DDB_ENTRY_MASK 0x3FF -#define ICL_DDB_ENTRY_MASK 0x7FF +#define DDB_ENTRY_MASK 0x7FF /* skl+: 10 bits, icl+ 11 bits */ #define DDB_ENTRY_END_SHIFT 16 #define _PLANE_BUF_CFG_1(pipe) \ _PIPE(pipe, _PLANE_BUF_CFG_1_A, _PLANE_BUF_CFG_1_B) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 737005bf6816..0f15685529a0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3927,14 +3927,9 @@ static unsigned int skl_cursor_allocation(int num_active) static void skl_ddb_entry_init_from_hw(struct drm_i915_private *dev_priv, struct skl_ddb_entry *entry, u32 reg) { - u16 mask; - if (INTEL_GEN(dev_priv) >= 11) - mask = ICL_DDB_ENTRY_MASK; - else - mask = SKL_DDB_ENTRY_MASK; - entry->start = reg & mask; - entry->end = (reg >> DDB_ENTRY_END_SHIFT) & mask; + entry->start = reg & DDB_ENTRY_MASK; + entry->end = (reg >> DDB_ENTRY_END_SHIFT) & DDB_ENTRY_MASK; if (entry->end) entry->end += 1; From ba345a0242d7ca7fe841d1926ebc12f2ec3890d8 Mon Sep 17 00:00:00 2001 From: Pratik Vishwakarma Date: Mon, 31 Dec 2018 13:42:53 +0530 Subject: [PATCH 477/539] drm/amdgpu/display: fix compiler errors [-Werror,-Wparentheses-equality] Remove extraneous parentheses around the comparison to silence this warning Signed-off-by: Pratik Vishwakarma Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c index 7d102ac0d61b..1ef0074302c5 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c @@ -63,7 +63,7 @@ void scaler_settings_calculation(struct dcn_bw_internal_vars *v) if (v->interlace_output[k] == 1.0) { v->v_ratio[k] = 2.0 * v->v_ratio[k]; } - if ((v->underscan_output[k] == 1.0)) { + if (v->underscan_output[k] == 1.0) { v->h_ratio[k] = v->h_ratio[k] * v->under_scan_factor; v->v_ratio[k] = v->v_ratio[k] * v->under_scan_factor; } From 5062b797db4103218fa00ee254417b8ecaab7401 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 23 Jan 2019 14:55:58 -0500 Subject: [PATCH 478/539] drm/amd/display: Don't re-program planes for DPMS changes [Why] There are opt1c lock warnings and CRTC read timeouts when running the "igt@kms_plane@plane-position-hole-dpms-pipe-*" tests. These are caused by trying to reprogram planes that are not in the current context. DPMS off removes the stream from the context. In this case: new_crtc_state->active_changed = true new_crtc_state->mode_changed = false The planes are reprogrammed before the stream is removed from the context because stream_state->mode_changed = false. For DPMS adds the stream and planes back to the context: new_crtc_state->active_changed = true new_crtc_state->mode_changed = false The planes are also reprogrammed here before the stream is added to the context because stream_state->mode_changed = true. They were not previously in the current context so warnings occur here. [How] Set stream_state->mode_changed = true when new_crtc_state->active_changed = true too. This prevents reprogramming before the context is applied in DC. The programming will be done after the context is applied. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Sun peng Li Acked-by: Bhawanpreet Lakha Acked-by: Tony Cheng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index bcc906a4eb04..ad31d7b9912f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4876,7 +4876,8 @@ cleanup: static void amdgpu_dm_crtc_copy_transient_flags(struct drm_crtc_state *crtc_state, struct dc_stream_state *stream_state) { - stream_state->mode_changed = crtc_state->mode_changed; + stream_state->mode_changed = + crtc_state->mode_changed || crtc_state->active_changed; } static int amdgpu_dm_atomic_commit(struct drm_device *dev, From 3f01f098a4e2ef30ef628497c43a3d568e720376 Mon Sep 17 00:00:00 2001 From: "Jerry (Fangzhi) Zuo" Date: Thu, 24 Jan 2019 11:46:49 -0500 Subject: [PATCH 479/539] drm/amd/display: Clear dc_sink after it gets released [Why] The dc_sink was released but the pointer on the aconnector was not cleared. [How] Clear it. Signed-off-by: Jerry (Fangzhi) Zuo Reviewed-by: Nicholas Kazlauskas Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 73f8e8fa6edb..44c1a02e6452 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -173,8 +173,10 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) aconnector->edid = edid; } - if (aconnector->dc_sink && aconnector->dc_sink->sink_signal == SIGNAL_TYPE_VIRTUAL) + if (aconnector->dc_sink && aconnector->dc_sink->sink_signal == SIGNAL_TYPE_VIRTUAL) { dc_sink_release(aconnector->dc_sink); + aconnector->dc_sink = NULL; + } if (!aconnector->dc_sink) { struct dc_sink *dc_sink; From 810ece19ee74c5705190ef18ccb292e7930a2377 Mon Sep 17 00:00:00 2001 From: Yongqiang Sun Date: Thu, 24 Jan 2019 15:59:22 -0500 Subject: [PATCH 480/539] drm/amd/display: Calc vline position in dc. We need to calcualte vline position in DC for DCN. Signed-off-by: Yongqiang Sun Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 8 +- drivers/gpu/drm/amd/display/dc/dc_stream.h | 15 ++-- .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 80 ++++++++++++++++++- .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.h | 6 +- .../amd/display/dc/inc/hw/timing_generator.h | 8 +- 5 files changed, 97 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index d9f62befd86a..409097cf58ed 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1572,13 +1572,13 @@ static void commit_planes_do_stream_update(struct dc *dc, stream_update->adjust->v_total_min, stream_update->adjust->v_total_max); - if (stream_update->vline0_config && pipe_ctx->stream_res.tg->funcs->program_vline_interrupt) + if (stream_update->periodic_vsync_config && pipe_ctx->stream_res.tg->funcs->program_vline_interrupt) pipe_ctx->stream_res.tg->funcs->program_vline_interrupt( - pipe_ctx->stream_res.tg, VLINE0, &stream->vline0_config); + pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, VLINE0, &stream->periodic_vsync_config); - if (stream_update->vline1_config && pipe_ctx->stream_res.tg->funcs->program_vline_interrupt) + if (stream_update->enhanced_sync_config && pipe_ctx->stream_res.tg->funcs->program_vline_interrupt) pipe_ctx->stream_res.tg->funcs->program_vline_interrupt( - pipe_ctx->stream_res.tg, VLINE1, &stream->vline1_config); + pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, VLINE1, &stream->enhanced_sync_config); if ((stream_update->hdr_static_metadata && !stream->use_dynamic_meta) || stream_update->vrr_infopacket || diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 2d1f2825de09..90f019eb54b3 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -51,11 +51,12 @@ struct freesync_context { bool dummy; }; -struct vline_config { - unsigned int start_line; - unsigned int end_line; +union vline_config { + unsigned int line_number; + unsigned long long delta_in_ns; }; + struct dc_stream_state { // sink is deprecated, new code should not reference // this pointer @@ -105,8 +106,8 @@ struct dc_stream_state { /* DMCU info */ unsigned int abm_level; - struct vline_config vline0_config; - struct vline_config vline1_config; + union vline_config periodic_vsync_config; + union vline_config enhanced_sync_config; /* from core_stream struct */ struct dc_context *ctx; @@ -155,8 +156,8 @@ struct dc_stream_update { struct dc_info_packet *hdr_static_metadata; unsigned int *abm_level; - struct vline_config *vline0_config; - struct vline_config *vline1_config; + union vline_config *periodic_vsync_config; + union vline_config *enhanced_sync_config; struct dc_crtc_timing_adjust *adjust; struct dc_info_packet *vrr_infopacket; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index cefa322df8a6..0355dcb8554a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -92,22 +92,94 @@ static void optc1_disable_stereo(struct timing_generator *optc) OTG_3D_STRUCTURE_STEREO_SEL_OVR, 0); } +static uint32_t get_start_vline(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing) +{ + struct dc_crtc_timing patched_crtc_timing; + int vesa_sync_start; + int asic_blank_end; + int interlace_factor; + int vertical_line_start; + + patched_crtc_timing = *dc_crtc_timing; + optc1_apply_front_porch_workaround(optc, &patched_crtc_timing); + + vesa_sync_start = patched_crtc_timing.h_addressable + + patched_crtc_timing.h_border_right + + patched_crtc_timing.h_front_porch; + + asic_blank_end = patched_crtc_timing.h_total - + vesa_sync_start - + patched_crtc_timing.h_border_left; + + interlace_factor = patched_crtc_timing.flags.INTERLACE ? 2 : 1; + + vesa_sync_start = patched_crtc_timing.v_addressable + + patched_crtc_timing.v_border_bottom + + patched_crtc_timing.v_front_porch; + + asic_blank_end = (patched_crtc_timing.v_total - + vesa_sync_start - + patched_crtc_timing.v_border_top) + * interlace_factor; + + vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1; + if (vertical_line_start < 0) { + ASSERT(0); + vertical_line_start = 0; + } + + return vertical_line_start; +} + +static void calc_vline_position( + struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing, + unsigned long long vsync_delta, + uint32_t *start_line, + uint32_t *end_line) +{ + unsigned long long req_delta_tens_of_usec = div64_u64((vsync_delta + 9999), 10000); + unsigned long long pix_clk_hundreds_khz = div64_u64((dc_crtc_timing->pix_clk_100hz + 999), 1000); + uint32_t req_delta_lines = (uint32_t) div64_u64( + (req_delta_tens_of_usec * pix_clk_hundreds_khz + dc_crtc_timing->h_total - 1), + dc_crtc_timing->h_total); + + uint32_t vsync_line = get_start_vline(optc, dc_crtc_timing); + + if (req_delta_lines != 0) + req_delta_lines--; + + if (req_delta_lines > vsync_line) + *start_line = dc_crtc_timing->v_total - (req_delta_lines - vsync_line) - 1; + else + *start_line = vsync_line - req_delta_lines; + + *end_line = *start_line + 2; + + if (*end_line >= dc_crtc_timing->v_total) + *end_line = 2; +} + void optc1_program_vline_interrupt( struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing, enum vline_select vline, - const struct vline_config *vline_config) + const union vline_config *vline_config) { struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t start_line = 0; + uint32_t end_line = 0; switch (vline) { case VLINE0: + calc_vline_position(optc, dc_crtc_timing, vline_config->delta_in_ns, &start_line, &end_line); REG_SET_2(OTG_VERTICAL_INTERRUPT0_POSITION, 0, - OTG_VERTICAL_INTERRUPT0_LINE_START, vline_config->start_line, - OTG_VERTICAL_INTERRUPT0_LINE_END, vline_config->end_line); + OTG_VERTICAL_INTERRUPT0_LINE_START, start_line, + OTG_VERTICAL_INTERRUPT0_LINE_END, end_line); break; case VLINE1: REG_SET(OTG_VERTICAL_INTERRUPT1_POSITION, 0, - OTG_VERTICAL_INTERRUPT1_LINE_START, vline_config->start_line); + OTG_VERTICAL_INTERRUPT1_LINE_START, vline_config->line_number); break; default: break; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index b34c8a240598..8a4e3e37e894 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -483,9 +483,11 @@ void optc1_program_timing( const struct dc_crtc_timing *dc_crtc_timing, bool use_vbios); -void optc1_program_vline_interrupt(struct timing_generator *optc, +void optc1_program_vline_interrupt( + struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing, enum vline_select vline, - const struct vline_config *vline_config); + const union vline_config *vline_config); void optc1_program_global_sync( struct timing_generator *optc); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index d22a406c19c0..39fec0186c10 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -134,7 +134,7 @@ struct dc_crtc_timing; struct drr_params; -struct vline_config; +union vline_config; enum vline_select { @@ -149,9 +149,11 @@ struct timing_generator_funcs { void (*program_timing)(struct timing_generator *tg, const struct dc_crtc_timing *timing, bool use_vbios); - void (*program_vline_interrupt)(struct timing_generator *optc, + void (*program_vline_interrupt)( + struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing, enum vline_select vline, - const struct vline_config *vline_config); + const union vline_config *vline_config); bool (*enable_crtc)(struct timing_generator *tg); bool (*disable_crtc)(struct timing_generator *tg); bool (*is_counter_moving)(struct timing_generator *tg); From b9d4b33059b86147bcde8f53ad19b97792837f75 Mon Sep 17 00:00:00 2001 From: Wesley Chalmers Date: Tue, 22 Jan 2019 18:57:06 -0500 Subject: [PATCH 481/539] drm/amd/display: Disable Stutter for Stereo 3D [WHY] Bandwidth calculation formulas currently do not take Stereo 3D + Stutter properly into account. Disable stutter feature when we detect a Stereo 3D mode as a temporary workaround. Signed-off-by: Wesley Chalmers Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Acked-by: Martin Leung Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn10/dcn10_hubbub.c | 19 ++++++++++-------- .../drm/amd/display/dc/dcn10/dcn10_hubbub.h | 2 +- .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 20 ++++++++++++++++++- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c index 5a4614c371bc..e161ad836812 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c @@ -88,11 +88,18 @@ void hubbub1_wm_read_state(struct hubbub *hubbub, s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D); } -void hubbub1_disable_allow_self_refresh(struct hubbub *hubbub) +void hubbub1_allow_self_refresh_control(struct hubbub *hubbub, bool allow) { struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); - REG_UPDATE(DCHUBBUB_ARB_DRAM_STATE_CNTL, - DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_ENABLE, 0); + + /* + * DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_ENABLE = 1 means do not allow stutter + * DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_ENABLE = 0 means allow stutter + */ + + REG_UPDATE_2(DCHUBBUB_ARB_DRAM_STATE_CNTL, + DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_VALUE, 0, + DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_ENABLE, !allow); } bool hububu1_is_allow_self_refresh_enabled(struct hubbub *hubbub) @@ -262,8 +269,6 @@ void hubbub1_program_watermarks( bool safe_to_lower) { struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); - - uint32_t force_en = hubbub1->base.ctx->dc->debug.disable_stutter ? 1 : 0; /* * Need to clamp to max of the register values (i.e. no wrap) * for dcn1, all wm registers are 21-bit wide @@ -537,9 +542,7 @@ void hubbub1_program_watermarks( REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 68); - REG_UPDATE_2(DCHUBBUB_ARB_DRAM_STATE_CNTL, - DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_VALUE, 0, - DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_ENABLE, force_en); + hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); #if 0 REG_UPDATE_2(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h index c681e1cc9290..9cd4a5194154 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h @@ -204,7 +204,7 @@ void hubbub1_program_watermarks( unsigned int refclk_mhz, bool safe_to_lower); -void hubbub1_disable_allow_self_refresh(struct hubbub *hubbub); +void hubbub1_allow_self_refresh_control(struct hubbub *hubbub, bool allow); bool hububu1_is_allow_self_refresh_enabled(struct hubbub *hubub); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 9f23ea283838..7cead0398c15 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1011,7 +1011,7 @@ static void dcn10_init_hw(struct dc *dc) */ if (allow_self_fresh_force_enable == false && hububu1_is_allow_self_refresh_enabled(dc->res_pool->hubbub)) - hubbub1_disable_allow_self_refresh(dc->res_pool->hubbub); + hubbub1_allow_self_refresh_control(dc->res_pool->hubbub, true); disable_vga(dc->hwseq); } @@ -2384,6 +2384,22 @@ static void dcn10_apply_ctx_for_surface( hubbub1_wm_change_req_wa(dc->res_pool->hubbub); } +static void dcn10_stereo_hw_frame_pack_wa(struct dc *dc, struct dc_state *context) +{ + uint8_t i; + + for (i = 0; i < context->stream_count; i++) { + if (context->streams[i]->timing.timing_3d_format + == TIMING_3D_FORMAT_HW_FRAME_PACKING) { + /* + * Disable stutter + */ + hubbub1_allow_self_refresh_control(dc->res_pool->hubbub, false); + break; + } + } +} + static void dcn10_prepare_bandwidth( struct dc *dc, struct dc_state *context) @@ -2405,6 +2421,7 @@ static void dcn10_prepare_bandwidth( &context->bw.dcn.watermarks, dc->res_pool->ref_clock_inKhz / 1000, true); + dcn10_stereo_hw_frame_pack_wa(dc, context); if (dc->debug.pplib_wm_report_mode == WM_REPORT_OVERRIDE) dcn_bw_notify_pplib_of_wm_ranges(dc); @@ -2434,6 +2451,7 @@ static void dcn10_optimize_bandwidth( &context->bw.dcn.watermarks, dc->res_pool->ref_clock_inKhz / 1000, true); + dcn10_stereo_hw_frame_pack_wa(dc, context); if (dc->debug.pplib_wm_report_mode == WM_REPORT_OVERRIDE) dcn_bw_notify_pplib_of_wm_ranges(dc); From 00fbeb4e2f9eda0ebd57eefdcf25e41958f98fa5 Mon Sep 17 00:00:00 2001 From: Eryk Brol Date: Mon, 21 Jan 2019 14:40:16 -0500 Subject: [PATCH 482/539] drm/amd/display: DC VM Fixes [Why] VM_helper needs to be intialized with the dc struct in order to fix an unallocated memory issue. System aperture settings should be initialized to 0 and guarded with a check to make sure vm_config is valid. [How] Allocate and free memory for vm_helper with other dc members. Check whether the vm_config valid bit is set before initializing aperture settings. Signed-off-by: Eryk Brol Reviewed-by: Jun Lei Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 1 - .../gpu/drm/amd/display/dc/core/dc_vm_helper.c | 16 ++++++---------- drivers/gpu/drm/amd/display/dc/dc.h | 1 - drivers/gpu/drm/amd/display/dc/inc/vm_helper.h | 7 ++++--- 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 409097cf58ed..af5f486d24e0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -621,7 +621,6 @@ static bool construct(struct dc *dc, #endif enum dce_version dc_version = DCE_VERSION_UNKNOWN; - dc_dceip = kzalloc(sizeof(*dc_dceip), GFP_KERNEL); if (!dc_dceip) { dm_error("%s: failed to create dceip\n", __func__); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c b/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c index e54b8ac339b2..6ce87b682a32 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c @@ -112,16 +112,12 @@ uint8_t get_vmid_for_ptb(struct vm_helper *vm_helper, int64_t ptb, uint8_t hubp_ return vmid; } -struct vm_helper init_vm_helper(unsigned int num_vmid, unsigned int num_hubp) +void init_vm_helper(struct vm_helper *vm_helper, unsigned int num_vmid, unsigned int num_hubp) { - static uint64_t ptb_assigned_to_vmid[MAX_VMID]; - static struct vmid_usage hubp_vmid_usage[MAX_HUBP]; + vm_helper->num_vmid = num_vmid; + vm_helper->num_hubp = num_hubp; + vm_helper->num_vmids_available = num_vmid - 1; - return (struct vm_helper){ - .num_vmid = num_vmid, - .num_hubp = num_hubp, - .num_vmids_available = num_vmid - 1, - .ptb_assigned_to_vmid = ptb_assigned_to_vmid, - .hubp_vmid_usage = hubp_vmid_usage - }; + memset(vm_helper->hubp_vmid_usage, 0, sizeof(vm_helper->hubp_vmid_usage[0]) * MAX_HUBP); + memset(vm_helper->ptb_assigned_to_vmid, 0, sizeof(vm_helper->ptb_assigned_to_vmid[0]) * MAX_VMID); } diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index ee2c7e5b6087..2224bba914ec 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -265,7 +265,6 @@ struct dc_debug_data { uint32_t auxErrorCount; }; - struct dc_state; struct resource_pool; struct dce_hwseq; diff --git a/drivers/gpu/drm/amd/display/dc/inc/vm_helper.h b/drivers/gpu/drm/amd/display/dc/inc/vm_helper.h index a202206e22a3..193407f76a80 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/vm_helper.h +++ b/drivers/gpu/drm/amd/display/dc/inc/vm_helper.h @@ -39,8 +39,8 @@ struct vm_helper { unsigned int num_vmid; unsigned int num_hubp; unsigned int num_vmids_available; - uint64_t *ptb_assigned_to_vmid; - struct vmid_usage *hubp_vmid_usage; + uint64_t ptb_assigned_to_vmid[MAX_VMID]; + struct vmid_usage hubp_vmid_usage[MAX_HUBP]; }; uint8_t get_vmid_for_ptb( @@ -48,7 +48,8 @@ uint8_t get_vmid_for_ptb( int64_t ptb, uint8_t pipe_idx); -struct vm_helper init_vm_helper( +void init_vm_helper( + struct vm_helper *vm_helper, unsigned int num_vmid, unsigned int num_hubp); From de00d253bc85978c1a7d3be888d675488d18a5dd Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Fri, 25 Jan 2019 11:50:31 -0500 Subject: [PATCH 483/539] drm/amd/display: link_rate_set should index into table [Why] Current implementation that maps link_rate_set value to actual link rate is incorrect. [How] Fix this implementation, such that link_rate_set indexes into the supported_link_rate table. Signed-off-by: Anthony Koo Reviewed-by: Harry Wentland Acked-by: Aric Cyr Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 35 +------------------ 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 5ee36d6e0512..09d301216076 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -2540,7 +2540,6 @@ void detect_edp_sink_caps(struct dc_link *link) uint32_t entry; uint32_t link_rate_in_khz; enum dc_link_rate link_rate = LINK_RATE_UNKNOWN; - uint8_t link_rate_set = 0; retrieve_link_cap(link); @@ -2560,39 +2559,7 @@ void detect_edp_sink_caps(struct dc_link *link) link_rate = linkRateInKHzToLinkRateMultiplier(link_rate_in_khz); if (link->reported_link_cap.link_rate < link_rate) { link->reported_link_cap.link_rate = link_rate; - - switch (link_rate) { - case LINK_RATE_LOW: - link_rate_set = 1; - break; - case LINK_RATE_RATE_2: - link_rate_set = 2; - break; - case LINK_RATE_RATE_3: - link_rate_set = 3; - break; - case LINK_RATE_HIGH: - link_rate_set = 4; - break; - case LINK_RATE_RBR2: - link_rate_set = 5; - break; - case LINK_RATE_RATE_6: - link_rate_set = 6; - break; - case LINK_RATE_HIGH2: - link_rate_set = 7; - break; - case LINK_RATE_HIGH3: - link_rate_set = 8; - break; - default: - link_rate_set = 0; - break; - } - - if (link->dpcd_caps.link_rate_set < link_rate_set) - link->dpcd_caps.link_rate_set = link_rate_set; + link->dpcd_caps.link_rate_set = entry; } } } From e6d2421343a78a3ef83d7839a8704dd2eb3c9a69 Mon Sep 17 00:00:00 2001 From: Murton Liu Date: Wed, 23 Jan 2019 17:37:57 -0500 Subject: [PATCH 484/539] drm/amd/display: PIP overlay corruption [Why] When moving mouse onto or off of pip plane, screen would flash briefly due to garbage negative pos values being programmed for cursor. Also, text flashes due to PIP flips taking too long. [How] When negative pos value seen, default to 0 and adjust by modifying cursor hotspot. For flip issue, only do post update when optimize required vs all the time. Signed-off-by: Murton Liu Reviewed-by: Aric Cyr Acked-by: Bhawanpreet Lakha Acked-by: Sivapiriyan Kumarasamy Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 +++ .../gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c | 23 +++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index af5f486d24e0..77e1bd18b1dd 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1125,6 +1125,9 @@ bool dc_post_update_surfaces_to_stream(struct dc *dc) int i; struct dc_state *context = dc->current_state; + if (dc->optimized_required == false) + return true; + post_surface_trace(dc); for (i = 0; i < dc->res_pool->pipe_count; i++) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index 0ba68d41b9c3..683829466a44 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -1150,9 +1150,28 @@ void hubp1_cursor_set_position( REG_UPDATE(CURSOR_CONTROL, CURSOR_ENABLE, cur_en); - REG_SET_2(CURSOR_POSITION, 0, - CURSOR_X_POSITION, pos->x, + //account for cases where we see negative offset relative to overlay plane + if (src_x_offset < 0 && src_y_offset < 0) { + REG_SET_2(CURSOR_POSITION, 0, + CURSOR_X_POSITION, 0, + CURSOR_Y_POSITION, 0); + x_hotspot -= src_x_offset; + y_hotspot -= src_y_offset; + } else if (src_x_offset < 0) { + REG_SET_2(CURSOR_POSITION, 0, + CURSOR_X_POSITION, 0, CURSOR_Y_POSITION, pos->y); + x_hotspot -= src_x_offset; + } else if (src_y_offset < 0) { + REG_SET_2(CURSOR_POSITION, 0, + CURSOR_X_POSITION, pos->x, + CURSOR_Y_POSITION, 0); + y_hotspot -= src_y_offset; + } else { + REG_SET_2(CURSOR_POSITION, 0, + CURSOR_X_POSITION, pos->x, + CURSOR_Y_POSITION, pos->y); + } REG_SET_2(CURSOR_HOT_SPOT, 0, CURSOR_HOT_SPOT_X, x_hotspot, From c00800c46e832ca27f4db489b8c28a56603f8d2c Mon Sep 17 00:00:00 2001 From: mark mcgarrity Date: Fri, 25 Jan 2019 14:38:34 -0500 Subject: [PATCH 485/539] drm/amd/display: 3.2.17 Signed-off-by: mark mcgarrity Reviewed-by: Aric Cyr Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 2224bba914ec..bade219d1268 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.16" +#define DC_VER "3.2.17" #define MAX_SURFACES 3 #define MAX_STREAMS 6 From 68f1a00c23d443c9d940fbd512a195e9e6c08b11 Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Sun, 20 Jan 2019 01:08:02 -0500 Subject: [PATCH 486/539] drm/amd/display: interface to check if timing can be seamless [Why] Need to figure out whether a timing we want to commit matches something that GOP already programmed, in which case we can decide to some optimizations [How] 1. Add way to check for DIG FE 2. Add way to check for matching OTG timing 3. Add way to check for matching pixel clock (if possible) - Currently only support DP for pixel clock, since it is easy to calc Signed-off-by: Anthony Koo Reviewed-by: Aric Cyr Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 46 +++++++++++++++ drivers/gpu/drm/amd/display/dc/dc.h | 4 ++ .../drm/amd/display/dc/dce/dce_clock_source.c | 28 ++++++++- .../amd/display/dc/dcn10/dcn10_link_encoder.c | 10 ++++ .../amd/display/dc/dcn10/dcn10_link_encoder.h | 2 + .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 59 +++++++++++++++++++ .../gpu/drm/amd/display/dc/inc/clock_source.h | 4 ++ .../drm/amd/display/dc/inc/hw/link_encoder.h | 1 + .../amd/display/dc/inc/hw/timing_generator.h | 2 + 9 files changed, 154 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 77e1bd18b1dd..b620520771a7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -969,6 +969,52 @@ static bool context_changed( return false; } +bool dc_validate_seamless_boot_timing(struct dc *dc, + const struct dc_sink *sink, + struct dc_crtc_timing *crtc_timing) +{ + struct timing_generator *tg; + struct dc_link *link = sink->link; + unsigned int inst; + + /* Check for enabled DIG to identify enabled display */ + if (!link->link_enc->funcs->is_dig_enabled(link->link_enc)) + return false; + + /* Check for which front end is used by this encoder. + * Note the inst is 1 indexed, where 0 is undefined. + * Note that DIG_FE can source from different OTG but our + * current implementation always map 1-to-1, so this code makes + * the same assumption and doesn't check OTG source. + */ + inst = link->link_enc->funcs->get_dig_frontend(link->link_enc) - 1; + + /* Instance should be within the range of the pool */ + if (inst >= dc->res_pool->pipe_count) + return false; + + tg = dc->res_pool->timing_generators[inst]; + + if (!tg->funcs->is_matching_timing) + return false; + + if (!tg->funcs->is_matching_timing(tg, crtc_timing)) + return false; + + if (dc_is_dp_signal(link->connector_signal)) { + unsigned int pix_clk_100hz; + + dc->res_pool->dp_clock_source->funcs->get_pixel_clk_frequency_100hz( + dc->res_pool->dp_clock_source, + inst, &pix_clk_100hz); + + if (crtc_timing->pix_clk_100hz != pix_clk_100hz) + return false; + } + + return true; +} + bool dc_enable_stereo( struct dc *dc, struct dc_state *context, diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index bade219d1268..1a7fd6aa77eb 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -594,6 +594,10 @@ struct dc_validation_set { uint8_t plane_count; }; +bool dc_validate_seamless_boot_timing(struct dc *dc, + const struct dc_sink *sink, + struct dc_crtc_timing *crtc_timing); + enum dc_status dc_validate_plane(struct dc *dc, const struct dc_plane_state *plane_state); void get_clock_requirements_for_state(struct dc_state *state, struct AsicStateEx *info); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index c67e90e5c339..71d5777de961 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -977,6 +977,28 @@ static bool dce110_clock_source_power_down( return bp_result == BP_RESULT_OK; } +static bool get_pixel_clk_frequency_100hz( + struct clock_source *clock_source, + unsigned int inst, + unsigned int *pixel_clk_khz) +{ + struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source); + unsigned int clock_hz = 0; + + if (clock_source->id == CLOCK_SOURCE_ID_DP_DTO) { + clock_hz = REG_READ(PHASE[inst]); + + /* NOTE: There is agreement with VBIOS here that MODULO is + * programmed equal to DPREFCLK, in which case PHASE will be + * equivalent to pixel clock. + */ + *pixel_clk_khz = clock_hz / 100; + return true; + } + + return false; +} + /*****************************************/ /* Constructor */ /*****************************************/ @@ -984,12 +1006,14 @@ static bool dce110_clock_source_power_down( static const struct clock_source_funcs dce112_clk_src_funcs = { .cs_power_down = dce110_clock_source_power_down, .program_pix_clk = dce112_program_pix_clk, - .get_pix_clk_dividers = dce112_get_pix_clk_dividers + .get_pix_clk_dividers = dce112_get_pix_clk_dividers, + .get_pixel_clk_frequency_100hz = get_pixel_clk_frequency_100hz }; static const struct clock_source_funcs dce110_clk_src_funcs = { .cs_power_down = dce110_clock_source_power_down, .program_pix_clk = dce110_program_pix_clk, - .get_pix_clk_dividers = dce110_get_pix_clk_dividers + .get_pix_clk_dividers = dce110_get_pix_clk_dividers, + .get_pixel_clk_frequency_100hz = get_pixel_clk_frequency_100hz }; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c index 771449f8984f..a9db372688ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c @@ -85,6 +85,7 @@ static const struct link_encoder_funcs dcn10_lnk_enc_funcs = { .enable_hpd = dcn10_link_encoder_enable_hpd, .disable_hpd = dcn10_link_encoder_disable_hpd, .is_dig_enabled = dcn10_is_dig_enabled, + .get_dig_frontend = dcn10_get_dig_frontend, .destroy = dcn10_link_encoder_destroy }; @@ -495,6 +496,15 @@ bool dcn10_is_dig_enabled(struct link_encoder *enc) return value; } +unsigned int dcn10_get_dig_frontend(struct link_encoder *enc) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + uint32_t value; + + REG_GET(DIG_BE_CNTL, DIG_FE_SOURCE_SELECT, &value); + return value; +} + static void link_encoder_disable(struct dcn10_link_encoder *enc10) { /* reset training pattern */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h index 670b46e887ed..b74b80a247ec 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h @@ -336,6 +336,8 @@ void dcn10_psr_program_secondary_packet(struct link_encoder *enc, bool dcn10_is_dig_enabled(struct link_encoder *enc); +unsigned int dcn10_get_dig_frontend(struct link_encoder *enc); + void dcn10_aux_initialize(struct dcn10_link_encoder *enc10); #endif /* __DC_LINK_ENCODER__DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 0355dcb8554a..51c98e99237e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -1208,6 +1208,64 @@ bool optc1_is_stereo_left_eye(struct timing_generator *optc) return ret; } +bool optc1_is_matching_timing(struct timing_generator *tg, + const struct dc_crtc_timing *otg_timing) +{ + struct dc_crtc_timing hw_crtc_timing = {0}; + struct dcn_otg_state s = {0}; + + if (tg == NULL || otg_timing == NULL) + return false; + + optc1_read_otg_state(DCN10TG_FROM_TG(tg), &s); + + hw_crtc_timing.h_total = s.h_total + 1; + hw_crtc_timing.h_addressable = s.h_total - ((s.h_total - s.h_blank_start) + s.h_blank_end); + hw_crtc_timing.h_front_porch = s.h_total + 1 - s.h_blank_start; + hw_crtc_timing.h_sync_width = s.h_sync_a_end - s.h_sync_a_start; + + hw_crtc_timing.v_total = s.v_total + 1; + hw_crtc_timing.v_addressable = s.v_total - ((s.v_total - s.v_blank_start) + s.v_blank_end); + hw_crtc_timing.v_front_porch = s.v_total + 1 - s.v_blank_start; + hw_crtc_timing.v_sync_width = s.v_sync_a_end - s.v_sync_a_start; + + if (otg_timing->h_total != hw_crtc_timing.h_total) + return false; + + if (otg_timing->h_border_left != hw_crtc_timing.h_border_left) + return false; + + if (otg_timing->h_addressable != hw_crtc_timing.h_addressable) + return false; + + if (otg_timing->h_border_right != hw_crtc_timing.h_border_right) + return false; + + if (otg_timing->h_front_porch != hw_crtc_timing.h_front_porch) + return false; + + if (otg_timing->h_sync_width != hw_crtc_timing.h_sync_width) + return false; + + if (otg_timing->v_total != hw_crtc_timing.v_total) + return false; + + if (otg_timing->v_border_top != hw_crtc_timing.v_border_top) + return false; + + if (otg_timing->v_addressable != hw_crtc_timing.v_addressable) + return false; + + if (otg_timing->v_border_bottom != hw_crtc_timing.v_border_bottom) + return false; + + if (otg_timing->v_sync_width != hw_crtc_timing.v_sync_width) + return false; + + return true; +} + + void optc1_read_otg_state(struct optc *optc1, struct dcn_otg_state *s) { @@ -1404,6 +1462,7 @@ static const struct timing_generator_funcs dcn10_tg_funcs = { .get_frame_count = optc1_get_vblank_counter, .get_scanoutpos = optc1_get_crtc_scanoutpos, .get_otg_active_size = optc1_get_otg_active_size, + .is_matching_timing = optc1_is_matching_timing, .set_early_control = optc1_set_early_control, /* used by enable_timing_synchronization. Not need for FPGA */ .wait_for_state = optc1_wait_for_state, diff --git a/drivers/gpu/drm/amd/display/dc/inc/clock_source.h b/drivers/gpu/drm/amd/display/dc/inc/clock_source.h index 43d1fbd8ace5..fe6301cb8681 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/clock_source.h +++ b/drivers/gpu/drm/amd/display/dc/inc/clock_source.h @@ -166,6 +166,10 @@ struct clock_source_funcs { struct clock_source *, struct pixel_clk_params *, struct pll_settings *); + bool (*get_pixel_clk_frequency_100hz)( + struct clock_source *clock_source, + unsigned int inst, + unsigned int *pixel_clk_khz); }; struct clock_source { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h index c20fdcaac53b..c9d3e37e9531 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h @@ -153,6 +153,7 @@ struct link_encoder_funcs { void (*enable_hpd)(struct link_encoder *enc); void (*disable_hpd)(struct link_encoder *enc); bool (*is_dig_enabled)(struct link_encoder *enc); + unsigned int (*get_dig_frontend)(struct link_encoder *enc); void (*destroy)(struct link_encoder **enc); }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 39fec0186c10..5d6cca7826f3 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -170,6 +170,8 @@ struct timing_generator_funcs { bool (*get_otg_active_size)(struct timing_generator *optc, uint32_t *otg_active_width, uint32_t *otg_active_height); + bool (*is_matching_timing)(struct timing_generator *tg, + const struct dc_crtc_timing *otg_timing); void (*set_early_control)(struct timing_generator *tg, uint32_t early_cntl); void (*wait_for_state)(struct timing_generator *tg, From a122b62d8ac484091c536ad04e1d0ea156ae6de7 Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Sun, 20 Jan 2019 01:13:42 -0500 Subject: [PATCH 487/539] drm/amd/display: refactor out programming of vupdate interrupt [Why] More clearly isolate the code that is involved in programming of vupdate interrupt [How] Add function for programming of vupdate interrupt. Call it after timing is programmed. Signed-off-by: Anthony Koo Reviewed-by: Aric Cyr Acked-by: Bhawanpreet Lakha Acked-by: Tony Cheng Signed-off-by: Alex Deucher --- .../display/dc/dce110/dce110_hw_sequencer.c | 5 ++ .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 51 +++++++++++++++---- .../amd/display/dc/inc/hw/timing_generator.h | 3 ++ 3 files changed, 48 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index fd7cd5b5a17c..e2d8362dd5b1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1341,6 +1341,11 @@ static enum dc_status apply_single_controller_ctx_to_hw( /* */ dc->hwss.enable_stream_timing(pipe_ctx, context, dc); + if (pipe_ctx->stream_res.tg->funcs->program_vupdate_interrupt) + pipe_ctx->stream_res.tg->funcs->program_vupdate_interrupt( + pipe_ctx->stream_res.tg, + &stream->timing); + if (pipe_ctx->stream->signal != SIGNAL_TYPE_VIRTUAL) pipe_ctx->stream_res.stream_enc->funcs->dig_connect_to_otg( pipe_ctx->stream_res.stream_enc, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 51c98e99237e..1dbd1d3999e6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -186,6 +186,42 @@ void optc1_program_vline_interrupt( } } +void optc1_program_vupdate_interrupt( + struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + int32_t vertical_line_start; + uint32_t asic_blank_end; + uint32_t vesa_sync_start; + struct dc_crtc_timing patched_crtc_timing; + + patched_crtc_timing = *dc_crtc_timing; + optc1_apply_front_porch_workaround(optc, &patched_crtc_timing); + + /* asic_h_blank_end = HsyncWidth + HbackPorch = + * vesa. usHorizontalTotal - vesa. usHorizontalSyncStart - + * vesa.h_left_border + */ + vesa_sync_start = patched_crtc_timing.h_addressable + + patched_crtc_timing.h_border_right + + patched_crtc_timing.h_front_porch; + + asic_blank_end = patched_crtc_timing.h_total - + vesa_sync_start - + patched_crtc_timing.h_border_left; + + /* Use OTG_VERTICAL_INTERRUPT2 replace VUPDATE interrupt, + * program the reg for interrupt postition. + */ + vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1; + if (vertical_line_start < 0) + vertical_line_start = 0; + + REG_SET(OTG_VERTICAL_INTERRUPT2_POSITION, 0, + OTG_VERTICAL_INTERRUPT2_LINE_START, vertical_line_start); +} + /** * program_timing_generator used by mode timing set * Program CRTC Timing Registers - OTG_H_*, OTG_V_*, Pixel repetition. @@ -288,22 +324,14 @@ void optc1_program_timing( patched_crtc_timing.v_addressable + patched_crtc_timing.v_border_bottom); - REG_UPDATE_2(OTG_V_BLANK_START_END, - OTG_V_BLANK_START, asic_blank_start, - OTG_V_BLANK_END, asic_blank_end); - - /* Use OTG_VERTICAL_INTERRUPT2 replace VUPDATE interrupt, - * program the reg for interrupt postition. - */ vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1; v_fp2 = 0; if (vertical_line_start < 0) v_fp2 = -vertical_line_start; - if (vertical_line_start < 0) - vertical_line_start = 0; - REG_SET(OTG_VERTICAL_INTERRUPT2_POSITION, 0, - OTG_VERTICAL_INTERRUPT2_LINE_START, vertical_line_start); + REG_UPDATE_2(OTG_V_BLANK_START_END, + OTG_V_BLANK_START, asic_blank_start, + OTG_V_BLANK_END, asic_blank_end); /* v_sync polarity */ v_sync_polarity = patched_crtc_timing.flags.VSYNC_POSITIVE_POLARITY ? @@ -1453,6 +1481,7 @@ static const struct timing_generator_funcs dcn10_tg_funcs = { .validate_timing = optc1_validate_timing, .program_timing = optc1_program_timing, .program_vline_interrupt = optc1_program_vline_interrupt, + .program_vupdate_interrupt = optc1_program_vupdate_interrupt, .program_global_sync = optc1_program_global_sync, .enable_crtc = optc1_enable_crtc, .disable_crtc = optc1_disable_crtc, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 5d6cca7826f3..03ae941895f3 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -154,6 +154,9 @@ struct timing_generator_funcs { const struct dc_crtc_timing *dc_crtc_timing, enum vline_select vline, const union vline_config *vline_config); + + void (*program_vupdate_interrupt)(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing); bool (*enable_crtc)(struct timing_generator *tg); bool (*disable_crtc)(struct timing_generator *tg); bool (*is_counter_moving)(struct timing_generator *tg); From ff582b61466f14d61f3a2a54c3d673d85bcb3c76 Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Sun, 20 Jan 2019 01:32:55 -0500 Subject: [PATCH 488/539] drm/amd/display: add way to determine if link is active [Why] Need to understand whether link is active aside from stream state. This could be used to check what links are enabled by GOP. [How] Store link_active state in link status and initialize it by checking if the DIG is enabled. Keep it updated on every link enable and disable Signed-off-by: Anthony Koo Reviewed-by: Aric Cyr Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 4 ++++ drivers/gpu/drm/amd/display/dc/dc_link.h | 1 + drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 5 +++++ 3 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 137d3c126632..e2e7bf2ba84b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2612,6 +2612,8 @@ void core_link_enable_stream( } } + stream->link->link_status.link_active = true; + core_dc->hwss.enable_audio_stream(pipe_ctx); /* turn off otg test pattern if enable */ @@ -2646,6 +2648,8 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx, int option) core_dc->hwss.disable_stream(pipe_ctx, option); disable_link(pipe_ctx->stream->link, pipe_ctx->stream->signal); + + pipe_ctx->stream->link->link_status.link_active = false; } void core_link_set_avmute(struct pipe_ctx *pipe_ctx, bool enable) diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index d26bbda61ad2..8fc223defed4 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -30,6 +30,7 @@ #include "grph_object_defs.h" struct dc_link_status { + bool link_active; struct dpcd_caps *dpcd_caps; }; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 7cead0398c15..7117144d48bd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1027,6 +1027,11 @@ static void dcn10_init_hw(struct dc *dc) dc->hwss.edp_power_control(link, true); link->link_enc->funcs->hw_init(link->link_enc); + + /* Check for enabled DIG to identify enabled display */ + if (link->link_enc->funcs->is_dig_enabled && + link->link_enc->funcs->is_dig_enabled(link->link_enc)) + link->link_status.link_active = true; } } From d2d7885f75b614a982a73383956570d95d79c23e Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Sun, 20 Jan 2019 01:41:44 -0500 Subject: [PATCH 489/539] drm/amd/display: add seamless boot flag to stream [Why] If we determine the stream we are trying to commit matches HW, we want to try to optimize. [How] Try to acquire the HW resources that are already enabled and optimize. Also skip backend reprogramming Signed-off-by: Anthony Koo Reviewed-by: Aric Cyr Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 + drivers/gpu/drm/amd/display/dc/core/dc_link.c | 14 ++++- .../gpu/drm/amd/display/dc/core/dc_resource.c | 56 ++++++++++++++++++- drivers/gpu/drm/amd/display/dc/dc_stream.h | 4 +- .../display/dc/dce110/dce110_hw_sequencer.c | 4 +- 5 files changed, 75 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index b620520771a7..52f838442e21 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1096,6 +1096,9 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c const struct dc_link *link = context->streams[i]->link; struct dc_stream_status *status; + if (context->streams[i]->apply_seamless_boot_optimization) + context->streams[i]->apply_seamless_boot_optimization = false; + if (!context->streams[i]->mode_changed) continue; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index e2e7bf2ba84b..6d2cef05b4d7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2579,13 +2579,23 @@ void core_link_enable_stream( &stream->timing); if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) { + bool apply_edp_fast_boot_optimization = + pipe_ctx->stream->apply_edp_fast_boot_optimization; + + pipe_ctx->stream->apply_edp_fast_boot_optimization = false; + resource_build_info_frame(pipe_ctx); core_dc->hwss.update_info_frame(pipe_ctx); + /* Do not touch link on seamless boot optimization. */ + if (pipe_ctx->stream->apply_seamless_boot_optimization) { + pipe_ctx->stream->dpms_off = false; + return; + } + /* eDP lit up by bios already, no need to enable again. */ if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP && - pipe_ctx->stream->apply_edp_fast_boot_optimization) { - pipe_ctx->stream->apply_edp_fast_boot_optimization = false; + apply_edp_fast_boot_optimization) { pipe_ctx->stream->dpms_off = false; return; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 9888bc7659f3..349ab8017776 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1800,6 +1800,51 @@ static void calculate_phy_pix_clks(struct dc_stream_state *stream) stream->phy_pix_clk *= 2; } +static int acquire_resource_from_hw_enabled_state( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_stream_state *stream) +{ + struct dc_link *link = stream->link; + unsigned int inst; + + /* Check for enabled DIG to identify enabled display */ + if (!link->link_enc->funcs->is_dig_enabled(link->link_enc)) + return -1; + + /* Check for which front end is used by this encoder. + * Note the inst is 1 indexed, where 0 is undefined. + * Note that DIG_FE can source from different OTG but our + * current implementation always map 1-to-1, so this code makes + * the same assumption and doesn't check OTG source. + */ + inst = link->link_enc->funcs->get_dig_frontend(link->link_enc) - 1; + + /* Instance should be within the range of the pool */ + if (inst >= pool->pipe_count) + return -1; + + if (!res_ctx->pipe_ctx[inst].stream) { + struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[inst]; + + pipe_ctx->stream_res.tg = pool->timing_generators[inst]; + pipe_ctx->plane_res.mi = pool->mis[inst]; + pipe_ctx->plane_res.hubp = pool->hubps[inst]; + pipe_ctx->plane_res.ipp = pool->ipps[inst]; + pipe_ctx->plane_res.xfm = pool->transforms[inst]; + pipe_ctx->plane_res.dpp = pool->dpps[inst]; + pipe_ctx->stream_res.opp = pool->opps[inst]; + if (pool->dpps[inst]) + pipe_ctx->plane_res.mpcc_inst = pool->dpps[inst]->inst; + pipe_ctx->pipe_idx = inst; + + pipe_ctx->stream = stream; + return inst; + } + + return -1; +} + enum dc_status resource_map_pool_resources( const struct dc *dc, struct dc_state *context, @@ -1824,8 +1869,15 @@ enum dc_status resource_map_pool_resources( calculate_phy_pix_clks(stream); - /* acquire new resources */ - pipe_idx = acquire_first_free_pipe(&context->res_ctx, pool, stream); + if (stream->apply_seamless_boot_optimization) + pipe_idx = acquire_resource_from_hw_enabled_state( + &context->res_ctx, + pool, + stream); + + if (pipe_idx < 0) + /* acquire new resources */ + pipe_idx = acquire_first_free_pipe(&context->res_ctx, pool, stream); #ifdef CONFIG_DRM_AMD_DC_DCN1_0 if (pipe_idx < 0) diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 90f019eb54b3..a798694992b9 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -119,7 +119,6 @@ struct dc_stream_state { int phy_pix_clk; enum signal_type signal; bool dpms_off; - bool apply_edp_fast_boot_optimization; void *dm_stream_context; @@ -146,6 +145,9 @@ struct dc_stream_state { uint8_t otg_offset; } out; + bool apply_edp_fast_boot_optimization; + bool apply_seamless_boot_optimization; + uint32_t stream_id; }; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index e2d8362dd5b1..21ceda410244 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1339,7 +1339,9 @@ static enum dc_status apply_single_controller_ctx_to_hw( } /* */ - dc->hwss.enable_stream_timing(pipe_ctx, context, dc); + /* Do not touch stream timing on seamless boot optimization. */ + if (!pipe_ctx->stream->apply_seamless_boot_optimization) + dc->hwss.enable_stream_timing(pipe_ctx, context, dc); if (pipe_ctx->stream_res.tg->funcs->program_vupdate_interrupt) pipe_ctx->stream_res.tg->funcs->program_vupdate_interrupt( From 9c0fb8d45bfcf2e8f080489913db443c1fd241ed Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Sun, 20 Jan 2019 01:45:36 -0500 Subject: [PATCH 490/539] drm/amd/display: refactor programming of DRR [Why] Keep enable_stream_timing programming only timing related stuff. [How] Move DRR and static screen mask programming from enable_stream_timing to outside in apply_single_controller_ctx_to_hw Signed-off-by: Anthony Koo Reviewed-by: Aric Cyr Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../display/dc/dce110/dce110_hw_sequencer.c | 31 +++++++++---------- .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 15 --------- 2 files changed, 15 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 21ceda410244..a4386348a981 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1250,8 +1250,6 @@ static enum dc_status dce110_enable_stream_timing( struct pipe_ctx *pipe_ctx_old = &dc->current_state->res_ctx. pipe_ctx[pipe_ctx->pipe_idx]; struct tg_color black_color = {0}; - struct drr_params params = {0}; - unsigned int event_triggers = 0; if (!pipe_ctx_old->stream) { @@ -1280,20 +1278,6 @@ static enum dc_status dce110_enable_stream_timing( pipe_ctx->stream_res.tg, &stream->timing, true); - - params.vertical_total_min = stream->adjust.v_total_min; - params.vertical_total_max = stream->adjust.v_total_max; - if (pipe_ctx->stream_res.tg->funcs->set_drr) - pipe_ctx->stream_res.tg->funcs->set_drr( - pipe_ctx->stream_res.tg, ¶ms); - - // DRR should set trigger event to monitor surface update event - if (stream->adjust.v_total_min != 0 && - stream->adjust.v_total_max != 0) - event_triggers = 0x80; - if (pipe_ctx->stream_res.tg->funcs->set_static_screen_control) - pipe_ctx->stream_res.tg->funcs->set_static_screen_control( - pipe_ctx->stream_res.tg, event_triggers); } if (!pipe_ctx_old->stream) { @@ -1313,6 +1297,8 @@ static enum dc_status apply_single_controller_ctx_to_hw( struct dc *dc) { struct dc_stream_state *stream = pipe_ctx->stream; + struct drr_params params = {0}; + unsigned int event_triggers = 0; if (pipe_ctx->stream_res.audio != NULL) { struct audio_output audio_output; @@ -1348,6 +1334,19 @@ static enum dc_status apply_single_controller_ctx_to_hw( pipe_ctx->stream_res.tg, &stream->timing); + params.vertical_total_min = stream->adjust.v_total_min; + params.vertical_total_max = stream->adjust.v_total_max; + if (pipe_ctx->stream_res.tg->funcs->set_drr) + pipe_ctx->stream_res.tg->funcs->set_drr( + pipe_ctx->stream_res.tg, ¶ms); + + // DRR should set trigger event to monitor surface update event + if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0) + event_triggers = 0x80; + if (pipe_ctx->stream_res.tg->funcs->set_static_screen_control) + pipe_ctx->stream_res.tg->funcs->set_static_screen_control( + pipe_ctx->stream_res.tg, event_triggers); + if (pipe_ctx->stream->signal != SIGNAL_TYPE_VIRTUAL) pipe_ctx->stream_res.stream_enc->funcs->dig_connect_to_otg( pipe_ctx->stream_res.stream_enc, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 7117144d48bd..a03bbbf94d5d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -636,8 +636,6 @@ static enum dc_status dcn10_enable_stream_timing( struct dc_stream_state *stream = pipe_ctx->stream; enum dc_color_space color_space; struct tg_color black_color = {0}; - struct drr_params params = {0}; - unsigned int event_triggers = 0; /* by upper caller loop, pipe0 is parent pipe and be called first. * back end is set up by for pipe0. Other children pipe share back end @@ -705,19 +703,6 @@ static enum dc_status dcn10_enable_stream_timing( return DC_ERROR_UNEXPECTED; } - params.vertical_total_min = stream->adjust.v_total_min; - params.vertical_total_max = stream->adjust.v_total_max; - if (pipe_ctx->stream_res.tg->funcs->set_drr) - pipe_ctx->stream_res.tg->funcs->set_drr( - pipe_ctx->stream_res.tg, ¶ms); - - // DRR should set trigger event to monitor surface update event - if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0) - event_triggers = 0x80; - if (pipe_ctx->stream_res.tg->funcs->set_static_screen_control) - pipe_ctx->stream_res.tg->funcs->set_static_screen_control( - pipe_ctx->stream_res.tg, event_triggers); - /* TODO program crtc source select for non-virtual signal*/ /* TODO program FMT */ /* TODO setup link_enc */ From fb55546ea4b50f04ef41c196b8ac1824e26bc48c Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Sun, 20 Jan 2019 01:23:07 -0500 Subject: [PATCH 491/539] drm/amd/display: refactor init_hw to isolate pipe related init [Why] Pipe related init is possible to optimized if we know what we intend to program, and if we can determine it matches what is already programmed for the pipe. [How] First step is to isolate the pipe related init code Signed-off-by: Anthony Koo Reviewed-by: Aric Cyr Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../display/dc/dce110/dce110_hw_sequencer.c | 6 + .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 159 +++++++++--------- .../gpu/drm/amd/display/dc/inc/hw_sequencer.h | 2 + 3 files changed, 85 insertions(+), 82 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index a4386348a981..4df8a43b5018 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -2275,6 +2275,11 @@ static void dce110_enable_per_frame_crtc_position_reset( } +static void init_pipes(struct dc *dc, struct dc_state *context) +{ + // Do nothing +} + static void init_hw(struct dc *dc) { int i; @@ -2642,6 +2647,7 @@ static const struct hw_sequencer_funcs dce110_funcs = { .program_gamut_remap = program_gamut_remap, .program_output_csc = program_output_csc, .init_hw = init_hw, + .init_pipes = init_pipes, .apply_ctx_to_hw = dce110_apply_ctx_to_hw, .apply_ctx_for_surface = dce110_apply_ctx_for_surface, .update_plane_addr = update_plane_addr, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index a03bbbf94d5d..117d9d8227f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -956,97 +956,34 @@ static void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx) pipe_ctx->pipe_idx); } -static void dcn10_init_hw(struct dc *dc) +static void dcn10_init_pipes(struct dc *dc, struct dc_state *context) { int i; - struct abm *abm = dc->res_pool->abm; - struct dmcu *dmcu = dc->res_pool->dmcu; - struct dce_hwseq *hws = dc->hwseq; - struct dc_bios *dcb = dc->ctx->dc_bios; - struct dc_state *context = dc->current_state; - - if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { - REG_WRITE(REFCLK_CNTL, 0); - REG_UPDATE(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, 1); - REG_WRITE(DIO_MEM_PWR_CTRL, 0); - - if (!dc->debug.disable_clock_gate) { - /* enable all DCN clock gating */ - REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0); - - REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0); - - REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); - } - - enable_power_gating_plane(dc->hwseq, true); - } else { - - if (!dcb->funcs->is_accelerated_mode(dcb)) { - bool allow_self_fresh_force_enable = - hububu1_is_allow_self_refresh_enabled(dc->res_pool->hubbub); - - bios_golden_init(dc); - - /* WA for making DF sleep when idle after resume from S0i3. - * DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_ENABLE is set to 1 by - * command table, if DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_ENABLE = 0 - * before calling command table and it changed to 1 after, - * it should be set back to 0. - */ - if (allow_self_fresh_force_enable == false && - hububu1_is_allow_self_refresh_enabled(dc->res_pool->hubbub)) - hubbub1_allow_self_refresh_control(dc->res_pool->hubbub, true); - - disable_vga(dc->hwseq); - } - - for (i = 0; i < dc->link_count; i++) { - /* Power up AND update implementation according to the - * required signal (which may be different from the - * default signal on connector). - */ - struct dc_link *link = dc->links[i]; - - if (link->link_enc->connector.id == CONNECTOR_ID_EDP) - dc->hwss.edp_power_control(link, true); - - link->link_enc->funcs->hw_init(link->link_enc); - - /* Check for enabled DIG to identify enabled display */ - if (link->link_enc->funcs->is_dig_enabled && - link->link_enc->funcs->is_dig_enabled(link->link_enc)) - link->link_status.link_active = true; - } - } for (i = 0; i < dc->res_pool->pipe_count; i++) { struct timing_generator *tg = dc->res_pool->timing_generators[i]; if (tg->funcs->is_tg_enabled(tg)) tg->funcs->lock(tg); - } - - /* Blank controller using driver code instead of - * command table. - */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct timing_generator *tg = dc->res_pool->timing_generators[i]; + /* Blank controller using driver code instead of + * command table. + */ if (tg->funcs->is_tg_enabled(tg)) { tg->funcs->set_blank(tg, true); hwss_wait_for_blank_complete(tg); } } - /* Reset all MPCC muxes */ dc->res_pool->mpc->funcs->mpc_init(dc->res_pool->mpc); - for (i = 0; i < dc->res_pool->timing_generator_count; i++) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { struct timing_generator *tg = dc->res_pool->timing_generators[i]; - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; struct hubp *hubp = dc->res_pool->hubps[i]; struct dpp *dpp = dc->res_pool->dpps[i]; + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + dpp->funcs->dpp_reset(dpp); pipe_ctx->stream_res.tg = tg; pipe_ctx->pipe_idx = i; @@ -1064,18 +1001,9 @@ static void dcn10_init_hw(struct dc *dc) pipe_ctx->stream_res.opp = dc->res_pool->opps[i]; hwss1_plane_atomic_disconnect(dc, pipe_ctx); - } - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct timing_generator *tg = dc->res_pool->timing_generators[i]; if (tg->funcs->is_tg_enabled(tg)) tg->funcs->unlock(tg); - } - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct timing_generator *tg = dc->res_pool->timing_generators[i]; - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; dcn10_disable_plane(dc, pipe_ctx); @@ -1084,10 +1012,73 @@ static void dcn10_init_hw(struct dc *dc) tg->funcs->tg_init(tg); } +} - /* end of FPGA. Below if real ASIC */ - if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) +static void dcn10_init_hw(struct dc *dc) +{ + int i; + struct abm *abm = dc->res_pool->abm; + struct dmcu *dmcu = dc->res_pool->dmcu; + struct dce_hwseq *hws = dc->hwseq; + struct dc_bios *dcb = dc->ctx->dc_bios; + + if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { + REG_WRITE(REFCLK_CNTL, 0); + REG_UPDATE(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, 1); + REG_WRITE(DIO_MEM_PWR_CTRL, 0); + + if (!dc->debug.disable_clock_gate) { + /* enable all DCN clock gating */ + REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0); + + REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0); + + REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); + } + + enable_power_gating_plane(dc->hwseq, true); + + /* end of FPGA. Below if real ASIC */ return; + } + + if (!dcb->funcs->is_accelerated_mode(dcb)) { + bool allow_self_fresh_force_enable = + hububu1_is_allow_self_refresh_enabled( + dc->res_pool->hubbub); + + bios_golden_init(dc); + + /* WA for making DF sleep when idle after resume from S0i3. + * DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_ENABLE is set to 1 by + * command table, if DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_ENABLE = 0 + * before calling command table and it changed to 1 after, + * it should be set back to 0. + */ + if (allow_self_fresh_force_enable == false && + hububu1_is_allow_self_refresh_enabled(dc->res_pool->hubbub)) + hubbub1_allow_self_refresh_control(dc->res_pool->hubbub, true); + + disable_vga(dc->hwseq); + } + + for (i = 0; i < dc->link_count; i++) { + /* Power up AND update implementation according to the + * required signal (which may be different from the + * default signal on connector). + */ + struct dc_link *link = dc->links[i]; + + if (link->link_enc->connector.id == CONNECTOR_ID_EDP) + dc->hwss.edp_power_control(link, true); + + link->link_enc->funcs->hw_init(link->link_enc); + + /* Check for enabled DIG to identify enabled display */ + if (link->link_enc->funcs->is_dig_enabled && + link->link_enc->funcs->is_dig_enabled(link->link_enc)) + link->link_status.link_active = true; + } for (i = 0; i < dc->res_pool->audio_count; i++) { struct audio *audio = dc->res_pool->audios[i]; @@ -1118,6 +1109,9 @@ static void dcn10_init_hw(struct dc *dc) enable_power_gating_plane(dc->hwseq, true); memset(&dc->res_pool->clk_mgr->clks, 0, sizeof(dc->res_pool->clk_mgr->clks)); + + if (dc->hwss.init_pipes) + dc->hwss.init_pipes(dc, dc->current_state); } static void reset_hw_ctx_wrap( @@ -2718,6 +2712,7 @@ static void dcn10_set_cursor_sdr_white_level(struct pipe_ctx *pipe_ctx) static const struct hw_sequencer_funcs dcn10_funcs = { .program_gamut_remap = program_gamut_remap, .init_hw = dcn10_init_hw, + .init_pipes = dcn10_init_pipes, .apply_ctx_to_hw = dce110_apply_ctx_to_hw, .apply_ctx_for_surface = dcn10_apply_ctx_for_surface, .update_plane_addr = dcn10_update_plane_addr, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index d6a85f48b6d1..341b4810288c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -70,6 +70,8 @@ struct hw_sequencer_funcs { void (*init_hw)(struct dc *dc); + void (*init_pipes)(struct dc *dc, struct dc_state *context); + enum dc_status (*apply_ctx_to_hw)( struct dc *dc, struct dc_state *context); From 2c850b7b9e7d0fb36e0bd1ad1238ff55e5c4dcdc Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Mon, 14 Jan 2019 15:19:44 -0500 Subject: [PATCH 492/539] drm/amd/display: add n_vid_mul and half pix_rate for odm Dp needs half container rate to properly support odm Signed-off-by: Dmytro Laktyushkin Reviewed-by: Nikola Cornij Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 7 +++++-- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 4df8a43b5018..e1b285ea01ac 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1032,8 +1032,7 @@ void dce110_unblank_stream(struct pipe_ctx *pipe_ctx, struct dc_link *link = stream->link; /* only 3 items below are used by unblank */ - params.pixel_clk_khz = - pipe_ctx->stream->timing.pix_clk_100hz / 10; + params.pixel_clk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10; params.link_settings.link_rate = link_settings->link_rate; if (dc_is_dp_signal(pipe_ctx->stream->signal)) @@ -1043,6 +1042,7 @@ void dce110_unblank_stream(struct pipe_ctx *pipe_ctx, link->dc->hwss.edp_backlight_control(link, true); } } + void dce110_blank_stream(struct pipe_ctx *pipe_ctx) { struct dc_stream_state *stream = pipe_ctx->stream; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 1dbd1d3999e6..2f78a84f0dcb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -390,7 +390,7 @@ void optc1_program_timing( h_div_2 = optc1_is_two_pixels_per_containter(&patched_crtc_timing); REG_UPDATE(OTG_H_TIMING_CNTL, - OTG_H_TIMING_DIV_BY2, h_div_2); + OTG_H_TIMING_DIV_BY2, h_div_2 || optc1->comb_opp_id != 0xf); } @@ -1531,10 +1531,13 @@ void dcn10_timing_generator_init(struct optc *optc1) optc1->min_v_blank_interlace = 5; optc1->min_h_sync_width = 8; optc1->min_v_sync_width = 1; + optc1->comb_opp_id = 0xf; } bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing) { - return timing->pixel_encoding == PIXEL_ENCODING_YCBCR420; + bool two_pix = timing->pixel_encoding == PIXEL_ENCODING_YCBCR420; + + return two_pix; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index 8a4e3e37e894..24452f11c598 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -435,7 +435,7 @@ struct optc { const struct dcn_optc_shift *tg_shift; const struct dcn_optc_mask *tg_mask; - enum controller_id controller_id; + int comb_opp_id; uint32_t max_h_total; uint32_t max_v_total; From fe5ec65668cdaa4348631d8ce1766eed43b33c10 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 29 Jan 2019 14:09:59 -0500 Subject: [PATCH 493/539] drm/i915: Block fbdev HPD processing during suspend When resuming, we check whether or not any previously connected MST topologies are still present and if so, attempt to resume them. If this fails, we disable said MST topologies and fire off a hotplug event so that userspace knows to reprobe. However, sending a hotplug event involves calling drm_fb_helper_hotplug_event(), which in turn results in fbcon doing a connector reprobe in the caller's thread - something we can't do at the point in which i915 calls drm_dp_mst_topology_mgr_resume() since hotplugging hasn't been fully initialized yet. This currently causes some rather subtle but fatal issues. For example, on my T480s the laptop dock connected to it usually disappears during a suspend cycle, and comes back up a short while after the system has been resumed. This guarantees pretty much every suspend and resume cycle, drm_dp_mst_topology_mgr_set_mst(mgr, false); will be caused and in turn, a connector hotplug will occur. Now it's Rute Goldberg time: when the connector hotplug occurs, i915 reprobes /all/ of the connectors, including eDP. However, eDP probing requires that we power on the panel VDD which in turn, grabs a wakeref to the appropriate power domain on the GPU (on my T480s, this is the PORT_DDI_A_IO domain). This is where things start breaking, since this all happens before intel_power_domains_enable() is called we end up leaking the wakeref that was acquired and never releasing it later. Come next suspend/resume cycle, this causes us to fail to shut down the GPU properly, which causes it not to resume properly and die a horrible complicated death. (as a note: this only happens when there's both an eDP panel and MST topology connected which is removed mid-suspend. One or the other seems to always be OK). We could try to fix the VDD wakeref leak, but this doesn't seem like it's worth it at all since we aren't able to handle hotplug detection while resuming anyway. So, let's go with a more robust solution inspired by nouveau: block fbdev from handling hotplug events until we resume fbdev. This allows us to still send sysfs hotplug events to be handled later by user space while we're resuming, while also preventing us from actually processing any hotplug events we receive until it's safe. This fixes the wakeref leak observed on the T480s and as such, also fixes suspend/resume with MST topologies connected on this machine. Changes since v2: * Don't call drm_fb_helper_hotplug_event() under lock, do it after lock (Chris Wilson) * Don't call drm_fb_helper_hotplug_event() in intel_fbdev_output_poll_changed() under lock (Chris Wilson) * Always set ifbdev->hpd_waiting (Chris Wilson) Signed-off-by: Lyude Paul Fixes: 0e32b39ceed6 ("drm/i915: add DP 1.2 MST support (v0.7)") Cc: Todd Previte Cc: Dave Airlie Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Imre Deak Cc: intel-gfx@lists.freedesktop.org Cc: # v3.17+ Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190129191001.442-2-lyude@redhat.com --- drivers/gpu/drm/i915/intel_drv.h | 10 +++++++++ drivers/gpu/drm/i915/intel_fbdev.c | 33 +++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 35fbc527c19e..79f102dfe37c 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -212,6 +212,16 @@ struct intel_fbdev { unsigned long vma_flags; async_cookie_t cookie; int preferred_bpp; + + /* Whether or not fbdev hpd processing is temporarily suspended */ + bool hpd_suspended : 1; + /* Set when a hotplug was received while HPD processing was + * suspended + */ + bool hpd_waiting : 1; + + /* Protects hpd_suspended */ + struct mutex hpd_lock; }; struct intel_encoder { diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 3036d835bc2b..39b314272fe9 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -679,6 +679,7 @@ int intel_fbdev_init(struct drm_device *dev) if (ifbdev == NULL) return -ENOMEM; + mutex_init(&ifbdev->hpd_lock); drm_fb_helper_prepare(dev, &ifbdev->helper, &intel_fb_helper_funcs); if (!intel_fbdev_init_bios(dev, ifbdev)) @@ -752,6 +753,26 @@ void intel_fbdev_fini(struct drm_i915_private *dev_priv) intel_fbdev_destroy(ifbdev); } +/* Suspends/resumes fbdev processing of incoming HPD events. When resuming HPD + * processing, fbdev will perform a full connector reprobe if a hotplug event + * was received while HPD was suspended. + */ +static void intel_fbdev_hpd_set_suspend(struct intel_fbdev *ifbdev, int state) +{ + bool send_hpd = false; + + mutex_lock(&ifbdev->hpd_lock); + ifbdev->hpd_suspended = state == FBINFO_STATE_SUSPENDED; + send_hpd = !ifbdev->hpd_suspended && ifbdev->hpd_waiting; + ifbdev->hpd_waiting = false; + mutex_unlock(&ifbdev->hpd_lock); + + if (send_hpd) { + DRM_DEBUG_KMS("Handling delayed fbcon HPD event\n"); + drm_fb_helper_hotplug_event(&ifbdev->helper); + } +} + void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -773,6 +794,7 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous */ if (state != FBINFO_STATE_RUNNING) flush_work(&dev_priv->fbdev_suspend_work); + console_lock(); } else { /* @@ -800,17 +822,26 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous drm_fb_helper_set_suspend(&ifbdev->helper, state); console_unlock(); + + intel_fbdev_hpd_set_suspend(ifbdev, state); } void intel_fbdev_output_poll_changed(struct drm_device *dev) { struct intel_fbdev *ifbdev = to_i915(dev)->fbdev; + bool send_hpd; if (!ifbdev) return; intel_fbdev_sync(ifbdev); - if (ifbdev->vma || ifbdev->helper.deferred_setup) + + mutex_lock(&ifbdev->hpd_lock); + send_hpd = !ifbdev->hpd_suspended; + ifbdev->hpd_waiting = true; + mutex_unlock(&ifbdev->hpd_lock); + + if (send_hpd && (ifbdev->vma || ifbdev->helper.deferred_setup)) drm_fb_helper_hotplug_event(&ifbdev->helper); } From 6be1cf96bb79e3150b27a490d9709eec6b523224 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 29 Jan 2019 14:10:00 -0500 Subject: [PATCH 494/539] drm/i915: Don't send MST hotplugs during resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have a bad habit of calling drm_fb_helper_hotplug_event() far more then we actually need to. MST appears to be one of these cases, where we call drm_fb_helper_hotplug_event() if we fail to resume a connected MST topology in intel_dp_mst_resume(). We don't actually need to do this at all though since hotplug events are already sent from drm_dp_connector_destroy_work() every time connectors are unregistered from userspace's PoV. Additionally, extra calls to drm_fb_helper_hotplug_event() also just mean more of a chance of doing a connector probe somewhere we shouldn't. So, don't send any hotplug events during resume if the MST topology fails to come up. Just rely on the DP MST helpers to send them for us. Signed-off-by: Lyude Paul Cc: Imre Deak Cc: Daniel Vetter Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190129191001.442-3-lyude@redhat.com --- drivers/gpu/drm/i915/intel_dp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index a15d9fd80641..26c8c0c747e7 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -7102,7 +7102,10 @@ void intel_dp_mst_resume(struct drm_i915_private *dev_priv) continue; ret = drm_dp_mst_topology_mgr_resume(&intel_dp->mst_mgr); - if (ret) - intel_dp_check_mst_status(intel_dp); + if (ret) { + intel_dp->is_mst = false; + drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, + false); + } } } From 6cbb55c086b4f23ad100f34cdeab496c4e99e0a6 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 29 Jan 2019 14:10:01 -0500 Subject: [PATCH 495/539] drm/i915: Don't send hotplug in intel_dp_check_mst_status() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This hotplug also isn't needed: drm_dp_mst_topology_mgr_set_mst() already sends a hotplug on its own from drm_dp_destroy_connector_work() after destroying connectors in the MST topology. Signed-off-by: Lyude Paul Cc: Imre Deak Cc: Daniel Vetter Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190129191001.442-4-lyude@redhat.com --- drivers/gpu/drm/i915/intel_dp.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 26c8c0c747e7..8f38d932f2d1 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4614,12 +4614,10 @@ go_again: return ret; } else { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); DRM_DEBUG_KMS("failed to get ESI - device may have failed\n"); intel_dp->is_mst = false; - drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, intel_dp->is_mst); - /* send a hotplug event */ - drm_kms_helper_hotplug_event(intel_dig_port->base.base.dev); + drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, + intel_dp->is_mst); } } return -EINVAL; From a6151792a37523a059763116c9cc442768e7d513 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Chmiel?= Date: Wed, 19 Dec 2018 16:57:02 +0100 Subject: [PATCH 496/539] drm/exynos: rotator: Add support for s5pv210 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds support for s5pv210. Currently only NV12 and XRGB8888 formats are supported. It was tested by using tool from https://www.spinics.net/lists/linux-samsung-soc/msg60498.html Signed-off-by: Paweł Chmiel Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_rotator.c | 23 +++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c index 8d67b2a54be3..05abfed6f7f8 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c +++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c @@ -356,6 +356,11 @@ static int rotator_runtime_resume(struct device *dev) } #endif +static const struct drm_exynos_ipp_limit rotator_s5pv210_rbg888_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_16K }, .v = { 8, SZ_16K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 2) }, +}; + static const struct drm_exynos_ipp_limit rotator_4210_rbg888_limits[] = { { IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_16K }, .v = { 8, SZ_16K }) }, { IPP_SIZE_LIMIT(AREA, .h.align = 4, .v.align = 4) }, @@ -371,6 +376,11 @@ static const struct drm_exynos_ipp_limit rotator_5250_rbg888_limits[] = { { IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 2) }, }; +static const struct drm_exynos_ipp_limit rotator_s5pv210_yuv_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, SZ_64K }, .v = { 32, SZ_64K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 8, .v.align = 8) }, +}; + static const struct drm_exynos_ipp_limit rotator_4210_yuv_limits[] = { { IPP_SIZE_LIMIT(BUFFER, .h = { 32, SZ_64K }, .v = { 32, SZ_64K }) }, { IPP_SIZE_LIMIT(AREA, .h.align = 8, .v.align = 8) }, @@ -381,6 +391,11 @@ static const struct drm_exynos_ipp_limit rotator_4412_yuv_limits[] = { { IPP_SIZE_LIMIT(AREA, .h.align = 8, .v.align = 8) }, }; +static const struct exynos_drm_ipp_formats rotator_s5pv210_formats[] = { + { IPP_SRCDST_FORMAT(XRGB8888, rotator_s5pv210_rbg888_limits) }, + { IPP_SRCDST_FORMAT(NV12, rotator_s5pv210_yuv_limits) }, +}; + static const struct exynos_drm_ipp_formats rotator_4210_formats[] = { { IPP_SRCDST_FORMAT(XRGB8888, rotator_4210_rbg888_limits) }, { IPP_SRCDST_FORMAT(NV12, rotator_4210_yuv_limits) }, @@ -396,6 +411,11 @@ static const struct exynos_drm_ipp_formats rotator_5250_formats[] = { { IPP_SRCDST_FORMAT(NV12, rotator_4412_yuv_limits) }, }; +static const struct rot_variant rotator_s5pv210_data = { + .formats = rotator_s5pv210_formats, + .num_formats = ARRAY_SIZE(rotator_s5pv210_formats), +}; + static const struct rot_variant rotator_4210_data = { .formats = rotator_4210_formats, .num_formats = ARRAY_SIZE(rotator_4210_formats), @@ -413,6 +433,9 @@ static const struct rot_variant rotator_5250_data = { static const struct of_device_id exynos_rotator_match[] = { { + .compatible = "samsung,s5pv210-rotator", + .data = &rotator_s5pv210_data, + }, { .compatible = "samsung,exynos4210-rotator", .data = &rotator_4210_data, }, { From 684c1b1457a2d83bd3138e244af220f943caef9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Chmiel?= Date: Fri, 28 Dec 2018 16:19:03 +0100 Subject: [PATCH 497/539] dt-bindings: gpu: samsung-rotator: Document s5pv210 support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit documents new compatible for s5pv210 soc, which will be also supported by this driver. Signed-off-by: Paweł Chmiel Reviewed-by: Rob Herring Acked-by: Krzysztof Kozlowski Signed-off-by: Inki Dae --- Documentation/devicetree/bindings/gpu/samsung-rotator.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/gpu/samsung-rotator.txt b/Documentation/devicetree/bindings/gpu/samsung-rotator.txt index 82cd1ed0be93..3aca2578da0b 100644 --- a/Documentation/devicetree/bindings/gpu/samsung-rotator.txt +++ b/Documentation/devicetree/bindings/gpu/samsung-rotator.txt @@ -2,9 +2,10 @@ Required properties: - compatible : value should be one of the following: - (a) "samsung,exynos4210-rotator" for Rotator IP in Exynos4210 - (b) "samsung,exynos4212-rotator" for Rotator IP in Exynos4212/4412 - (c) "samsung,exynos5250-rotator" for Rotator IP in Exynos5250 + * "samsung,s5pv210-rotator" for Rotator IP in S5PV210 + * "samsung,exynos4210-rotator" for Rotator IP in Exynos4210 + * "samsung,exynos4212-rotator" for Rotator IP in Exynos4212/4412 + * "samsung,exynos5250-rotator" for Rotator IP in Exynos5250 - reg : Physical base address of the IP registers and length of memory mapped region. From 9c0c4997b8a80ec4ba77c4ebe3e74ec0db2d547d Mon Sep 17 00:00:00 2001 From: Andrzej Pietrasiewicz Date: Thu, 13 Dec 2018 14:19:46 +0100 Subject: [PATCH 498/539] drm/exynos: Change Andrzej Pietrasiewicz's e-mail address My @samusung.com address is going to cease existing soon, so change it to an address which can actually be used to contact me. Signed-off-by: Andrzej Pietrasiewicz Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_scaler.c | 2 +- drivers/gpu/drm/exynos/regs-scaler.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c index 71270efa64f3..ed1dd1aec902 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_scaler.c +++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c @@ -1,7 +1,7 @@ /* * Copyright (C) 2017 Samsung Electronics Co.Ltd * Author: - * Andrzej Pietrasiewicz + * Andrzej Pietrasiewicz * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gpu/drm/exynos/regs-scaler.h b/drivers/gpu/drm/exynos/regs-scaler.h index fc7ccad75e74..512a2baced11 100644 --- a/drivers/gpu/drm/exynos/regs-scaler.h +++ b/drivers/gpu/drm/exynos/regs-scaler.h @@ -2,7 +2,7 @@ * * Copyright (c) 2017 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * Author: Andrzej Pietrasiewicz + * Author: Andrzej Pietrasiewicz * * Register definition file for Samsung scaler driver * From 5c4604e757ba9b193b09768d75a7d2105a5b883f Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Thu, 7 Feb 2019 10:54:53 +0200 Subject: [PATCH 499/539] drm/i915: Prevent a race during I915_GEM_MMAP ioctl with WC set Make sure the underlying VMA in the process address space is the same as it was during vm_mmap to avoid applying WC to wrong VMA. A more long-term solution would be to have vm_mmap_locked variant in linux/mmap.h for when caller wants to hold mmap_sem for an extended duration. v2: - Refactor the compare function Fixes: 1816f9236303 ("drm/i915: Support creation of unbound wc user mappings for objects") Reported-by: Adam Zabrocki Suggested-by: Linus Torvalds Signed-off-by: Joonas Lahtinen Cc: # v4.0+ Cc: Akash Goel Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Adam Zabrocki Reviewed-by: Chris Wilson Reviewed-by: Tvrtko Ursulin #v1 Link: https://patchwork.freedesktop.org/patch/msgid/20190207085454.10598-1-joonas.lahtinen@linux.intel.com --- drivers/gpu/drm/i915/i915_gem.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 52b5a24be42b..2ea9b423de49 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1679,6 +1679,16 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, return 0; } +static inline bool +__vma_matches(struct vm_area_struct *vma, struct file *filp, + unsigned long addr, unsigned long size) +{ + if (vma->vm_file != filp) + return false; + + return vma->vm_start == addr && (vma->vm_end - vma->vm_start) == size; +} + /** * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address * it is mapped to. @@ -1737,7 +1747,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, return -EINTR; } vma = find_vma(mm, addr); - if (vma) + if (vma && __vma_matches(vma, obj->base.filp, addr, args->size)) vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); else From ebfb6977801da521d8d5d752d373a187e2a2b9b3 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Thu, 7 Feb 2019 10:54:54 +0200 Subject: [PATCH 500/539] drm/i915: Handle vm_mmap error during I915_GEM_MMAP ioctl with WC set Add err goto label and use it when VMA can't be established or changes underneath. v2: - Dropping Fixes: as it's indeed impossible to race an object to the error address. (Chris) v3: - Use IS_ERR_VALUE (Chris) Reported-by: Adam Zabrocki Signed-off-by: Joonas Lahtinen Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Adam Zabrocki Reviewed-by: Tvrtko Ursulin #v2 Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190207085454.10598-2-joonas.lahtinen@linux.intel.com --- drivers/gpu/drm/i915/i915_gem.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2ea9b423de49..1d94bb61a779 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1738,6 +1738,9 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, addr = vm_mmap(obj->base.filp, 0, args->size, PROT_READ | PROT_WRITE, MAP_SHARED, args->offset); + if (IS_ERR_VALUE(addr)) + goto err; + if (args->flags & I915_MMAP_WC) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; @@ -1753,17 +1756,22 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, else addr = -ENOMEM; up_write(&mm->mmap_sem); + if (IS_ERR_VALUE(addr)) + goto err; /* This may race, but that's ok, it only gets set */ WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); } i915_gem_object_put(obj); - if (IS_ERR((void *)addr)) - return addr; args->addr_ptr = (u64)addr; return 0; + +err: + i915_gem_object_put(obj); + + return addr; } static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) From d6f328bfeb0b0a9304166c52ec15c08076ca5246 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 7 Feb 2019 07:18:22 +0000 Subject: [PATCH 501/539] drm/i915: Hack and slash, throttle execbuffer hogs Apply backpressure to hogs that emit requests faster than the GPU can process them by waiting for their ring to be less than half-full before proceeding with taking the struct_mutex. This is a gross hack to apply throttling backpressure, the long term goal is to remove the struct_mutex contention so that each client naturally waits, preferably in an asynchronous, nonblocking fashion (pipelined operations for the win), for their own resources and never blocks another client within the driver at least. (Realtime priority goals would extend to ensuring that resource contention favours high priority clients as well.) This patch only limits excessive request production and does not attempt to throttle clients that block waiting for eviction (either global GTT or system memory) or any other global resources, see above for the long term goal. No microbenchmarks are harmed (to the best of my knowledge). Testcase: igt/gem_exec_schedule/pi-ringfull-* Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: John Harrison Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20190207071829.5574-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 67 ++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 13 ----- drivers/gpu/drm/i915/intel_ringbuffer.h | 12 ++++ 3 files changed, 79 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 8eedf7cac493..02adcaf6ebea 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -753,6 +753,68 @@ static int eb_select_context(struct i915_execbuffer *eb) return 0; } +static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring) +{ + struct i915_request *rq; + + /* + * Completely unscientific finger-in-the-air estimates for suitable + * maximum user request size (to avoid blocking) and then backoff. + */ + if (intel_ring_update_space(ring) >= PAGE_SIZE) + return NULL; + + /* + * Find a request that after waiting upon, there will be at least half + * the ring available. The hysteresis allows us to compete for the + * shared ring and should mean that we sleep less often prior to + * claiming our resources, but not so long that the ring completely + * drains before we can submit our next request. + */ + list_for_each_entry(rq, &ring->request_list, ring_link) { + if (__intel_ring_space(rq->postfix, + ring->emit, ring->size) > ring->size / 2) + break; + } + if (&rq->ring_link == &ring->request_list) + return NULL; /* weird, we will check again later for real */ + + return i915_request_get(rq); +} + +static int eb_wait_for_ring(const struct i915_execbuffer *eb) +{ + const struct intel_context *ce; + struct i915_request *rq; + int ret = 0; + + /* + * Apply a light amount of backpressure to prevent excessive hogs + * from blocking waiting for space whilst holding struct_mutex and + * keeping all of their resources pinned. + */ + + ce = to_intel_context(eb->ctx, eb->engine); + if (!ce->ring) /* first use, assume empty! */ + return 0; + + rq = __eb_wait_for_ring(ce->ring); + if (rq) { + mutex_unlock(&eb->i915->drm.struct_mutex); + + if (i915_request_wait(rq, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT) < 0) + ret = -EINTR; + + i915_request_put(rq); + + mutex_lock(&eb->i915->drm.struct_mutex); + } + + return ret; +} + static int eb_lookup_vmas(struct i915_execbuffer *eb) { struct radix_tree_root *handles_vma = &eb->ctx->handles_vma; @@ -2291,6 +2353,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (err) goto err_rpm; + err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ + if (unlikely(err)) + goto err_unlock; + err = eb_relocate(&eb); if (err) { /* @@ -2435,6 +2501,7 @@ err_batch_unpin: err_vma: if (eb.exec) eb_release_vmas(&eb); +err_unlock: mutex_unlock(&dev->struct_mutex); err_rpm: intel_runtime_pm_put(eb.i915, wakeref); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b889b27f8aeb..7f841dba87b3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -49,19 +49,6 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) I915_GEM_HWS_INDEX_ADDR); } -static unsigned int __intel_ring_space(unsigned int head, - unsigned int tail, - unsigned int size) -{ - /* - * "If the Ring Buffer Head Pointer and the Tail Pointer are on the - * same cacheline, the Head Pointer must not be greater than the Tail - * Pointer." - */ - GEM_BUG_ON(!is_power_of_2(size)); - return (head - tail - CACHELINE_BYTES) & (size - 1); -} - unsigned int intel_ring_update_space(struct intel_ring *ring) { unsigned int space; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 4d4ea6963a72..710ffb221775 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -832,6 +832,18 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) return tail; } +static inline unsigned int +__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) +{ + /* + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the + * same cacheline, the Head Pointer must not be greater than the Tail + * Pointer." + */ + GEM_BUG_ON(!is_power_of_2(size)); + return (head - tail - CACHELINE_BYTES) & (size - 1); +} + void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno); int intel_engine_setup_common(struct intel_engine_cs *engine); From 440e84a52ae965c3ceb3153ad59e8ed6e8df9ef4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Feb 2019 20:54:33 +0200 Subject: [PATCH 502/539] drm/i915: Don't set update_wm_post on g4x+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit update_wm_post is meant for pre-g4x only. Don't ever set it on g4x+. The only effect of a bogus update_wm_post on g4x+ could be that we clear the legacy_cursor_update flag in intel_atomic_commit(). Since legacy_cursor_update is only set for legacy cursor updates (as the name suggests) and we only set update_wm_post for a modeset the two cases should never occur at the same time. But let's be consistent in setting update_wm_post so we don't end up confusing so many people. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190206185433.8116-1-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_display.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9db931a450f6..bf564a638dca 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11063,7 +11063,8 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, int ret; bool mode_changed = needs_modeset(crtc_state); - if (mode_changed && !crtc_state->active) + if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv) && + mode_changed && !crtc_state->active) pipe_config->update_wm_post = true; if (mode_changed && crtc_state->enable && From de5469c21ff9c3e5ba6162dae58379ed51443164 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 1 Feb 2019 14:28:23 +0100 Subject: [PATCH 503/539] gpu: host1x: Program the channel stream ID When processing command streams, make sure the host1x's stream ID is programmed for the channel so that addresses are properly translated through the SMMU. Signed-off-by: Thierry Reding --- drivers/gpu/host1x/hw/channel_hw.c | 13 +++++++++++++ drivers/gpu/host1x/hw/host1x06_hardware.h | 1 + drivers/gpu/host1x/hw/host1x07_hardware.h | 1 + drivers/gpu/host1x/hw/hw_host1x06_channel.h | 11 +++++++++++ drivers/gpu/host1x/hw/hw_host1x07_channel.h | 11 +++++++++++ 5 files changed, 37 insertions(+) create mode 100644 drivers/gpu/host1x/hw/hw_host1x06_channel.h create mode 100644 drivers/gpu/host1x/hw/hw_host1x07_channel.h diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c index 95ea81172a83..3067af4452cd 100644 --- a/drivers/gpu/host1x/hw/channel_hw.c +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -17,6 +17,7 @@ */ #include +#include #include #include @@ -89,6 +90,16 @@ static inline void synchronize_syncpt_base(struct host1x_job *job) HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(value)); } +static void host1x_channel_set_streamid(struct host1x_channel *channel) +{ +#if HOST1X_HW >= 6 + struct iommu_fwspec *spec = dev_iommu_fwspec_get(channel->dev->parent); + u32 sid = spec ? spec->ids[0] & 0xffff : 0x7f; + + host1x_ch_writel(channel, sid, HOST1X_CHANNEL_SMMU_STREAMID); +#endif +} + static int channel_submit(struct host1x_job *job) { struct host1x_channel *ch = job->channel; @@ -120,6 +131,8 @@ static int channel_submit(struct host1x_job *job) goto error; } + host1x_channel_set_streamid(ch); + /* begin a CDMA submit */ err = host1x_cdma_begin(&ch->cdma, job); if (err) { diff --git a/drivers/gpu/host1x/hw/host1x06_hardware.h b/drivers/gpu/host1x/hw/host1x06_hardware.h index 3039c92ea605..eab753b91f24 100644 --- a/drivers/gpu/host1x/hw/host1x06_hardware.h +++ b/drivers/gpu/host1x/hw/host1x06_hardware.h @@ -22,6 +22,7 @@ #include #include +#include "hw_host1x06_channel.h" #include "hw_host1x06_uclass.h" #include "hw_host1x06_vm.h" #include "hw_host1x06_hypervisor.h" diff --git a/drivers/gpu/host1x/hw/host1x07_hardware.h b/drivers/gpu/host1x/hw/host1x07_hardware.h index 1353e7ab71dd..a79f57dc87bb 100644 --- a/drivers/gpu/host1x/hw/host1x07_hardware.h +++ b/drivers/gpu/host1x/hw/host1x07_hardware.h @@ -22,6 +22,7 @@ #include #include +#include "hw_host1x07_channel.h" #include "hw_host1x07_uclass.h" #include "hw_host1x07_vm.h" #include "hw_host1x07_hypervisor.h" diff --git a/drivers/gpu/host1x/hw/hw_host1x06_channel.h b/drivers/gpu/host1x/hw/hw_host1x06_channel.h new file mode 100644 index 000000000000..18ae1c57bbea --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x06_channel.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 NVIDIA Corporation. + */ + +#ifndef HOST1X_HW_HOST1X06_CHANNEL_H +#define HOST1X_HW_HOST1X06_CHANNEL_H + +#define HOST1X_CHANNEL_SMMU_STREAMID 0x084 + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x07_channel.h b/drivers/gpu/host1x/hw/hw_host1x07_channel.h new file mode 100644 index 000000000000..96fa72bbd7ab --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x07_channel.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 NVIDIA Corporation. + */ + +#ifndef HOST1X_HW_HOST1X07_CHANNEL_H +#define HOST1X_HW_HOST1X07_CHANNEL_H + +#define HOST1X_CHANNEL_SMMU_STREAMID 0x084 + +#endif From 5a5fccbd8c315c08db01e585e1cbe88e30b70691 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 1 Feb 2019 14:28:24 +0100 Subject: [PATCH 504/539] gpu: host1x: Introduce support for wide opcodes The CDMA push buffer can currently only handle opcodes that take a single word parameter. However, the host1x implementation on Tegra186 and later supports opcodes that require multiple words as parameters. Unfortunately the way the push buffer is structured, these wide opcodes cannot simply be composed of two regular opcodes because that could result in the wide opcode being split across the end of the push buffer and the final RESTART opcode required to wrap the push buffer around would break the wide opcode. One way to fix this would be to remove the concept of slots to simplify push buffer operations. However, that's not entirely trivial and should be done in a separate patch. For now, simply use a different function to push four-word opcodes into the push buffer. Technically only three words are pushed, with the fourth word used as padding to preserve the 2-word alignment required by the slots abstraction. The fourth word is always a NOP opcode. Additional care must be taken when the end of the push buffer is reached. If a four-word opcode doesn't fit into the push buffer without being split by the boundary, NOP opcodes will be introduced and the new wide opcode placed at the beginning of the push buffer. Signed-off-by: Thierry Reding --- drivers/gpu/host1x/cdma.c | 92 +++++++++++++++++++++++++++++++++++ drivers/gpu/host1x/cdma.h | 2 + include/trace/events/host1x.h | 26 ++++++++++ 3 files changed, 120 insertions(+) diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index 15bdeb836e3d..64099cc1964b 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -217,6 +217,45 @@ unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma, return 0; } +/* + * Sleep (if necessary) until the push buffer has enough free space. + * + * Must be called with the cdma lock held. + */ +int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x, + struct host1x_cdma *cdma, + unsigned int needed) +{ + while (true) { + struct push_buffer *pb = &cdma->push_buffer; + unsigned int space; + + space = host1x_pushbuffer_space(pb); + if (space >= needed) + break; + + trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev), + CDMA_EVENT_PUSH_BUFFER_SPACE); + + host1x_hw_cdma_flush(host1x, cdma); + + /* If somebody has managed to already start waiting, yield */ + if (cdma->event != CDMA_EVENT_NONE) { + mutex_unlock(&cdma->lock); + schedule(); + mutex_lock(&cdma->lock); + continue; + } + + cdma->event = CDMA_EVENT_PUSH_BUFFER_SPACE; + + mutex_unlock(&cdma->lock); + wait_for_completion(&cdma->complete); + mutex_lock(&cdma->lock); + } + + return 0; +} /* * Start timer that tracks the time spent by the job. * Must be called with the cdma lock held. @@ -509,6 +548,59 @@ void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2) host1x_pushbuffer_push(pb, op1, op2); } +/* + * Push four words into two consecutive push buffer slots. Note that extra + * care needs to be taken not to split the two slots across the end of the + * push buffer. Otherwise the RESTART opcode at the end of the push buffer + * that ensures processing will restart at the beginning will break up the + * four words. + * + * Blocks as necessary if the push buffer is full. + */ +void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2, + u32 op3, u32 op4) +{ + struct host1x_channel *channel = cdma_to_channel(cdma); + struct host1x *host1x = cdma_to_host1x(cdma); + struct push_buffer *pb = &cdma->push_buffer; + unsigned int needed = 2, extra = 0, i; + unsigned int space = cdma->slots_free; + + if (host1x_debug_trace_cmdbuf) + trace_host1x_cdma_push_wide(dev_name(channel->dev), op1, op2, + op3, op4); + + /* compute number of extra slots needed for padding */ + if (pb->pos + 16 > pb->size) { + extra = (pb->size - pb->pos) / 8; + needed += extra; + } + + host1x_cdma_wait_pushbuffer_space(host1x, cdma, needed); + space = host1x_pushbuffer_space(pb); + + cdma->slots_free = space - needed; + cdma->slots_used += needed; + + /* + * Note that we rely on the fact that this is only used to submit wide + * gather opcodes, which consist of 3 words, and they are padded with + * a NOP to avoid having to deal with fractional slots (a slot always + * represents 2 words). The fourth opcode passed to this function will + * therefore always be a NOP. + * + * This works around a slight ambiguity when it comes to opcodes. For + * all current host1x incarnations the NOP opcode uses the exact same + * encoding (0x20000000), so we could hard-code the value here, but a + * new incarnation may change it and break that assumption. + */ + for (i = 0; i < extra; i++) + host1x_pushbuffer_push(pb, op4, op4); + + host1x_pushbuffer_push(pb, op1, op2); + host1x_pushbuffer_push(pb, op3, op4); +} + /* * End a cdma submit * Kick off DMA, add job to the sync queue, and a number of slots to be freed diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h index 71078359c626..3a5e0408b8d1 100644 --- a/drivers/gpu/host1x/cdma.h +++ b/drivers/gpu/host1x/cdma.h @@ -90,6 +90,8 @@ int host1x_cdma_init(struct host1x_cdma *cdma); int host1x_cdma_deinit(struct host1x_cdma *cdma); int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job); void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2); +void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2, + u32 op3, u32 op4); void host1x_cdma_end(struct host1x_cdma *cdma, struct host1x_job *job); void host1x_cdma_update(struct host1x_cdma *cdma); void host1x_cdma_peek(struct host1x_cdma *cdma, u32 dmaget, int slot, diff --git a/include/trace/events/host1x.h b/include/trace/events/host1x.h index a37ef73092e5..3d340b6f1ea3 100644 --- a/include/trace/events/host1x.h +++ b/include/trace/events/host1x.h @@ -80,6 +80,32 @@ TRACE_EVENT(host1x_cdma_push, __entry->name, __entry->op1, __entry->op2) ); +TRACE_EVENT(host1x_cdma_push_wide, + TP_PROTO(const char *name, u32 op1, u32 op2, u32 op3, u32 op4), + + TP_ARGS(name, op1, op2, op3, op4), + + TP_STRUCT__entry( + __field(const char *, name) + __field(u32, op1) + __field(u32, op2) + __field(u32, op3) + __field(u32, op4) + ), + + TP_fast_assign( + __entry->name = name; + __entry->op1 = op1; + __entry->op2 = op2; + __entry->op3 = op3; + __entry->op4 = op4; + ), + + TP_printk("name=%s, op1=%08x, op2=%08x, op3=%08x op4=%08x", + __entry->name, __entry->op1, __entry->op2, __entry->op3, + __entry->op4) +); + TRACE_EVENT(host1x_cdma_push_gather, TP_PROTO(const char *name, struct host1x_bo *bo, u32 words, u32 offset, void *cmdbuf), From 67a82dbc0a374df7a348cc8fb28982945035bd25 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 1 Feb 2019 14:28:25 +0100 Subject: [PATCH 505/539] gpu: host1x: Support 40-bit addressing Tegra186 and later support 40 bits of address space. Additional registers need to be programmed to store the full 40 bits of push buffer addresses. Since command stream gathers can also reside in buffers in a 40-bit address space, a new variant of the GATHER opcode is also introduced. It takes two parameters: the first parameter contains the lower 32 bits of the address and the second parameter contains bits 32 to 39. Signed-off-by: Thierry Reding --- drivers/gpu/host1x/hw/cdma_hw.c | 32 ++++++++++++++++++----- drivers/gpu/host1x/hw/channel_hw.c | 30 ++++++++++++++++++--- drivers/gpu/host1x/hw/host1x06_hardware.h | 5 ++++ drivers/gpu/host1x/hw/host1x07_hardware.h | 5 ++++ 4 files changed, 62 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c index ce320534cbed..485aef5761af 100644 --- a/drivers/gpu/host1x/hw/cdma_hw.c +++ b/drivers/gpu/host1x/hw/cdma_hw.c @@ -68,20 +68,31 @@ static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr, static void cdma_start(struct host1x_cdma *cdma) { struct host1x_channel *ch = cdma_to_channel(cdma); + u64 start, end; if (cdma->running) return; cdma->last_pos = cdma->push_buffer.pos; + start = cdma->push_buffer.dma; + end = start + cdma->push_buffer.size + 4; host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP, HOST1X_CHANNEL_DMACTRL); /* set base, put and end pointer */ - host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART); + host1x_ch_writel(ch, lower_32_bits(start), HOST1X_CHANNEL_DMASTART); +#if HOST1X_HW >= 6 + host1x_ch_writel(ch, upper_32_bits(start), HOST1X_CHANNEL_DMASTART_HI); +#endif host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT); - host1x_ch_writel(ch, cdma->push_buffer.dma + cdma->push_buffer.size + 4, - HOST1X_CHANNEL_DMAEND); +#if HOST1X_HW >= 6 + host1x_ch_writel(ch, 0, HOST1X_CHANNEL_DMAPUT_HI); +#endif + host1x_ch_writel(ch, lower_32_bits(end), HOST1X_CHANNEL_DMAEND); +#if HOST1X_HW >= 6 + host1x_ch_writel(ch, upper_32_bits(end), HOST1X_CHANNEL_DMAEND_HI); +#endif /* reset GET */ host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP | @@ -104,6 +115,7 @@ static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr) { struct host1x *host1x = cdma_to_host1x(cdma); struct host1x_channel *ch = cdma_to_channel(cdma); + u64 start, end; if (cdma->running) return; @@ -113,10 +125,18 @@ static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr) host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP, HOST1X_CHANNEL_DMACTRL); + start = cdma->push_buffer.dma; + end = start + cdma->push_buffer.size + 4; + /* set base, end pointer (all of memory) */ - host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART); - host1x_ch_writel(ch, cdma->push_buffer.dma + cdma->push_buffer.size, - HOST1X_CHANNEL_DMAEND); + host1x_ch_writel(ch, lower_32_bits(start), HOST1X_CHANNEL_DMASTART); +#if HOST1X_HW >= 6 + host1x_ch_writel(ch, upper_32_bits(start), HOST1X_CHANNEL_DMASTART_HI); +#endif + host1x_ch_writel(ch, lower_32_bits(end), HOST1X_CHANNEL_DMAEND); +#if HOST1X_HW >= 6 + host1x_ch_writel(ch, upper_32_bits(end), HOST1X_CHANNEL_DMAEND_HI); +#endif /* set GET, by loading the value in PUT (then reset GET) */ host1x_ch_writel(ch, getptr, HOST1X_CHANNEL_DMAPUT); diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c index 3067af4452cd..27101c04a827 100644 --- a/drivers/gpu/host1x/hw/channel_hw.c +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -61,15 +61,37 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo, static void submit_gathers(struct host1x_job *job) { struct host1x_cdma *cdma = &job->channel->cdma; +#if HOST1X_HW < 6 + struct device *dev = job->channel->dev; +#endif unsigned int i; for (i = 0; i < job->num_gathers; i++) { struct host1x_job_gather *g = &job->gathers[i]; - u32 op1 = host1x_opcode_gather(g->words); - u32 op2 = g->base + g->offset; + dma_addr_t addr = g->base + g->offset; + u32 op2, op3; - trace_write_gather(cdma, g->bo, g->offset, op1 & 0xffff); - host1x_cdma_push(cdma, op1, op2); + op2 = lower_32_bits(addr); + op3 = upper_32_bits(addr); + + trace_write_gather(cdma, g->bo, g->offset, g->words); + + if (op3 != 0) { +#if HOST1X_HW >= 6 + u32 op1 = host1x_opcode_gather_wide(g->words); + u32 op4 = HOST1X_OPCODE_NOP; + + host1x_cdma_push_wide(cdma, op1, op2, op3, op4); +#else + dev_err(dev, "invalid gather for push buffer %pad\n", + &addr); + continue; +#endif + } else { + u32 op1 = host1x_opcode_gather(g->words); + + host1x_cdma_push(cdma, op1, op2); + } } } diff --git a/drivers/gpu/host1x/hw/host1x06_hardware.h b/drivers/gpu/host1x/hw/host1x06_hardware.h index eab753b91f24..dd37b10c8d04 100644 --- a/drivers/gpu/host1x/hw/host1x06_hardware.h +++ b/drivers/gpu/host1x/hw/host1x06_hardware.h @@ -138,6 +138,11 @@ static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; } +static inline u32 host1x_opcode_gather_wide(unsigned count) +{ + return (12 << 28) | count; +} + #define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) #endif diff --git a/drivers/gpu/host1x/hw/host1x07_hardware.h b/drivers/gpu/host1x/hw/host1x07_hardware.h index a79f57dc87bb..9f6da4ee5443 100644 --- a/drivers/gpu/host1x/hw/host1x07_hardware.h +++ b/drivers/gpu/host1x/hw/host1x07_hardware.h @@ -138,6 +138,11 @@ static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; } +static inline u32 host1x_opcode_gather_wide(unsigned count) +{ + return (12 << 28) | count; +} + #define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) #endif From 38fabcc953883741313d707a919326f77c2d7214 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 1 Feb 2019 14:28:27 +0100 Subject: [PATCH 506/539] gpu: host1x: Restrict IOVA space to DMA mask On Tegra186 and later, the ARM SMMU provides an input address space that is 48 bits wide. However, memory clients can only address up to 40 bits. If the geometry is used as-is, allocations of IOVA space can end up in a region that is not addressable by the memory clients. To fix this, restrict the IOVA space to the DMA mask of the host1x device. Signed-off-by: Thierry Reding --- drivers/gpu/host1x/dev.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 4c044ee54fe6..544b67f2b3ff 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -283,6 +283,8 @@ static int host1x_probe(struct platform_device *pdev) host->group = iommu_group_get(&pdev->dev); if (host->group) { struct iommu_domain_geometry *geometry; + u64 mask = dma_get_mask(host->dev); + dma_addr_t start, end; unsigned long order; err = iova_cache_get(); @@ -310,11 +312,12 @@ static int host1x_probe(struct platform_device *pdev) } geometry = &host->domain->geometry; + start = geometry->aperture_start & mask; + end = geometry->aperture_end & mask; order = __ffs(host->domain->pgsize_bitmap); - init_iova_domain(&host->iova, 1UL << order, - geometry->aperture_start >> order); - host->iova_end = geometry->aperture_end; + init_iova_domain(&host->iova, 1UL << order, start >> order); + host->iova_end = end; } skip_iommu: From 8de896eb206fea3caa26b5fc8e637934d8486f0f Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 1 Feb 2019 14:28:28 +0100 Subject: [PATCH 507/539] gpu: host1x: Support 40-bit addressing on Tegra186 The host1x and clients instantiated on Tegra186 support addressing 40 bits of memory. Signed-off-by: Thierry Reding --- drivers/gpu/host1x/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 544b67f2b3ff..ee3c7b81a29d 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -136,7 +136,7 @@ static const struct host1x_info host1x06_info = { .nb_bases = 16, .init = host1x06_init, .sync_offset = 0x0, - .dma_mask = DMA_BIT_MASK(34), + .dma_mask = DMA_BIT_MASK(40), .has_hypervisor = true, .num_sid_entries = ARRAY_SIZE(tegra186_sid_table), .sid_table = tegra186_sid_table, From 0e43b8da154a95f4369da4068a43ad9d700f4cea Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 1 Feb 2019 14:28:29 +0100 Subject: [PATCH 508/539] gpu: host1x: Use correct semantics for HOST1X_CHANNEL_DMAEND The HOST1X_CHANNEL_DMAEND is an offset relative to the value written to the HOST1X_CHANNEL_DMASTART register, but it is currently treated as an absolute address. This can cause SMMU faults if the CDMA fetches past a pushbuffer's IOMMU mapping. Properly setting the DMAEND prevents the CDMA from fetching beyond that address and avoid such issues. This is currently not observed because a whole (almost) page of essentially scratch space absorbs any excessive prefetching by CDMA. However, changing the number of slots in the push buffer can trigger these SMMU faults. Signed-off-by: Thierry Reding --- drivers/gpu/host1x/hw/cdma_hw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c index 485aef5761af..a24c090ac96f 100644 --- a/drivers/gpu/host1x/hw/cdma_hw.c +++ b/drivers/gpu/host1x/hw/cdma_hw.c @@ -75,7 +75,7 @@ static void cdma_start(struct host1x_cdma *cdma) cdma->last_pos = cdma->push_buffer.pos; start = cdma->push_buffer.dma; - end = start + cdma->push_buffer.size + 4; + end = cdma->push_buffer.size + 4; host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP, HOST1X_CHANNEL_DMACTRL); @@ -126,7 +126,7 @@ static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr) HOST1X_CHANNEL_DMACTRL); start = cdma->push_buffer.dma; - end = start + cdma->push_buffer.size + 4; + end = cdma->push_buffer.size + 4; /* set base, end pointer (all of memory) */ host1x_ch_writel(ch, lower_32_bits(start), HOST1X_CHANNEL_DMASTART); From e1f338c0f8a9aacf351e42e5cfd0639fc73dc5b9 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 1 Feb 2019 14:28:30 +0100 Subject: [PATCH 509/539] gpu: host1x: Optimize CDMA push buffer memory usage The host1x CDMA push buffer is terminated by a special opcode (RESTART) that tells the CDMA to wrap around to the beginning of the push buffer. To accomodate the RESTART opcode, an extra 4 bytes are allocated on top of the 512 * 8 = 4096 bytes needed for the 512 slots (1 slot = 2 words) that are used for other commands passed to CDMA. This requires that two memory pages are allocated, but most of the second page (4092 bytes) is never used. Decrease the number of slots to 511 so that the RESTART opcode fits within the page. Adjust the push buffer wraparound code to take into account push buffer sizes that are not a power of two. Signed-off-by: Thierry Reding --- drivers/gpu/host1x/cdma.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index 64099cc1964b..07df85b92ebf 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -41,7 +41,17 @@ * means that the push buffer is full, not empty. */ -#define HOST1X_PUSHBUFFER_SLOTS 512 +/* + * Typically the commands written into the push buffer are a pair of words. We + * use slots to represent each of these pairs and to simplify things. Note the + * strange number of slots allocated here. 512 slots will fit exactly within a + * single memory page. We also need one additional word at the end of the push + * buffer for the RESTART opcode that will instruct the CDMA to jump back to + * the beginning of the push buffer. With 512 slots, this means that we'll use + * 2 memory pages and waste 4092 bytes of the second page that will never be + * used. + */ +#define HOST1X_PUSHBUFFER_SLOTS 511 /* * Clean up push buffer resources @@ -143,7 +153,10 @@ static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2) WARN_ON(pb->pos == pb->fence); *(p++) = op1; *(p++) = op2; - pb->pos = (pb->pos + 8) & (pb->size - 1); + pb->pos += 8; + + if (pb->pos >= pb->size) + pb->pos -= pb->size; } /* @@ -153,7 +166,10 @@ static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2) static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots) { /* Advance the next write position */ - pb->fence = (pb->fence + slots * 8) & (pb->size - 1); + pb->fence += slots * 8; + + if (pb->fence >= pb->size) + pb->fence -= pb->size; } /* @@ -161,7 +177,12 @@ static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots) */ static u32 host1x_pushbuffer_space(struct push_buffer *pb) { - return ((pb->fence - pb->pos) & (pb->size - 1)) / 8; + unsigned int fence = pb->fence; + + if (pb->fence < pb->pos) + fence += pb->size; + + return (fence - pb->pos) / 8; } /* From 8e5d19c625f94e91d42aaf808e1231f636cf1365 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 1 Feb 2019 14:28:31 +0100 Subject: [PATCH 510/539] drm/tegra: Store parent pointer in Tegra DRM clients Tegra DRM clients need access to their parent, so store a pointer to it upon registration. It's technically possible to get at this by going via the host1x client's parent and getting the driver data, but that's quite complicated and not very transparent. It's much more straightforward and natural to let the children know about their parent. Signed-off-by: Thierry Reding Reviewed-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/drm.c | 2 ++ drivers/gpu/drm/tegra/drm.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 4b70ce664c41..61dcbd218ffc 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1041,6 +1041,7 @@ int tegra_drm_register_client(struct tegra_drm *tegra, { mutex_lock(&tegra->clients_lock); list_add_tail(&client->list, &tegra->clients); + client->drm = tegra; mutex_unlock(&tegra->clients_lock); return 0; @@ -1051,6 +1052,7 @@ int tegra_drm_unregister_client(struct tegra_drm *tegra, { mutex_lock(&tegra->clients_lock); list_del_init(&client->list); + client->drm = NULL; mutex_unlock(&tegra->clients_lock); return 0; diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h index f1763b4d5b5f..2c809755bca7 100644 --- a/drivers/gpu/drm/tegra/drm.h +++ b/drivers/gpu/drm/tegra/drm.h @@ -88,6 +88,7 @@ int tegra_drm_submit(struct tegra_drm_context *context, struct tegra_drm_client { struct host1x_client base; struct list_head list; + struct tegra_drm *drm; unsigned int version; const struct tegra_drm_client_ops *ops; From 77a0b09dd993c83ee7c770cc704e9bec18fd19c7 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 1 Feb 2019 14:28:32 +0100 Subject: [PATCH 511/539] drm/tegra: vic: Load firmware on demand Loading the firmware requires an allocation of IOVA space to make sure that the VIC's Falcon microcontroller can read the firmware if address translation via the SMMU is enabled. However, the allocation currently happens at a time where the geometry of an IOMMU domain may not have been initialized yet. This happens for example on Tegra186 and later where an ARM SMMU is used. Domains which are created by the ARM SMMU driver postpone the geometry setup until a device is attached to the domain. This is because IOMMU domains aren't attached to a specific IOMMU instance at allocation time and hence the input address space, which defines the geometry, is not known yet. Work around this by postponing the firmware load until it is needed at the time where a channel is opened to the VIC. At this time the shared IOMMU domain's geometry has been properly initialized. As a byproduct this allows the Tegra DRM to be created in the absence of VIC firmware, since the VIC initialization no longer fails if the firmware can't be found. Based on an earlier patch by Dmitry Osipenko . Signed-off-by: Thierry Reding Reviewed-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/vic.c | 53 +++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index d47983deb1cf..739b894661ab 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -181,13 +181,6 @@ static int vic_init(struct host1x_client *client) vic->domain = tegra->domain; } - if (!vic->falcon.data) { - vic->falcon.data = tegra; - err = falcon_load_firmware(&vic->falcon); - if (err < 0) - goto detach; - } - vic->channel = host1x_channel_request(client->dev); if (!vic->channel) { err = -ENOMEM; @@ -246,6 +239,30 @@ static const struct host1x_client_ops vic_client_ops = { .exit = vic_exit, }; +static int vic_load_firmware(struct vic *vic) +{ + int err; + + if (vic->falcon.data) + return 0; + + vic->falcon.data = vic->client.drm; + + err = falcon_read_firmware(&vic->falcon, vic->config->firmware); + if (err < 0) + goto cleanup; + + err = falcon_load_firmware(&vic->falcon); + if (err < 0) + goto cleanup; + + return 0; + +cleanup: + vic->falcon.data = NULL; + return err; +} + static int vic_open_channel(struct tegra_drm_client *client, struct tegra_drm_context *context) { @@ -256,19 +273,25 @@ static int vic_open_channel(struct tegra_drm_client *client, if (err < 0) return err; + err = vic_load_firmware(vic); + if (err < 0) + goto rpm_put; + err = vic_boot(vic); - if (err < 0) { - pm_runtime_put(vic->dev); - return err; - } + if (err < 0) + goto rpm_put; context->channel = host1x_channel_get(vic->channel); if (!context->channel) { - pm_runtime_put(vic->dev); - return -ENOMEM; + err = -ENOMEM; + goto rpm_put; } return 0; + +rpm_put: + pm_runtime_put(vic->dev); + return err; } static void vic_close_channel(struct tegra_drm_context *context) @@ -372,10 +395,6 @@ static int vic_probe(struct platform_device *pdev) if (err < 0) return err; - err = falcon_read_firmware(&vic->falcon, vic->config->firmware); - if (err < 0) - goto exit_falcon; - platform_set_drvdata(pdev, vic); INIT_LIST_HEAD(&vic->client.base.list); From b9f8b09ce256e2e197b9a38984dea64e3404728c Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 1 Feb 2019 14:28:33 +0100 Subject: [PATCH 512/539] drm/tegra: Setup shared IOMMU domain after initialization Move initialization of the shared IOMMU domain after the host1x device has been initialized. At this point all the Tegra DRM clients have been attached to the shared IOMMU domain. This is important because Tegra186 and later use an ARM SMMU, for which the driver defers setting up the geometry for a domain until a device is attached to it. This is to ensure that the domain is properly set up for a specific ARM SMMU instance, which is unknown at allocation time. Reviewed-by: Dmitry Osipenko Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/drm.c | 54 ++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 61dcbd218ffc..271c7a5fc954 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -92,10 +92,6 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags) return -ENOMEM; if (iommu_present(&platform_bus_type)) { - u64 carveout_start, carveout_end, gem_start, gem_end; - struct iommu_domain_geometry *geometry; - unsigned long order; - tegra->domain = iommu_domain_alloc(&platform_bus_type); if (!tegra->domain) { err = -ENOMEM; @@ -105,27 +101,6 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags) err = iova_cache_get(); if (err < 0) goto domain; - - geometry = &tegra->domain->geometry; - gem_start = geometry->aperture_start; - gem_end = geometry->aperture_end - CARVEOUT_SZ; - carveout_start = gem_end + 1; - carveout_end = geometry->aperture_end; - - order = __ffs(tegra->domain->pgsize_bitmap); - init_iova_domain(&tegra->carveout.domain, 1UL << order, - carveout_start >> order); - - tegra->carveout.shift = iova_shift(&tegra->carveout.domain); - tegra->carveout.limit = carveout_end >> tegra->carveout.shift; - - drm_mm_init(&tegra->mm, gem_start, gem_end - gem_start + 1); - mutex_init(&tegra->mm_lock); - - DRM_DEBUG("IOMMU apertures:\n"); - DRM_DEBUG(" GEM: %#llx-%#llx\n", gem_start, gem_end); - DRM_DEBUG(" Carveout: %#llx-%#llx\n", carveout_start, - carveout_end); } mutex_init(&tegra->clients_lock); @@ -159,6 +134,35 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags) if (err < 0) goto fbdev; + if (tegra->domain) { + u64 carveout_start, carveout_end, gem_start, gem_end; + dma_addr_t start, end; + unsigned long order; + + start = tegra->domain->geometry.aperture_start; + end = tegra->domain->geometry.aperture_end; + + gem_start = start; + gem_end = end - CARVEOUT_SZ; + carveout_start = gem_end + 1; + carveout_end = end; + + order = __ffs(tegra->domain->pgsize_bitmap); + init_iova_domain(&tegra->carveout.domain, 1UL << order, + carveout_start >> order); + + tegra->carveout.shift = iova_shift(&tegra->carveout.domain); + tegra->carveout.limit = carveout_end >> tegra->carveout.shift; + + drm_mm_init(&tegra->mm, gem_start, gem_end - gem_start + 1); + mutex_init(&tegra->mm_lock); + + DRM_DEBUG("IOMMU apertures:\n"); + DRM_DEBUG(" GEM: %#llx-%#llx\n", gem_start, gem_end); + DRM_DEBUG(" Carveout: %#llx-%#llx\n", carveout_start, + carveout_end); + } + if (tegra->hub) { err = tegra_display_hub_prepare(tegra->hub); if (err < 0) From 02be8e4fbb1f352f2ed6251db9fddd646d531b6e Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 1 Feb 2019 14:28:34 +0100 Subject: [PATCH 513/539] drm/tegra: Restrict IOVA space to DMA mask On Tegra186 and later, the ARM SMMU provides an input address space that is 48 bits wide. However, memory clients can only address up to 40 bits. If the geometry is used as-is, allocations of IOVA space can end up in a region that cannot be addressed by the memory clients. To fix this, restrict the IOVA space to the DMA mask of the host1x device. Note that, technically, the IOVA space needs to be restricted to the intersection of the DMA masks for all clients that are attached to the IOMMU domain. In practice using the DMA mask of the host1x device is sufficient because all host1x clients share the same DMA mask. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/drm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 271c7a5fc954..0c5f1e6a0446 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -136,11 +136,12 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags) if (tegra->domain) { u64 carveout_start, carveout_end, gem_start, gem_end; + u64 dma_mask = dma_get_mask(&device->dev); dma_addr_t start, end; unsigned long order; - start = tegra->domain->geometry.aperture_start; - end = tegra->domain->geometry.aperture_end; + start = tegra->domain->geometry.aperture_start & dma_mask; + end = tegra->domain->geometry.aperture_end & dma_mask; gem_start = start; gem_end = end - CARVEOUT_SZ; From 3ff41673d5c6842e6668f95b0a14e5f6a74d043f Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 1 Feb 2019 14:28:35 +0100 Subject: [PATCH 514/539] drm/tegra: vic: Do not clear driver data Upon driver failure, the driver core will take care of clearing the driver data, so there's no need to do so explicitly in the driver. Reviewed-by: Dmitry Osipenko Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/vic.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index 739b894661ab..55a8cc162e9d 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -412,7 +412,6 @@ static int vic_probe(struct platform_device *pdev) err = host1x_client_register(&vic->client.base); if (err < 0) { dev_err(dev, "failed to register host1x client: %d\n", err); - platform_set_drvdata(pdev, NULL); goto exit_falcon; } From f3779cb190a5a12d2e26fd5af724fb1384a9144f Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 1 Feb 2019 14:28:36 +0100 Subject: [PATCH 515/539] drm/tegra: vic: Support stream ID register programming The version of VIC found in Tegra186 and later incorporates improvements with regards to context isolation. As part of those improvements, stream ID registers were added that allow to specify separate stream IDs for the Falcon microcontroller and the VIC memory interface. While it is possible to also set the stream ID dynamically at runtime to allow userspace contexts to be completely separated, this commit doesn't implement that yet. Instead, the static VIC stream ID is programmed when the Falcon is booted. This ensures that memory accesses by the Falcon or the VIC are properly translated via the SMMU. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/vic.c | 21 +++++++++++++++++++++ drivers/gpu/drm/tegra/vic.h | 9 +++++++++ 2 files changed, 30 insertions(+) diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index 55a8cc162e9d..39bfed9623de 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -26,6 +26,7 @@ struct vic_config { const char *firmware; unsigned int version; + bool supports_sid; }; struct vic { @@ -105,6 +106,22 @@ static int vic_boot(struct vic *vic) if (vic->booted) return 0; + if (vic->config->supports_sid) { + struct iommu_fwspec *spec = dev_iommu_fwspec_get(vic->dev); + u32 value; + + value = TRANSCFG_ATT(1, TRANSCFG_SID_FALCON) | + TRANSCFG_ATT(0, TRANSCFG_SID_HW); + vic_writel(vic, value, VIC_TFBIF_TRANSCFG); + + if (spec && spec->num_ids > 0) { + value = spec->ids[0] & 0xffff; + + vic_writel(vic, value, VIC_THI_STREAMID0); + vic_writel(vic, value, VIC_THI_STREAMID1); + } + } + /* setup clockgating registers */ vic_writel(vic, CG_IDLE_CG_DLY_CNT(4) | CG_IDLE_CG_EN | @@ -314,6 +331,7 @@ static const struct tegra_drm_client_ops vic_ops = { static const struct vic_config vic_t124_config = { .firmware = NVIDIA_TEGRA_124_VIC_FIRMWARE, .version = 0x40, + .supports_sid = false, }; #define NVIDIA_TEGRA_210_VIC_FIRMWARE "nvidia/tegra210/vic04_ucode.bin" @@ -321,6 +339,7 @@ static const struct vic_config vic_t124_config = { static const struct vic_config vic_t210_config = { .firmware = NVIDIA_TEGRA_210_VIC_FIRMWARE, .version = 0x21, + .supports_sid = false, }; #define NVIDIA_TEGRA_186_VIC_FIRMWARE "nvidia/tegra186/vic04_ucode.bin" @@ -328,6 +347,7 @@ static const struct vic_config vic_t210_config = { static const struct vic_config vic_t186_config = { .firmware = NVIDIA_TEGRA_186_VIC_FIRMWARE, .version = 0x18, + .supports_sid = true, }; #define NVIDIA_TEGRA_194_VIC_FIRMWARE "nvidia/tegra194/vic.bin" @@ -335,6 +355,7 @@ static const struct vic_config vic_t186_config = { static const struct vic_config vic_t194_config = { .firmware = NVIDIA_TEGRA_194_VIC_FIRMWARE, .version = 0x19, + .supports_sid = true, }; static const struct of_device_id vic_match[] = { diff --git a/drivers/gpu/drm/tegra/vic.h b/drivers/gpu/drm/tegra/vic.h index 21844817a7e1..017584340dd6 100644 --- a/drivers/gpu/drm/tegra/vic.h +++ b/drivers/gpu/drm/tegra/vic.h @@ -17,11 +17,20 @@ /* VIC registers */ +#define VIC_THI_STREAMID0 0x00000030 +#define VIC_THI_STREAMID1 0x00000034 + #define NV_PVIC_MISC_PRI_VIC_CG 0x000016d0 #define CG_IDLE_CG_DLY_CNT(val) ((val & 0x3f) << 0) #define CG_IDLE_CG_EN (1 << 6) #define CG_WAKEUP_DLY_CNT(val) ((val & 0xf) << 16) +#define VIC_TFBIF_TRANSCFG 0x00002044 +#define TRANSCFG_ATT(i, v) (((v) & 0x3) << (i * 4)) +#define TRANSCFG_SID_HW 0 +#define TRANSCFG_SID_PHY 1 +#define TRANSCFG_SID_FALCON 2 + /* Firmware offsets */ #define VIC_UCODE_FCE_HEADER_OFFSET (6*4) From 6c2b3881d0df85fed7e3f43dcc9cbc3e5124bc12 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 25 Jan 2019 11:00:57 +0100 Subject: [PATCH 516/539] dt-bindings: display: tegra: Support SOR crossbar configuration The SOR has a crossbar that can map each lane of the SOR to each of the SOR pads. The mapping is usually the same across designs for a specific SoC generation, but every now and then there's a design that doesn't. Allow the crossbar configuration to be specified in device tree to make it possible to support these designs. Signed-off-by: Thierry Reding --- .../bindings/display/tegra/nvidia,tegra20-host1x.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt index 593be44a53c9..9999255ac5b6 100644 --- a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt +++ b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt @@ -238,6 +238,9 @@ of the following host1x client modules: - nvidia,hpd-gpio: specifies a GPIO used for hotplug detection - nvidia,edid: supplies a binary EDID blob - nvidia,panel: phandle of a display panel + - nvidia,xbar-cfg: 5 cells containing the crossbar configuration. Each lane + of the SOR, identified by the cell's index, is mapped via the crossbar to + the pad specified by the cell's value. Optional properties when driving an eDP output: - nvidia,dpaux: phandle to a DispayPort AUX interface From 6d6c815daad8ec3469135c310ed39849cbe7752c Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 25 Jan 2019 11:00:58 +0100 Subject: [PATCH 517/539] drm/tegra: sor: Support device tree crossbar configuration The crossbar configuration is usually the same across all designs for a given SoC generation. But sometimes there are designs that require some other configuration. Implement support for parsing the crossbar configuration from a device tree. If the crossbar configuration is not present in the device tree, fall back to the default crossbar configuration. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/sor.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c index 79602debf231..beb8fda25826 100644 --- a/drivers/gpu/drm/tegra/sor.c +++ b/drivers/gpu/drm/tegra/sor.c @@ -410,6 +410,8 @@ struct tegra_sor { struct clk *clk_dp; struct clk *clk; + u8 xbar_cfg[5]; + struct drm_dp_aux *aux; struct drm_info_list *debugfs_files; @@ -1814,7 +1816,7 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder) /* XXX not in TRM */ for (value = 0, i = 0; i < 5; i++) - value |= SOR_XBAR_CTRL_LINK0_XSEL(i, sor->soc->xbar_cfg[i]) | + value |= SOR_XBAR_CTRL_LINK0_XSEL(i, sor->xbar_cfg[i]) | SOR_XBAR_CTRL_LINK1_XSEL(i, i); tegra_sor_writel(sor, 0x00000000, SOR_XBAR_POL); @@ -2550,7 +2552,7 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder) /* XXX not in TRM */ for (value = 0, i = 0; i < 5; i++) - value |= SOR_XBAR_CTRL_LINK0_XSEL(i, sor->soc->xbar_cfg[i]) | + value |= SOR_XBAR_CTRL_LINK0_XSEL(i, sor->xbar_cfg[i]) | SOR_XBAR_CTRL_LINK1_XSEL(i, i); tegra_sor_writel(sor, 0x00000000, SOR_XBAR_POL); @@ -3171,6 +3173,8 @@ MODULE_DEVICE_TABLE(of, tegra_sor_of_match); static int tegra_sor_parse_dt(struct tegra_sor *sor) { struct device_node *np = sor->dev->of_node; + u32 xbar_cfg[5]; + unsigned int i; u32 value; int err; @@ -3188,6 +3192,17 @@ static int tegra_sor_parse_dt(struct tegra_sor *sor) sor->pad = TEGRA_IO_PAD_HDMI_DP0 + sor->index; } + err = of_property_read_u32_array(np, "nvidia,xbar-cfg", xbar_cfg, 5); + if (err < 0) { + /* fall back to default per-SoC XBAR configuration */ + for (i = 0; i < 5; i++) + sor->xbar_cfg[i] = sor->soc->xbar_cfg[i]; + } else { + /* copy cells to SOR XBAR configuration */ + for (i = 0; i < 5; i++) + sor->xbar_cfg[i] = xbar_cfg[i]; + } + return 0; } From e8bad659381e9d63164f61e3bdf37e1c12274016 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 7 Aug 2018 16:07:11 +0300 Subject: [PATCH 518/539] gpu: host1x: Cancel only job that actually got stuck Host1x doesn't have information about jobs inter-dependency, that is something that will become available once host1x will get a proper jobs scheduler implementation. Currently a hang job causes other unrelated jobs to be canceled, that is a relic from downstream driver which is irrelevant to upstream. Let's cancel only the hanging job and not to touch other jobs in queue. Signed-off-by: Dmitry Osipenko Reviewed-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/host1x/cdma.c | 33 +++++++-------------------------- 1 file changed, 7 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index 07df85b92ebf..2314f37cb735 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -408,13 +408,11 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, } /* - * Walk the sync_queue, first incrementing with the CPU syncpts that - * are partially executed (the first buffer) or fully skipped while - * still in the current context (slots are also NOP-ed). + * Increment with CPU the remaining syncpts of a partially executed job. * - * At the point contexts are interleaved, syncpt increments must be - * done inline with the pushbuffer from a GATHER buffer to maintain - * the order (slots are modified to be a GATHER of syncpt incrs). + * Syncpt increments must be done inline with the pushbuffer from a + * GATHER buffer to maintain the order (slots are modified to be a + * GATHER of syncpt incrs). * * Note: save in restart_addr the location where the timed out buffer * started in the PB, so we can start the refetch from there (with the @@ -422,20 +420,15 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, * properly for this buffer and resources are freed. */ - dev_dbg(dev, "%s: perform CPU incr on pending same ctx buffers\n", - __func__); + dev_dbg(dev, "%s: perform CPU incr on pending buffers\n", __func__); if (!list_empty(&cdma->sync_queue)) restart_addr = job->first_get; else restart_addr = cdma->last_pos; - /* do CPU increments as long as this context continues */ - list_for_each_entry_from(job, &cdma->sync_queue, list) { - /* different context, gets us out of this loop */ - if (job->client != cdma->timeout.client) - break; - + /* do CPU increments for the remaining syncpts */ + if (job) { /* won't need a timeout when replayed */ job->timeout = 0; @@ -448,20 +441,8 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, host1x_hw_cdma_timeout_cpu_incr(host1x, cdma, job->first_get, syncpt_incrs, job->syncpt_end, job->num_slots); - - syncpt_val += syncpt_incrs; } - /* - * The following sumbits from the same client may be dependent on the - * failed submit and therefore they may fail. Force a small timeout - * to make the queue cleanup faster. - */ - - list_for_each_entry_from(job, &cdma->sync_queue, list) - if (job->client == cdma->timeout.client) - job->timeout = min_t(unsigned int, job->timeout, 500); - dev_dbg(dev, "%s: finished sync_queue modification\n", __func__); /* roll back DMAGET and start up channel again */ From 5d6f043685fed86153ea6aeadd098a11267f58ef Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 7 Aug 2018 16:07:12 +0300 Subject: [PATCH 519/539] gpu: host1x: Don't complete a completed job There is a chance that the last job has been completed at the time of CDMA timeout handler invocation. In this case there is no need to complete the completed job. Signed-off-by: Dmitry Osipenko Reviewed-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/host1x/cdma.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index 2314f37cb735..7556e0efd35a 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -383,7 +383,7 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, { struct host1x *host1x = cdma_to_host1x(cdma); u32 restart_addr, syncpt_incrs, syncpt_val; - struct host1x_job *job = NULL; + struct host1x_job *job; syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt); @@ -402,11 +402,16 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, list_for_each_entry(job, &cdma->sync_queue, list) { if (syncpt_val < job->syncpt_end) - break; + goto syncpt_incr; host1x_job_dump(dev, job); } + /* all jobs have been completed */ + job = NULL; + +syncpt_incr: + /* * Increment with CPU the remaining syncpts of a partially executed job. * @@ -419,16 +424,16 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, * modified NOP-ed PB slots). This lets things appear to have completed * properly for this buffer and resources are freed. */ - - dev_dbg(dev, "%s: perform CPU incr on pending buffers\n", __func__); - - if (!list_empty(&cdma->sync_queue)) + if (job) restart_addr = job->first_get; else restart_addr = cdma->last_pos; /* do CPU increments for the remaining syncpts */ if (job) { + dev_dbg(dev, "%s: perform CPU incr on pending buffers\n", + __func__); + /* won't need a timeout when replayed */ job->timeout = 0; @@ -441,9 +446,10 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, host1x_hw_cdma_timeout_cpu_incr(host1x, cdma, job->first_get, syncpt_incrs, job->syncpt_end, job->num_slots); - } - dev_dbg(dev, "%s: finished sync_queue modification\n", __func__); + dev_dbg(dev, "%s: finished sync_queue modification\n", + __func__); + } /* roll back DMAGET and start up channel again */ host1x_hw_cdma_resume(host1x, cdma, restart_addr); From 79930bafe2802c3a67a70ad4904032d9154bf3fa Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 7 Aug 2018 16:07:13 +0300 Subject: [PATCH 520/539] gpu: host1x: Continue CDMA execution starting with a next job Currently gathers of a hung job are getting NOP'ed and a restarted CDMA executes the NOP'ed gathers. There shouldn't be a reason to not restart CDMA execution starting with a next job, avoiding the unnecessary churning with gathers NOP'ing. Signed-off-by: Dmitry Osipenko Reviewed-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/host1x/cdma.c | 23 +++++++++++------------ drivers/gpu/host1x/hw/cdma_hw.c | 14 -------------- 2 files changed, 11 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index 7556e0efd35a..f45b7c69b694 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -383,7 +383,7 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, { struct host1x *host1x = cdma_to_host1x(cdma); u32 restart_addr, syncpt_incrs, syncpt_val; - struct host1x_job *job; + struct host1x_job *job, *next_job = NULL; syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt); @@ -401,8 +401,13 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, __func__); list_for_each_entry(job, &cdma->sync_queue, list) { - if (syncpt_val < job->syncpt_end) + if (syncpt_val < job->syncpt_end) { + + if (!list_is_last(&job->list, &cdma->sync_queue)) + next_job = list_next_entry(job, list); + goto syncpt_incr; + } host1x_job_dump(dev, job); } @@ -415,17 +420,11 @@ syncpt_incr: /* * Increment with CPU the remaining syncpts of a partially executed job. * - * Syncpt increments must be done inline with the pushbuffer from a - * GATHER buffer to maintain the order (slots are modified to be a - * GATHER of syncpt incrs). - * - * Note: save in restart_addr the location where the timed out buffer - * started in the PB, so we can start the refetch from there (with the - * modified NOP-ed PB slots). This lets things appear to have completed - * properly for this buffer and resources are freed. + * CDMA will continue execution starting with the next job or will get + * into idle state. */ - if (job) - restart_addr = job->first_get; + if (next_job) + restart_addr = next_job->first_get; else restart_addr = cdma->last_pos; diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c index a24c090ac96f..5d61088db2bb 100644 --- a/drivers/gpu/host1x/hw/cdma_hw.c +++ b/drivers/gpu/host1x/hw/cdma_hw.c @@ -39,8 +39,6 @@ static void push_buffer_init(struct push_buffer *pb) static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr, u32 syncpt_incrs, u32 syncval, u32 nr_slots) { - struct host1x *host1x = cdma_to_host1x(cdma); - struct push_buffer *pb = &cdma->push_buffer; unsigned int i; for (i = 0; i < syncpt_incrs; i++) @@ -48,18 +46,6 @@ static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr, /* after CPU incr, ensure shadow is up to date */ host1x_syncpt_load(cdma->timeout.syncpt); - - /* NOP all the PB slots */ - while (nr_slots--) { - u32 *p = (u32 *)(pb->mapped + getptr); - *(p++) = HOST1X_OPCODE_NOP; - *(p++) = HOST1X_OPCODE_NOP; - dev_dbg(host1x->dev, "%s: NOP at %pad+%#x\n", __func__, - &pb->dma, getptr); - getptr = (getptr + 8) & (pb->size - 1); - } - - wmb(); } /* From c53134577c185533ca7e0b958bafc77539d41fd9 Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Fri, 1 Feb 2019 17:57:48 -0500 Subject: [PATCH 521/539] drm/amdgpu: Fix pci platform speed and width MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new Vega series GPU cards have in-built bridges. To get the pcie speed and width supported by the platform walk the hierarchy and get the slowest link. Signed-off-by: Harish Kasiviswanathan Acked-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 58 +++++++++++++++++----- 1 file changed, 46 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d7dddb936f84..fcab1fe9bb68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3618,6 +3618,38 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ return r; } +static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev, + enum pci_bus_speed *speed, + enum pcie_link_width *width) +{ + struct pci_dev *pdev = adev->pdev; + enum pci_bus_speed cur_speed; + enum pcie_link_width cur_width; + + *speed = PCI_SPEED_UNKNOWN; + *width = PCIE_LNK_WIDTH_UNKNOWN; + + while (pdev) { + cur_speed = pcie_get_speed_cap(pdev); + cur_width = pcie_get_width_cap(pdev); + + if (cur_speed != PCI_SPEED_UNKNOWN) { + if (*speed == PCI_SPEED_UNKNOWN) + *speed = cur_speed; + else if (cur_speed < *speed) + *speed = cur_speed; + } + + if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) { + if (*width == PCIE_LNK_WIDTH_UNKNOWN) + *width = cur_width; + else if (cur_width < *width) + *width = cur_width; + } + pdev = pci_upstream_bridge(pdev); + } +} + /** * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot * @@ -3630,8 +3662,8 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) { struct pci_dev *pdev; - enum pci_bus_speed speed_cap; - enum pcie_link_width link_width; + enum pci_bus_speed speed_cap, platform_speed_cap; + enum pcie_link_width platform_link_width; if (amdgpu_pcie_gen_cap) adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; @@ -3648,6 +3680,12 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) return; } + if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) + return; + + amdgpu_device_get_min_pci_speed_width(adev, &platform_speed_cap, + &platform_link_width); + if (adev->pm.pcie_gen_mask == 0) { /* asic caps */ pdev = adev->pdev; @@ -3673,22 +3711,20 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1; } /* platform caps */ - pdev = adev->ddev->pdev->bus->self; - speed_cap = pcie_get_speed_cap(pdev); - if (speed_cap == PCI_SPEED_UNKNOWN) { + if (platform_speed_cap == PCI_SPEED_UNKNOWN) { adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); } else { - if (speed_cap == PCIE_SPEED_16_0GT) + if (platform_speed_cap == PCIE_SPEED_16_0GT) adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); - else if (speed_cap == PCIE_SPEED_8_0GT) + else if (platform_speed_cap == PCIE_SPEED_8_0GT) adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); - else if (speed_cap == PCIE_SPEED_5_0GT) + else if (platform_speed_cap == PCIE_SPEED_5_0GT) adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); else @@ -3697,12 +3733,10 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) } } if (adev->pm.pcie_mlw_mask == 0) { - pdev = adev->ddev->pdev->bus->self; - link_width = pcie_get_width_cap(pdev); - if (link_width == PCIE_LNK_WIDTH_UNKNOWN) { + if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) { adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; } else { - switch (link_width) { + switch (platform_link_width) { case PCIE_LNK_X32: adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | From 9d18c131ee67d005ffe7e9d2ac192b4218ba6476 Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Tue, 5 Feb 2019 14:05:11 -0500 Subject: [PATCH 522/539] drm/amd/powerplay: add override pcie parameters for Vega20 (v2) v2: Fix SMU message format Send override message after SMU enable features Signed-off-by: Harish Kasiviswanathan Reviewed-by: Eric Huang Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 47 +++++++++++-------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index da022ca79b56..0769b1ec562b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -771,40 +771,47 @@ static int vega20_init_smc_table(struct pp_hwmgr *hwmgr) return 0; } +/* + * Override PCIe link speed and link width for DPM Level 1. PPTable entries + * reflect the ASIC capabilities and not the system capabilities. For e.g. + * Vega20 board in a PCI Gen3 system. In this case, when SMU's tries to switch + * to DPM1, it fails as system doesn't support Gen4. + */ static int vega20_override_pcie_parameters(struct pp_hwmgr *hwmgr) { struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); - uint32_t pcie_speed = 0, pcie_width = 0, pcie_arg; + uint32_t pcie_gen = 0, pcie_width = 0, smu_pcie_arg; int ret; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) - pcie_speed = 16; + pcie_gen = 3; else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) - pcie_speed = 8; + pcie_gen = 2; else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) - pcie_speed = 5; + pcie_gen = 1; else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1) - pcie_speed = 2; + pcie_gen = 0; - if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X32) - pcie_width = 32; - else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) - pcie_width = 16; + if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) + pcie_width = 6; else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12) - pcie_width = 12; + pcie_width = 5; else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8) - pcie_width = 8; - else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4) pcie_width = 4; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4) + pcie_width = 3; else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2) pcie_width = 2; else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1) pcie_width = 1; - pcie_arg = pcie_width | (pcie_speed << 8); - + /* Bit 31:16: LCLK DPM level. 0 is DPM0, and 1 is DPM1 + * Bit 15:8: PCIE GEN, 0 to 3 corresponds to GEN1 to GEN4 + * Bit 7:0: PCIE lane width, 1 to 7 corresponds is x1 to x32 + */ + smu_pcie_arg = (1 << 16) | (pcie_gen << 8) | pcie_width; ret = smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_OverridePcieParameters, pcie_arg); + PPSMC_MSG_OverridePcieParameters, smu_pcie_arg); PP_ASSERT_WITH_CODE(!ret, "[OverridePcieParameters] Attempt to override pcie params failed!", return ret); @@ -1611,11 +1618,6 @@ static int vega20_enable_dpm_tasks(struct pp_hwmgr *hwmgr) "[EnableDPMTasks] Failed to initialize SMC table!", return result); - result = vega20_override_pcie_parameters(hwmgr); - PP_ASSERT_WITH_CODE(!result, - "[EnableDPMTasks] Failed to override pcie parameters!", - return result); - result = vega20_run_btc(hwmgr); PP_ASSERT_WITH_CODE(!result, "[EnableDPMTasks] Failed to run btc!", @@ -1631,6 +1633,11 @@ static int vega20_enable_dpm_tasks(struct pp_hwmgr *hwmgr) "[EnableDPMTasks] Failed to enable all smu features!", return result); + result = vega20_override_pcie_parameters(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "[EnableDPMTasks] Failed to override pcie parameters!", + return result); + result = vega20_notify_smc_display_change(hwmgr); PP_ASSERT_WITH_CODE(!result, "[EnableDPMTasks] Failed to notify smc display change!", From 7fbd31cceac0596c46394ea8745fe09b98d6ed79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 7 Feb 2019 11:41:59 +0100 Subject: [PATCH 523/539] drm/amdgpu: fix NULL ptr dref in the VM code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The exclusive fence is of course perfectly optional here. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 1e3a36c90d38..75481cf3348f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1789,9 +1789,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, return r; /* Wait for any BO move to be completed */ - r = dma_fence_wait(exclusive, true); - if (unlikely(r)) - return r; + if (exclusive) { + r = dma_fence_wait(exclusive, true); + if (unlikely(r)) + return r; + } params.func = amdgpu_vm_cpu_set_ptes; params.pages_addr = pages_addr; From 7eb31a0bb2c1df7328692c875a6423b406b09ad1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 5 Feb 2019 18:08:36 +0200 Subject: [PATCH 524/539] drm/i915: Split the gamma/csc enable bits from the plane_ctl() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On g4x+ the pipe gamma enable bit for the primary plane affects the pipe bottom color as well. The same for the pipe csc enable bit on ilk+. Thus we must configure those bits correctly even when the primary plane is disabled. To make the feasible let's split those settings from the plane_ctl() function into a seprate funciton that we can call from the ->disable_plane() hook as well. For consistency we'll do that on all the plane types. While that has no real benefits at this time, it'll become useful when we start to control the pipe gamma/csc enable bits dynamically when we overhaul the color management code. On pre-g4x there doesn't appear to be any way to gamma correct the pipe bottom color, but sticking to the same pattern doesn't hurt. And it'll still help us to do crtc state readout correctly for the pipe gamma enable bit for the color management overhaul. An alternative apporach would be to still precompute these bits into plane_state->ctl, but that would require that we run through the plane check even when the plane isn't logically enabled on any crtc. Currently that condition causes us to short circuit the entire thing and not call ->check_plane(). There would also be some chicken and egg problems with ->check_plane() vs. crtc color state check that would requite splitting certain things into multiple steps. So all in all this seems like the easier route. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190205160848.24662-2-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_display.c | 136 +++++++++++++++++++-------- drivers/gpu/drm/i915/intel_drv.h | 3 +- drivers/gpu/drm/i915/intel_sprite.c | 54 ++++++++--- 3 files changed, 143 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index bf564a638dca..be5a73b6805a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3215,21 +3215,13 @@ i9xx_plane_max_stride(struct intel_plane *plane, } } -static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 i9xx_plane_ctl_crtc(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = - to_i915(plane_state->base.plane->dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int rotation = plane_state->base.rotation; - u32 dspcntr; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 dspcntr = 0; - dspcntr = DISPLAY_PLANE_ENABLE | DISPPLANE_GAMMA_ENABLE; - - if (IS_G4X(dev_priv) || IS_GEN(dev_priv, 5) || - IS_GEN(dev_priv, 6) || IS_IVYBRIDGE(dev_priv)) - dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; + dspcntr |= DISPPLANE_GAMMA_ENABLE; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) dspcntr |= DISPPLANE_PIPE_CSC_ENABLE; @@ -3237,6 +3229,24 @@ static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state, if (INTEL_GEN(dev_priv) < 5) dspcntr |= DISPPLANE_SEL_PIPE(crtc->pipe); + return dspcntr; +} + +static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + const struct drm_framebuffer *fb = plane_state->base.fb; + unsigned int rotation = plane_state->base.rotation; + u32 dspcntr; + + dspcntr = DISPLAY_PLANE_ENABLE; + + if (IS_G4X(dev_priv) || IS_GEN(dev_priv, 5) || + IS_GEN(dev_priv, 6) || IS_IVYBRIDGE(dev_priv)) + dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; + switch (fb->format->format) { case DRM_FORMAT_C8: dspcntr |= DISPPLANE_8BPP; @@ -3364,11 +3374,13 @@ static void i9xx_update_plane(struct intel_plane *plane, struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; u32 linear_offset; - u32 dspcntr = plane_state->ctl; int x = plane_state->color_plane[0].x; int y = plane_state->color_plane[0].y; unsigned long irqflags; u32 dspaddr_offset; + u32 dspcntr; + + dspcntr = plane_state->ctl | i9xx_plane_ctl_crtc(crtc_state); linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); @@ -3428,10 +3440,23 @@ static void i9xx_disable_plane(struct intel_plane *plane, struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; unsigned long irqflags; + u32 dspcntr; + + /* + * DSPCNTR pipe gamma enable on g4x+ and pipe csc + * enable on ilk+ affect the pipe bottom color as + * well, so we must configure them even if the plane + * is disabled. + * + * On pre-g4x there is no way to gamma correct the + * pipe bottom color but we'll keep on doing this + * anyway. + */ + dspcntr = i9xx_plane_ctl_crtc(crtc_state); spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - I915_WRITE_FW(DSPCNTR(i9xx_plane), 0); + I915_WRITE_FW(DSPCNTR(i9xx_plane), dspcntr); if (INTEL_GEN(dev_priv) >= 4) I915_WRITE_FW(DSPSURF(i9xx_plane), 0); else @@ -3668,6 +3693,20 @@ static u32 cnl_plane_ctl_flip(unsigned int reflect) return 0; } +u32 skl_plane_ctl_crtc(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + u32 plane_ctl = 0; + + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + return plane_ctl; + + plane_ctl |= PLANE_CTL_PIPE_GAMMA_ENABLE; + plane_ctl |= PLANE_CTL_PIPE_CSC_ENABLE; + + return plane_ctl; +} + u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -3682,10 +3721,7 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, if (INTEL_GEN(dev_priv) < 10 && !IS_GEMINILAKE(dev_priv)) { plane_ctl |= skl_plane_ctl_alpha(plane_state); - plane_ctl |= - PLANE_CTL_PIPE_GAMMA_ENABLE | - PLANE_CTL_PIPE_CSC_ENABLE | - PLANE_CTL_PLANE_GAMMA_DISABLE; + plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE; if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709) plane_ctl |= PLANE_CTL_YUV_TO_RGB_CSC_FORMAT_BT709; @@ -3710,19 +3746,27 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, return plane_ctl; } +u32 glk_plane_color_ctl_crtc(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + u32 plane_color_ctl = 0; + + if (INTEL_GEN(dev_priv) >= 11) + return plane_color_ctl; + + plane_color_ctl |= PLANE_COLOR_PIPE_GAMMA_ENABLE; + plane_color_ctl |= PLANE_COLOR_PIPE_CSC_ENABLE; + + return plane_color_ctl; +} + u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct drm_i915_private *dev_priv = - to_i915(plane_state->base.plane->dev); const struct drm_framebuffer *fb = plane_state->base.fb; struct intel_plane *plane = to_intel_plane(plane_state->base.plane); u32 plane_color_ctl = 0; - if (INTEL_GEN(dev_priv) < 11) { - plane_color_ctl |= PLANE_COLOR_PIPE_GAMMA_ENABLE; - plane_color_ctl |= PLANE_COLOR_PIPE_CSC_ENABLE; - } plane_color_ctl |= PLANE_COLOR_PLANE_GAMMA_DISABLE; plane_color_ctl |= glk_plane_color_ctl_alpha(plane_state); @@ -9945,11 +9989,15 @@ i845_cursor_max_stride(struct intel_plane *plane, return 2048; } +static u32 i845_cursor_ctl_crtc(const struct intel_crtc_state *crtc_state) +{ + return CURSOR_GAMMA_ENABLE; +} + static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { return CURSOR_ENABLE | - CURSOR_GAMMA_ENABLE | CURSOR_FORMAT_ARGB | CURSOR_STRIDE(plane_state->color_plane[0].stride); } @@ -10019,7 +10067,9 @@ static void i845_update_cursor(struct intel_plane *plane, unsigned int width = plane_state->base.crtc_w; unsigned int height = plane_state->base.crtc_h; - cntl = plane_state->ctl; + cntl = plane_state->ctl | + i845_cursor_ctl_crtc(crtc_state); + size = (height << 12) | width; base = intel_cursor_base(plane_state); @@ -10086,27 +10136,36 @@ i9xx_cursor_max_stride(struct intel_plane *plane, return plane->base.dev->mode_config.cursor_width * 4; } +static u32 i9xx_cursor_ctl_crtc(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 cntl = 0; + + if (INTEL_GEN(dev_priv) >= 11) + return cntl; + + cntl |= MCURSOR_GAMMA_ENABLE; + + if (HAS_DDI(dev_priv)) + cntl |= MCURSOR_PIPE_CSC_ENABLE; + + if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv)) + cntl |= MCURSOR_PIPE_SELECT(crtc->pipe); + + return cntl; +} + static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); u32 cntl = 0; if (IS_GEN(dev_priv, 6) || IS_IVYBRIDGE(dev_priv)) cntl |= MCURSOR_TRICKLE_FEED_DISABLE; - if (INTEL_GEN(dev_priv) <= 10) { - cntl |= MCURSOR_GAMMA_ENABLE; - - if (HAS_DDI(dev_priv)) - cntl |= MCURSOR_PIPE_CSC_ENABLE; - } - - if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv)) - cntl |= MCURSOR_PIPE_SELECT(crtc->pipe); - switch (plane_state->base.crtc_w) { case 64: cntl |= MCURSOR_MODE_64_ARGB_AX; @@ -10231,7 +10290,8 @@ static void i9xx_update_cursor(struct intel_plane *plane, unsigned long irqflags; if (plane_state && plane_state->base.visible) { - cntl = plane_state->ctl; + cntl = plane_state->ctl | + i9xx_cursor_ctl_crtc(crtc_state); if (plane_state->base.crtc_h != plane_state->base.crtc_w) fbc_ctl = CUR_FBC_CTL_EN | (plane_state->base.crtc_h - 1); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 79f102dfe37c..9717b1250e6f 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1765,9 +1765,10 @@ static inline u32 intel_plane_ggtt_offset(const struct intel_plane_state *state) u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); +u32 glk_plane_color_ctl_crtc(const struct intel_crtc_state *crtc_state); u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); -u32 glk_color_ctl(const struct intel_plane_state *plane_state); +u32 skl_plane_ctl_crtc(const struct intel_crtc_state *crtc_state); u32 skl_plane_stride(const struct intel_plane_state *plane_state, int plane); int skl_check_plane_surface(struct intel_plane_state *plane_state); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index cd42e81f8a90..b56a1a9ad01d 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -484,9 +484,16 @@ skl_program_plane(struct intel_plane *plane, struct intel_plane *linked = plane_state->linked_plane; const struct drm_framebuffer *fb = plane_state->base.fb; u8 alpha = plane_state->base.alpha >> 8; + u32 plane_color_ctl = 0; unsigned long irqflags; u32 keymsk, keymax; + plane_ctl |= skl_plane_ctl_crtc(crtc_state); + + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + plane_color_ctl = plane_state->color_ctl | + glk_plane_color_ctl_crtc(crtc_state); + /* Sizes are 0 based */ src_w--; src_h--; @@ -533,8 +540,7 @@ skl_program_plane(struct intel_plane *plane, } if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) - I915_WRITE_FW(PLANE_COLOR_CTL(pipe, plane_id), - plane_state->color_ctl); + I915_WRITE_FW(PLANE_COLOR_CTL(pipe, plane_id), plane_color_ctl); if (fb->format->is_yuv && icl_is_hdr_plane(plane)) icl_program_input_csc(plane, crtc_state, plane_state); @@ -733,6 +739,11 @@ vlv_update_clrc(const struct intel_plane_state *plane_state) SP_SH_SIN(sh_sin) | SP_SH_COS(sh_cos)); } +static u32 vlv_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state) +{ + return SP_GAMMA_ENABLE; +} + static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -741,7 +752,7 @@ static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; u32 sprctl; - sprctl = SP_ENABLE | SP_GAMMA_ENABLE; + sprctl = SP_ENABLE; switch (fb->format->format) { case DRM_FORMAT_YUYV: @@ -808,7 +819,6 @@ vlv_update_plane(struct intel_plane *plane, struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; enum plane_id plane_id = plane->id; - u32 sprctl = plane_state->ctl; u32 sprsurf_offset = plane_state->color_plane[0].offset; u32 linear_offset; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; @@ -819,6 +829,9 @@ vlv_update_plane(struct intel_plane *plane, u32 x = plane_state->color_plane[0].x; u32 y = plane_state->color_plane[0].y; unsigned long irqflags; + u32 sprctl; + + sprctl = plane_state->ctl | vlv_sprite_ctl_crtc(crtc_state); /* Sizes are 0 based */ crtc_w--; @@ -901,6 +914,19 @@ vlv_plane_get_hw_state(struct intel_plane *plane, return ret; } +static u32 ivb_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + u32 sprctl = 0; + + sprctl |= SPRITE_GAMMA_ENABLE; + + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + sprctl |= SPRITE_PIPE_CSC_ENABLE; + + return sprctl; +} + static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -911,14 +937,11 @@ static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; u32 sprctl; - sprctl = SPRITE_ENABLE | SPRITE_GAMMA_ENABLE; + sprctl = SPRITE_ENABLE; if (IS_IVYBRIDGE(dev_priv)) sprctl |= SPRITE_TRICKLE_FEED_DISABLE; - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - sprctl |= SPRITE_PIPE_CSC_ENABLE; - switch (fb->format->format) { case DRM_FORMAT_XBGR8888: sprctl |= SPRITE_FORMAT_RGBX888 | SPRITE_RGB_ORDER_RGBX; @@ -970,7 +993,6 @@ ivb_update_plane(struct intel_plane *plane, { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; - u32 sprctl = plane_state->ctl, sprscale = 0; u32 sprsurf_offset = plane_state->color_plane[0].offset; u32 linear_offset; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; @@ -982,8 +1004,11 @@ ivb_update_plane(struct intel_plane *plane, u32 y = plane_state->color_plane[0].y; u32 src_w = drm_rect_width(&plane_state->base.src) >> 16; u32 src_h = drm_rect_height(&plane_state->base.src) >> 16; + u32 sprctl, sprscale = 0; unsigned long irqflags; + sprctl = plane_state->ctl | ivb_sprite_ctl_crtc(crtc_state); + /* Sizes are 0 based */ src_w--; src_h--; @@ -1080,6 +1105,11 @@ g4x_sprite_max_stride(struct intel_plane *plane, return 16384; } +static u32 g4x_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state) +{ + return DVS_GAMMA_ENABLE; +} + static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -1090,7 +1120,7 @@ static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; u32 dvscntr; - dvscntr = DVS_ENABLE | DVS_GAMMA_ENABLE; + dvscntr = DVS_ENABLE; if (IS_GEN(dev_priv, 6)) dvscntr |= DVS_TRICKLE_FEED_DISABLE; @@ -1146,7 +1176,6 @@ g4x_update_plane(struct intel_plane *plane, { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; - u32 dvscntr = plane_state->ctl, dvsscale = 0; u32 dvssurf_offset = plane_state->color_plane[0].offset; u32 linear_offset; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; @@ -1158,8 +1187,11 @@ g4x_update_plane(struct intel_plane *plane, u32 y = plane_state->color_plane[0].y; u32 src_w = drm_rect_width(&plane_state->base.src) >> 16; u32 src_h = drm_rect_height(&plane_state->base.src) >> 16; + u32 dvscntr, dvsscale = 0; unsigned long irqflags; + dvscntr = plane_state->ctl | g4x_sprite_ctl_crtc(crtc_state); + /* Sizes are 0 based */ src_w--; src_h--; From 5f4f3e386b36952b2fa3d3935342a25d6e1c6d28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 5 Feb 2019 18:08:37 +0200 Subject: [PATCH 525/539] drm/i915: Precompute gamma_mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We shouldn't be computing gamma mode during the commit phase. Move it to the check phase. v2: Reword comments a bit (Matt) Rebase Signed-off-by: Ville Syrjälä Reviewed-by: Matt Roper Reviewed-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20190205160848.24662-3-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_color.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index 31fa599b0d61..057f0600840a 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -375,8 +375,7 @@ static void haswell_load_luts(struct intel_crtc_state *crtc_state) reenable_ips = true; } - crtc_state->gamma_mode = GAMMA_MODE_MODE_8BIT; - I915_WRITE(GAMMA_MODE(crtc->pipe), GAMMA_MODE_MODE_8BIT); + I915_WRITE(GAMMA_MODE(crtc->pipe), crtc_state->gamma_mode); i9xx_load_luts(crtc_state); @@ -476,9 +475,7 @@ static void broadwell_load_luts(struct intel_crtc_state *crtc_state) bdw_load_gamma_lut(crtc_state, INTEL_INFO(dev_priv)->color.degamma_lut_size); - crtc_state->gamma_mode = GAMMA_MODE_MODE_SPLIT; - I915_WRITE(GAMMA_MODE(pipe), GAMMA_MODE_MODE_SPLIT); - POSTING_READ(GAMMA_MODE(pipe)); + I915_WRITE(GAMMA_MODE(pipe), crtc_state->gamma_mode); /* * Reset the index, otherwise it prevents the legacy palette to be @@ -532,9 +529,7 @@ static void glk_load_luts(struct intel_crtc_state *crtc_state) bdw_load_gamma_lut(crtc_state, 0); - crtc_state->gamma_mode = GAMMA_MODE_MODE_10BIT; - I915_WRITE(GAMMA_MODE(pipe), GAMMA_MODE_MODE_10BIT); - POSTING_READ(GAMMA_MODE(pipe)); + I915_WRITE(GAMMA_MODE(pipe), crtc_state->gamma_mode); } /* Loads the palette/gamma unit for the CRTC on CherryView. */ @@ -634,8 +629,10 @@ int intel_color_check(struct intel_crtc_state *crtc_state) gamma_tests = INTEL_INFO(dev_priv)->color.gamma_lut_tests; /* Always allow legacy gamma LUT with no further checking. */ - if (crtc_state_is_legacy_gamma(crtc_state)) + if (crtc_state_is_legacy_gamma(crtc_state)) { + crtc_state->gamma_mode = GAMMA_MODE_MODE_8BIT; return 0; + } if (check_lut_size(crtc_state->base.degamma_lut, degamma_length) || check_lut_size(crtc_state->base.gamma_lut, gamma_length)) @@ -645,6 +642,12 @@ int intel_color_check(struct intel_crtc_state *crtc_state) drm_color_lut_check(crtc_state->base.gamma_lut, gamma_tests)) return -EINVAL; + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + crtc_state->gamma_mode = GAMMA_MODE_MODE_10BIT; + else if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) + crtc_state->gamma_mode = GAMMA_MODE_MODE_SPLIT; + else + crtc_state->gamma_mode = GAMMA_MODE_MODE_8BIT; return 0; } From 23b03a272c2beed43fa7468971b94a4b267cebbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 5 Feb 2019 18:08:38 +0200 Subject: [PATCH 526/539] drm/i915: Constify the state arguments to the color management stuff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass the crtc state etc. as const to the color management commit functions. And while at it polish some of the local variables. v2: Rebase Signed-off-by: Ville Syrjälä Reviewed-by: Matt Roper Reviewed-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20190205160848.24662-4-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/i915_drv.h | 4 +- drivers/gpu/drm/i915/intel_color.c | 140 ++++++++++++++++------------- drivers/gpu/drm/i915/intel_drv.h | 4 +- 3 files changed, 80 insertions(+), 68 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a2293152cb6a..1ca96376651b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -323,8 +323,8 @@ struct drm_i915_display_funcs { /* display clock increase/decrease */ /* pll clock increase/decrease */ - void (*load_csc_matrix)(struct intel_crtc_state *crtc_state); - void (*load_luts)(struct intel_crtc_state *crtc_state); + void (*load_csc_matrix)(const struct intel_crtc_state *crtc_state); + void (*load_luts)(const struct intel_crtc_state *crtc_state); }; #define CSR_VERSION(major, minor) ((major) << 16 | (minor)) diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index 057f0600840a..96d4ece4870c 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -74,12 +74,12 @@ #define ILK_CSC_COEFF_1_0 \ ((7 << 12) | ILK_CSC_COEFF_FP(CTM_COEFF_1_0, 8)) -static bool lut_is_legacy(struct drm_property_blob *lut) +static bool lut_is_legacy(const struct drm_property_blob *lut) { return drm_color_lut_size(lut) == LEGACY_LUT_LENGTH; } -static bool crtc_state_is_legacy_gamma(struct intel_crtc_state *crtc_state) +static bool crtc_state_is_legacy_gamma(const struct intel_crtc_state *crtc_state) { return !crtc_state->base.degamma_lut && !crtc_state->base.ctm && @@ -115,8 +115,8 @@ static u64 *ctm_mult_by_limited(u64 *result, const u64 *input) static void ilk_load_ycbcr_conversion_matrix(struct intel_crtc *crtc) { - int pipe = crtc->pipe; struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; I915_WRITE(PIPE_CSC_PREOFF_HI(pipe), 0); I915_WRITE(PIPE_CSC_PREOFF_ME(pipe), 0); @@ -137,13 +137,14 @@ static void ilk_load_ycbcr_conversion_matrix(struct intel_crtc *crtc) I915_WRITE(PIPE_CSC_MODE(pipe), 0); } -static void ilk_load_csc_matrix(struct intel_crtc_state *crtc_state) +static void ilk_load_csc_matrix(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - int i, pipe = crtc->pipe; - u16 coeffs[9] = { 0, }; bool limited_color_range = false; + enum pipe pipe = crtc->pipe; + u16 coeffs[9] = {}; + int i; /* * FIXME if there's a gamma LUT after the CSC, we should @@ -256,16 +257,16 @@ static void ilk_load_csc_matrix(struct intel_crtc_state *crtc_state) /* * Set up the pipe CSC unit on CherryView. */ -static void cherryview_load_csc_matrix(struct intel_crtc_state *crtc_state) +static void cherryview_load_csc_matrix(const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc_state->base.crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; u32 mode; if (crtc_state->base.ctm) { - struct drm_color_ctm *ctm = crtc_state->base.ctm->data; - u16 coeffs[9] = { 0, }; + const struct drm_color_ctm *ctm = crtc_state->base.ctm->data; + u16 coeffs[9] = {}; int i; for (i = 0; i < ARRAY_SIZE(coeffs); i++) { @@ -303,18 +304,17 @@ static void cherryview_load_csc_matrix(struct intel_crtc_state *crtc_state) I915_WRITE(CGM_PIPE_MODE(pipe), mode); } -void intel_color_set_csc(struct intel_crtc_state *crtc_state) +void intel_color_set_csc(const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc_state->base.crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); if (dev_priv->display.load_csc_matrix) dev_priv->display.load_csc_matrix(crtc_state); } /* Loads the legacy palette/gamma unit for the CRTC. */ -static void i9xx_load_luts_internal(struct intel_crtc_state *crtc_state, - struct drm_property_blob *blob) +static void i9xx_load_luts_internal(const struct intel_crtc_state *crtc_state, + const struct drm_property_blob *blob) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -329,7 +329,8 @@ static void i9xx_load_luts_internal(struct intel_crtc_state *crtc_state, } if (blob) { - struct drm_color_lut *lut = blob->data; + const struct drm_color_lut *lut = blob->data; + for (i = 0; i < 256; i++) { u32 word = (drm_color_lut_extract(lut[i].red, 8) << 16) | @@ -353,13 +354,13 @@ static void i9xx_load_luts_internal(struct intel_crtc_state *crtc_state, } } -static void i9xx_load_luts(struct intel_crtc_state *crtc_state) +static void i9xx_load_luts(const struct intel_crtc_state *crtc_state) { i9xx_load_luts_internal(crtc_state, crtc_state->base.gamma_lut); } /* Loads the legacy palette/gamma unit for the CRTC on Haswell. */ -static void haswell_load_luts(struct intel_crtc_state *crtc_state) +static void haswell_load_luts(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -383,17 +384,19 @@ static void haswell_load_luts(struct intel_crtc_state *crtc_state) hsw_enable_ips(crtc_state); } -static void bdw_load_degamma_lut(struct intel_crtc_state *crtc_state) +static void bdw_load_degamma_lut(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut; u32 i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; + enum pipe pipe = crtc->pipe; I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_SPLIT_MODE | PAL_PREC_AUTO_INCREMENT); - if (crtc_state->base.degamma_lut) { - struct drm_color_lut *lut = crtc_state->base.degamma_lut->data; + if (degamma_lut) { + const struct drm_color_lut *lut = degamma_lut->data; for (i = 0; i < lut_size; i++) { u32 word = @@ -413,11 +416,13 @@ static void bdw_load_degamma_lut(struct intel_crtc_state *crtc_state) } } -static void bdw_load_gamma_lut(struct intel_crtc_state *crtc_state, u32 offset) +static void bdw_load_gamma_lut(const struct intel_crtc_state *crtc_state, u32 offset) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; u32 i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + enum pipe pipe = crtc->pipe; WARN_ON(offset & ~PAL_PREC_INDEX_VALUE_MASK); @@ -426,8 +431,8 @@ static void bdw_load_gamma_lut(struct intel_crtc_state *crtc_state, u32 offset) PAL_PREC_AUTO_INCREMENT | offset); - if (crtc_state->base.gamma_lut) { - struct drm_color_lut *lut = crtc_state->base.gamma_lut->data; + if (gamma_lut) { + const struct drm_color_lut *lut = gamma_lut->data; for (i = 0; i < lut_size; i++) { u32 word = @@ -461,10 +466,11 @@ static void bdw_load_gamma_lut(struct intel_crtc_state *crtc_state, u32 offset) } /* Loads the palette/gamma unit for the CRTC on Broadwell+. */ -static void broadwell_load_luts(struct intel_crtc_state *crtc_state) +static void broadwell_load_luts(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; if (crtc_state_is_legacy_gamma(crtc_state)) { haswell_load_luts(crtc_state); @@ -484,10 +490,11 @@ static void broadwell_load_luts(struct intel_crtc_state *crtc_state) I915_WRITE(PREC_PAL_INDEX(pipe), 0); } -static void glk_load_degamma_lut(struct intel_crtc_state *crtc_state) +static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; const u32 lut_size = 33; u32 i; @@ -514,11 +521,11 @@ static void glk_load_degamma_lut(struct intel_crtc_state *crtc_state) I915_WRITE(PRE_CSC_GAMC_DATA(pipe), (1 << 16)); } -static void glk_load_luts(struct intel_crtc_state *crtc_state) +static void glk_load_luts(const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc_state->base.crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; glk_load_degamma_lut(crtc_state); @@ -532,28 +539,29 @@ static void glk_load_luts(struct intel_crtc_state *crtc_state) I915_WRITE(GAMMA_MODE(pipe), crtc_state->gamma_mode); } -/* Loads the palette/gamma unit for the CRTC on CherryView. */ -static void cherryview_load_luts(struct intel_crtc_state *crtc_state) +static void cherryview_load_luts(const struct intel_crtc_state *crtc_state) { - struct drm_crtc *crtc = crtc_state->base.crtc; - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - enum pipe pipe = to_intel_crtc(crtc)->pipe; - struct drm_color_lut *lut; - u32 i, lut_size; - u32 word0, word1; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; + const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut; + enum pipe pipe = crtc->pipe; if (crtc_state_is_legacy_gamma(crtc_state)) { /* Turn off degamma/gamma on CGM block. */ I915_WRITE(CGM_PIPE_MODE(pipe), (crtc_state->base.ctm ? CGM_PIPE_MODE_CSC : 0)); - i9xx_load_luts_internal(crtc_state, crtc_state->base.gamma_lut); + i9xx_load_luts_internal(crtc_state, gamma_lut); return; } - if (crtc_state->base.degamma_lut) { - lut = crtc_state->base.degamma_lut->data; - lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; + if (degamma_lut) { + const struct drm_color_lut *lut = degamma_lut->data; + int i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; + for (i = 0; i < lut_size; i++) { + u32 word0, word1; + /* Write LUT in U0.14 format. */ word0 = (drm_color_lut_extract(lut[i].green, 14) << 16) | @@ -565,10 +573,13 @@ static void cherryview_load_luts(struct intel_crtc_state *crtc_state) } } - if (crtc_state->base.gamma_lut) { - lut = crtc_state->base.gamma_lut->data; - lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + if (gamma_lut) { + const struct drm_color_lut *lut = gamma_lut->data; + int i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + for (i = 0; i < lut_size; i++) { + u32 word0, word1; + /* Write LUT in U0.10 format. */ word0 = (drm_color_lut_extract(lut[i].green, 10) << 16) | @@ -582,8 +593,8 @@ static void cherryview_load_luts(struct intel_crtc_state *crtc_state) I915_WRITE(CGM_PIPE_MODE(pipe), (crtc_state->base.ctm ? CGM_PIPE_MODE_CSC : 0) | - (crtc_state->base.degamma_lut ? CGM_PIPE_MODE_DEGAMMA : 0) | - (crtc_state->base.gamma_lut ? CGM_PIPE_MODE_GAMMA : 0)); + (degamma_lut ? CGM_PIPE_MODE_DEGAMMA : 0) | + (gamma_lut ? CGM_PIPE_MODE_GAMMA : 0)); /* * Also program a linear LUT in the legacy block (behind the @@ -592,10 +603,9 @@ static void cherryview_load_luts(struct intel_crtc_state *crtc_state) i9xx_load_luts_internal(crtc_state, NULL); } -void intel_color_load_luts(struct intel_crtc_state *crtc_state) +void intel_color_load_luts(const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc_state->base.crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); dev_priv->display.load_luts(crtc_state); } @@ -620,6 +630,8 @@ static int check_lut_size(const struct drm_property_blob *lut, int expected) int intel_color_check(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; + const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut; int gamma_length, degamma_length; u32 gamma_tests, degamma_tests; @@ -634,12 +646,12 @@ int intel_color_check(struct intel_crtc_state *crtc_state) return 0; } - if (check_lut_size(crtc_state->base.degamma_lut, degamma_length) || - check_lut_size(crtc_state->base.gamma_lut, gamma_length)) + if (check_lut_size(degamma_lut, degamma_length) || + check_lut_size(gamma_lut, gamma_length)) return -EINVAL; - if (drm_color_lut_check(crtc_state->base.degamma_lut, degamma_tests) || - drm_color_lut_check(crtc_state->base.gamma_lut, gamma_tests)) + if (drm_color_lut_check(degamma_lut, degamma_tests) || + drm_color_lut_check(gamma_lut, gamma_tests)) return -EINVAL; if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 9717b1250e6f..2c299456fdbd 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -2389,8 +2389,8 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_ /* intel_color.c */ void intel_color_init(struct intel_crtc *crtc); int intel_color_check(struct intel_crtc_state *crtc_state); -void intel_color_set_csc(struct intel_crtc_state *crtc_state); -void intel_color_load_luts(struct intel_crtc_state *crtc_state); +void intel_color_set_csc(const struct intel_crtc_state *crtc_state); +void intel_color_load_luts(const struct intel_crtc_state *crtc_state); /* intel_lspcon.c */ bool lspcon_init(struct intel_digital_port *intel_dig_port); From 87cefd57c88aecc877b35c89375fe688a8eb0868 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 5 Feb 2019 18:08:39 +0200 Subject: [PATCH 527/539] drm/i915: Pull GAMMA_MODE write out from haswell_load_luts() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For bdw+ let's move the GAMMA_MODE write for the legacy LUT mode into the .load_luts() funciton directly, rather than relying on haswell_load_luts(). We'll be getting rid of haswell_load_luts() entirely soon, and it's anyway cleaner to have the GAMMA_MODE write in a single place. Signed-off-by: Ville Syrjälä Reviewed-by: Matt Roper Reviewed-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20190205160848.24662-5-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_color.c | 36 +++++++++++++++++------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index 96d4ece4870c..171dff60d62f 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -473,21 +473,20 @@ static void broadwell_load_luts(const struct intel_crtc_state *crtc_state) enum pipe pipe = crtc->pipe; if (crtc_state_is_legacy_gamma(crtc_state)) { - haswell_load_luts(crtc_state); - return; + i9xx_load_luts(crtc_state); + } else { + bdw_load_degamma_lut(crtc_state); + bdw_load_gamma_lut(crtc_state, + INTEL_INFO(dev_priv)->color.degamma_lut_size); + + /* + * Reset the index, otherwise it prevents the legacy palette to be + * written properly. + */ + I915_WRITE(PREC_PAL_INDEX(pipe), 0); } - bdw_load_degamma_lut(crtc_state); - bdw_load_gamma_lut(crtc_state, - INTEL_INFO(dev_priv)->color.degamma_lut_size); - I915_WRITE(GAMMA_MODE(pipe), crtc_state->gamma_mode); - - /* - * Reset the index, otherwise it prevents the legacy palette to be - * written properly. - */ - I915_WRITE(PREC_PAL_INDEX(pipe), 0); } static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state) @@ -530,11 +529,16 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state) glk_load_degamma_lut(crtc_state); if (crtc_state_is_legacy_gamma(crtc_state)) { - haswell_load_luts(crtc_state); - return; - } + i9xx_load_luts(crtc_state); + } else { + bdw_load_gamma_lut(crtc_state, 0); - bdw_load_gamma_lut(crtc_state, 0); + /* + * Reset the index, otherwise it prevents the legacy palette to be + * written properly. + */ + I915_WRITE(PREC_PAL_INDEX(pipe), 0); + } I915_WRITE(GAMMA_MODE(pipe), crtc_state->gamma_mode); } From 4d8ed54c044703f96b1df9ef7ac689b18899a470 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 5 Feb 2019 18:08:40 +0200 Subject: [PATCH 528/539] drm/i915: Split color mgmt based on single vs. double buffered registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the color management hooks along the single vs. double buffered registers line. Of the currently programmed registers GAMMA_MODE and the ilk+ pipe CSC are double buffered, the LUTS and CHV CGM block are single buffered. The double buffered register will be programmed during the normal pipe update with evasion, and also during pipe enable so that the settings will already be correct when the pipe starts up before the planes are enabled. The single buffered registers are currently programmed before the vblank evade. Which is totally wrong, but we'll correct that later. v2: Add some docs to explain the two vfuncs (Matt,Uma) Rebase Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190205160848.24662-6-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/i915_drv.h | 14 +++++++- drivers/gpu/drm/i915/intel_color.c | 49 +++++++++++++--------------- drivers/gpu/drm/i915/intel_display.c | 16 +++++---- drivers/gpu/drm/i915/intel_drv.h | 2 +- 4 files changed, 46 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1ca96376651b..08d75d57b34e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -323,7 +323,19 @@ struct drm_i915_display_funcs { /* display clock increase/decrease */ /* pll clock increase/decrease */ - void (*load_csc_matrix)(const struct intel_crtc_state *crtc_state); + /* + * Program double buffered color management registers during + * vblank evasion. The registers should then latch during the + * next vblank start, alongside any other double buffered registers + * involved with the same commit. + */ + void (*color_commit)(const struct intel_crtc_state *crtc_state); + /* + * Load LUTs (and other single buffered color management + * registers). Will (hopefully) be called during the vblank + * following the latching of any double buffered registers + * involved with the same commit. + */ void (*load_luts)(const struct intel_crtc_state *crtc_state); }; diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index 171dff60d62f..c364cb7cb14c 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -304,14 +304,6 @@ static void cherryview_load_csc_matrix(const struct intel_crtc_state *crtc_state I915_WRITE(CGM_PIPE_MODE(pipe), mode); } -void intel_color_set_csc(const struct intel_crtc_state *crtc_state) -{ - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - - if (dev_priv->display.load_csc_matrix) - dev_priv->display.load_csc_matrix(crtc_state); -} - /* Loads the legacy palette/gamma unit for the CRTC. */ static void i9xx_load_luts_internal(const struct intel_crtc_state *crtc_state, const struct drm_property_blob *blob) @@ -359,6 +351,16 @@ static void i9xx_load_luts(const struct intel_crtc_state *crtc_state) i9xx_load_luts_internal(crtc_state, crtc_state->base.gamma_lut); } +static void hsw_color_commit(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + I915_WRITE(GAMMA_MODE(crtc->pipe), crtc_state->gamma_mode); + + ilk_load_csc_matrix(crtc_state); +} + /* Loads the legacy palette/gamma unit for the CRTC on Haswell. */ static void haswell_load_luts(const struct intel_crtc_state *crtc_state) { @@ -376,8 +378,6 @@ static void haswell_load_luts(const struct intel_crtc_state *crtc_state) reenable_ips = true; } - I915_WRITE(GAMMA_MODE(crtc->pipe), crtc_state->gamma_mode); - i9xx_load_luts(crtc_state); if (reenable_ips) @@ -485,8 +485,6 @@ static void broadwell_load_luts(const struct intel_crtc_state *crtc_state) */ I915_WRITE(PREC_PAL_INDEX(pipe), 0); } - - I915_WRITE(GAMMA_MODE(pipe), crtc_state->gamma_mode); } static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state) @@ -539,8 +537,6 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state) */ I915_WRITE(PREC_PAL_INDEX(pipe), 0); } - - I915_WRITE(GAMMA_MODE(pipe), crtc_state->gamma_mode); } static void cherryview_load_luts(const struct intel_crtc_state *crtc_state) @@ -551,10 +547,9 @@ static void cherryview_load_luts(const struct intel_crtc_state *crtc_state) const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut; enum pipe pipe = crtc->pipe; + cherryview_load_csc_matrix(crtc_state); + if (crtc_state_is_legacy_gamma(crtc_state)) { - /* Turn off degamma/gamma on CGM block. */ - I915_WRITE(CGM_PIPE_MODE(pipe), - (crtc_state->base.ctm ? CGM_PIPE_MODE_CSC : 0)); i9xx_load_luts_internal(crtc_state, gamma_lut); return; } @@ -595,11 +590,6 @@ static void cherryview_load_luts(const struct intel_crtc_state *crtc_state) } } - I915_WRITE(CGM_PIPE_MODE(pipe), - (crtc_state->base.ctm ? CGM_PIPE_MODE_CSC : 0) | - (degamma_lut ? CGM_PIPE_MODE_DEGAMMA : 0) | - (gamma_lut ? CGM_PIPE_MODE_GAMMA : 0)); - /* * Also program a linear LUT in the legacy block (behind the * CGM block). @@ -614,6 +604,14 @@ void intel_color_load_luts(const struct intel_crtc_state *crtc_state) dev_priv->display.load_luts(crtc_state); } +void intel_color_commit(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + + if (dev_priv->display.color_commit) + dev_priv->display.color_commit(crtc_state); +} + static int check_lut_size(const struct drm_property_blob *lut, int expected) { int len; @@ -675,18 +673,17 @@ void intel_color_init(struct intel_crtc *crtc) drm_mode_crtc_set_gamma_size(&crtc->base, 256); if (IS_CHERRYVIEW(dev_priv)) { - dev_priv->display.load_csc_matrix = cherryview_load_csc_matrix; dev_priv->display.load_luts = cherryview_load_luts; } else if (IS_HASWELL(dev_priv)) { - dev_priv->display.load_csc_matrix = ilk_load_csc_matrix; dev_priv->display.load_luts = haswell_load_luts; + dev_priv->display.color_commit = hsw_color_commit; } else if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) { - dev_priv->display.load_csc_matrix = ilk_load_csc_matrix; dev_priv->display.load_luts = broadwell_load_luts; + dev_priv->display.color_commit = hsw_color_commit; } else if (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) { - dev_priv->display.load_csc_matrix = ilk_load_csc_matrix; dev_priv->display.load_luts = glk_load_luts; + dev_priv->display.color_commit = hsw_color_commit; } else { dev_priv->display.load_luts = i9xx_load_luts; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index be5a73b6805a..63b61f61eccc 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5779,6 +5779,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, * clocks enabled */ intel_color_load_luts(pipe_config); + intel_color_commit(pipe_config); if (dev_priv->display.initial_watermarks != NULL) dev_priv->display.initial_watermarks(old_intel_state, pipe_config); @@ -5888,8 +5889,6 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, haswell_set_pipemisc(pipe_config); - intel_color_set_csc(pipe_config); - intel_crtc->active = true; /* Display WA #1180: WaDisableScalarClockGating: glk, cnl */ @@ -5908,6 +5907,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, * clocks enabled */ intel_color_load_luts(pipe_config); + intel_color_commit(pipe_config); if (INTEL_GEN(dev_priv) >= 11) icl_set_pipe_chicken(intel_crtc); @@ -6245,8 +6245,6 @@ static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, i9xx_set_pipeconf(pipe_config); - intel_color_set_csc(pipe_config); - intel_crtc->active = true; intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); @@ -6266,6 +6264,7 @@ static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, i9xx_pfit_enable(pipe_config); intel_color_load_luts(pipe_config); + intel_color_commit(pipe_config); dev_priv->display.initial_watermarks(old_intel_state, pipe_config); @@ -6322,6 +6321,7 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config, i9xx_pfit_enable(pipe_config); intel_color_load_luts(pipe_config); + intel_color_commit(pipe_config); if (dev_priv->display.initial_watermarks != NULL) dev_priv->display.initial_watermarks(old_intel_state, @@ -13742,10 +13742,8 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc, if (!modeset && (intel_cstate->base.color_mgmt_changed || - intel_cstate->update_pipe)) { - intel_color_set_csc(intel_cstate); + intel_cstate->update_pipe)) intel_color_load_luts(intel_cstate); - } /* Perform vblank evasion around commit operation */ intel_pipe_update_start(intel_cstate); @@ -13753,6 +13751,10 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc, if (modeset) goto out; + if (intel_cstate->base.color_mgmt_changed || + intel_cstate->update_pipe) + intel_color_commit(intel_cstate); + if (intel_cstate->update_pipe) intel_update_pipe_config(old_intel_cstate, intel_cstate); else if (INTEL_GEN(dev_priv) >= 9) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 2c299456fdbd..aa003ab8c4aa 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -2389,7 +2389,7 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_ /* intel_color.c */ void intel_color_init(struct intel_crtc *crtc); int intel_color_check(struct intel_crtc_state *crtc_state); -void intel_color_set_csc(const struct intel_crtc_state *crtc_state); +void intel_color_commit(const struct intel_crtc_state *crtc_state); void intel_color_load_luts(const struct intel_crtc_state *crtc_state); /* intel_lspcon.c */ From 051a6d8d3ca09da2cfcbf15ca3dbe6afe330d51f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 5 Feb 2019 18:08:41 +0200 Subject: [PATCH 529/539] drm/i915: Move LUT programming to happen after vblank waits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The LUTs are single buffered so we should program them after the double buffered pipe updates have been latched by the hardware. We'll also fix up the IPS vs. split gamma w/a to do the IPS disable like everyone else. Note that this is currently dead code as we don't use the split gamma mode on HSW, but that will be fixed up shortly. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190205160848.24662-7-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_color.c | 25 +-------------- drivers/gpu/drm/i915/intel_display.c | 47 ++++++++++++++++++++++++---- 2 files changed, 42 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index c364cb7cb14c..71a1f12c6b2a 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -361,29 +361,6 @@ static void hsw_color_commit(const struct intel_crtc_state *crtc_state) ilk_load_csc_matrix(crtc_state); } -/* Loads the legacy palette/gamma unit for the CRTC on Haswell. */ -static void haswell_load_luts(const struct intel_crtc_state *crtc_state) -{ - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - bool reenable_ips = false; - - /* - * Workaround : Do not read or write the pipe palette/gamma data while - * GAMMA_MODE is configured for split gamma and IPS_CTL has IPS enabled. - */ - if (IS_HASWELL(dev_priv) && crtc_state->ips_enabled && - (crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT)) { - hsw_disable_ips(crtc_state); - reenable_ips = true; - } - - i9xx_load_luts(crtc_state); - - if (reenable_ips) - hsw_enable_ips(crtc_state); -} - static void bdw_load_degamma_lut(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); @@ -675,7 +652,7 @@ void intel_color_init(struct intel_crtc *crtc) if (IS_CHERRYVIEW(dev_priv)) { dev_priv->display.load_luts = cherryview_load_luts; } else if (IS_HASWELL(dev_priv)) { - dev_priv->display.load_luts = haswell_load_luts; + dev_priv->display.load_luts = i9xx_load_luts; dev_priv->display.color_commit = hsw_color_commit; } else if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) { diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 63b61f61eccc..2802674c94ef 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5373,24 +5373,54 @@ intel_pre_disable_primary_noatomic(struct drm_crtc *crtc) static bool hsw_pre_update_disable_ips(const struct intel_crtc_state *old_crtc_state, const struct intel_crtc_state *new_crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + if (!old_crtc_state->ips_enabled) return false; if (needs_modeset(&new_crtc_state->base)) return true; + /* + * Workaround : Do not read or write the pipe palette/gamma data while + * GAMMA_MODE is configured for split gamma and IPS_CTL has IPS enabled. + * + * Disable IPS before we program the LUT. + */ + if (IS_HASWELL(dev_priv) && + (new_crtc_state->base.color_mgmt_changed || + new_crtc_state->update_pipe) && + new_crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT) + return true; + return !new_crtc_state->ips_enabled; } static bool hsw_post_update_enable_ips(const struct intel_crtc_state *old_crtc_state, const struct intel_crtc_state *new_crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + if (!new_crtc_state->ips_enabled) return false; if (needs_modeset(&new_crtc_state->base)) return true; + /* + * Workaround : Do not read or write the pipe palette/gamma data while + * GAMMA_MODE is configured for split gamma and IPS_CTL has IPS enabled. + * + * Re-enable IPS after the LUT has been programmed. + */ + if (IS_HASWELL(dev_priv) && + (new_crtc_state->base.color_mgmt_changed || + new_crtc_state->update_pipe) && + new_crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT) + return true; + /* * We can't read out IPS on broadwell, assume the worst and * forcibly enable IPS on the first fastset. @@ -11136,7 +11166,7 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, return ret; } - if (crtc_state->color_mgmt_changed) { + if (mode_changed || crtc_state->color_mgmt_changed) { ret = intel_color_check(pipe_config); if (ret) return ret; @@ -13225,6 +13255,16 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) */ drm_atomic_helper_wait_for_flip_done(dev, state); + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { + new_intel_crtc_state = to_intel_crtc_state(new_crtc_state); + + if (new_crtc_state->active && + !needs_modeset(new_crtc_state) && + (new_intel_crtc_state->base.color_mgmt_changed || + new_intel_crtc_state->update_pipe)) + intel_color_load_luts(new_intel_crtc_state); + } + /* * Now that the vblank has passed, we can go ahead and program the * optimal watermarks on platforms that need two-step watermark @@ -13740,11 +13780,6 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc, intel_atomic_get_new_crtc_state(old_intel_state, intel_crtc); bool modeset = needs_modeset(&intel_cstate->base); - if (!modeset && - (intel_cstate->base.color_mgmt_changed || - intel_cstate->update_pipe)) - intel_color_load_luts(intel_cstate); - /* Perform vblank evasion around commit operation */ intel_pipe_update_start(intel_cstate); From c09d39166d8a3f3788680b32dbb0a40a70de32e2 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 7 Feb 2019 12:45:32 -0800 Subject: [PATCH 530/539] drm/i915: Update DRIVER_DATE to 20190207 Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 08d75d57b34e..9adc7bb9e69c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -91,8 +91,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20190202" -#define DRIVER_TIMESTAMP 1549095268 +#define DRIVER_DATE "20190207" +#define DRIVER_TIMESTAMP 1549572331 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions From 0461221316ec21e0a535a35fba3feb6ba75706e6 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Thu, 7 Feb 2019 15:12:35 -0500 Subject: [PATCH 531/539] drm/amd/display: Check hpd_gpio for NULL before accessing it dal_gpio_open and dal_gpio_unlock_pin dereference hpd_gpio. Check for NULL before calling those functions. Fixes: ac627caf6b9275a ("drm/amd/display: add gpio lock/unlock") Reported-by: Przemek Socha CC: Chiawen Huang CC: Tony Cheng Acked-by: Alex Deucher Tested-by: Alex Deucher Signed-off-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 6d2cef05b4d7..7f5a947ad31d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1127,10 +1127,11 @@ static bool construct( link->dc->res_pool->funcs->link_init(link); link->hpd_gpio = get_hpd_gpio(link->ctx->dc_bios, link->link_id, link->ctx->gpio_service); - dal_gpio_open(link->hpd_gpio, GPIO_MODE_INTERRUPT); - dal_gpio_unlock_pin(link->hpd_gpio); - if (link->hpd_gpio != NULL) + if (link->hpd_gpio != NULL) { + dal_gpio_open(link->hpd_gpio, GPIO_MODE_INTERRUPT); + dal_gpio_unlock_pin(link->hpd_gpio); link->irq_source_hpd = dal_irq_get_source(link->hpd_gpio); + } switch (link->link_id.id) { case CONNECTOR_ID_HDMI_TYPE_A: From 4c6d8fc20b09f9684743afd72e4dbc3f15524479 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 14 Jan 2019 17:44:56 +0100 Subject: [PATCH 532/539] drm: rcar-du: add missing of_node_put Add an of_node_put when the result of of_graph_get_remote_port_parent is not available. Add a second of_node_put if no encoder is selected (encoder remains NULL). The semantic match that finds the first problem is as follows (http://coccinelle.lip6.fr): // @r exists@ local idexpression e; expression x; @@ e = of_graph_get_remote_port_parent(...); ... when != x = e when != true e == NULL when != of_node_put(e) when != of_fwnode_handle(e) ( return e; | *return ...; ) // Signed-off-by: Julia Lawall Reviewed-by: Laurent Pinchart Reviewed-by: Kieran Bingham Signed-off-by: Laurent Pinchart --- drivers/gpu/drm/rcar-du/rcar_du_kms.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c index 663ab98d1f97..99795c677dd5 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c @@ -352,6 +352,7 @@ static int rcar_du_encoders_init_one(struct rcar_du_device *rcdu, dev_dbg(rcdu->dev, "connected entity %pOF is disabled, skipping\n", entity); + of_node_put(entity); return -ENODEV; } @@ -387,6 +388,7 @@ static int rcar_du_encoders_init_one(struct rcar_du_device *rcdu, dev_warn(rcdu->dev, "no encoder found for endpoint %pOF, skipping\n", ep->local_node); + of_node_put(entity); return -ENODEV; } From 8a2fe6c09f2ac17c7605982bc6810ea2b2c2a0d0 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 22 Jan 2019 15:25:46 +0000 Subject: [PATCH 533/539] dt-bindings: display: renesas: lvds: Document r8a7744 bindings Document the RZ/G1N (R8A7744) LVDS bindings. Signed-off-by: Biju Das Reviewed-by: Laurent Pinchart Signed-off-by: Laurent Pinchart Reviewed-by: Simon Horman Reviewed-by: Rob Herring --- .../devicetree/bindings/display/bridge/renesas,lvds.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt b/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt index 27a054e1bb5f..900a884ad9f5 100644 --- a/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt +++ b/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt @@ -8,6 +8,7 @@ Required properties: - compatible : Shall contain one of - "renesas,r8a7743-lvds" for R8A7743 (RZ/G1M) compatible LVDS encoders + - "renesas,r8a7744-lvds" for R8A7744 (RZ/G1N) compatible LVDS encoders - "renesas,r8a774c0-lvds" for R8A774C0 (RZ/G2E) compatible LVDS encoders - "renesas,r8a7790-lvds" for R8A7790 (R-Car H2) compatible LVDS encoders - "renesas,r8a7791-lvds" for R8A7791 (R-Car M2-W) compatible LVDS encoders From fc59d7d491a9758be2915caf400d342881a8b86e Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 22 Jan 2019 15:25:47 +0000 Subject: [PATCH 534/539] drm: rcar-du: lvds: Add r8a7744 support The LVDS encoders on RZ/G1N SoC is similar to RZ/G1M. Add support for RZ/G1N (R8A7744) SoC to the LVDS encoder driver. Signed-off-by: Biju Das Reviewed-by: Laurent Pinchart Signed-off-by: Laurent Pinchart Reviewed-by: Simon Horman --- drivers/gpu/drm/rcar-du/rcar_lvds.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/rcar-du/rcar_lvds.c b/drivers/gpu/drm/rcar-du/rcar_lvds.c index 044c65680ef3..997927e8b0b6 100644 --- a/drivers/gpu/drm/rcar-du/rcar_lvds.c +++ b/drivers/gpu/drm/rcar-du/rcar_lvds.c @@ -786,6 +786,7 @@ static const struct rcar_lvds_device_info rcar_lvds_r8a77995_info = { static const struct of_device_id rcar_lvds_of_table[] = { { .compatible = "renesas,r8a7743-lvds", .data = &rcar_lvds_gen2_info }, + { .compatible = "renesas,r8a7744-lvds", .data = &rcar_lvds_gen2_info }, { .compatible = "renesas,r8a774c0-lvds", .data = &rcar_lvds_r8a77990_info }, { .compatible = "renesas,r8a7790-lvds", .data = &rcar_lvds_r8a7790_info }, { .compatible = "renesas,r8a7791-lvds", .data = &rcar_lvds_gen2_info }, From 5aebc852af25d8e1bacee664a1e3abae13dab799 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 17 Jan 2019 00:40:49 +0200 Subject: [PATCH 535/539] drm: rcar-du: Simplify encoder registration Before the driver fully moved to drm_bridge and drm_panel, it was necessary to parse DT and locate encoder and connector nodes. The connector node is now unused and can be removed as a parameter to rcar_du_encoder_init(). As a consequence rcar_du_encoders_init_one() can be greatly simplified, removing most of the DT parsing. Signed-off-by: Laurent Pinchart --- drivers/gpu/drm/rcar-du/rcar_du_encoder.c | 3 +- drivers/gpu/drm/rcar-du/rcar_du_encoder.h | 3 +- drivers/gpu/drm/rcar-du/rcar_du_kms.c | 52 ++--------------------- 3 files changed, 6 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c index 369e930750e9..ed7ccee705cb 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c @@ -30,8 +30,7 @@ static const struct drm_encoder_funcs encoder_funcs = { int rcar_du_encoder_init(struct rcar_du_device *rcdu, enum rcar_du_output output, - struct device_node *enc_node, - struct device_node *con_node) + struct device_node *enc_node) { struct rcar_du_encoder *renc; struct drm_encoder *encoder; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_encoder.h b/drivers/gpu/drm/rcar-du/rcar_du_encoder.h index 552f2a02e5b5..df9be4524301 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_encoder.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_encoder.h @@ -26,7 +26,6 @@ struct rcar_du_encoder { int rcar_du_encoder_init(struct rcar_du_device *rcdu, enum rcar_du_output output, - struct device_node *enc_node, - struct device_node *con_node); + struct device_node *enc_node); #endif /* __RCAR_DU_ENCODER_H__ */ diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c index 99795c677dd5..b0c80dffd8b8 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c @@ -330,17 +330,10 @@ static int rcar_du_encoders_init_one(struct rcar_du_device *rcdu, enum rcar_du_output output, struct of_endpoint *ep) { - struct device_node *connector = NULL; - struct device_node *encoder = NULL; - struct device_node *ep_node = NULL; - struct device_node *entity_ep_node; struct device_node *entity; int ret; - /* - * Locate the connected entity and infer its type from the number of - * endpoints. - */ + /* Locate the connected entity and initialize the encoder. */ entity = of_graph_get_remote_port_parent(ep->local_node); if (!entity) { dev_dbg(rcdu->dev, "unconnected endpoint %pOF, skipping\n", @@ -356,50 +349,13 @@ static int rcar_du_encoders_init_one(struct rcar_du_device *rcdu, return -ENODEV; } - entity_ep_node = of_graph_get_remote_endpoint(ep->local_node); - - for_each_endpoint_of_node(entity, ep_node) { - if (ep_node == entity_ep_node) - continue; - - /* - * We've found one endpoint other than the input, this must - * be an encoder. Locate the connector. - */ - encoder = entity; - connector = of_graph_get_remote_port_parent(ep_node); - of_node_put(ep_node); - - if (!connector) { - dev_warn(rcdu->dev, - "no connector for encoder %pOF, skipping\n", - encoder); - of_node_put(entity_ep_node); - of_node_put(encoder); - return -ENODEV; - } - - break; - } - - of_node_put(entity_ep_node); - - if (!encoder) { - dev_warn(rcdu->dev, - "no encoder found for endpoint %pOF, skipping\n", - ep->local_node); - of_node_put(entity); - return -ENODEV; - } - - ret = rcar_du_encoder_init(rcdu, output, encoder, connector); + ret = rcar_du_encoder_init(rcdu, output, entity); if (ret && ret != -EPROBE_DEFER) dev_warn(rcdu->dev, "failed to initialize encoder %pOF on output %u (%d), skipping\n", - encoder, output, ret); + entity, output, ret); - of_node_put(encoder); - of_node_put(connector); + of_node_put(entity); return ret; } From 6e1f8557818f3c2476404ba1d4a5157b823b79f0 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 17 Jan 2019 00:43:08 +0200 Subject: [PATCH 536/539] drm: rcar-du: lvds: Don't fail probe if output is not connected on D3/E3 On the D3 and E3 SoCs the LVDS encoder has an extended internal PLL and supplies a clock to the DU. That clock is used not only for the LVDS outputs but also for the DPAD output. The LVDS encoder thus needs to be available to the DU even when its output is disabled. Don't fail probe in that case on D3 and E3. Signed-off-by: Laurent Pinchart --- drivers/gpu/drm/rcar-du/rcar_lvds.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/rcar-du/rcar_lvds.c b/drivers/gpu/drm/rcar-du/rcar_lvds.c index 997927e8b0b6..a7789c5b3880 100644 --- a/drivers/gpu/drm/rcar-du/rcar_lvds.c +++ b/drivers/gpu/drm/rcar-du/rcar_lvds.c @@ -545,7 +545,10 @@ static int rcar_lvds_attach(struct drm_bridge *bridge) return drm_bridge_attach(bridge->encoder, lvds->next_bridge, bridge); - /* Otherwise we have a panel, create a connector. */ + /* Otherwise if we have a panel, create a connector. */ + if (!lvds->panel) + return 0; + ret = drm_connector_init(bridge->dev, connector, &rcar_lvds_conn_funcs, DRM_MODE_CONNECTOR_LVDS); if (ret < 0) @@ -593,7 +596,8 @@ static int rcar_lvds_parse_dt(struct rcar_lvds *lvds) local_output = of_graph_get_endpoint_by_regs(lvds->dev->of_node, 1, 0); if (!local_output) { dev_dbg(lvds->dev, "unconnected port@1\n"); - return -ENODEV; + ret = -ENODEV; + goto done; } /* @@ -643,6 +647,15 @@ done: of_node_put(remote_input); of_node_put(remote); + /* + * On D3/E3 the LVDS encoder provides a clock to the DU, which can be + * used for the DPAD output even when the LVDS output is not connected. + * Don't fail probe in that case as the DU will need the bridge to + * control the clock. + */ + if (lvds->info->quirks & RCAR_LVDS_QUIRK_EXT_PLL) + return ret == -ENODEV ? 0 : ret; + return ret; } From 02f2b30032c12b1b91abe5f2bd0d74ba1f700ea1 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 17 Jan 2019 01:11:37 +0200 Subject: [PATCH 537/539] drm: rcar-du: lvds: Add API to enable/disable clock output On the D3 and E3 platforms, the LVDS internal PLL supplies the pixel clock to the DU. This works automatically for LVDS outputs as the LVDS encoder is enabled through the bridge API, enabling the internal PLL and clock output. However, when using the DU DPAD output with the LVDS outputs turned off, the LVDS PLL needs to be controlled manually. Add an API to do so, to be called by the DU driver. The drivers/gpu/drm/rcar-du/ directory has to be treated as obj-y unconditionally, as the LVDS driver could be built-in while the DU driver is compiled as a module. Signed-off-by: Laurent Pinchart --- drivers/gpu/drm/Makefile | 2 +- drivers/gpu/drm/rcar-du/Kconfig | 1 + drivers/gpu/drm/rcar-du/rcar_lvds.c | 77 +++++++++++++++++++++++++---- drivers/gpu/drm/rcar-du/rcar_lvds.h | 27 ++++++++++ 4 files changed, 96 insertions(+), 11 deletions(-) create mode 100644 drivers/gpu/drm/rcar-du/rcar_lvds.h diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index f0c1f8731a27..1ac55c65eac0 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -81,7 +81,7 @@ obj-$(CONFIG_DRM_UDL) += udl/ obj-$(CONFIG_DRM_AST) += ast/ obj-$(CONFIG_DRM_ARMADA) += armada/ obj-$(CONFIG_DRM_ATMEL_HLCDC) += atmel-hlcdc/ -obj-$(CONFIG_DRM_RCAR_DU) += rcar-du/ +obj-y += rcar-du/ obj-$(CONFIG_DRM_SHMOBILE) +=shmobile/ obj-y += omapdrm/ obj-$(CONFIG_DRM_SUN4I) += sun4i/ diff --git a/drivers/gpu/drm/rcar-du/Kconfig b/drivers/gpu/drm/rcar-du/Kconfig index 225141656e19..7c36e2777a15 100644 --- a/drivers/gpu/drm/rcar-du/Kconfig +++ b/drivers/gpu/drm/rcar-du/Kconfig @@ -4,6 +4,7 @@ config DRM_RCAR_DU depends on DRM && OF depends on ARM || ARM64 depends on ARCH_RENESAS || COMPILE_TEST + imply DRM_RCAR_LVDS select DRM_KMS_HELPER select DRM_KMS_CMA_HELPER select DRM_GEM_CMA_HELPER diff --git a/drivers/gpu/drm/rcar-du/rcar_lvds.c b/drivers/gpu/drm/rcar-du/rcar_lvds.c index a7789c5b3880..7ef97b2a6eda 100644 --- a/drivers/gpu/drm/rcar-du/rcar_lvds.c +++ b/drivers/gpu/drm/rcar-du/rcar_lvds.c @@ -23,6 +23,7 @@ #include #include +#include "rcar_lvds.h" #include "rcar_lvds_regs.h" struct rcar_lvds; @@ -183,8 +184,9 @@ struct pll_info { static void rcar_lvds_d3_e3_pll_calc(struct rcar_lvds *lvds, struct clk *clk, unsigned long target, struct pll_info *pll, - u32 clksel) + u32 clksel, bool dot_clock_only) { + unsigned int div7 = dot_clock_only ? 1 : 7; unsigned long output; unsigned long fin; unsigned int m_min; @@ -218,9 +220,9 @@ static void rcar_lvds_d3_e3_pll_calc(struct rcar_lvds *lvds, struct clk *clk, * `------------> | | * |/ * - * The /7 divider is optional when the LVDS PLL is used to generate a - * dot clock for the DU RGB output, without using the LVDS encoder. We - * don't support this configuration yet. + * The /7 divider is optional, it is enabled when the LVDS PLL is used + * to drive the LVDS encoder, and disabled when used to generate a dot + * clock for the DU RGB output, without using the LVDS encoder. * * The PLL allowed input frequency range is 12 MHz to 192 MHz. */ @@ -280,7 +282,7 @@ static void rcar_lvds_d3_e3_pll_calc(struct rcar_lvds *lvds, struct clk *clk, * the PLL, followed by a an optional fixed /7 * divider. */ - fout = fvco / (1 << e) / 7; + fout = fvco / (1 << e) / div7; div = DIV_ROUND_CLOSEST(fout, target); diff = abs(fout / div - target); @@ -301,7 +303,7 @@ static void rcar_lvds_d3_e3_pll_calc(struct rcar_lvds *lvds, struct clk *clk, done: output = fin * pll->pll_n / pll->pll_m / (1 << pll->pll_e) - / 7 / pll->div; + / div7 / pll->div; error = (long)(output - target) * 10000 / (long)target; dev_dbg(lvds->dev, @@ -311,17 +313,18 @@ done: pll->pll_m, pll->pll_n, pll->pll_e, pll->div); } -static void rcar_lvds_pll_setup_d3_e3(struct rcar_lvds *lvds, unsigned int freq) +static void __rcar_lvds_pll_setup_d3_e3(struct rcar_lvds *lvds, + unsigned int freq, bool dot_clock_only) { struct pll_info pll = { .diff = (unsigned long)-1 }; u32 lvdpllcr; rcar_lvds_d3_e3_pll_calc(lvds, lvds->clocks.dotclkin[0], freq, &pll, - LVDPLLCR_CKSEL_DU_DOTCLKIN(0)); + LVDPLLCR_CKSEL_DU_DOTCLKIN(0), dot_clock_only); rcar_lvds_d3_e3_pll_calc(lvds, lvds->clocks.dotclkin[1], freq, &pll, - LVDPLLCR_CKSEL_DU_DOTCLKIN(1)); + LVDPLLCR_CKSEL_DU_DOTCLKIN(1), dot_clock_only); rcar_lvds_d3_e3_pll_calc(lvds, lvds->clocks.extal, freq, &pll, - LVDPLLCR_CKSEL_EXTAL); + LVDPLLCR_CKSEL_EXTAL, dot_clock_only); lvdpllcr = LVDPLLCR_PLLON | pll.clksel | LVDPLLCR_CLKOUT | LVDPLLCR_PLLN(pll.pll_n - 1) | LVDPLLCR_PLLM(pll.pll_m - 1); @@ -330,6 +333,9 @@ static void rcar_lvds_pll_setup_d3_e3(struct rcar_lvds *lvds, unsigned int freq) lvdpllcr |= LVDPLLCR_STP_CLKOUTE | LVDPLLCR_OUTCLKSEL | LVDPLLCR_PLLE(pll.pll_e - 1); + if (dot_clock_only) + lvdpllcr |= LVDPLLCR_OCKSEL; + rcar_lvds_write(lvds, LVDPLLCR, lvdpllcr); if (pll.div > 1) @@ -343,6 +349,57 @@ static void rcar_lvds_pll_setup_d3_e3(struct rcar_lvds *lvds, unsigned int freq) rcar_lvds_write(lvds, LVDDIV, 0); } +static void rcar_lvds_pll_setup_d3_e3(struct rcar_lvds *lvds, unsigned int freq) +{ + __rcar_lvds_pll_setup_d3_e3(lvds, freq, false); +} + +/* ----------------------------------------------------------------------------- + * Clock - D3/E3 only + */ + +int rcar_lvds_clk_enable(struct drm_bridge *bridge, unsigned long freq) +{ + struct rcar_lvds *lvds = bridge_to_rcar_lvds(bridge); + int ret; + + if (WARN_ON(!(lvds->info->quirks & RCAR_LVDS_QUIRK_EXT_PLL))) + return -ENODEV; + + dev_dbg(lvds->dev, "enabling LVDS PLL, freq=%luHz\n", freq); + + WARN_ON(lvds->enabled); + + ret = clk_prepare_enable(lvds->clocks.mod); + if (ret < 0) + return ret; + + __rcar_lvds_pll_setup_d3_e3(lvds, freq, true); + + lvds->enabled = true; + return 0; +} +EXPORT_SYMBOL_GPL(rcar_lvds_clk_enable); + +void rcar_lvds_clk_disable(struct drm_bridge *bridge) +{ + struct rcar_lvds *lvds = bridge_to_rcar_lvds(bridge); + + if (WARN_ON(!(lvds->info->quirks & RCAR_LVDS_QUIRK_EXT_PLL))) + return; + + dev_dbg(lvds->dev, "disabling LVDS PLL\n"); + + WARN_ON(!lvds->enabled); + + rcar_lvds_write(lvds, LVDPLLCR, 0); + + clk_disable_unprepare(lvds->clocks.mod); + + lvds->enabled = false; +} +EXPORT_SYMBOL_GPL(rcar_lvds_clk_disable); + /* ----------------------------------------------------------------------------- * Bridge */ diff --git a/drivers/gpu/drm/rcar-du/rcar_lvds.h b/drivers/gpu/drm/rcar-du/rcar_lvds.h new file mode 100644 index 000000000000..a709cae1bc32 --- /dev/null +++ b/drivers/gpu/drm/rcar-du/rcar_lvds.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * rcar_lvds.h -- R-Car LVDS Encoder + * + * Copyright (C) 2013-2018 Renesas Electronics Corporation + * + * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com) + */ + +#ifndef __RCAR_LVDS_H__ +#define __RCAR_LVDS_H__ + +struct drm_bridge; + +#if IS_ENABLED(CONFIG_DRM_RCAR_LVDS) +int rcar_lvds_clk_enable(struct drm_bridge *bridge, unsigned long freq); +void rcar_lvds_clk_disable(struct drm_bridge *bridge); +#else +static inline int rcar_lvds_clk_enable(struct drm_bridge *bridge, + unsigned long freq) +{ + return -ENOSYS; +} +static inline void rcar_lvds_clk_disable(struct drm_bridge *bridge) { } +#endif /* CONFIG_DRM_RCAR_LVDS */ + +#endif /* __RCAR_LVDS_H__ */ From a6cc417d3eee4ac37aa9547cf82d4ff155d1780c Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 17 Jan 2019 01:59:34 +0200 Subject: [PATCH 538/539] drm: rcar-du: Turn LVDS clock output on/off for DPAD0 output on D3/E3 On the D3 and E3 SoCs the LVDS PLL clock output provides the dot clock to the DU channels, even when the LVDS outputs are not in use. Enable and disable the LVDS clock output when enabling or disabling a CRTC connected to the DPAD0 output. Signed-off-by: Laurent Pinchart --- drivers/gpu/drm/rcar-du/rcar_du_crtc.c | 34 +++++++++++++++++++++++ drivers/gpu/drm/rcar-du/rcar_du_drv.h | 3 ++ drivers/gpu/drm/rcar-du/rcar_du_encoder.c | 1 + 3 files changed, 38 insertions(+) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c index 93ee0020c9b1..96175d48a902 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c @@ -25,6 +25,7 @@ #include "rcar_du_plane.h" #include "rcar_du_regs.h" #include "rcar_du_vsp.h" +#include "rcar_lvds.h" static u32 rcar_du_crtc_read(struct rcar_du_crtc *rcrtc, u32 reg) { @@ -656,8 +657,27 @@ static void rcar_du_crtc_atomic_enable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { struct rcar_du_crtc *rcrtc = to_rcar_crtc(crtc); + struct rcar_du_crtc_state *rstate = to_rcar_crtc_state(crtc->state); + struct rcar_du_device *rcdu = rcrtc->group->dev; rcar_du_crtc_get(rcrtc); + + /* + * On D3/E3 the dot clock is provided by the LVDS encoder attached to + * the DU channel. We need to enable its clock output explicitly if + * the LVDS output is disabled. + */ + if (rcdu->info->lvds_clk_mask & BIT(rcrtc->index) && + rstate->outputs == BIT(RCAR_DU_OUTPUT_DPAD0)) { + struct rcar_du_encoder *encoder = + rcdu->encoders[RCAR_DU_OUTPUT_LVDS0 + rcrtc->index]; + const struct drm_display_mode *mode = + &crtc->state->adjusted_mode; + + rcar_lvds_clk_enable(encoder->base.bridge, + mode->clock * 1000); + } + rcar_du_crtc_start(rcrtc); } @@ -665,10 +685,24 @@ static void rcar_du_crtc_atomic_disable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { struct rcar_du_crtc *rcrtc = to_rcar_crtc(crtc); + struct rcar_du_crtc_state *rstate = to_rcar_crtc_state(old_state); + struct rcar_du_device *rcdu = rcrtc->group->dev; rcar_du_crtc_stop(rcrtc); rcar_du_crtc_put(rcrtc); + if (rcdu->info->lvds_clk_mask & BIT(rcrtc->index) && + rstate->outputs == BIT(RCAR_DU_OUTPUT_DPAD0)) { + struct rcar_du_encoder *encoder = + rcdu->encoders[RCAR_DU_OUTPUT_LVDS0 + rcrtc->index]; + + /* + * Disable the LVDS clock output, see + * rcar_du_crtc_atomic_enable(). + */ + rcar_lvds_clk_disable(encoder->base.bridge); + } + spin_lock_irq(&crtc->dev->event_lock); if (crtc->state->event) { drm_crtc_send_vblank_event(crtc, crtc->state->event); diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.h b/drivers/gpu/drm/rcar-du/rcar_du_drv.h index 6c187d0bf7c2..1327cd0df90a 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_drv.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.h @@ -22,6 +22,7 @@ struct device; struct drm_device; struct drm_property; struct rcar_du_device; +struct rcar_du_encoder; #define RCAR_DU_FEATURE_CRTC_IRQ_CLOCK BIT(0) /* Per-CRTC IRQ and clock */ #define RCAR_DU_FEATURE_VSP1_SOURCE BIT(1) /* Has inputs from VSP1 */ @@ -81,6 +82,8 @@ struct rcar_du_device { struct rcar_du_crtc crtcs[RCAR_DU_MAX_CRTCS]; unsigned int num_crtcs; + struct rcar_du_encoder *encoders[RCAR_DU_OUTPUT_MAX]; + struct rcar_du_group groups[RCAR_DU_MAX_GROUPS]; struct rcar_du_vsp vsps[RCAR_DU_MAX_VSPS]; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c index ed7ccee705cb..8ee4e762f4e5 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c @@ -41,6 +41,7 @@ int rcar_du_encoder_init(struct rcar_du_device *rcdu, if (renc == NULL) return -ENOMEM; + rcdu->encoders[output] = renc; renc->output = output; encoder = rcar_encoder_to_drm_encoder(renc); From 16065fcdd19ddb9e093192914ac863884f308766 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Fri, 8 Feb 2019 15:04:09 +0100 Subject: [PATCH 539/539] drm/virtio: do NOT reuse resource ids Bisected guest kernel changes crashing qemu. Landed at "6c1cd97bda drm/virtio: fix resource id handling". Looked again, and noticed we where not only leaking *some* ids, but *all* ids. The old code never ever called virtio_gpu_resource_id_put(). So, commit 6c1cd97bda effectively makes the linux kernel starting re-using IDs after releasing them, and apparently virglrenderer can't deal with that. Oops. This patch puts a temporary stopgap into place for the 5.0 release. Signed-off-by: Gerd Hoffmann Reviewed-by: Dave Airlie Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20190208140409.15280-1-kraxel@redhat.com --- drivers/gpu/drm/virtio/virtgpu_object.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index f39a183d59c2..e7e946035027 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -28,10 +28,21 @@ static int virtio_gpu_resource_id_get(struct virtio_gpu_device *vgdev, uint32_t *resid) { +#if 0 int handle = ida_alloc(&vgdev->resource_ida, GFP_KERNEL); if (handle < 0) return handle; +#else + static int handle; + + /* + * FIXME: dirty hack to avoid re-using IDs, virglrenderer + * can't deal with that. Needs fixing in virglrenderer, also + * should figure a better way to handle that in the guest. + */ + handle++; +#endif *resid = handle + 1; return 0; @@ -39,7 +50,9 @@ static int virtio_gpu_resource_id_get(struct virtio_gpu_device *vgdev, static void virtio_gpu_resource_id_put(struct virtio_gpu_device *vgdev, uint32_t id) { +#if 0 ida_free(&vgdev->resource_ida, id - 1); +#endif } static void virtio_gpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)