diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 617d9c3a86c3..36357a36a281 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -93,20 +93,6 @@ config DRM_KMS_FB_HELPER help FBDEV helpers for KMS drivers. -config DRM_DEBUG_DP_MST_TOPOLOGY_REFS - bool "Enable refcount backtrace history in the DP MST helpers" - select STACKDEPOT - depends on DRM_KMS_HELPER - depends on DEBUG_KERNEL - depends on EXPERT - help - Enables debug tracing for topology refs in DRM's DP MST helpers. A - history of each topology reference/dereference will be printed to the - kernel log once a port or branch device's topology refcount reaches 0. - - This has the potential to use a lot of memory and print some very - large kernel messages. If in doubt, say "N". - config DRM_FBDEV_EMULATION bool "Enable legacy fbdev support for your modesetting driver" depends on DRM @@ -246,9 +232,9 @@ config DRM_AMDGPU tristate "AMD GPU" depends on DRM && PCI && MMU select FW_LOADER - select DRM_KMS_HELPER + select DRM_KMS_HELPER select DRM_SCHED - select DRM_TTM + select DRM_TTM select POWER_SUPPLY select HWMON select BACKLIGHT_CLASS_DEVICE diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index ca0e435559d5..00962a659009 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -53,9 +53,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ - amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ - amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \ - amdgpu_umc.o smu_v11_0_i2c.o + amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ + amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o smu_v11_0_i2c.o amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o @@ -68,7 +67,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \ - arct_reg_init.o navi12_reg_init.o mxgpu_nv.o + arct_reg_init.o navi12_reg_init.o # add DF block amdgpu-y += \ @@ -84,7 +83,7 @@ amdgpu-y += \ # add UMC block amdgpu-y += \ - umc_v6_1.o umc_v6_0.o + umc_v6_1.o # add IH block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0bb08e0e1611..bd37df5dd6d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -73,7 +73,6 @@ #include "amdgpu_gmc.h" #include "amdgpu_gfx.h" #include "amdgpu_sdma.h" -#include "amdgpu_nbio.h" #include "amdgpu_dm.h" #include "amdgpu_virt.h" #include "amdgpu_csa.h" @@ -107,8 +106,6 @@ struct amdgpu_mgpu_info uint32_t num_apu; }; -#define AMDGPU_MAX_TIMEOUT_PARAM_LENGTH 256 - /* * Modules parameters. */ @@ -125,7 +122,6 @@ extern int amdgpu_disp_priority; extern int amdgpu_hw_i2c; extern int amdgpu_pcie_gen2; extern int amdgpu_msi; -extern char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH]; extern int amdgpu_dpm; extern int amdgpu_fw_load_type; extern int amdgpu_aspm; @@ -139,7 +135,6 @@ extern int amdgpu_vm_fragment_size; extern int amdgpu_vm_fault_stop; extern int amdgpu_vm_debug; extern int amdgpu_vm_update_mode; -extern int amdgpu_exp_hw_support; extern int amdgpu_dc; extern int amdgpu_sched_jobs; extern int amdgpu_sched_hw_submission; @@ -151,7 +146,11 @@ extern uint amdgpu_sdma_phase_quantum; extern char *amdgpu_disable_cu; extern char *amdgpu_virtual_display; extern uint amdgpu_pp_feature_mask; -extern uint amdgpu_force_long_training; +extern int amdgpu_ngg; +extern int amdgpu_prim_buf_per_se; +extern int amdgpu_pos_buf_per_se; +extern int amdgpu_cntl_sb_buf_per_se; +extern int amdgpu_param_buf_per_se; extern int amdgpu_job_hang_limit; extern int amdgpu_lbpw; extern int amdgpu_compute_multipipe; @@ -168,12 +167,6 @@ extern int amdgpu_mcbp; extern int amdgpu_discovery; extern int amdgpu_mes; extern int amdgpu_noretry; -extern int amdgpu_force_asic_type; -#ifdef CONFIG_HSA_AMD -extern int sched_policy; -#else -static const int sched_policy = KFD_SCHED_POLICY_HWS; -#endif #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; @@ -290,9 +283,6 @@ struct amdgpu_ip_block_version { const struct amd_ip_funcs *funcs; }; -#define HW_REV(_Major, _Minor, _Rev) \ - ((((uint32_t) (_Major)) << 16) | ((uint32_t) (_Minor) << 8) | ((uint32_t) (_Rev))) - struct amdgpu_ip_block { struct amdgpu_ip_block_status status; const struct amdgpu_ip_block_version *version; @@ -435,6 +425,7 @@ struct amdgpu_fpriv { }; int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv); +int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev); int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned size, struct amdgpu_ib *ib); @@ -486,6 +477,7 @@ struct amdgpu_cs_parser { uint64_t bytes_moved_vis_threshold; uint64_t bytes_moved; uint64_t bytes_moved_vis; + struct amdgpu_bo_list_entry *evictable; /* user fence */ struct amdgpu_bo_list_entry uf_entry; @@ -632,11 +624,6 @@ struct amdgpu_fw_vram_usage { u64 size; struct amdgpu_bo *reserved_bo; void *va; - - /* Offset on the top of VRAM, used as c2p write buffer. - */ - u64 mem_train_fb_loc; - bool mem_train_support; }; /* @@ -657,14 +644,71 @@ typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t); typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t); + +/* + * amdgpu nbio functions + * + */ +struct nbio_hdp_flush_reg { + u32 ref_and_mask_cp0; + u32 ref_and_mask_cp1; + u32 ref_and_mask_cp2; + u32 ref_and_mask_cp3; + u32 ref_and_mask_cp4; + u32 ref_and_mask_cp5; + u32 ref_and_mask_cp6; + u32 ref_and_mask_cp7; + u32 ref_and_mask_cp8; + u32 ref_and_mask_cp9; + u32 ref_and_mask_sdma0; + u32 ref_and_mask_sdma1; + u32 ref_and_mask_sdma2; + u32 ref_and_mask_sdma3; + u32 ref_and_mask_sdma4; + u32 ref_and_mask_sdma5; + u32 ref_and_mask_sdma6; + u32 ref_and_mask_sdma7; +}; + struct amdgpu_mmio_remap { u32 reg_offset; resource_size_t bus_addr; }; +struct amdgpu_nbio_funcs { + const struct nbio_hdp_flush_reg *hdp_flush_reg; + u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev); + u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev); + u32 (*get_pcie_index_offset)(struct amdgpu_device *adev); + u32 (*get_pcie_data_offset)(struct amdgpu_device *adev); + u32 (*get_rev_id)(struct amdgpu_device *adev); + void (*mc_access_enable)(struct amdgpu_device *adev, bool enable); + void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring); + u32 (*get_memsize)(struct amdgpu_device *adev); + void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index, int doorbell_size); + void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell, + int doorbell_index, int instance); + void (*enable_doorbell_aperture)(struct amdgpu_device *adev, + bool enable); + void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev, + bool enable); + void (*ih_doorbell_range)(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index); + void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, + bool enable); + void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev, + bool enable); + void (*get_clockgating_state)(struct amdgpu_device *adev, + u32 *flags); + void (*ih_control)(struct amdgpu_device *adev); + void (*init_registers)(struct amdgpu_device *adev); + void (*detect_hw_virt)(struct amdgpu_device *adev); + void (*remap_hdp_registers)(struct amdgpu_device *adev); +}; + struct amdgpu_df_funcs { void (*sw_init)(struct amdgpu_device *adev); - void (*sw_fini)(struct amdgpu_device *adev); void (*enable_broadcast_mode)(struct amdgpu_device *adev, bool enable); u32 (*get_fb_channel_number)(struct amdgpu_device *adev); @@ -769,7 +813,6 @@ struct amdgpu_device { uint8_t *bios; uint32_t bios_size; struct amdgpu_bo *stolen_vga_memory; - struct amdgpu_bo *discovery_memory; uint32_t bios_scratch_reg_offset; uint32_t bios_scratch[AMDGPU_BIOS_NUM_SCRATCH]; @@ -878,12 +921,6 @@ struct amdgpu_device { u32 cg_flags; u32 pg_flags; - /* nbio */ - struct amdgpu_nbio nbio; - - /* mmhub */ - struct amdgpu_mmhub mmhub; - /* gfx */ struct amdgpu_gfx gfx; @@ -937,7 +974,9 @@ struct amdgpu_device { /* soc15 register offset based on ip, instance and segment */ uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; + const struct amdgpu_nbio_funcs *nbio_funcs; const struct amdgpu_df_funcs *df_funcs; + const struct amdgpu_mmhub_funcs *mmhub_funcs; /* delayed work_func for deferring clockgating during resume */ struct delayed_work delayed_init_work; @@ -970,6 +1009,8 @@ struct amdgpu_device { int asic_reset_res; struct work_struct xgmi_reset_work; + bool in_baco_reset; + long gfx_timeout; long sdma_timeout; long video_timeout; @@ -991,8 +1032,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, void amdgpu_device_fini(struct amdgpu_device *adev); int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev); -void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, - uint32_t *buf, size_t size, bool write); uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t acc_flags); void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index d3da9dde4ee1..07eb29885372 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -63,10 +63,45 @@ void amdgpu_amdkfd_fini(void) void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) { - bool vf = amdgpu_sriov_vf(adev); + const struct kfd2kgd_calls *kfd2kgd; + + switch (adev->asic_type) { +#ifdef CONFIG_DRM_AMDGPU_CIK + case CHIP_KAVERI: + case CHIP_HAWAII: + kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); + break; +#endif + case CHIP_CARRIZO: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_VEGAM: + kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); + break; + case CHIP_VEGA10: + case CHIP_VEGA12: + case CHIP_VEGA20: + case CHIP_RAVEN: + kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); + break; + case CHIP_ARCTURUS: + kfd2kgd = amdgpu_amdkfd_arcturus_get_functions(); + break; + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + kfd2kgd = amdgpu_amdkfd_gfx_10_0_get_functions(); + break; + default: + dev_info(adev->dev, "kfd not supported on this ASIC\n"); + return; + } adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev, - adev->pdev, adev->asic_type, vf); + adev->pdev, kfd2kgd); if (adev->kfd.dev) amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; @@ -130,6 +165,14 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) adev->gfx.mec.queue_bitmap, KGD_MAX_QUEUES); + /* remove the KIQ bit as well */ + if (adev->gfx.kiq.ring.sched.ready) + clear_bit(amdgpu_gfx_mec_queue_to_bit(adev, + adev->gfx.kiq.ring.me - 1, + adev->gfx.kiq.ring.pipe, + adev->gfx.kiq.ring.queue), + gpu_resources.queue_bitmap); + /* According to linux/bitmap.h we shouldn't use bitmap_clear if * nbits is not compile time constant */ @@ -159,7 +202,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) adev->doorbell_index.last_non_cp; } - kgd2kfd_device_init(adev->kfd.dev, adev->ddev, &gpu_resources); + kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); } } @@ -666,14 +709,38 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) return 0; } +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) +{ + return NULL; +} + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) +{ + return NULL; +} + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) +{ + return NULL; +} + +struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void) +{ + return NULL; +} + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void) +{ + return NULL; +} + struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, - unsigned int asic_type, bool vf) + const struct kfd2kgd_calls *f2g) { return NULL; } bool kgd2kfd_device_init(struct kfd_dev *kfd, - struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources) { return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 069d5d230810..e519df3fd2b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -57,7 +57,7 @@ struct kgd_mem { unsigned int mapped_to_gpu_memory; uint64_t va; - uint32_t alloc_flags; + uint32_t mapping_flags; atomic_t invalid; struct amdkfd_process_info *process_info; @@ -137,6 +137,12 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle); bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd); +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void); +struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void); +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void); + bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev); @@ -173,17 +179,10 @@ uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd); uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd); uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src); -/* Read user wptr from a specified user address space with page fault - * disabled. The memory must be pinned and mapped to the hardware when - * this is called in hqd_load functions, so it should never fault in - * the first place. This resolves a circular lock dependency involving - * four locks, including the DQM lock and mmap_sem. - */ #define read_user_wptr(mmptr, wptr, dst) \ ({ \ bool valid = false; \ if ((mmptr) && (wptr)) { \ - pagefault_disable(); \ if ((mmptr) == current->mm) { \ valid = !get_user((dst), (wptr)); \ } else if (current->mm == NULL) { \ @@ -191,7 +190,6 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s valid = !get_user((dst), (wptr)); \ unuse_mm(mmptr); \ } \ - pagefault_enable(); \ } \ valid; \ }) @@ -242,9 +240,8 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); int kgd2kfd_init(void); void kgd2kfd_exit(void); struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, - unsigned int asic_type, bool vf); + const struct kfd2kgd_calls *f2g); bool kgd2kfd_device_init(struct kfd_dev *kfd, - struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources); void kgd2kfd_device_exit(struct kfd_dev *kfd); void kgd2kfd_suspend(struct kfd_dev *kfd); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index b6713e0ed1b2..c79aaebeeaf0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -19,6 +19,10 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ + +#undef pr_fmt +#define pr_fmt(fmt) "kfd2kgd: " fmt + #include #include #include @@ -65,11 +69,11 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) return (struct v9_sdma_mqd *)mqd; } -static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, +static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, unsigned int engine_id, unsigned int queue_id) { - uint32_t sdma_engine_reg_base[8] = { + uint32_t base[8] = { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, SOC15_REG_OFFSET(SDMA1, 0, @@ -87,82 +91,111 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, SOC15_REG_OFFSET(SDMA7, 0, mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL }; + uint32_t retval; - uint32_t retval = sdma_engine_reg_base[engine_id] - + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); + retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - + mmSDMA0_RLC0_RB_CNTL); - pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, - queue_id, retval); + pr_debug("sdma base address: 0x%x\n", retval); return retval; } +static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, + u32 instance, u32 offset) +{ + switch (instance) { + case 0: + return (adev->reg_offset[SDMA0_HWIP][0][0] + offset); + case 1: + return (adev->reg_offset[SDMA1_HWIP][0][1] + offset); + case 2: + return (adev->reg_offset[SDMA2_HWIP][0][1] + offset); + case 3: + return (adev->reg_offset[SDMA3_HWIP][0][1] + offset); + case 4: + return (adev->reg_offset[SDMA4_HWIP][0][1] + offset); + case 5: + return (adev->reg_offset[SDMA5_HWIP][0][1] + offset); + case 6: + return (adev->reg_offset[SDMA6_HWIP][0][1] + offset); + case 7: + return (adev->reg_offset[SDMA7_HWIP][0][1] + offset); + default: + break; + } + return 0; +} + static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, uint32_t __user *wptr, struct mm_struct *mm) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; - uint32_t sdma_rlc_reg_offset; + uint32_t sdma_base_addr, sdmax_gfx_context_cntl; unsigned long end_jiffies; uint32_t data; uint64_t data64; uint64_t __user *wptr64 = (uint64_t __user *)wptr; m = get_sdma_mqd(mqd); - sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, m->sdma_queue_id); + sdmax_gfx_context_cntl = sdma_v4_0_get_reg_offset(adev, + m->sdma_engine_id, mmSDMA0_GFX_CONTEXT_CNTL); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); end_jiffies = msecs_to_jiffies(2000) + jiffies; while (true) { - data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) { - pr_err("SDMA RLC not idle in %s\n", __func__); + if (time_after(jiffies, end_jiffies)) return -ETIME; - } usleep_range(500, 1000); } + data = RREG32(sdmax_gfx_context_cntl); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(sdmax_gfx_context_cntl, data); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, m->sdmax_rlcx_doorbell_offset); data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, ENABLE, 1); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, - m->sdmax_rlcx_rb_rptr); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, m->sdmax_rlcx_rb_rptr_hi); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); if (read_user_wptr(mm, wptr64, data64)) { - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, lower_32_bits(data64)); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, upper_32_bits(data64)); } else { - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, m->sdmax_rlcx_rb_rptr); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, m->sdmax_rlcx_rb_rptr_hi); } - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, m->sdmax_rlcx_rb_base_hi); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdmax_rlcx_rb_rptr_addr_lo); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdmax_rlcx_rb_rptr_addr_hi); data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); return 0; } @@ -172,8 +205,7 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, uint32_t (**dump)[2], uint32_t *n_regs) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, - engine_id, queue_id); + uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); uint32_t i = 0, reg; #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+10) @@ -183,15 +215,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, return -ENOMEM; for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) - DUMP_REG(sdma_rlc_reg_offset + reg); + DUMP_REG(sdma_base_addr + reg); for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) - DUMP_REG(sdma_rlc_reg_offset + reg); + DUMP_REG(sdma_base_addr + reg); for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) - DUMP_REG(sdma_rlc_reg_offset + reg); + DUMP_REG(sdma_base_addr + reg); for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) - DUMP_REG(sdma_rlc_reg_offset + reg); + DUMP_REG(sdma_base_addr + reg); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -203,14 +235,14 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; - uint32_t sdma_rlc_reg_offset; + uint32_t sdma_base_addr; uint32_t sdma_rlc_rb_cntl; m = get_sdma_mqd(mqd); - sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, m->sdma_queue_id); - sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); + sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) return true; @@ -223,42 +255,40 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; - uint32_t sdma_rlc_reg_offset; + uint32_t sdma_base_addr; uint32_t temp; unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); - sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, m->sdma_queue_id); - temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); while (true) { - temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) { - pr_err("SDMA RLC not idle in %s\n", __func__); + if (time_after(jiffies, end_jiffies)) return -ETIME; - } usleep_range(500, 1000); } - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, - RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); - m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); m->sdmax_rlcx_rb_rptr_hi = - RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); return 0; } -const struct kfd2kgd_calls arcturus_kfd2kgd = { +static const struct kfd2kgd_calls kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, .init_interrupts = kgd_gfx_v9_init_interrupts, @@ -274,11 +304,20 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { .address_watch_execute = kgd_gfx_v9_address_watch_execute, .wave_control_execute = kgd_gfx_v9_wave_control_execute, .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_info = - kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, + .get_atc_vmid_pasid_mapping_pasid = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid, + .get_atc_vmid_pasid_mapping_valid = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid, + .set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va, .get_tile_config = kgd_gfx_v9_get_tile_config, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, .get_hive_id = amdgpu_amdkfd_get_hive_id, }; + +struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void) +{ + return (struct kfd2kgd_calls *)&kfd2kgd; +} + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 61cd707158e4..d10f483f5e27 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -19,9 +19,18 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ +#undef pr_fmt +#define pr_fmt(fmt) "kfd2kgd: " fmt + +#include +#include +#include +#include #include #include "amdgpu.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_ucode.h" +#include "soc15_hw_ip.h" #include "gc/gc_10_1_0_offset.h" #include "gc/gc_10_1_0_sh_mask.h" #include "navi10_enum.h" @@ -33,7 +42,6 @@ #include "v10_structs.h" #include "nv.h" #include "nvd.h" -#include "gfxhub_v2_0.h" enum hqd_dequeue_request_type { NO_ACTION = 0, @@ -42,6 +50,63 @@ enum hqd_dequeue_request_type { SAVE_WAVES }; +/* + * Register access functions + */ + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases); +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm); +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm); +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id); +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int utimeout); +#if 0 +static uint32_t get_watch_base_addr(struct amdgpu_device *adev); +#endif +static int kgd_address_watch_disable(struct kgd_dev *kgd); +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo); +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd); +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset); + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid); +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid); +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint64_t page_table_base); +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); + /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. */ @@ -74,6 +139,37 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, return 0; } +static const struct kfd2kgd_calls kfd2kgd = { + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_pasid = + get_atc_vmid_pasid_mapping_pasid, + .get_atc_vmid_pasid_mapping_valid = + get_atc_vmid_pasid_mapping_valid, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .get_tile_config = amdgpu_amdkfd_get_tile_config, +}; + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions() +{ + return (struct kfd2kgd_calls *)&kfd2kgd; +} + static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) { return (struct amdgpu_device *)kgd; @@ -154,6 +250,11 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, ATC_VMID0_PASID_MAPPING__VALID_MASK; pr_debug("pasid 0x%x vmid %d, reg value %x\n", pasid, vmid, pasid_mapping); + /* + * need to do this twice, once for gfx and once for mmhub + * for ATC add 16 to VMID for mmhub, for IH different registers. + * ATC_VMID0..15 registers are separate from ATC_VMID16..31. + */ pr_debug("ATHUB, reg %x\n", SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid, @@ -205,11 +306,11 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) return 0; } -static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, +static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, unsigned int engine_id, unsigned int queue_id) { - uint32_t sdma_engine_reg_base[2] = { + uint32_t base[2] = { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, /* On gfx10, mmSDMA1_xxx registers are defined NOT based @@ -221,12 +322,12 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL }; + uint32_t retval; - uint32_t retval = sdma_engine_reg_base[engine_id] - + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); + retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - + mmSDMA0_RLC0_RB_CNTL); - pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, - queue_id, retval); + pr_debug("sdma base address: 0x%x\n", retval); return retval; } @@ -387,67 +488,72 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; - uint32_t sdma_rlc_reg_offset; + uint32_t sdma_base_addr, sdmax_gfx_context_cntl; unsigned long end_jiffies; uint32_t data; uint64_t data64; uint64_t __user *wptr64 = (uint64_t __user *)wptr; m = get_sdma_mqd(mqd); - sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, m->sdma_queue_id); + pr_debug("sdma load base addr %x for engine %d, queue %d\n", sdma_base_addr, m->sdma_engine_id, m->sdma_queue_id); + sdmax_gfx_context_cntl = m->sdma_engine_id ? + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) : + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); end_jiffies = msecs_to_jiffies(2000) + jiffies; while (true) { - data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) { - pr_err("SDMA RLC not idle in %s\n", __func__); + if (time_after(jiffies, end_jiffies)) return -ETIME; - } usleep_range(500, 1000); } + data = RREG32(sdmax_gfx_context_cntl); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(sdmax_gfx_context_cntl, data); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, m->sdmax_rlcx_doorbell_offset); data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, ENABLE, 1); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, - m->sdmax_rlcx_rb_rptr); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, m->sdmax_rlcx_rb_rptr_hi); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); if (read_user_wptr(mm, wptr64, data64)) { - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, lower_32_bits(data64)); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, upper_32_bits(data64)); } else { - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, m->sdmax_rlcx_rb_rptr); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, m->sdmax_rlcx_rb_rptr_hi); } - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, m->sdmax_rlcx_rb_base_hi); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdmax_rlcx_rb_rptr_addr_lo); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdmax_rlcx_rb_rptr_addr_hi); data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); return 0; } @@ -457,26 +563,28 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, uint32_t (**dump)[2], uint32_t *n_regs) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, - engine_id, queue_id); + uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); uint32_t i = 0, reg; #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+10) + pr_debug("sdma dump engine id %d queue_id %d\n", engine_id, queue_id); + pr_debug("sdma base addr %x\n", sdma_base_addr); + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) - DUMP_REG(sdma_rlc_reg_offset + reg); + DUMP_REG(sdma_base_addr + reg); for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) - DUMP_REG(sdma_rlc_reg_offset + reg); + DUMP_REG(sdma_base_addr + reg); for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) - DUMP_REG(sdma_rlc_reg_offset + reg); + DUMP_REG(sdma_base_addr + reg); for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) - DUMP_REG(sdma_rlc_reg_offset + reg); + DUMP_REG(sdma_base_addr + reg); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -510,14 +618,14 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; - uint32_t sdma_rlc_reg_offset; + uint32_t sdma_base_addr; uint32_t sdma_rlc_rb_cntl; m = get_sdma_mqd(mqd); - sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, m->sdma_queue_id); - sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); + sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) return true; @@ -638,52 +746,59 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; - uint32_t sdma_rlc_reg_offset; + uint32_t sdma_base_addr; uint32_t temp; unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); - sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, m->sdma_queue_id); - temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); while (true) { - temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) { - pr_err("SDMA RLC not idle in %s\n", __func__); + if (time_after(jiffies, end_jiffies)) return -ETIME; - } usleep_range(500, 1000); } - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, - RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); - m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); m->sdmax_rlcx_rb_rptr_hi = - RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); return 0; } -static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, - uint8_t vmid, uint16_t *p_pasid) +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid) { - uint32_t value; + uint32_t reg; struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); - *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; +} - return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; } static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) @@ -715,8 +830,6 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; int vmid; - uint16_t queried_pasid; - bool ret; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; if (amdgpu_emu_mode == 0 && ring->sched.ready) @@ -725,13 +838,13 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) for (vmid = 0; vmid < 16; vmid++) { if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) continue; - - ret = get_atc_vmid_pasid_mapping_info(kgd, vmid, - &queried_pasid); - if (ret && queried_pasid == pasid) { - amdgpu_gmc_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB_0, 0); - break; + if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { + if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) + == pasid) { + amdgpu_gmc_flush_gpu_tlb(adev, vmid, + AMDGPU_GFXHUB_0, 0); + break; + } } } @@ -801,6 +914,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base) { struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint64_t base = page_table_base | AMDGPU_PTE_VALID; if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { pr_err("trying to set page table base for wrong VMID %u\n", @@ -808,31 +922,18 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, return; } - /* SDMA is on gfxhub as well for Navi1* series */ - gfxhub_v2_0_setup_vm_pt_regs(adev, vmid, page_table_base); -} + /* TODO: take advantage of per-process address space size. For + * now, all processes share the same address space size, like + * on GFX8 and older. + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); -const struct kfd2kgd_calls gfx_v10_kfd2kgd = { - .program_sh_mem_settings = kgd_program_sh_mem_settings, - .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_interrupts = kgd_init_interrupts, - .hqd_load = kgd_hqd_load, - .hqd_sdma_load = kgd_hqd_sdma_load, - .hqd_dump = kgd_hqd_dump, - .hqd_sdma_dump = kgd_hqd_sdma_dump, - .hqd_is_occupied = kgd_hqd_is_occupied, - .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, - .hqd_destroy = kgd_hqd_destroy, - .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, - .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_info = - get_atc_vmid_pasid_mapping_info, - .get_tile_config = amdgpu_amdkfd_get_tile_config, - .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, - .get_hive_id = amdgpu_amdkfd_get_hive_id, -}; + WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), + upper_32_bits(adev->vm_manager.max_pfn - 1)); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 6e6f0a99ec06..5f459bf5f622 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -20,6 +20,8 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#include +#include #include #include "amdgpu.h" @@ -84,6 +86,65 @@ union TCP_WATCH_CNTL_BITS { float f32All; }; +/* + * Register access functions + */ + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, + uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); + +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); + +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm); +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm); +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); + +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id); +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int utimeout); +static int kgd_address_watch_disable(struct kgd_dev *kgd); +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo); +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd); +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset); + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid); + +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid); +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint64_t page_table_base); +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); +static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd); + /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. */ @@ -109,6 +170,37 @@ static int get_tile_config(struct kgd_dev *kgd, return 0; } +static const struct kfd2kgd_calls kfd2kgd = { + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, + .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, + .set_scratch_backing_va = set_scratch_backing_va, + .get_tile_config = get_tile_config, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, + .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, +}; + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) +{ + return (struct kfd2kgd_calls *)&kfd2kgd; +} + static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) { return (struct amdgpu_device *)kgd; @@ -211,15 +303,14 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) return 0; } -static inline uint32_t get_sdma_rlc_reg_offset(struct cik_sdma_rlc_registers *m) +static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) { uint32_t retval; retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; - pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", - m->sdma_engine_id, m->sdma_queue_id, retval); + pr_debug("sdma base address: 0x%x\n", retval); return retval; } @@ -322,52 +413,60 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; unsigned long end_jiffies; - uint32_t sdma_rlc_reg_offset; + uint32_t sdma_base_addr; uint32_t data; m = get_sdma_mqd(mqd); - sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); + sdma_base_addr = get_sdma_base_addr(m); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); end_jiffies = msecs_to_jiffies(2000) + jiffies; while (true) { - data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) { - pr_err("SDMA RLC not idle in %s\n", __func__); + if (time_after(jiffies, end_jiffies)) return -ETIME; - } usleep_range(500, 1000); } + if (m->sdma_engine_id) { + data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); + } else { + data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); + } data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL, ENABLE, 1); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, - m->sdma_rlc_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr); if (read_user_wptr(mm, wptr, data)) - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); else - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, m->sdma_rlc_rb_rptr); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR, + WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, m->sdma_rlc_virtual_addr); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, m->sdma_rlc_rb_base_hi); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdma_rlc_rb_rptr_addr_lo); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdma_rlc_rb_rptr_addr_hi); data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); return 0; } @@ -425,13 +524,13 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; - uint32_t sdma_rlc_reg_offset; + uint32_t sdma_base_addr; uint32_t sdma_rlc_rb_cntl; m = get_sdma_mqd(mqd); - sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); + sdma_base_addr = get_sdma_base_addr(m); - sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); + sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) return true; @@ -546,34 +645,32 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; - uint32_t sdma_rlc_reg_offset; + uint32_t sdma_base_addr; uint32_t temp; unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); - sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); + sdma_base_addr = get_sdma_base_addr(m); - temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); while (true) { - temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) { - pr_err("SDMA RLC not idle in %s\n", __func__); + if (time_after(jiffies, end_jiffies)) return -ETIME; - } usleep_range(500, 1000); } - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, - RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); - m->sdma_rlc_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); + m->sdma_rlc_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); return 0; } @@ -661,16 +758,24 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; } -static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, - uint8_t vmid, uint16_t *p_pasid) +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid) { - uint32_t value; + uint32_t reg; struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; +} - return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; } static void set_scratch_backing_va(struct kgd_dev *kgd, @@ -750,28 +855,3 @@ static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd) return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); } - -const struct kfd2kgd_calls gfx_v7_kfd2kgd = { - .program_sh_mem_settings = kgd_program_sh_mem_settings, - .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_interrupts = kgd_init_interrupts, - .hqd_load = kgd_hqd_load, - .hqd_sdma_load = kgd_hqd_sdma_load, - .hqd_dump = kgd_hqd_dump, - .hqd_sdma_dump = kgd_hqd_sdma_dump, - .hqd_is_occupied = kgd_hqd_is_occupied, - .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, - .hqd_destroy = kgd_hqd_destroy, - .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, - .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info, - .set_scratch_backing_va = set_scratch_backing_va, - .get_tile_config = get_tile_config, - .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, - .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, -}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index bfbddedb2380..6d2f61449606 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -20,6 +20,9 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#include +#include +#include #include #include "amdgpu.h" @@ -41,6 +44,62 @@ enum hqd_dequeue_request_type { RESET_WAVES }; +/* + * Register access functions + */ + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases); +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm); +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm); +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id); +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int utimeout); +static int kgd_address_watch_disable(struct kgd_dev *kgd); +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo); +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd); +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset); + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid); +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid); +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid); +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint64_t page_table_base); +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); + /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. */ @@ -66,6 +125,38 @@ static int get_tile_config(struct kgd_dev *kgd, return 0; } +static const struct kfd2kgd_calls kfd2kgd = { + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_pasid = + get_atc_vmid_pasid_mapping_pasid, + .get_atc_vmid_pasid_mapping_valid = + get_atc_vmid_pasid_mapping_valid, + .set_scratch_backing_va = set_scratch_backing_va, + .get_tile_config = get_tile_config, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, +}; + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) +{ + return (struct kfd2kgd_calls *)&kfd2kgd; +} + static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) { return (struct amdgpu_device *)kgd; @@ -169,15 +260,13 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) return 0; } -static inline uint32_t get_sdma_rlc_reg_offset(struct vi_sdma_mqd *m) +static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m) { uint32_t retval; retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET; - - pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", - m->sdma_engine_id, m->sdma_queue_id, retval); + pr_debug("sdma base address: 0x%x\n", retval); return retval; } @@ -309,51 +398,59 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_sdma_mqd *m; unsigned long end_jiffies; - uint32_t sdma_rlc_reg_offset; + uint32_t sdma_base_addr; uint32_t data; m = get_sdma_mqd(mqd); - sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + sdma_base_addr = get_sdma_base_addr(m); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); end_jiffies = msecs_to_jiffies(2000) + jiffies; while (true) { - data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) { - pr_err("SDMA RLC not idle in %s\n", __func__); + if (time_after(jiffies, end_jiffies)) return -ETIME; - } usleep_range(500, 1000); } + if (m->sdma_engine_id) { + data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); + } else { + data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); + } data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, ENABLE, 1); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, - m->sdmax_rlcx_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); if (read_user_wptr(mm, wptr, data)) - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); else - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, m->sdmax_rlcx_rb_rptr); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR, + WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, m->sdmax_rlcx_virtual_addr); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, m->sdmax_rlcx_rb_base_hi); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdmax_rlcx_rb_rptr_addr_lo); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdmax_rlcx_rb_rptr_addr_hi); data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); return 0; } @@ -420,13 +517,13 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_sdma_mqd *m; - uint32_t sdma_rlc_reg_offset; + uint32_t sdma_base_addr; uint32_t sdma_rlc_rb_cntl; m = get_sdma_mqd(mqd); - sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); + sdma_base_addr = get_sdma_base_addr(m); - sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); + sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) return true; @@ -544,48 +641,54 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_sdma_mqd *m; - uint32_t sdma_rlc_reg_offset; + uint32_t sdma_base_addr; uint32_t temp; unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); - sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); + sdma_base_addr = get_sdma_base_addr(m); - temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); while (true) { - temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) { - pr_err("SDMA RLC not idle in %s\n", __func__); + if (time_after(jiffies, end_jiffies)) return -ETIME; - } usleep_range(500, 1000); } - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, - RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); - m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); return 0; } -static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, - uint8_t vmid, uint16_t *p_pasid) +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid) { - uint32_t value; + uint32_t reg; struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; +} - return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; } static int kgd_address_watch_disable(struct kgd_dev *kgd) @@ -695,28 +798,3 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) RREG32(mmVM_INVALIDATE_RESPONSE); return 0; } - -const struct kfd2kgd_calls gfx_v8_kfd2kgd = { - .program_sh_mem_settings = kgd_program_sh_mem_settings, - .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_interrupts = kgd_init_interrupts, - .hqd_load = kgd_hqd_load, - .hqd_sdma_load = kgd_hqd_sdma_load, - .hqd_dump = kgd_hqd_dump, - .hqd_sdma_dump = kgd_hqd_sdma_dump, - .hqd_is_occupied = kgd_hqd_is_occupied, - .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, - .hqd_destroy = kgd_hqd_destroy, - .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, - .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_info = - get_atc_vmid_pasid_mapping_info, - .set_scratch_backing_va = set_scratch_backing_va, - .get_tile_config = get_tile_config, - .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, -}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 47c853ef1051..e262f2ac07a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -19,10 +19,17 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ + +#define pr_fmt(fmt) "kfd2kgd: " fmt + +#include +#include +#include #include #include "amdgpu.h" #include "amdgpu_amdkfd.h" +#include "soc15_hw_ip.h" #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" #include "vega10_enum.h" @@ -43,6 +50,9 @@ #include "gmc_v9_0.h" +#define V9_PIPE_PER_MEC (4) +#define V9_QUEUES_PER_PIPE_MEC (8) + enum hqd_dequeue_request_type { NO_ACTION = 0, DRAIN_PIPE, @@ -216,21 +226,22 @@ int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) return 0; } -static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, +static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, unsigned int engine_id, unsigned int queue_id) { - uint32_t sdma_engine_reg_base[2] = { + uint32_t base[2] = { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL }; - uint32_t retval = sdma_engine_reg_base[engine_id] - + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); + uint32_t retval; - pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, - queue_id, retval); + retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - + mmSDMA0_RLC0_RB_CNTL); + + pr_debug("sdma base address: 0x%x\n", retval); return retval; } @@ -377,67 +388,71 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; - uint32_t sdma_rlc_reg_offset; + uint32_t sdma_base_addr, sdmax_gfx_context_cntl; unsigned long end_jiffies; uint32_t data; uint64_t data64; uint64_t __user *wptr64 = (uint64_t __user *)wptr; m = get_sdma_mqd(mqd); - sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, m->sdma_queue_id); + sdmax_gfx_context_cntl = m->sdma_engine_id ? + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) : + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); end_jiffies = msecs_to_jiffies(2000) + jiffies; while (true) { - data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) { - pr_err("SDMA RLC not idle in %s\n", __func__); + if (time_after(jiffies, end_jiffies)) return -ETIME; - } usleep_range(500, 1000); } + data = RREG32(sdmax_gfx_context_cntl); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(sdmax_gfx_context_cntl, data); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, m->sdmax_rlcx_doorbell_offset); data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, ENABLE, 1); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, - m->sdmax_rlcx_rb_rptr); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, m->sdmax_rlcx_rb_rptr_hi); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); if (read_user_wptr(mm, wptr64, data64)) { - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, lower_32_bits(data64)); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, upper_32_bits(data64)); } else { - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, m->sdmax_rlcx_rb_rptr); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, m->sdmax_rlcx_rb_rptr_hi); } - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, m->sdmax_rlcx_rb_base_hi); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdmax_rlcx_rb_rptr_addr_lo); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdmax_rlcx_rb_rptr_addr_hi); data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); return 0; } @@ -447,8 +462,7 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, uint32_t (**dump)[2], uint32_t *n_regs) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, - engine_id, queue_id); + uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); uint32_t i = 0, reg; #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+10) @@ -458,15 +472,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, return -ENOMEM; for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) - DUMP_REG(sdma_rlc_reg_offset + reg); + DUMP_REG(sdma_base_addr + reg); for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) - DUMP_REG(sdma_rlc_reg_offset + reg); + DUMP_REG(sdma_base_addr + reg); for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) - DUMP_REG(sdma_rlc_reg_offset + reg); + DUMP_REG(sdma_base_addr + reg); for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) - DUMP_REG(sdma_rlc_reg_offset + reg); + DUMP_REG(sdma_base_addr + reg); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -500,14 +514,14 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; - uint32_t sdma_rlc_reg_offset; + uint32_t sdma_base_addr; uint32_t sdma_rlc_rb_cntl; m = get_sdma_mqd(mqd); - sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, m->sdma_queue_id); - sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); + sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) return true; @@ -570,52 +584,59 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; - uint32_t sdma_rlc_reg_offset; + uint32_t sdma_base_addr; uint32_t temp; unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); - sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, m->sdma_queue_id); - temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); while (true) { - temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) { - pr_err("SDMA RLC not idle in %s\n", __func__); + if (time_after(jiffies, end_jiffies)) return -ETIME; - } usleep_range(500, 1000); } - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, - RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); - m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); m->sdmax_rlcx_rb_rptr_hi = - RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); return 0; } -bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, - uint8_t vmid, uint16_t *p_pasid) +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid) { - uint32_t value; + uint32_t reg; struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); - *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; +} - return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; } static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid, @@ -650,8 +671,6 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; int vmid, i; - uint16_t queried_pasid; - bool ret; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; uint32_t flush_type = 0; @@ -667,14 +686,14 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) for (vmid = 0; vmid < 16; vmid++) { if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) continue; - - ret = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(kgd, vmid, - &queried_pasid); - if (ret && queried_pasid == pasid) { - for (i = 0; i < adev->num_vmhubs; i++) - amdgpu_gmc_flush_gpu_tlb(adev, vmid, - i, flush_type); - break; + if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { + if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(kgd, vmid) + == pasid) { + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, vmid, + i, flush_type); + break; + } } } @@ -758,6 +777,15 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, return 0; } +void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid) +{ + /* No longer needed on GFXv9. The scratch base address is + * passed to the shader by the CP. It's the user mode driver's + * responsibility. + */ +} + void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base) { @@ -783,7 +811,7 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmi gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); } -const struct kfd2kgd_calls gfx_v9_kfd2kgd = { +static const struct kfd2kgd_calls kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, .init_interrupts = kgd_gfx_v9_init_interrupts, @@ -799,11 +827,19 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .address_watch_execute = kgd_gfx_v9_address_watch_execute, .wave_control_execute = kgd_gfx_v9_wave_control_execute, .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_info = - kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, + .get_atc_vmid_pasid_mapping_pasid = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid, + .get_atc_vmid_pasid_mapping_valid = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid, + .set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va, .get_tile_config = kgd_gfx_v9_get_tile_config, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, .get_hive_id = amdgpu_amdkfd_get_hive_id, }; + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) +{ + return (struct kfd2kgd_calls *)&kfd2kgd; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index d9e9ad22b2bd..26d8879bff9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -55,10 +55,14 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, unsigned int watch_point_id, unsigned int reg_offset); -bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, - uint8_t vmid, uint16_t *p_pasid); +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid); +uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid); void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base); +void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid); int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index ae6f5446262c..01a58d3f8e17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -19,6 +19,9 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ + +#define pr_fmt(fmt) "kfd2kgd: " fmt + #include #include #include @@ -30,6 +33,11 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_dma_buf.h" +/* Special VM and GART address alignment needed for VI pre-Fiji due to + * a HW bug. + */ +#define VI_BO_SIZE_ALIGN (0x8000) + /* BO flag to indicate a KFD userptr BO */ #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) @@ -341,46 +349,13 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); int ret; - ret = amdgpu_vm_update_pdes(adev, vm, false); + ret = amdgpu_vm_update_directories(adev, vm); if (ret) return ret; return amdgpu_sync_fence(NULL, sync, vm->last_update, false); } -static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) -{ - struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); - bool coherent = mem->alloc_flags & ALLOC_MEM_FLAGS_COHERENT; - uint32_t mapping_flags; - - mapping_flags = AMDGPU_VM_PAGE_READABLE; - if (mem->alloc_flags & ALLOC_MEM_FLAGS_WRITABLE) - mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; - if (mem->alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE) - mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; - - switch (adev->asic_type) { - case CHIP_ARCTURUS: - if (mem->alloc_flags & ALLOC_MEM_FLAGS_VRAM) { - if (bo_adev == adev) - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; - else - mapping_flags |= AMDGPU_VM_MTYPE_UC; - } else { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } - break; - default: - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } - - return amdgpu_gem_va_map_flags(adev, mapping_flags); -} - /* add_bo_to_vm - Add a BO to a VM * * Everything that needs to bo done only once when a BO is first added @@ -429,7 +404,8 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, } bo_va_entry->va = va; - bo_va_entry->pte_flags = get_pte_flags(adev, mem); + bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev, + mem->mapping_flags); bo_va_entry->kgd_dev = (void *)adev; list_add(&bo_va_entry->bo_list, list_bo_va); @@ -1103,8 +1079,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( uint64_t user_addr = 0; struct amdgpu_bo *bo; struct amdgpu_bo_param bp; + int byte_align; u32 domain, alloc_domain; u64 alloc_flags; + uint32_t mapping_flags; int ret; /* @@ -1157,7 +1135,25 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if ((*mem)->aql_queue) size = size >> 1; - (*mem)->alloc_flags = flags; + /* Workaround for TLB bug on older VI chips */ + byte_align = (adev->family == AMDGPU_FAMILY_VI && + adev->asic_type != CHIP_FIJI && + adev->asic_type != CHIP_POLARIS10 && + adev->asic_type != CHIP_POLARIS11 && + adev->asic_type != CHIP_POLARIS12 && + adev->asic_type != CHIP_VEGAM) ? + VI_BO_SIZE_ALIGN : 1; + + mapping_flags = AMDGPU_VM_PAGE_READABLE; + if (flags & ALLOC_MEM_FLAGS_WRITABLE) + mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; + if (flags & ALLOC_MEM_FLAGS_EXECUTABLE) + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; + if (flags & ALLOC_MEM_FLAGS_COHERENT) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; + (*mem)->mapping_flags = mapping_flags; amdgpu_sync_create(&(*mem)->sync); @@ -1172,7 +1168,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( memset(&bp, 0, sizeof(bp)); bp.size = size; - bp.byte_align = 1; + bp.byte_align = byte_align; bp.domain = alloc_domain; bp.flags = alloc_flags; bp.type = bo_type; @@ -1630,10 +1626,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, INIT_LIST_HEAD(&(*mem)->bo_va_list); mutex_init(&(*mem)->lock); - (*mem)->alloc_flags = - ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? - ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT) | - ALLOC_MEM_FLAGS_WRITABLE | ALLOC_MEM_FLAGS_EXECUTABLE; + (*mem)->mapping_flags = + AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | + AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC; (*mem)->bo = amdgpu_bo_ref(bo); (*mem)->va = va; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 72232fccf61a..1c9d40f97a9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -2038,11 +2038,6 @@ int amdgpu_atombios_init(struct amdgpu_device *adev) if (adev->is_atom_fw) { amdgpu_atomfirmware_scratch_regs_init(adev); amdgpu_atomfirmware_allocate_fb_scratch(adev); - ret = amdgpu_atomfirmware_get_mem_train_fb_loc(adev); - if (ret) { - DRM_ERROR("Failed to get mem train fb location.\n"); - return ret; - } } else { amdgpu_atombios_scratch_regs_init(adev); amdgpu_atombios_allocate_fb_scratch(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index ff4eb96bdfb5..daf687428cdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -27,7 +27,6 @@ #include "amdgpu_atomfirmware.h" #include "atom.h" #include "atombios.h" -#include "soc15_hw_ip.h" bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev) { @@ -121,14 +120,65 @@ union vram_info { struct atom_vram_info_header_v2_3 v23; struct atom_vram_info_header_v2_4 v24; }; +/* + * Return vram width from integrated system info table, if available, + * or 0 if not. + */ +int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev) +{ + struct amdgpu_mode_info *mode_info = &adev->mode_info; + int index; + u16 data_offset, size; + union igp_info *igp_info; + union vram_info *vram_info; + u32 mem_channel_number; + u32 mem_channel_width; + u8 frev, crev; -union vram_module { - struct atom_vram_module_v9 v9; - struct atom_vram_module_v10 v10; -}; + if (adev->flags & AMD_IS_APU) + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + integratedsysteminfo); + else + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + vram_info); -static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev, - int atom_mem_type) + /* get any igp specific overrides */ + if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size, + &frev, &crev, &data_offset)) { + if (adev->flags & AMD_IS_APU) { + igp_info = (union igp_info *) + (mode_info->atom_context->bios + data_offset); + switch (crev) { + case 11: + mem_channel_number = igp_info->v11.umachannelnumber; + /* channel width is 64 */ + return mem_channel_number * 64; + default: + return 0; + } + } else { + vram_info = (union vram_info *) + (mode_info->atom_context->bios + data_offset); + switch (crev) { + case 3: + mem_channel_number = vram_info->v23.vram_module[0].channel_num; + mem_channel_width = vram_info->v23.vram_module[0].channel_width; + return mem_channel_number * (1 << mem_channel_width); + case 4: + mem_channel_number = vram_info->v24.vram_module[0].channel_num; + mem_channel_width = vram_info->v24.vram_module[0].channel_width; + return mem_channel_number * (1 << mem_channel_width); + default: + return 0; + } + } + } + + return 0; +} + +static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev, + int atom_mem_type) { int vram_type; @@ -169,25 +219,19 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev, return vram_type; } - - -int -amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, - int *vram_width, int *vram_type, - int *vram_vendor) +/* + * Return vram type from either integrated system info table + * or umc info table, if available, or 0 (TYPE_UNKNOWN) if not + */ +int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev) { struct amdgpu_mode_info *mode_info = &adev->mode_info; - int index, i = 0; + int index; u16 data_offset, size; union igp_info *igp_info; union vram_info *vram_info; - union vram_module *vram_module; u8 frev, crev; u8 mem_type; - u8 mem_vendor; - u32 mem_channel_number; - u32 mem_channel_width; - u32 module_id; if (adev->flags & AMD_IS_APU) index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, @@ -195,7 +239,6 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, else index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, vram_info); - if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size, &frev, &crev, &data_offset)) { @@ -204,67 +247,25 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, (mode_info->atom_context->bios + data_offset); switch (crev) { case 11: - mem_channel_number = igp_info->v11.umachannelnumber; - /* channel width is 64 */ - if (vram_width) - *vram_width = mem_channel_number * 64; mem_type = igp_info->v11.memorytype; - if (vram_type) - *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); - break; + return convert_atom_mem_type_to_vram_type(adev, mem_type); default: - return -EINVAL; + return 0; } } else { vram_info = (union vram_info *) (mode_info->atom_context->bios + data_offset); - module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16; switch (crev) { case 3: - if (module_id > vram_info->v23.vram_module_num) - module_id = 0; - vram_module = (union vram_module *)vram_info->v23.vram_module; - while (i < module_id) { - vram_module = (union vram_module *) - ((u8 *)vram_module + vram_module->v9.vram_module_size); - i++; - } - mem_type = vram_module->v9.memory_type; - if (vram_type) - *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); - mem_channel_number = vram_module->v9.channel_num; - mem_channel_width = vram_module->v9.channel_width; - if (vram_width) - *vram_width = mem_channel_number * (1 << mem_channel_width); - mem_vendor = (vram_module->v9.vender_rev_id) & 0xF; - if (vram_vendor) - *vram_vendor = mem_vendor; - break; + mem_type = vram_info->v23.vram_module[0].memory_type; + return convert_atom_mem_type_to_vram_type(adev, mem_type); case 4: - if (module_id > vram_info->v24.vram_module_num) - module_id = 0; - vram_module = (union vram_module *)vram_info->v24.vram_module; - while (i < module_id) { - vram_module = (union vram_module *) - ((u8 *)vram_module + vram_module->v10.vram_module_size); - i++; - } - mem_type = vram_module->v10.memory_type; - if (vram_type) - *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); - mem_channel_number = vram_module->v10.channel_num; - mem_channel_width = vram_module->v10.channel_width; - if (vram_width) - *vram_width = mem_channel_number * (1 << mem_channel_width); - mem_vendor = (vram_module->v10.vender_rev_id) & 0xF; - if (vram_vendor) - *vram_vendor = mem_vendor; - break; + mem_type = vram_info->v24.vram_module[0].memory_type; + return convert_atom_mem_type_to_vram_type(adev, mem_type); default: - return -EINVAL; + return 0; } } - } return 0; @@ -463,138 +464,3 @@ int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev) } return -EINVAL; } - -/* - * Check if VBIOS supports GDDR6 training data save/restore - */ -static bool gddr6_mem_train_vbios_support(struct amdgpu_device *adev) -{ - uint16_t data_offset; - int index; - - index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, - firmwareinfo); - if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, NULL, - NULL, NULL, &data_offset)) { - struct atom_firmware_info_v3_1 *firmware_info = - (struct atom_firmware_info_v3_1 *)(adev->mode_info.atom_context->bios + - data_offset); - - DRM_DEBUG("atom firmware capability:0x%08x.\n", - le32_to_cpu(firmware_info->firmware_capability)); - - if (le32_to_cpu(firmware_info->firmware_capability) & - ATOM_FIRMWARE_CAP_ENABLE_2STAGE_BIST_TRAINING) - return true; - } - - return false; -} - -static int gddr6_mem_train_support(struct amdgpu_device *adev) -{ - int ret; - uint32_t major, minor, revision, hw_v; - - if (gddr6_mem_train_vbios_support(adev)) { - amdgpu_discovery_get_ip_version(adev, MP0_HWID, &major, &minor, &revision); - hw_v = HW_REV(major, minor, revision); - /* - * treat 0 revision as a special case since register for MP0 and MMHUB is missing - * for some Navi10 A0, preventing driver from discovering the hwip information since - * none of the functions will be initialized, it should not cause any problems - */ - switch (hw_v) { - case HW_REV(11, 0, 0): - case HW_REV(11, 0, 5): - ret = 1; - break; - default: - DRM_ERROR("memory training vbios supports but psp hw(%08x)" - " doesn't support!\n", hw_v); - ret = -1; - break; - } - } else { - ret = 0; - hw_v = -1; - } - - - DRM_DEBUG("mp0 hw_v %08x, ret:%d.\n", hw_v, ret); - return ret; -} - -int amdgpu_atomfirmware_get_mem_train_fb_loc(struct amdgpu_device *adev) -{ - struct atom_context *ctx = adev->mode_info.atom_context; - unsigned char *bios = ctx->bios; - struct vram_reserve_block *reserved_block; - int index, block_number; - uint8_t frev, crev; - uint16_t data_offset, size; - uint32_t start_address_in_kb; - uint64_t offset; - int ret; - - adev->fw_vram_usage.mem_train_support = false; - - if (adev->asic_type != CHIP_NAVI10 && - adev->asic_type != CHIP_NAVI14) - return 0; - - if (amdgpu_sriov_vf(adev)) - return 0; - - ret = gddr6_mem_train_support(adev); - if (ret == -1) - return -EINVAL; - else if (ret == 0) - return 0; - - index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, - vram_usagebyfirmware); - ret = amdgpu_atom_parse_data_header(ctx, index, &size, &frev, &crev, - &data_offset); - if (ret == 0) { - DRM_ERROR("parse data header failed.\n"); - return -EINVAL; - } - - DRM_DEBUG("atom firmware common table header size:0x%04x, frev:0x%02x," - " crev:0x%02x, data_offset:0x%04x.\n", size, frev, crev, data_offset); - /* only support 2.1+ */ - if (((uint16_t)frev << 8 | crev) < 0x0201) { - DRM_ERROR("frev:0x%02x, crev:0x%02x < 2.1 !\n", frev, crev); - return -EINVAL; - } - - reserved_block = (struct vram_reserve_block *) - (bios + data_offset + sizeof(struct atom_common_table_header)); - block_number = ((unsigned int)size - sizeof(struct atom_common_table_header)) - / sizeof(struct vram_reserve_block); - reserved_block += (block_number > 0) ? block_number-1 : 0; - DRM_DEBUG("block_number:0x%04x, last block: 0x%08xkb sz, %dkb fw, %dkb drv.\n", - block_number, - le32_to_cpu(reserved_block->start_address_in_kb), - le16_to_cpu(reserved_block->used_by_firmware_in_kb), - le16_to_cpu(reserved_block->used_by_driver_in_kb)); - if (reserved_block->used_by_firmware_in_kb > 0) { - start_address_in_kb = le32_to_cpu(reserved_block->start_address_in_kb); - offset = (uint64_t)start_address_in_kb * ONE_KiB; - if ((offset & (ONE_MiB - 1)) < (4 * ONE_KiB + 1) ) { - offset -= ONE_MiB; - } - - offset &= ~(ONE_MiB - 1); - adev->fw_vram_usage.mem_train_fb_loc = offset; - adev->fw_vram_usage.mem_train_support = true; - DRM_DEBUG("mem_train_fb_loc:0x%09llx.\n", offset); - ret = 0; - } else { - DRM_ERROR("used_by_firmware_in_kb is 0!\n"); - ret = -EINVAL; - } - - return ret; -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index f871af5ea6f3..5ec6f92f353c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -29,9 +29,8 @@ bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev); void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev); int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev); -int amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, - int *vram_width, int *vram_type, int *vram_vendor); -int amdgpu_atomfirmware_get_mem_train_fb_loc(struct amdgpu_device *adev); +int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev); +int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev); bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index a97fb759e2f4..3e35a8f2c5e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -613,7 +613,17 @@ static bool amdgpu_atpx_detect(void) bool d3_supported = false; struct pci_dev *parent_pdev; - while ((pdev = pci_get_class(PCI_BASE_CLASS_DISPLAY << 16, pdev)) != NULL) { + while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) { + vga_count++; + + has_atpx |= (amdgpu_atpx_pci_probe_handle(pdev) == true); + + parent_pdev = pci_upstream_bridge(pdev); + d3_supported |= parent_pdev && parent_pdev->bridge_d3; + amdgpu_atpx_get_quirks(pdev); + } + + while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) { vga_count++; has_atpx |= (amdgpu_atpx_pci_probe_handle(pdev) == true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 85b0515c0fdc..61e38e43ad1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -140,12 +140,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, return 0; error_free: - for (i = 0; i < last_entry; ++i) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo); - - amdgpu_bo_unref(&bo); - } - for (i = first_userptr; i < num_entries; ++i) { + while (i--) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo); amdgpu_bo_unref(&bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index a62cbc8199de..d8729285f731 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -1019,12 +1019,8 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) */ if (amdgpu_connector->shared_ddc && (ret == connector_status_connected)) { struct drm_connector *list_connector; - struct drm_connector_list_iter iter; struct amdgpu_connector *list_amdgpu_connector; - - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(list_connector, - &iter) { + list_for_each_entry(list_connector, &dev->mode_config.connector_list, head) { if (connector == list_connector) continue; list_amdgpu_connector = to_amdgpu_connector(list_connector); @@ -1041,7 +1037,6 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) } } } - drm_connector_list_iter_end(&iter); } } } @@ -1499,7 +1494,6 @@ amdgpu_connector_add(struct amdgpu_device *adev, { struct drm_device *dev = adev->ddev; struct drm_connector *connector; - struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector; struct amdgpu_connector_atom_dig *amdgpu_dig_connector; struct drm_encoder *encoder; @@ -1514,12 +1508,10 @@ amdgpu_connector_add(struct amdgpu_device *adev, return; /* see if we already added it */ - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->connector_id == connector_id) { amdgpu_connector->devices |= supported_device; - drm_connector_list_iter_end(&iter); return; } if (amdgpu_connector->ddc_bus && i2c_bus->valid) { @@ -1534,7 +1526,6 @@ amdgpu_connector_add(struct amdgpu_device *adev, } } } - drm_connector_list_iter_end(&iter); /* check if it's a dp bridge */ list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index a169ff16277f..253158fc378f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -35,7 +35,6 @@ #include "amdgpu_trace.h" #include "amdgpu_gmc.h" #include "amdgpu_gem.h" -#include "amdgpu_ras.h" static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, struct drm_amdgpu_cs_chunk_fence *data, @@ -450,12 +449,75 @@ retry: return r; } +/* Last resort, try to evict something from the current working set */ +static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, + struct amdgpu_bo *validated) +{ + uint32_t domain = validated->allowed_domains; + struct ttm_operation_ctx ctx = { true, false }; + int r; + + if (!p->evictable) + return false; + + for (;&p->evictable->tv.head != &p->validated; + p->evictable = list_prev_entry(p->evictable, tv.head)) { + + struct amdgpu_bo_list_entry *candidate = p->evictable; + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + bool update_bytes_moved_vis; + uint32_t other; + + /* If we reached our current BO we can forget it */ + if (bo == validated) + break; + + /* We can't move pinned BOs here */ + if (bo->pin_count) + continue; + + other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); + + /* Check if this BO is in one of the domains we need space for */ + if (!(other & domain)) + continue; + + /* Check if we can move this BO somewhere else */ + other = bo->allowed_domains & ~domain; + if (!other) + continue; + + /* Good we can try to move this BO somewhere else */ + update_bytes_moved_vis = + !amdgpu_gmc_vram_full_visible(&adev->gmc) && + amdgpu_bo_in_cpu_visible_vram(bo); + amdgpu_bo_placement_from_domain(bo, other); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + p->bytes_moved += ctx.bytes_moved; + if (update_bytes_moved_vis) + p->bytes_moved_vis += ctx.bytes_moved; + + if (unlikely(r)) + break; + + p->evictable = list_prev_entry(p->evictable, tv.head); + list_move(&candidate->tv.head, &p->validated); + + return true; + } + + return false; +} + static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo) { struct amdgpu_cs_parser *p = param; int r; - r = amdgpu_cs_bo_validate(p, bo); + do { + r = amdgpu_cs_bo_validate(p, bo); + } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo)); if (r) return r; @@ -474,6 +536,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, list_for_each_entry(lobj, validated, tv.head) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo); + bool binding_userptr = false; struct mm_struct *usermm; usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm); @@ -490,14 +553,20 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, lobj->user_pages); + binding_userptr = true; } + if (p->evictable == lobj) + p->evictable = NULL; + r = amdgpu_cs_validate(p, bo); if (r) return r; - kvfree(lobj->user_pages); - lobj->user_pages = NULL; + if (binding_userptr) { + kvfree(lobj->user_pages); + lobj->user_pages = NULL; + } } return 0; } @@ -592,6 +661,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, &p->bytes_moved_vis_threshold); p->bytes_moved = 0; p->bytes_moved_vis = 0; + p->evictable = list_last_entry(&p->validated, + struct amdgpu_bo_list_entry, + tv.head); r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm, amdgpu_cs_validate, p); @@ -843,7 +915,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_vm_update_pdes(adev, vm, false); + r = amdgpu_vm_update_directories(adev, vm); if (r) return r; @@ -1287,9 +1359,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) bool reserved_buffers = false; int i, r; - if (amdgpu_ras_intr_triggered()) - return -EHWPOISON; - if (!adev->accel_working) return -EBUSY; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 693f17e78791..5652cc72ed3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1077,7 +1077,8 @@ failure: ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); - kfree(fences); + if (fences) + kfree(fences); return 0; } @@ -1089,8 +1090,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) { adev->debugfs_preempt = debugfs_create_file("amdgpu_preempt_ib", 0600, - adev->ddev->primary->debugfs_root, adev, - &fops_ib_preempt); + adev->ddev->primary->debugfs_root, + (void *)adev, &fops_ib_preempt); if (!(adev->debugfs_preempt)) { DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n"); return -EIO; @@ -1102,7 +1103,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev) { - debugfs_remove(adev->debugfs_preempt); + if (adev->debugfs_preempt) + debugfs_remove(adev->debugfs_preempt); } #else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d36d2b093539..5a1939dbd4e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -65,8 +65,6 @@ #include "amdgpu_ras.h" #include "amdgpu_pmu.h" -#include - MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); @@ -80,7 +78,7 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 -const char *amdgpu_asic_name[] = { +static const char *amdgpu_asic_name[] = { "TAHITI", "PITCAIRN", "VERDE", @@ -153,36 +151,6 @@ bool amdgpu_device_is_px(struct drm_device *dev) return false; } -/** - * VRAM access helper functions. - * - * amdgpu_device_vram_access - read/write a buffer in vram - * - * @adev: amdgpu_device pointer - * @pos: offset of the buffer in vram - * @buf: virtual address of the buffer in system memory - * @size: read/write size, sizeof(@buf) must > @size - * @write: true - write to vram, otherwise - read from vram - */ -void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, - uint32_t *buf, size_t size, bool write) -{ - uint64_t last; - unsigned long flags; - - last = size - 4; - for (last += pos; pos <= last; pos += 4) { - spin_lock_irqsave(&adev->mmio_idx_lock, flags); - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); - WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31); - if (write) - WREG32_NO_KIQ(mmMM_DATA, *buf++); - else - *buf++ = RREG32_NO_KIQ(mmMM_DATA); - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); - } -} - /* * MMIO register access helper functions. */ @@ -1055,6 +1023,12 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) amdgpu_device_check_block_size(adev); + ret = amdgpu_device_get_job_timeout_settings(adev); + if (ret) { + dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); + return ret; + } + adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); return ret; @@ -1495,9 +1469,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) - goto parse_soc_bounding_box; - adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); @@ -1525,13 +1496,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) adev->gfx.config.num_packer_per_sc = le32_to_cpu(gpu_info_fw->num_packer_per_sc); } - -parse_soc_bounding_box: #ifdef CONFIG_DRM_AMD_DC_DCN2_0 - /* - * soc bounding box info is not integrated in disocovery table, - * we always need to parse it from gpu info firmware. - */ if (hdr->version_minor == 2) { const struct gpu_info_firmware_v1_2 *gpu_info_fw = (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data + @@ -1648,9 +1613,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (r) return r; - if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) - amdgpu_discovery_get_gfx_info(adev); - amdgpu_amdkfd_device_probe(adev); if (amdgpu_sriov_vf(adev)) { @@ -1660,7 +1622,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) } adev->pm.pp_feature = amdgpu_pp_feature_mask; - if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) + if (amdgpu_sriov_vf(adev)) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; for (i = 0; i < adev->num_ip_blocks; i++) { @@ -1877,19 +1839,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) goto init_failed; - /* - * retired pages will be loaded from eeprom and reserved here, - * it should be called after amdgpu_device_ip_hw_init_phase2 since - * for some ASICs the RAS EEPROM code relies on SMU fully functioning - * for I2C communication which only true at this point. - * recovery_init may fail, but it can free all resources allocated by - * itself and its failure should not stop amdgpu init process. - * - * Note: theoretically, this should be called before all vram allocations - * to protect retired page from abusing - */ - amdgpu_ras_recovery_init(adev); - if (adev->gmc.xgmi.num_physical_nodes > 1) amdgpu_xgmi_add_device(adev); amdgpu_amdkfd_device_init(adev); @@ -2271,12 +2220,6 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) /* displays are handled in phase1 */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) continue; - /* PSP lost connection when err_event_athub occurs */ - if (amdgpu_ras_intr_triggered() && - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { - adev->ip_blocks[i].status.hw = false; - continue; - } /* XXX handle errors */ r = adev->ip_blocks[i].version->funcs->suspend(adev); /* XXX handle errors */ @@ -2288,17 +2231,17 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) /* handle putting the SMC in the appropriate state */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { if (is_support_sw_smu(adev)) { - r = smu_set_mp1_state(&adev->smu, adev->mp1_state); + /* todo */ } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_mp1_state) { r = adev->powerplay.pp_funcs->set_mp1_state( adev->powerplay.pp_handle, adev->mp1_state); - } - if (r) { - DRM_ERROR("SMC failed to set mp1 state %d, %d\n", - adev->mp1_state, r); - return r; + if (r) { + DRM_ERROR("SMC failed to set mp1 state %d, %d\n", + adev->mp1_state, r); + return r; + } } } @@ -2613,73 +2556,6 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) adev->asic_reset_res, adev->ddev->unique); } -static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) -{ - char *input = amdgpu_lockup_timeout; - char *timeout_setting = NULL; - int index = 0; - long timeout; - int ret = 0; - - /* - * By default timeout for non compute jobs is 10000. - * And there is no timeout enforced on compute jobs. - * In SR-IOV or passthrough mode, timeout for compute - * jobs are 10000 by default. - */ - adev->gfx_timeout = msecs_to_jiffies(10000); - adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; - if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) - adev->compute_timeout = adev->gfx_timeout; - else - adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; - - if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { - while ((timeout_setting = strsep(&input, ",")) && - strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { - ret = kstrtol(timeout_setting, 0, &timeout); - if (ret) - return ret; - - if (timeout == 0) { - index++; - continue; - } else if (timeout < 0) { - timeout = MAX_SCHEDULE_TIMEOUT; - } else { - timeout = msecs_to_jiffies(timeout); - } - - switch (index++) { - case 0: - adev->gfx_timeout = timeout; - break; - case 1: - adev->compute_timeout = timeout; - break; - case 2: - adev->sdma_timeout = timeout; - break; - case 3: - adev->video_timeout = timeout; - break; - default: - break; - } - } - /* - * There is only one value specified and - * it should apply to all non-compute jobs. - */ - if (index == 1) { - adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; - if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) - adev->compute_timeout = adev->gfx_timeout; - } - } - - return ret; -} /** * amdgpu_device_init - initialize the driver @@ -2707,12 +2583,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->ddev = ddev; adev->pdev = pdev; adev->flags = flags; - - if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST) - adev->asic_type = amdgpu_force_asic_type; - else - adev->asic_type = flags & AMD_ASIC_MASK; - + adev->asic_type = flags & AMD_ASIC_MASK; adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; if (amdgpu_emu_mode == 1) adev->usec_timeout *= 2; @@ -2855,12 +2726,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; - r = amdgpu_device_get_job_timeout_settings(adev); - if (r) { - dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); - return r; - } - /* doorbell bar mapping and doorbell index init*/ amdgpu_device_doorbell_init(adev); @@ -3142,7 +3007,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) struct amdgpu_device *adev; struct drm_crtc *crtc; struct drm_connector *connector; - struct drm_connector_list_iter iter; int r; if (dev == NULL || dev->dev_private == NULL) { @@ -3165,11 +3029,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) if (!amdgpu_device_has_dc_support(adev)) { /* turn off display hw */ drm_modeset_lock_all(dev); - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) - drm_helper_connector_dpms(connector, - DRM_MODE_DPMS_OFF); - drm_connector_list_iter_end(&iter); + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); + } drm_modeset_unlock_all(dev); /* unpin the front buffers and cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { @@ -3220,11 +3082,15 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) */ amdgpu_bo_evict_vram(adev); + pci_save_state(dev->pdev); if (suspend) { - pci_save_state(dev->pdev); /* Shut down the device */ pci_disable_device(dev->pdev); pci_set_power_state(dev->pdev, PCI_D3hot); + } else { + r = amdgpu_asic_reset(adev); + if (r) + DRM_ERROR("amdgpu asic reset failed\n"); } return 0; @@ -3244,7 +3110,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) { struct drm_connector *connector; - struct drm_connector_list_iter iter; struct amdgpu_device *adev = dev->dev_private; struct drm_crtc *crtc; int r = 0; @@ -3315,13 +3180,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) /* turn on display hw */ drm_modeset_lock_all(dev); - - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) - drm_helper_connector_dpms(connector, - DRM_MODE_DPMS_ON); - drm_connector_list_iter_end(&iter); - + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); + } drm_modeset_unlock_all(dev); } amdgpu_fbdev_set_suspend(adev, 0); @@ -3767,6 +3628,11 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, break; } } + + list_for_each_entry(tmp_adev, device_list_handle, + gmc.xgmi.head) { + amdgpu_ras_reserve_bad_pages(tmp_adev); + } } } @@ -3870,18 +3736,25 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock) adev->mp1_state = PP_MP1_STATE_NONE; break; } + /* Block kfd: SRIOV would do it separately */ + if (!amdgpu_sriov_vf(adev)) + amdgpu_amdkfd_pre_reset(adev); return true; } static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) { + /*unlock kfd: SRIOV would do it separately */ + if (!amdgpu_sriov_vf(adev)) + amdgpu_amdkfd_post_reset(adev); amdgpu_vf_error_trans_all(adev); adev->mp1_state = PP_MP1_STATE_NONE; adev->in_gpu_reset = 0; mutex_unlock(&adev->lock_reset); } + /** * amdgpu_device_gpu_recover - reset the asic and recover scheduler * @@ -3901,24 +3774,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_hive_info *hive = NULL; struct amdgpu_device *tmp_adev = NULL; int i, r = 0; - bool in_ras_intr = amdgpu_ras_intr_triggered(); - - /* - * Flush RAM to disk so that after reboot - * the user can read log and see why the system rebooted. - */ - if (in_ras_intr && amdgpu_ras_get_context(adev)->reboot) { - - DRM_WARN("Emergency reboot."); - - ksys_sync_helper(); - emergency_restart(); - } need_full_reset = job_signaled = false; INIT_LIST_HEAD(&device_list); - dev_info(adev->dev, "GPU %s begin!\n", in_ras_intr ? "jobs stop":"reset"); + dev_info(adev->dev, "GPU reset begin!\n"); cancel_delayed_work_sync(&adev->delayed_init_work); @@ -3945,16 +3805,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, return 0; } - /* Block kfd: SRIOV would do it separately */ - if (!amdgpu_sriov_vf(adev)) - amdgpu_amdkfd_pre_reset(adev); - /* Build list of devices to reset */ if (adev->gmc.xgmi.num_physical_nodes > 1) { if (!hive) { - /*unlock kfd: SRIOV would do it separately */ - if (!amdgpu_sriov_vf(adev)) - amdgpu_amdkfd_post_reset(adev); amdgpu_device_unlock_adev(adev); return -ENODEV; } @@ -3970,22 +3823,17 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, device_list_handle = &device_list; } - /* block all schedulers and reset given job's ring */ - list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { - if (tmp_adev != adev) { - amdgpu_device_lock_adev(tmp_adev, false); - if (!amdgpu_sriov_vf(tmp_adev)) - amdgpu_amdkfd_pre_reset(tmp_adev); - } - - /* - * Mark these ASICs to be reseted as untracked first - * And add them back after reset completed - */ + /* + * Mark these ASICs to be reseted as untracked first + * And add them back after reset completed + */ + list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) amdgpu_unregister_gpu_instance(tmp_adev); + /* block all schedulers and reset given job's ring */ + list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { /* disable ras on ALL IPs */ - if (!in_ras_intr && amdgpu_device_ip_need_full_reset(tmp_adev)) + if (amdgpu_device_ip_need_full_reset(tmp_adev)) amdgpu_ras_suspend(tmp_adev); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { @@ -3995,16 +3843,10 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, continue; drm_sched_stop(&ring->sched, job ? &job->base : NULL); - - if (in_ras_intr) - amdgpu_job_stop_all_jobs_on_sched(&ring->sched); } } - if (in_ras_intr) - goto skip_sched_resume; - /* * Must check guilty signal here since after this point all old * HW fences are force signaled. @@ -4015,6 +3857,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, dma_fence_is_signaled(job->base.s_fence->parent)) job_signaled = true; + if (!amdgpu_device_ip_need_full_reset(adev)) + device_list_handle = &device_list; + if (job_signaled) { dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); goto skip_hw_reset; @@ -4036,6 +3881,7 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ if (tmp_adev == adev) continue; + amdgpu_device_lock_adev(tmp_adev, false); r = amdgpu_device_pre_asic_reset(tmp_adev, NULL, &need_full_reset); @@ -4063,7 +3909,6 @@ skip_hw_reset: /* Post ASIC reset for all devs .*/ list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; @@ -4085,18 +3930,12 @@ skip_hw_reset: if (r) { /* bad news, how to tell it to userspace ? */ - dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter)); + dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter)); amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); } else { - dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); + dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter)); } - } -skip_sched_resume: - list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { - /*unlock kfd: SRIOV would do it separately */ - if (!in_ras_intr && !amdgpu_sriov_vf(tmp_adev)) - amdgpu_amdkfd_post_reset(tmp_adev); amdgpu_device_unlock_adev(tmp_adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index f95092741c38..1481899f86c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -134,10 +134,20 @@ static int hw_id_map[MAX_HWIP] = { static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary) { + uint32_t *p = (uint32_t *)binary; uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; - uint64_t pos = vram_size - DISCOVERY_TMR_SIZE; + uint64_t pos = vram_size - BINARY_MAX_SIZE; + unsigned long flags; + + while (pos < vram_size) { + spin_lock_irqsave(&adev->mmio_idx_lock, flags); + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); + WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31); + *p++ = RREG32_NO_KIQ(mmMM_DATA); + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); + pos += 4; + } - amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, DISCOVERY_TMR_SIZE, false); return 0; } @@ -169,7 +179,7 @@ int amdgpu_discovery_init(struct amdgpu_device *adev) uint16_t checksum; int r; - adev->discovery = kzalloc(DISCOVERY_TMR_SIZE, GFP_KERNEL); + adev->discovery = kzalloc(BINARY_MAX_SIZE, GFP_KERNEL); if (!adev->discovery) return -ENOMEM; @@ -323,7 +333,7 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) } int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, - int *major, int *minor, int *revision) + int *major, int *minor) { struct binary_header *bhdr; struct ip_discovery_header *ihdr; @@ -359,8 +369,6 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, *major = ip->major; if (minor) *minor = ip->minor; - if (revision) - *revision = ip->revision; return 0; } ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h index ba78e15d9b05..85b8c4d4d576 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -24,13 +24,11 @@ #ifndef __AMDGPU_DISCOVERY__ #define __AMDGPU_DISCOVERY__ -#define DISCOVERY_TMR_SIZE (64 << 10) - int amdgpu_discovery_init(struct amdgpu_device *adev); void amdgpu_discovery_fini(struct amdgpu_device *adev); int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev); int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, - int *major, int *minor, int *revision); + int *major, int *minor); int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev); #endif /* __AMDGPU_DISCOVERY__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index d2dd59a95e8a..1d4aaa9580f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -370,13 +370,11 @@ void amdgpu_display_print_display_setup(struct drm_device *dev) struct amdgpu_connector *amdgpu_connector; struct drm_encoder *encoder; struct amdgpu_encoder *amdgpu_encoder; - struct drm_connector_list_iter iter; uint32_t devices; int i = 0; - drm_connector_list_iter_begin(dev, &iter); DRM_INFO("AMDGPU Display Connectors\n"); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { amdgpu_connector = to_amdgpu_connector(connector); DRM_INFO("Connector %d:\n", i); DRM_INFO(" %s\n", connector->name); @@ -440,7 +438,6 @@ void amdgpu_display_print_display_setup(struct drm_device *dev) } i++; } - drm_connector_list_iter_end(&iter); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index e2eec7b66334..d5bcdfefbad6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -341,6 +341,7 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = { /** * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation + * @dev: DRM device * @gobj: GEM BO * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR. * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index 9cc270efee7c..5803fcbae22f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -911,8 +911,7 @@ int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low) if (is_support_sw_smu(adev)) { ret = smu_get_dpm_freq_range(&adev->smu, SMU_GFXCLK, low ? &clk_freq : NULL, - !low ? &clk_freq : NULL, - true); + !low ? &clk_freq : NULL); if (ret) return 0; return clk_freq * 100; @@ -929,8 +928,7 @@ int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low) if (is_support_sw_smu(adev)) { ret = smu_get_dpm_freq_range(&adev->smu, SMU_UCLK, low ? &clk_freq : NULL, - !low ? &clk_freq : NULL, - true); + !low ? &clk_freq : NULL); if (ret) return 0; return clk_freq * 100; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index 2cfb677272af..1c5c0fd76dbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -298,6 +298,12 @@ enum amdgpu_pcie_gen { #define amdgpu_dpm_get_current_power_state(adev) \ ((adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle)) +#define amdgpu_smu_get_current_power_state(adev) \ + ((adev)->smu.ppt_funcs->get_current_power_state(&((adev)->smu))) + +#define amdgpu_smu_set_power_state(adev) \ + ((adev)->smu.ppt_funcs->set_power_state(&((adev)->smu))) + #define amdgpu_dpm_get_pp_num_states(adev, data) \ ((adev)->powerplay.pp_funcs->get_pp_num_states((adev)->powerplay.pp_handle, data)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 1d6ce018c8f2..25adf2b847e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -43,8 +43,6 @@ #include "amdgpu_amdkfd.h" -#include "amdgpu_ras.h" - /* * KMS wrapper. * - 3.0.0 - initial driver @@ -84,12 +82,13 @@ * - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS. * - 3.34.0 - Non-DC can flip correctly between buffers with different pitches * - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask - * - 3.36.0 - Allow reading more status registers on si/cik */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 36 +#define KMS_DRIVER_MINOR 35 #define KMS_DRIVER_PATCHLEVEL 0 +#define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256 + int amdgpu_vram_limit = 0; int amdgpu_vis_vram_limit = 0; int amdgpu_gart_size = -1; /* auto */ @@ -102,7 +101,7 @@ int amdgpu_disp_priority = 0; int amdgpu_hw_i2c = 0; int amdgpu_pcie_gen2 = -1; int amdgpu_msi = -1; -char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH]; +char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH]; int amdgpu_dpm = -1; int amdgpu_fw_load_type = -1; int amdgpu_aspm = -1; @@ -129,7 +128,11 @@ char *amdgpu_disable_cu = NULL; char *amdgpu_virtual_display = NULL; /* OverDrive(bit 14) disabled by default*/ uint amdgpu_pp_feature_mask = 0xffffbfff; -uint amdgpu_force_long_training = 0; +int amdgpu_ngg = 0; +int amdgpu_prim_buf_per_se = 0; +int amdgpu_pos_buf_per_se = 0; +int amdgpu_cntl_sb_buf_per_se = 0; +int amdgpu_param_buf_per_se = 0; int amdgpu_job_hang_limit = 0; int amdgpu_lbpw = -1; int amdgpu_compute_multipipe = -1; @@ -143,13 +146,12 @@ int amdgpu_mcbp = 0; int amdgpu_discovery = -1; int amdgpu_mes = 0; int amdgpu_noretry = 1; -int amdgpu_force_asic_type = -1; struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), }; int amdgpu_ras_enable = -1; -uint amdgpu_ras_mask = 0xffffffff; +uint amdgpu_ras_mask = 0xfffffffb; /** * DOC: vramlimit (int) @@ -242,21 +244,16 @@ module_param_named(msi, amdgpu_msi, int, 0444); * * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or * multiple values specified. 0 and negative values are invalidated. They will be adjusted - * to the default timeout. - * - * - With one value specified, the setting will apply to all non-compute jobs. - * - With multiple values specified, the first one will be for GFX. - * The second one is for Compute. The third and fourth ones are - * for SDMA and Video. - * + * to default timeout. + * - With one value specified, the setting will apply to all non-compute jobs. + * - With multiple values specified, the first one will be for GFX. The second one is for Compute. + * And the third and fourth ones are for SDMA and Video. * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video) * jobs is 10000. And there is no timeout enforced on compute jobs. */ -MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and infinity timeout for compute jobs; " - "for passthrough or sriov, 10000 for all jobs." +MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: 10000 for non-compute jobs and infinity timeout for compute jobs." " 0: keep default value. negative: infinity timeout), " - "format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; " - "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video]."); + "format is [Non-Compute] or [GFX,Compute,SDMA,Video]"); module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444); /** @@ -394,14 +391,6 @@ module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444); MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))"); module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444); -/** - * DOC: forcelongtraining (uint) - * Force long memory training in resume. - * The default is zero, indicates short training in resume. - */ -MODULE_PARM_DESC(forcelongtraining, "force memory long training"); -module_param_named(forcelongtraining, amdgpu_force_long_training, uint, 0444); - /** * DOC: pcie_gen_cap (uint) * Override PCIE gen speed capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h. @@ -459,6 +448,42 @@ MODULE_PARM_DESC(virtual_display, "Enable virtual display feature (the virtual_display will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x)"); module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444); +/** + * DOC: ngg (int) + * Set to enable Next Generation Graphics (1 = enable). The default is 0 (disabled). + */ +MODULE_PARM_DESC(ngg, "Next Generation Graphics (1 = enable, 0 = disable(default depending on gfx))"); +module_param_named(ngg, amdgpu_ngg, int, 0444); + +/** + * DOC: prim_buf_per_se (int) + * Override the size of Primitive Buffer per Shader Engine in Byte. The default is 0 (depending on gfx). + */ +MODULE_PARM_DESC(prim_buf_per_se, "the size of Primitive Buffer per Shader Engine (default depending on gfx)"); +module_param_named(prim_buf_per_se, amdgpu_prim_buf_per_se, int, 0444); + +/** + * DOC: pos_buf_per_se (int) + * Override the size of Position Buffer per Shader Engine in Byte. The default is 0 (depending on gfx). + */ +MODULE_PARM_DESC(pos_buf_per_se, "the size of Position Buffer per Shader Engine (default depending on gfx)"); +module_param_named(pos_buf_per_se, amdgpu_pos_buf_per_se, int, 0444); + +/** + * DOC: cntl_sb_buf_per_se (int) + * Override the size of Control Sideband per Shader Engine in Byte. The default is 0 (depending on gfx). + */ +MODULE_PARM_DESC(cntl_sb_buf_per_se, "the size of Control Sideband per Shader Engine (default depending on gfx)"); +module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444); + +/** + * DOC: param_buf_per_se (int) + * Override the size of Off-Chip Parameter Cache per Shader Engine in Byte. + * The default is 0 (depending on gfx). + */ +MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Parameter Cache per Shader Engine (default depending on gfx)"); +module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444); + /** * DOC: job_hang_limit (int) * Set how much time allow a job hang and not drop it. The default is 0. @@ -591,16 +616,6 @@ MODULE_PARM_DESC(noretry, "Disable retry faults (0 = retry enabled, 1 = retry disabled (default))"); module_param_named(noretry, amdgpu_noretry, int, 0644); -/** - * DOC: force_asic_type (int) - * A non negative value used to specify the asic type for all supported GPUs. - */ -MODULE_PARM_DESC(force_asic_type, - "A non negative value used to specify the asic type for all supported GPUs"); -module_param_named(force_asic_type, amdgpu_force_asic_type, int, 0444); - - - #ifdef CONFIG_HSA_AMD /** * DOC: sched_policy (int) @@ -1007,7 +1022,6 @@ static const struct pci_device_id pciidlist[] = { /* Navi12 */ {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x7362, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT}, {0, 0, 0} }; @@ -1113,10 +1127,7 @@ amdgpu_pci_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); -#ifdef MODULE - if (THIS_MODULE->state != MODULE_STATE_GOING) -#endif - DRM_ERROR("Hotplug removal is not supported\n"); + DRM_ERROR("Device removal is currently not supported outside of fbcon\n"); drm_dev_unplug(dev); drm_dev_put(dev); pci_disable_device(pdev); @@ -1129,9 +1140,6 @@ amdgpu_pci_shutdown(struct pci_dev *pdev) struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = dev->dev_private; - if (amdgpu_ras_intr_triggered()) - return; - /* if we are running in a VM, make sure the device * torn down properly on reboot/shutdown. * unfortunately we can't detect certain @@ -1166,13 +1174,8 @@ static int amdgpu_pmops_resume(struct device *dev) static int amdgpu_pmops_freeze(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - struct amdgpu_device *adev = drm_dev->dev_private; - int r; - r = amdgpu_device_suspend(drm_dev, false, true); - if (r) - return r; - return amdgpu_asic_reset(adev); + return amdgpu_device_suspend(drm_dev, false, true); } static int amdgpu_pmops_thaw(struct device *dev) @@ -1344,6 +1347,66 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv) return 0; } +int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) +{ + char *input = amdgpu_lockup_timeout; + char *timeout_setting = NULL; + int index = 0; + long timeout; + int ret = 0; + + /* + * By default timeout for non compute jobs is 10000. + * And there is no timeout enforced on compute jobs. + */ + adev->gfx_timeout = msecs_to_jiffies(10000); + adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; + adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; + + if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) { + while ((timeout_setting = strsep(&input, ",")) && + strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) { + ret = kstrtol(timeout_setting, 0, &timeout); + if (ret) + return ret; + + if (timeout == 0) { + index++; + continue; + } else if (timeout < 0) { + timeout = MAX_SCHEDULE_TIMEOUT; + } else { + timeout = msecs_to_jiffies(timeout); + } + + switch (index++) { + case 0: + adev->gfx_timeout = timeout; + break; + case 1: + adev->compute_timeout = timeout; + break; + case 2: + adev->sdma_timeout = timeout; + break; + case 3: + adev->video_timeout = timeout; + break; + default: + break; + } + } + /* + * There is only one value specified and + * it should apply to all non-compute jobs. + */ + if (index == 1) + adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; + } + + return ret; +} + static bool amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe, bool in_vblank_irq, int *vpos, int *hpos, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c index 61fcf247a638..571a6dfb473e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c @@ -37,14 +37,12 @@ amdgpu_link_encoder_connector(struct drm_device *dev) { struct amdgpu_device *adev = dev->dev_private; struct drm_connector *connector; - struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector; struct drm_encoder *encoder; struct amdgpu_encoder *amdgpu_encoder; - drm_connector_list_iter_begin(dev, &iter); /* walk the list and link encoders to connectors */ - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { amdgpu_connector = to_amdgpu_connector(connector); list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { amdgpu_encoder = to_amdgpu_encoder(encoder); @@ -57,7 +55,6 @@ amdgpu_link_encoder_connector(struct drm_device *dev) } } } - drm_connector_list_iter_end(&iter); } void amdgpu_encoder_set_active_device(struct drm_encoder *encoder) @@ -65,10 +62,8 @@ void amdgpu_encoder_set_active_device(struct drm_encoder *encoder) struct drm_device *dev = encoder->dev; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct drm_connector *connector; - struct drm_connector_list_iter iter; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { if (connector->encoder == encoder) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); amdgpu_encoder->active_device = amdgpu_encoder->devices & amdgpu_connector->devices; @@ -77,7 +72,6 @@ void amdgpu_encoder_set_active_device(struct drm_encoder *encoder) amdgpu_connector->devices, encoder->encoder_type); } } - drm_connector_list_iter_end(&iter); } struct drm_connector * @@ -85,20 +79,15 @@ amdgpu_get_connector_for_encoder(struct drm_encoder *encoder) { struct drm_device *dev = encoder->dev; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct drm_connector *connector, *found = NULL; - struct drm_connector_list_iter iter; + struct drm_connector *connector; struct amdgpu_connector *amdgpu_connector; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { amdgpu_connector = to_amdgpu_connector(connector); - if (amdgpu_encoder->active_device & amdgpu_connector->devices) { - found = connector; - break; - } + if (amdgpu_encoder->active_device & amdgpu_connector->devices) + return connector; } - drm_connector_list_iter_end(&iter); - return found; + return NULL; } struct drm_connector * @@ -106,20 +95,15 @@ amdgpu_get_connector_for_encoder_init(struct drm_encoder *encoder) { struct drm_device *dev = encoder->dev; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct drm_connector *connector, *found = NULL; - struct drm_connector_list_iter iter; + struct drm_connector *connector; struct amdgpu_connector *amdgpu_connector; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { amdgpu_connector = to_amdgpu_connector(connector); - if (amdgpu_encoder->devices & amdgpu_connector->devices) { - found = connector; - break; - } + if (amdgpu_encoder->devices & amdgpu_connector->devices) + return connector; } - drm_connector_list_iter_end(&iter); - return found; + return NULL; } struct drm_encoder *amdgpu_get_external_encoder(struct drm_encoder *encoder) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 377fe20bce23..23085b352cf2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -462,7 +462,18 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, timeout = adev->gfx_timeout; break; case AMDGPU_RING_TYPE_COMPUTE: - timeout = adev->compute_timeout; + /* + * For non-sriov case, no timeout enforce + * on compute ring by default. Unless user + * specifies a timeout for compute ring. + * + * For sriov case, always use the timeout + * as gfx ring + */ + if (!amdgpu_sriov_vf(ring->adev)) + timeout = adev->compute_timeout; + else + timeout = adev->gfx_timeout; break; case AMDGPU_RING_TYPE_SDMA: timeout = adev->sdma_timeout; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 4277125a79ee..c40642a91bd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -527,41 +527,13 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, goto error; } - r = amdgpu_vm_update_pdes(adev, vm, false); + r = amdgpu_vm_update_directories(adev, vm); error: if (r && r != -ERESTARTSYS) DRM_ERROR("Couldn't update BO_VA (%d)\n", r); } -/** - * amdgpu_gem_va_map_flags - map GEM UAPI flags into hardware flags - * - * @adev: amdgpu_device pointer - * @flags: GEM UAPI flags - * - * Returns the GEM UAPI flags mapped into hardware for the ASIC. - */ -uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags) -{ - uint64_t pte_flag = 0; - - if (flags & AMDGPU_VM_PAGE_EXECUTABLE) - pte_flag |= AMDGPU_PTE_EXECUTABLE; - if (flags & AMDGPU_VM_PAGE_READABLE) - pte_flag |= AMDGPU_PTE_READABLE; - if (flags & AMDGPU_VM_PAGE_WRITEABLE) - pte_flag |= AMDGPU_PTE_WRITEABLE; - if (flags & AMDGPU_VM_PAGE_PRT) - pte_flag |= AMDGPU_PTE_PRT; - - if (adev->gmc.gmc_funcs->map_mtype) - pte_flag |= amdgpu_gmc_map_mtype(adev, - flags & AMDGPU_VM_MTYPE_MASK); - - return pte_flag; -} - int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { @@ -659,7 +631,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, switch (args->operation) { case AMDGPU_VA_OP_MAP: - va_flags = amdgpu_gem_va_map_flags(adev, args->flags); + va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); r = amdgpu_vm_bo_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, va_flags); @@ -674,7 +646,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->map_size); break; case AMDGPU_VA_OP_REPLACE: - va_flags = amdgpu_gem_va_map_flags(adev, args->flags); + va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, va_flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h index e0f025dd1b14..0b66d2e6b5d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h @@ -67,7 +67,6 @@ int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); -uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags); int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index c9d1fada6188..f9bef3154b99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -26,7 +26,6 @@ #include "amdgpu.h" #include "amdgpu_gfx.h" #include "amdgpu_rlc.h" -#include "amdgpu_ras.h" /* delay 0.1 second to enable gfx off feature */ #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) @@ -232,10 +231,12 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) { - int i, queue, me; + int i, queue, pipe, me; for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) { queue = i % adev->gfx.me.num_queue_per_pipe; + pipe = (i / adev->gfx.me.num_queue_per_pipe) + % adev->gfx.me.num_pipe_per_me; me = (i / adev->gfx.me.num_queue_per_pipe) / adev->gfx.me.num_pipe_per_me; @@ -319,7 +320,8 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, return r; } -void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) +void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, + struct amdgpu_irq_src *irq) { amdgpu_device_wb_free(ring->adev, ring->adev->virt.reg_val_offs); amdgpu_ring_fini(ring); @@ -567,102 +569,3 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) mutex_unlock(&adev->gfx.gfx_off_mutex); } - -int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev) -{ - int r; - struct ras_fs_if fs_info = { - .sysfs_name = "gfx_err_count", - .debugfs_name = "gfx_err_inject", - }; - struct ras_ih_if ih_info = { - .cb = amdgpu_gfx_process_ras_data_cb, - }; - - if (!adev->gfx.ras_if) { - adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!adev->gfx.ras_if) - return -ENOMEM; - adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX; - adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->gfx.ras_if->sub_block_index = 0; - strcpy(adev->gfx.ras_if->name, "gfx"); - } - fs_info.head = ih_info.head = *adev->gfx.ras_if; - - r = amdgpu_ras_late_init(adev, adev->gfx.ras_if, - &fs_info, &ih_info); - if (r) - goto free; - - if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) { - r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); - if (r) - goto late_fini; - } else { - /* free gfx ras_if if ras is not supported */ - r = 0; - goto free; - } - - return 0; -late_fini: - amdgpu_ras_late_fini(adev, adev->gfx.ras_if, &ih_info); -free: - kfree(adev->gfx.ras_if); - adev->gfx.ras_if = NULL; - return r; -} - -void amdgpu_gfx_ras_fini(struct amdgpu_device *adev) -{ - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && - adev->gfx.ras_if) { - struct ras_common_if *ras_if = adev->gfx.ras_if; - struct ras_ih_if ih_info = { - .head = *ras_if, - .cb = amdgpu_gfx_process_ras_data_cb, - }; - - amdgpu_ras_late_fini(adev, ras_if, &ih_info); - kfree(ras_if); - } -} - -int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, - void *err_data, - struct amdgpu_iv_entry *entry) -{ - /* TODO ue will trigger an interrupt. - * - * When “Full RAS” is enabled, the per-IP interrupt sources should - * be disabled and the driver should only look for the aggregated - * interrupt via sync flood - */ - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { - kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - if (adev->gfx.funcs->query_ras_error_count) - adev->gfx.funcs->query_ras_error_count(adev, err_data); - amdgpu_ras_reset_gpu(adev, 0); - } - return AMDGPU_RAS_SUCCESS; -} - -int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - struct ras_common_if *ras_if = adev->gfx.ras_if; - struct ras_dispatch_if ih_data = { - .entry = entry, - }; - - if (!ras_if) - return 0; - - ih_data.head = *ras_if; - - DRM_ERROR("CP ECC ERROR IRQ\n"); - amdgpu_ras_interrupt_dispatch(adev, &ih_data); - return 0; -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 459aa9059542..6ee4021910e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -201,6 +201,28 @@ struct amdgpu_gfx_funcs { int (*query_ras_error_count) (struct amdgpu_device *adev, void *ras_error_status); }; +struct amdgpu_ngg_buf { + struct amdgpu_bo *bo; + uint64_t gpu_addr; + uint32_t size; + uint32_t bo_size; +}; + +enum { + NGG_PRIM = 0, + NGG_POS, + NGG_CNTL, + NGG_PARAM, + NGG_BUF_MAX +}; + +struct amdgpu_ngg { + struct amdgpu_ngg_buf buf[NGG_BUF_MAX]; + uint32_t gds_reserve_addr; + uint32_t gds_reserve_size; + bool init; +}; + struct sq_work { struct work_struct work; unsigned ih_data; @@ -289,6 +311,9 @@ struct amdgpu_gfx { uint32_t grbm_soft_reset; uint32_t srbm_soft_reset; + /* NGG */ + struct amdgpu_ngg ngg; + /* gfx off */ bool gfx_off_state; /* true: enabled, false: disabled */ struct mutex gfx_off_mutex; @@ -330,7 +355,8 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_irq_src *irq); -void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring); +void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, + struct amdgpu_irq_src *irq); void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev); int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, @@ -358,12 +384,5 @@ void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me, int pipe, int queue); void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); -int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev); -void amdgpu_gfx_ras_fini(struct amdgpu_device *adev); -int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, - void *err_data, - struct amdgpu_iv_entry *entry); -int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index a12f33c0f5df..5790db61fa2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -27,8 +27,6 @@ #include #include "amdgpu.h" -#include "amdgpu_ras.h" -#include "amdgpu_xgmi.h" /** * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO @@ -307,29 +305,3 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, gmc->fault_hash[hash].idx = gmc->last_fault++; return false; } - -int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) -{ - int r; - - if (adev->umc.funcs && adev->umc.funcs->ras_late_init) { - r = adev->umc.funcs->ras_late_init(adev); - if (r) - return r; - } - - if (adev->mmhub.funcs && adev->mmhub.funcs->ras_late_init) { - r = adev->mmhub.funcs->ras_late_init(adev); - if (r) - return r; - } - - return amdgpu_xgmi_ras_late_init(adev); -} - -void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) -{ - amdgpu_umc_ras_fini(adev); - amdgpu_mmhub_ras_fini(adev); - amdgpu_xgmi_ras_fini(adev); -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 555d8e57fae9..b6e1d98ef01e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -99,15 +99,12 @@ struct amdgpu_gmc_funcs { unsigned pasid); /* enable/disable PRT support */ void (*set_prt)(struct amdgpu_device *adev, bool enable); - /* map mtype to hardware flags */ - uint64_t (*map_mtype)(struct amdgpu_device *adev, uint32_t flags); + /* set pte flags based per asic */ + uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev, + uint32_t flags); /* get the pde for a given mc addr */ void (*get_vm_pde)(struct amdgpu_device *adev, int level, u64 *dst, u64 *flags); - /* get the pte flags to use for a BO VA mapping */ - void (*get_vm_pte)(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, - uint64_t *flags); }; struct amdgpu_xgmi { @@ -123,7 +120,6 @@ struct amdgpu_xgmi { /* gpu list in the same hive */ struct list_head head; bool supported; - struct ras_common_if *ras_if; }; struct amdgpu_gmc { @@ -157,7 +153,6 @@ struct amdgpu_gmc { uint32_t fw_version; struct amdgpu_irq_src vm_fault; uint32_t vram_type; - uint8_t vram_vendor; uint32_t srbm_soft_reset; bool prt_warning; uint64_t stolen_size; @@ -182,14 +177,15 @@ struct amdgpu_gmc { struct amdgpu_xgmi xgmi; struct amdgpu_irq_src ecc_irq; + struct ras_common_if *umc_ras_if; + struct ras_common_if *mmhub_ras_if; }; #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) -#define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) -#define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags)) +#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags)) /** * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR @@ -234,7 +230,5 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc); bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, uint16_t pasid, uint64_t timestamp); -int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev); -void amdgpu_gmc_ras_fini(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 6f9289735e31..53734da1c2df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -282,7 +282,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, !dma_fence_is_later(updates, (*id)->flushed_updates)) updates = NULL; - if ((*id)->owner != vm->direct.fence_context || + if ((*id)->owner != vm->entity.fence_context || job->vm_pd_addr != (*id)->pd_gpu_addr || updates || !(*id)->last_flush || ((*id)->last_flush->context != fence_context && @@ -349,7 +349,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, struct dma_fence *flushed; /* Check all the prerequisites to using this VMID */ - if ((*id)->owner != vm->direct.fence_context) + if ((*id)->owner != vm->entity.fence_context) continue; if ((*id)->pd_gpu_addr != job->vm_pd_addr) @@ -449,7 +449,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, } id->pd_gpu_addr = job->vm_pd_addr; - id->owner = vm->direct.fence_context; + id->owner = vm->entity.fence_context; if (job->vm_needs_flush) { dma_fence_put(id->last_flush); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 6f3b03f6224f..2a3f5ec298db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -87,13 +87,10 @@ static void amdgpu_hotplug_work_func(struct work_struct *work) struct drm_device *dev = adev->ddev; struct drm_mode_config *mode_config = &dev->mode_config; struct drm_connector *connector; - struct drm_connector_list_iter iter; mutex_lock(&mode_config->mutex); - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) + list_for_each_entry(connector, &mode_config->connector_list, head) amdgpu_connector_hotplug(connector); - drm_connector_list_iter_end(&iter); mutex_unlock(&mode_config->mutex); /* Just fire off a uevent and let userspace tell us what to do */ drm_helper_hpd_irq_event(dev); @@ -156,20 +153,6 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg) ret = amdgpu_ih_process(adev, &adev->irq.ih); if (ret == IRQ_HANDLED) pm_runtime_mark_last_busy(dev->dev); - - /* For the hardware that cannot enable bif ring for both ras_controller_irq - * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status - * register to check whether the interrupt is triggered or not, and properly - * ack the interrupt if it is there - */ - if (adev->nbio.funcs && - adev->nbio.funcs->handle_ras_controller_intr_no_bifring) - adev->nbio.funcs->handle_ras_controller_intr_no_bifring(adev); - - if (adev->nbio.funcs && - adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring) - adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring(adev); - return ret; } @@ -245,19 +228,10 @@ int amdgpu_irq_init(struct amdgpu_device *adev) adev->irq.msi_enabled = false; if (amdgpu_msi_ok(adev)) { - int nvec = pci_msix_vec_count(adev->pdev); - unsigned int flags; - - if (nvec <= 0) { - flags = PCI_IRQ_MSI; - } else { - flags = PCI_IRQ_MSI | PCI_IRQ_MSIX; - } - /* we only need one vector */ - nvec = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags); - if (nvec > 0) { + int ret = pci_enable_msi(adev->pdev); + if (!ret) { adev->irq.msi_enabled = true; - dev_dbg(adev->dev, "amdgpu: using MSI/MSI-X.\n"); + dev_dbg(adev->dev, "amdgpu: using MSI.\n"); } } @@ -280,8 +254,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) INIT_WORK(&adev->irq.ih2_work, amdgpu_irq_handle_ih2); adev->irq.installed = true; - /* Use vector 0 for MSI-X */ - r = drm_irq_install(adev->ddev, pci_irq_vector(adev->pdev, 0)); + r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq); if (r) { adev->irq.installed = false; if (!amdgpu_device_has_dc_support(adev)) @@ -396,7 +369,7 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev, * amdgpu_irq_dispatch - dispatch IRQ to IP blocks * * @adev: amdgpu device pointer - * @ih: interrupt ring instance + * @entry: interrupt vector pointer * * Dispatches IRQ to IP blocks. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4fb20e870e63..9d76e0923a5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -218,7 +218,7 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched); struct dma_fence *fence = NULL, *finished; struct amdgpu_job *job; - int r = 0; + int r; job = to_amdgpu_job(sched_job); finished = &job->base.s_fence->finished; @@ -243,49 +243,9 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) job->fence = dma_fence_get(fence); amdgpu_job_free_resources(job); - - fence = r ? ERR_PTR(r) : fence; return fence; } -#define to_drm_sched_job(sched_job) \ - container_of((sched_job), struct drm_sched_job, queue_node) - -void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) -{ - struct drm_sched_job *s_job; - struct drm_sched_entity *s_entity = NULL; - int i; - - /* Signal all jobs not yet scheduled */ - for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { - struct drm_sched_rq *rq = &sched->sched_rq[i]; - - if (!rq) - continue; - - spin_lock(&rq->lock); - list_for_each_entry(s_entity, &rq->entities, list) { - while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) { - struct drm_sched_fence *s_fence = s_job->s_fence; - - dma_fence_signal(&s_fence->scheduled); - dma_fence_set_error(&s_fence->finished, -EHWPOISON); - dma_fence_signal(&s_fence->finished); - } - } - spin_unlock(&rq->lock); - } - - /* Signal all jobs already scheduled to HW */ - list_for_each_entry(s_job, &sched->ring_mirror_list, node) { - struct drm_sched_fence *s_fence = s_job->s_fence; - - dma_fence_set_error(&s_fence->finished, -EHWPOISON); - dma_fence_signal(&s_fence->finished); - } -} - const struct drm_sched_backend_ops amdgpu_sched_ops = { .dependency = amdgpu_job_dependency, .run_job = amdgpu_job_run, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index dc7ee9358dcd..51e62504c279 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -76,7 +76,4 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, void *owner, struct dma_fence **f); int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, struct dma_fence **fence); - -void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched); - #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 137a8573d556..d55f5baa83d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -584,12 +584,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file struct drm_amdgpu_info_vram_gtt vram_gtt; vram_gtt.vram_size = adev->gmc.real_vram_size - - atomic64_read(&adev->vram_pin_size) - - AMDGPU_VM_RESERVED_VRAM; - vram_gtt.vram_cpu_accessible_size = - min(adev->gmc.visible_vram_size - - atomic64_read(&adev->visible_pin_size), - vram_gtt.vram_size); + atomic64_read(&adev->vram_pin_size); + vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size - + atomic64_read(&adev->visible_pin_size); vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size; vram_gtt.gtt_size *= PAGE_SIZE; vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size); @@ -602,18 +599,15 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file memset(&mem, 0, sizeof(mem)); mem.vram.total_heap_size = adev->gmc.real_vram_size; mem.vram.usable_heap_size = adev->gmc.real_vram_size - - atomic64_read(&adev->vram_pin_size) - - AMDGPU_VM_RESERVED_VRAM; + atomic64_read(&adev->vram_pin_size); mem.vram.heap_usage = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; mem.cpu_accessible_vram.total_heap_size = adev->gmc.visible_vram_size; - mem.cpu_accessible_vram.usable_heap_size = - min(adev->gmc.visible_vram_size - - atomic64_read(&adev->visible_pin_size), - mem.vram.usable_heap_size); + mem.cpu_accessible_vram.usable_heap_size = adev->gmc.visible_vram_size - + atomic64_read(&adev->visible_pin_size); mem.cpu_accessible_vram.heap_usage = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); mem.cpu_accessible_vram.max_allocation = @@ -735,6 +729,17 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.vce_harvest_config = adev->vce.harvest_config; dev_info.gc_double_offchip_lds_buf = adev->gfx.config.double_offchip_lds_buf; + + if (amdgpu_ngg) { + dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PRIM].gpu_addr; + dev_info.prim_buf_size = adev->gfx.ngg.buf[NGG_PRIM].size; + dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[NGG_POS].gpu_addr; + dev_info.pos_buf_size = adev->gfx.ngg.buf[NGG_POS].size; + dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[NGG_CNTL].gpu_addr; + dev_info.cntl_sb_buf_size = adev->gfx.ngg.buf[NGG_CNTL].size; + dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PARAM].gpu_addr; + dev_info.param_buf_size = adev->gfx.ngg.buf[NGG_PARAM].size; + } dev_info.wave_front_size = adev->gfx.cu_info.wave_front_size; dev_info.num_shader_visible_vgprs = adev->gfx.config.max_gprs; dev_info.num_cu_per_sh = adev->gfx.config.max_cu_per_sh; @@ -963,12 +968,6 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) /* Ensure IB tests are run on ring */ flush_delayed_work(&adev->delayed_init_work); - - if (amdgpu_ras_intr_triggered()) { - DRM_ERROR("RAS Intr triggered, device disabled!!"); - return -EHWPOISON; - } - file_priv->driver_priv = NULL; r = pm_runtime_get_sync(dev->dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 1cd78940cf82..2d75ecfa199b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -23,17 +23,9 @@ struct amdgpu_mmhub_funcs { void (*ras_init)(struct amdgpu_device *adev); - int (*ras_late_init)(struct amdgpu_device *adev); void (*query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status); }; -struct amdgpu_mmhub { - struct ras_common_if *ras_if; - const struct amdgpu_mmhub_funcs *funcs; -}; - -int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev); -void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 392300f77b13..31d4deb5d294 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -136,7 +136,6 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn) * amdgpu_mn_read_lock - take the read side lock for this notifier * * @amn: our notifier - * @blockable: is the notifier blockable */ static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e3f16b49e970..6f0b789a0b49 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -342,70 +342,6 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, return 0; } -/** - * amdgpu_bo_create_kernel_at - create BO for kernel use at specific location - * - * @adev: amdgpu device object - * @offset: offset of the BO - * @size: size of the BO - * @domain: where to place it - * @bo_ptr: used to initialize BOs in structures - * @cpu_addr: optional CPU address mapping - * - * Creates a kernel BO at a specific offset in the address space of the domain. - * - * Returns: - * 0 on success, negative error code otherwise. - */ -int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev, - uint64_t offset, uint64_t size, uint32_t domain, - struct amdgpu_bo **bo_ptr, void **cpu_addr) -{ - struct ttm_operation_ctx ctx = { false, false }; - unsigned int i; - int r; - - offset &= PAGE_MASK; - size = ALIGN(size, PAGE_SIZE); - - r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE, domain, bo_ptr, - NULL, cpu_addr); - if (r) - return r; - - /* - * Remove the original mem node and create a new one at the request - * position. - */ - if (cpu_addr) - amdgpu_bo_kunmap(*bo_ptr); - - ttm_bo_mem_put(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.mem); - - for (i = 0; i < (*bo_ptr)->placement.num_placement; ++i) { - (*bo_ptr)->placements[i].fpfn = offset >> PAGE_SHIFT; - (*bo_ptr)->placements[i].lpfn = (offset + size) >> PAGE_SHIFT; - } - r = ttm_bo_mem_space(&(*bo_ptr)->tbo, &(*bo_ptr)->placement, - &(*bo_ptr)->tbo.mem, &ctx); - if (r) - goto error; - - if (cpu_addr) { - r = amdgpu_bo_kmap(*bo_ptr, cpu_addr); - if (r) - goto error; - } - - amdgpu_bo_unreserve(*bo_ptr); - return 0; - -error: - amdgpu_bo_unreserve(*bo_ptr); - amdgpu_bo_unref(bo_ptr); - return r; -} - /** * amdgpu_bo_free_kernel - free BO for kernel use * @@ -515,10 +451,9 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, { struct ttm_operation_ctx ctx = { .interruptible = (bp->type != ttm_bo_type_kernel), - .no_wait_gpu = bp->no_wait_gpu, + .no_wait_gpu = false, .resv = bp->resv, - .flags = bp->type != ttm_bo_type_kernel ? - TTM_OPT_FLAG_ALLOW_RES_EVICT : 0 + .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT }; struct amdgpu_bo *bo; unsigned long page_align, size = bp->size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 7e99f6c58c48..658f4c9779b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -41,7 +41,6 @@ struct amdgpu_bo_param { u32 preferred_domain; u64 flags; enum ttm_bo_type type; - bool no_wait_gpu; struct dma_resv *resv; }; @@ -238,9 +237,6 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, unsigned long size, int align, u32 domain, struct amdgpu_bo **bo_ptr, u64 *gpu_addr, void **cpu_addr); -int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev, - uint64_t offset, uint64_t size, uint32_t domain, - struct amdgpu_bo **bo_ptr, void **cpu_addr); void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, void **cpu_addr); int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index f205f56e3358..03930313c263 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -161,7 +161,7 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev, if (is_support_sw_smu(adev)) { if (adev->smu.ppt_funcs->get_current_power_state) - pm = smu_get_current_power_state(&adev->smu); + pm = amdgpu_smu_get_current_power_state(adev); else pm = adev->pm.dpm.user_state; } else if (adev->powerplay.pp_funcs->get_current_power_state) { @@ -805,7 +805,8 @@ static ssize_t amdgpu_get_pp_feature_status(struct device *dev, } /** - * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_fclk pp_dpm_dcefclk pp_dpm_pcie + * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_fclk pp_dpm_dcefclk + * pp_dpm_pcie * * The amdgpu driver provides a sysfs API for adjusting what power levels * are enabled for a given power state. The files pp_dpm_sclk, pp_dpm_mclk, @@ -821,15 +822,9 @@ static ssize_t amdgpu_get_pp_feature_status(struct device *dev, * * To manually adjust these states, first select manual using * power_dpm_force_performance_level. - * Secondly, enter a new value for each level by inputing a string that + * Secondly,Enter a new value for each level by inputing a string that * contains " echo xx xx xx > pp_dpm_sclk/mclk/pcie" - * E.g., - * - * .. code-block:: bash - * - * echo "4 5 6" > pp_dpm_sclk - * - * will enable sclk levels 4, 5, and 6. + * E.g., echo 4 5 6 to > pp_dpm_sclk will enable sclk levels 4, 5, and 6. * * NOTE: change to the dcefclk max dpm level is not supported now */ @@ -907,7 +902,7 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, return ret; if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); @@ -954,7 +949,7 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, return ret; if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); @@ -994,7 +989,7 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, return ret; if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask); @@ -1034,7 +1029,7 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, return ret; if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask); @@ -1074,7 +1069,7 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, return ret; if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask); @@ -1114,7 +1109,7 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, return ret; if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); @@ -1306,7 +1301,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, } parameter[parameter_size] = profile_mode; if (is_support_sw_smu(adev)) - ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size, true); + ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size); else if (adev->powerplay.pp_funcs->set_power_profile_mode) ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size); if (!ret) @@ -2015,7 +2010,7 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, uint32_t limit = 0; if (is_support_sw_smu(adev)) { - smu_get_power_limit(&adev->smu, &limit, true, true); + smu_get_power_limit(&adev->smu, &limit, true); return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true); @@ -2033,7 +2028,7 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, uint32_t limit = 0; if (is_support_sw_smu(adev)) { - smu_get_power_limit(&adev->smu, &limit, false, true); + smu_get_power_limit(&adev->smu, &limit, false); return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false); @@ -2201,9 +2196,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, * * - fan1_input: fan speed in RPM * - * - fan[1-\*]_target: Desired fan speed Unit: revolution/min (RPM) + * - fan[1-*]_target: Desired fan speed Unit: revolution/min (RPM) * - * - fan[1-\*]_enable: Enable or disable the sensors.1: Enable 0: Disable + * - fan[1-*]_enable: Enable or disable the sensors.1: Enable 0: Disable * * hwmon interfaces for GPU clocks: * @@ -2830,19 +2825,6 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) DRM_ERROR("failed to create device file pp_dpm_sclk\n"); return ret; } - - /* Arcturus does not support standalone mclk/socclk/fclk level setting */ - if (adev->asic_type == CHIP_ARCTURUS) { - dev_attr_pp_dpm_mclk.attr.mode &= ~S_IWUGO; - dev_attr_pp_dpm_mclk.store = NULL; - - dev_attr_pp_dpm_socclk.attr.mode &= ~S_IWUGO; - dev_attr_pp_dpm_socclk.store = NULL; - - dev_attr_pp_dpm_fclk.attr.mode &= ~S_IWUGO; - dev_attr_pp_dpm_fclk.store = NULL; - } - ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk); if (ret) { DRM_ERROR("failed to create device file pp_dpm_mclk\n"); @@ -3026,8 +3008,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) struct smu_dpm_context *smu_dpm = &adev->smu.smu_dpm; smu_handle_task(&adev->smu, smu_dpm->dpm_level, - AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, - true); + AMD_PP_TASK_DISPLAY_CONFIG_CHANGE); } else { if (adev->powerplay.pp_funcs->dispatch_tasks) { if (!amdgpu_device_has_dc_support(adev)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index bbe9ac7e843f..4d71537a960d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -34,8 +34,6 @@ #include "psp_v11_0.h" #include "psp_v12_0.h" -#include "amdgpu_ras.h" - static void psp_set_funcs(struct amdgpu_device *adev); static int psp_early_init(void *handle) @@ -90,17 +88,6 @@ static int psp_sw_init(void *handle) return ret; } - ret = psp_mem_training_init(psp); - if (ret) { - DRM_ERROR("Failed to initialize memory training!\n"); - return ret; - } - ret = psp_mem_training(psp, PSP_MEM_TRAIN_COLD_BOOT); - if (ret) { - DRM_ERROR("Failed to process memory training!\n"); - return ret; - } - return 0; } @@ -108,7 +95,6 @@ static int psp_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - psp_mem_training_fini(&adev->psp); release_firmware(adev->psp.sos_fw); adev->psp.sos_fw = NULL; release_firmware(adev->psp.asd_fw); @@ -165,19 +151,10 @@ psp_cmd_submit_buf(struct psp_context *psp, return ret; } - amdgpu_asic_invalidate_hdp(psp->adev, NULL); while (*((unsigned int *)psp->fence_buf) != index) { if (--timeout == 0) break; - /* - * Shouldn't wait for timeout when err_event_athub occurs, - * because gpu reset thread triggered and lock resource should - * be released for psp resume sequence. - */ - if (amdgpu_ras_intr_triggered()) - break; msleep(1); - amdgpu_asic_invalidate_hdp(psp->adev, NULL); } /* In some cases, psp response status is not 0 even there is no @@ -191,9 +168,8 @@ psp_cmd_submit_buf(struct psp_context *psp, if (ucode) DRM_WARN("failed to load ucode id (%d) ", ucode->ucode_id); - DRM_DEBUG_DRIVER("psp command (0x%X) failed and response status is (0x%X)\n", - psp->cmd_buf_mem->cmd_id, - psp->cmd_buf_mem->resp.status & GFX_CMD_STATUS_MASK); + DRM_WARN("psp command failed and response status is (0x%X)\n", + psp->cmd_buf_mem->resp.status & GFX_CMD_STATUS_MASK); if (!timeout) { mutex_unlock(&psp->mutex); return -EINVAL; @@ -277,8 +253,7 @@ static int psp_tmr_init(struct psp_context *psp) /* For ASICs support RLC autoload, psp will parse the toc * and calculate the total size of TMR needed */ - if (!amdgpu_sriov_vf(psp->adev) && - psp->toc_start_addr && + if (psp->toc_start_addr && psp->toc_bin_size && psp->fw_pri_buf) { ret = psp_load_toc(psp, &tmr_size); @@ -312,9 +287,15 @@ static int psp_tmr_load(struct psp_context *psp) ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + if (ret) + goto failed; kfree(cmd); + return 0; + +failed: + kfree(cmd); return ret; } @@ -791,324 +772,6 @@ static int psp_ras_initialize(struct psp_context *psp) } // ras end -// HDCP start -static void psp_prep_hdcp_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint64_t hdcp_ta_mc, - uint64_t hdcp_mc_shared, - uint32_t hdcp_ta_size, - uint32_t shared_size) -{ - cmd->cmd_id = GFX_CMD_ID_LOAD_TA; - cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(hdcp_ta_mc); - cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(hdcp_ta_mc); - cmd->cmd.cmd_load_ta.app_len = hdcp_ta_size; - - cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = - lower_32_bits(hdcp_mc_shared); - cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = - upper_32_bits(hdcp_mc_shared); - cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; -} - -static int psp_hdcp_init_shared_buf(struct psp_context *psp) -{ - int ret; - - /* - * Allocate 16k memory aligned to 4k from Frame Buffer (local - * physical) for hdcp ta <-> Driver - */ - ret = amdgpu_bo_create_kernel(psp->adev, PSP_HDCP_SHARED_MEM_SIZE, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &psp->hdcp_context.hdcp_shared_bo, - &psp->hdcp_context.hdcp_shared_mc_addr, - &psp->hdcp_context.hdcp_shared_buf); - - return ret; -} - -static int psp_hdcp_load(struct psp_context *psp) -{ - int ret; - struct psp_gfx_cmd_resp *cmd; - - /* - * TODO: bypass the loading in sriov for now - */ - if (amdgpu_sriov_vf(psp->adev)) - return 0; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - memset(psp->fw_pri_buf, 0, PSP_1_MEG); - memcpy(psp->fw_pri_buf, psp->ta_hdcp_start_addr, - psp->ta_hdcp_ucode_size); - - psp_prep_hdcp_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->hdcp_context.hdcp_shared_mc_addr, - psp->ta_hdcp_ucode_size, - PSP_HDCP_SHARED_MEM_SIZE); - - ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - - if (!ret) { - psp->hdcp_context.hdcp_initialized = 1; - psp->hdcp_context.session_id = cmd->resp.session_id; - } - - kfree(cmd); - - return ret; -} -static int psp_hdcp_initialize(struct psp_context *psp) -{ - int ret; - - if (!psp->hdcp_context.hdcp_initialized) { - ret = psp_hdcp_init_shared_buf(psp); - if (ret) - return ret; - } - - ret = psp_hdcp_load(psp); - if (ret) - return ret; - - return 0; -} -static void psp_prep_hdcp_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint32_t hdcp_session_id) -{ - cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA; - cmd->cmd.cmd_unload_ta.session_id = hdcp_session_id; -} - -static int psp_hdcp_unload(struct psp_context *psp) -{ - int ret; - struct psp_gfx_cmd_resp *cmd; - - /* - * TODO: bypass the unloading in sriov for now - */ - if (amdgpu_sriov_vf(psp->adev)) - return 0; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - psp_prep_hdcp_ta_unload_cmd_buf(cmd, psp->hdcp_context.session_id); - - ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - - kfree(cmd); - - return ret; -} - -static void psp_prep_hdcp_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint32_t ta_cmd_id, - uint32_t hdcp_session_id) -{ - cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; - cmd->cmd.cmd_invoke_cmd.session_id = hdcp_session_id; - cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; - /* Note: cmd_invoke_cmd.buf is not used for now */ -} - -int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id) -{ - int ret; - struct psp_gfx_cmd_resp *cmd; - - /* - * TODO: bypass the loading in sriov for now - */ - if (amdgpu_sriov_vf(psp->adev)) - return 0; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - psp_prep_hdcp_ta_invoke_cmd_buf(cmd, ta_cmd_id, - psp->hdcp_context.session_id); - - ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - - kfree(cmd); - - return ret; -} - -static int psp_hdcp_terminate(struct psp_context *psp) -{ - int ret; - - if (!psp->hdcp_context.hdcp_initialized) - return 0; - - ret = psp_hdcp_unload(psp); - if (ret) - return ret; - - psp->hdcp_context.hdcp_initialized = 0; - - /* free hdcp shared memory */ - amdgpu_bo_free_kernel(&psp->hdcp_context.hdcp_shared_bo, - &psp->hdcp_context.hdcp_shared_mc_addr, - &psp->hdcp_context.hdcp_shared_buf); - - return 0; -} -// HDCP end - -// DTM start -static void psp_prep_dtm_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint64_t dtm_ta_mc, - uint64_t dtm_mc_shared, - uint32_t dtm_ta_size, - uint32_t shared_size) -{ - cmd->cmd_id = GFX_CMD_ID_LOAD_TA; - cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(dtm_ta_mc); - cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(dtm_ta_mc); - cmd->cmd.cmd_load_ta.app_len = dtm_ta_size; - - cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(dtm_mc_shared); - cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(dtm_mc_shared); - cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; -} - -static int psp_dtm_init_shared_buf(struct psp_context *psp) -{ - int ret; - - /* - * Allocate 16k memory aligned to 4k from Frame Buffer (local - * physical) for dtm ta <-> Driver - */ - ret = amdgpu_bo_create_kernel(psp->adev, PSP_DTM_SHARED_MEM_SIZE, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &psp->dtm_context.dtm_shared_bo, - &psp->dtm_context.dtm_shared_mc_addr, - &psp->dtm_context.dtm_shared_buf); - - return ret; -} - -static int psp_dtm_load(struct psp_context *psp) -{ - int ret; - struct psp_gfx_cmd_resp *cmd; - - /* - * TODO: bypass the loading in sriov for now - */ - if (amdgpu_sriov_vf(psp->adev)) - return 0; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - memset(psp->fw_pri_buf, 0, PSP_1_MEG); - memcpy(psp->fw_pri_buf, psp->ta_dtm_start_addr, psp->ta_dtm_ucode_size); - - psp_prep_dtm_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->dtm_context.dtm_shared_mc_addr, - psp->ta_dtm_ucode_size, - PSP_DTM_SHARED_MEM_SIZE); - - ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - - if (!ret) { - psp->dtm_context.dtm_initialized = 1; - psp->dtm_context.session_id = cmd->resp.session_id; - } - - kfree(cmd); - - return ret; -} - -static int psp_dtm_initialize(struct psp_context *psp) -{ - int ret; - - if (!psp->dtm_context.dtm_initialized) { - ret = psp_dtm_init_shared_buf(psp); - if (ret) - return ret; - } - - ret = psp_dtm_load(psp); - if (ret) - return ret; - - return 0; -} - -static void psp_prep_dtm_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint32_t ta_cmd_id, - uint32_t dtm_session_id) -{ - cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; - cmd->cmd.cmd_invoke_cmd.session_id = dtm_session_id; - cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; - /* Note: cmd_invoke_cmd.buf is not used for now */ -} - -int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id) -{ - int ret; - struct psp_gfx_cmd_resp *cmd; - - /* - * TODO: bypass the loading in sriov for now - */ - if (amdgpu_sriov_vf(psp->adev)) - return 0; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - psp_prep_dtm_ta_invoke_cmd_buf(cmd, ta_cmd_id, - psp->dtm_context.session_id); - - ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - - kfree(cmd); - - return ret; -} - -static int psp_dtm_terminate(struct psp_context *psp) -{ - int ret; - - if (!psp->dtm_context.dtm_initialized) - return 0; - - ret = psp_hdcp_unload(psp); - if (ret) - return ret; - - psp->dtm_context.dtm_initialized = 0; - - /* free hdcp shared memory */ - amdgpu_bo_free_kernel(&psp->dtm_context.dtm_shared_bo, - &psp->dtm_context.dtm_shared_mc_addr, - &psp->dtm_context.dtm_shared_buf); - - return 0; -} -// DTM end - static int psp_hw_start(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -1182,16 +845,6 @@ static int psp_hw_start(struct psp_context *psp) if (ret) dev_err(psp->adev->dev, "RAS: Failed to initialize RAS\n"); - - ret = psp_hdcp_initialize(psp); - if (ret) - dev_err(psp->adev->dev, - "HDCP: Failed to initialize HDCP\n"); - - ret = psp_dtm_initialize(psp); - if (ret) - dev_err(psp->adev->dev, - "DTM: Failed to initialize DTM\n"); } return 0; @@ -1297,7 +950,21 @@ static void psp_print_fw_hdr(struct psp_context *psp, struct amdgpu_firmware_info *ucode) { struct amdgpu_device *adev = psp->adev; - struct common_firmware_header *hdr; + const struct sdma_firmware_header_v1_0 *sdma_hdr = + (const struct sdma_firmware_header_v1_0 *) + adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data; + const struct gfx_firmware_header_v1_0 *ce_hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; + const struct gfx_firmware_header_v1_0 *pfp_hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; + const struct gfx_firmware_header_v1_0 *me_hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; + const struct gfx_firmware_header_v1_0 *mec_hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + const struct rlc_firmware_header_v2_0 *rlc_hdr = + (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + const struct smc_firmware_header_v1_0 *smc_hdr = + (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data; switch (ucode->ucode_id) { case AMDGPU_UCODE_ID_SDMA0: @@ -1308,33 +975,25 @@ static void psp_print_fw_hdr(struct psp_context *psp, case AMDGPU_UCODE_ID_SDMA5: case AMDGPU_UCODE_ID_SDMA6: case AMDGPU_UCODE_ID_SDMA7: - hdr = (struct common_firmware_header *) - adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data; - amdgpu_ucode_print_sdma_hdr(hdr); + amdgpu_ucode_print_sdma_hdr(&sdma_hdr->header); break; case AMDGPU_UCODE_ID_CP_CE: - hdr = (struct common_firmware_header *)adev->gfx.ce_fw->data; - amdgpu_ucode_print_gfx_hdr(hdr); + amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); break; case AMDGPU_UCODE_ID_CP_PFP: - hdr = (struct common_firmware_header *)adev->gfx.pfp_fw->data; - amdgpu_ucode_print_gfx_hdr(hdr); + amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); break; case AMDGPU_UCODE_ID_CP_ME: - hdr = (struct common_firmware_header *)adev->gfx.me_fw->data; - amdgpu_ucode_print_gfx_hdr(hdr); + amdgpu_ucode_print_gfx_hdr(&me_hdr->header); break; case AMDGPU_UCODE_ID_CP_MEC1: - hdr = (struct common_firmware_header *)adev->gfx.mec_fw->data; - amdgpu_ucode_print_gfx_hdr(hdr); + amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); break; case AMDGPU_UCODE_ID_RLC_G: - hdr = (struct common_firmware_header *)adev->gfx.rlc_fw->data; - amdgpu_ucode_print_rlc_hdr(hdr); + amdgpu_ucode_print_rlc_hdr(&rlc_hdr->header); break; case AMDGPU_UCODE_ID_SMC: - hdr = (struct common_firmware_header *)adev->pm.fw->data; - amdgpu_ucode_print_smc_hdr(hdr); + amdgpu_ucode_print_smc_hdr(&smc_hdr->header); break; default: break; @@ -1420,6 +1079,10 @@ out: ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT)) /* skip mec JT when autoload is enabled */ continue; + /* Renoir only needs to load mec jump table one time */ + if (adev->asic_type == CHIP_RENOIR && + ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT) + continue; psp_print_fw_hdr(psp, ucode); @@ -1428,8 +1091,7 @@ out: return ret; /* Start rlc autoload after psp recieved all the gfx firmware */ - if (psp->autoload_supported && ucode->ucode_id == - AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) { + if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) { ret = psp_rlc_autoload(psp); if (ret) { DRM_ERROR("Failed to start rlc autoload\n"); @@ -1554,11 +1216,8 @@ static int psp_hw_fini(void *handle) psp->xgmi_context.initialized == 1) psp_xgmi_terminate(psp); - if (psp->adev->psp.ta_fw) { + if (psp->adev->psp.ta_fw) psp_ras_terminate(psp); - psp_dtm_terminate(psp); - psp_hdcp_terminate(psp); - } psp_ring_destroy(psp, PSP_RING_TYPE__KM); @@ -1600,16 +1259,6 @@ static int psp_suspend(void *handle) DRM_ERROR("Failed to terminate ras ta\n"); return ret; } - ret = psp_hdcp_terminate(psp); - if (ret) { - DRM_ERROR("Failed to terminate hdcp ta\n"); - return ret; - } - ret = psp_dtm_terminate(psp); - if (ret) { - DRM_ERROR("Failed to terminate dtm ta\n"); - return ret; - } } ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); @@ -1629,12 +1278,6 @@ static int psp_resume(void *handle) DRM_INFO("PSP is resuming...\n"); - ret = psp_mem_training(psp, PSP_MEM_TRAIN_RESUME); - if (ret) { - DRM_ERROR("Failed to process memory training!\n"); - return ret; - } - mutex_lock(&adev->firmware.mutex); ret = psp_hw_start(psp); @@ -1674,6 +1317,9 @@ int psp_rlc_autoload_start(struct psp_context *psp) int ret; struct psp_gfx_cmd_resp *cmd; + if (amdgpu_sriov_vf(psp->adev)) + return 0; + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!cmd) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 09c5474ebcc3..bc0947f6bc8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -37,9 +37,6 @@ #define PSP_RAS_SHARED_MEM_SIZE 0x4000 #define PSP_1_MEG 0x100000 #define PSP_TMR_SIZE 0x400000 -#define PSP_HDCP_SHARED_MEM_SIZE 0x4000 -#define PSP_DTM_SHARED_MEM_SIZE 0x4000 -#define PSP_SHARED_MEM_SIZE 0x4000 struct psp_context; struct psp_xgmi_node_info; @@ -49,8 +46,6 @@ enum psp_bootloader_cmd { PSP_BL__LOAD_SYSDRV = 0x10000, PSP_BL__LOAD_SOSDRV = 0x20000, PSP_BL__LOAD_KEY_DATABASE = 0x80000, - PSP_BL__DRAM_LONG_TRAIN = 0x100000, - PSP_BL__DRAM_SHORT_TRAIN = 0x200000, }; enum psp_ring_type @@ -113,9 +108,6 @@ struct psp_funcs struct ta_ras_trigger_error_input *info); int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr); int (*rlc_autoload_start)(struct psp_context *psp); - int (*mem_training_init)(struct psp_context *psp); - void (*mem_training_fini)(struct psp_context *psp); - int (*mem_training)(struct psp_context *psp, uint32_t ops); }; #define AMDGPU_XGMI_MAX_CONNECTED_NODES 64 @@ -150,65 +142,6 @@ struct psp_ras_context { struct amdgpu_ras *ras; }; -struct psp_hdcp_context { - bool hdcp_initialized; - uint32_t session_id; - struct amdgpu_bo *hdcp_shared_bo; - uint64_t hdcp_shared_mc_addr; - void *hdcp_shared_buf; -}; - -struct psp_dtm_context { - bool dtm_initialized; - uint32_t session_id; - struct amdgpu_bo *dtm_shared_bo; - uint64_t dtm_shared_mc_addr; - void *dtm_shared_buf; -}; - -#define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942 -#define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES 0x1000 -#define GDDR6_MEM_TRAINING_OFFSET 0x8000 - -enum psp_memory_training_init_flag { - PSP_MEM_TRAIN_NOT_SUPPORT = 0x0, - PSP_MEM_TRAIN_SUPPORT = 0x1, - PSP_MEM_TRAIN_INIT_FAILED = 0x2, - PSP_MEM_TRAIN_RESERVE_SUCCESS = 0x4, - PSP_MEM_TRAIN_INIT_SUCCESS = 0x8, -}; - -enum psp_memory_training_ops { - PSP_MEM_TRAIN_SEND_LONG_MSG = 0x1, - PSP_MEM_TRAIN_SAVE = 0x2, - PSP_MEM_TRAIN_RESTORE = 0x4, - PSP_MEM_TRAIN_SEND_SHORT_MSG = 0x8, - PSP_MEM_TRAIN_COLD_BOOT = PSP_MEM_TRAIN_SEND_LONG_MSG, - PSP_MEM_TRAIN_RESUME = PSP_MEM_TRAIN_SEND_SHORT_MSG, -}; - -struct psp_memory_training_context { - /*training data size*/ - u64 train_data_size; - /* - * sys_cache - * cpu virtual address - * system memory buffer that used to store the training data. - */ - void *sys_cache; - - /*vram offset of the p2c training data*/ - u64 p2c_train_data_offset; - struct amdgpu_bo *p2c_bo; - - /*vram offset of the c2p training data*/ - u64 c2p_train_data_offset; - struct amdgpu_bo *c2p_bo; - - enum psp_memory_training_init_flag init; - u32 training_cnt; -}; - struct psp_context { struct amdgpu_device *adev; @@ -273,21 +206,9 @@ struct psp_context uint32_t ta_ras_ucode_version; uint32_t ta_ras_ucode_size; uint8_t *ta_ras_start_addr; - - uint32_t ta_hdcp_ucode_version; - uint32_t ta_hdcp_ucode_size; - uint8_t *ta_hdcp_start_addr; - - uint32_t ta_dtm_ucode_version; - uint32_t ta_dtm_ucode_size; - uint8_t *ta_dtm_start_addr; - struct psp_xgmi_context xgmi_context; struct psp_ras_context ras; - struct psp_hdcp_context hdcp_context; - struct psp_dtm_context dtm_context; struct mutex mutex; - struct psp_memory_training_context mem_train_ctx; }; struct amdgpu_psp_funcs { @@ -330,12 +251,6 @@ struct amdgpu_psp_funcs { (psp)->funcs->xgmi_set_topology_info((psp), (num_device), (topology)) : -EINVAL) #define psp_rlc_autoload(psp) \ ((psp)->funcs->rlc_autoload_start ? (psp)->funcs->rlc_autoload_start((psp)) : 0) -#define psp_mem_training_init(psp) \ - ((psp)->funcs->mem_training_init ? (psp)->funcs->mem_training_init((psp)) : 0) -#define psp_mem_training_fini(psp) \ - ((psp)->funcs->mem_training_fini ? (psp)->funcs->mem_training_fini((psp)) : 0) -#define psp_mem_training(psp, ops) \ - ((psp)->funcs->mem_training ? (psp)->funcs->mem_training((psp), (ops)) : 0) #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i)) @@ -364,8 +279,6 @@ int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_ras_enable_features(struct psp_context *psp, union ta_ras_cmd_input *info, bool enable); -int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id); -int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_rlc_autoload_start(struct psp_context *psp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index dab90c280476..016ea274b955 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -25,13 +25,10 @@ #include #include #include -#include -#include #include "amdgpu.h" #include "amdgpu_ras.h" #include "amdgpu_atomfirmware.h" -#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" const char *ras_error_string[] = { "none", @@ -68,16 +65,11 @@ const char *ras_block_string[] = { /* inject address is 52 bits */ #define RAS_UMC_INJECT_ADDR_LIMIT (0x1ULL << 52) -enum amdgpu_ras_retire_page_reservation { - AMDGPU_RAS_RETIRE_PAGE_RESERVED, - AMDGPU_RAS_RETIRE_PAGE_PENDING, - AMDGPU_RAS_RETIRE_PAGE_FAULT, -}; - -atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0); - -static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, - uint64_t addr); +static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev, + uint64_t offset, uint64_t size, + struct amdgpu_bo **bo_ptr); +static int amdgpu_ras_release_vram(struct amdgpu_device *adev, + struct amdgpu_bo **bo_ptr); static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf, size_t size, loff_t *pos) @@ -197,10 +189,6 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, return 0; } - -static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, - struct ras_common_if *head); - /** * DOC: AMDGPU RAS debugfs control interface * @@ -222,36 +210,29 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, * * Second member: struct ras_debug_if::op. * It has three kinds of operations. - * - * - 0: disable RAS on the block. Take ::head as its data. - * - 1: enable RAS on the block. Take ::head as its data. - * - 2: inject errors on the block. Take ::inject as its data. + * 0: disable RAS on the block. Take ::head as its data. + * 1: enable RAS on the block. Take ::head as its data. + * 2: inject errors on the block. Take ::inject as its data. * * How to use the interface? * programs: * copy the struct ras_debug_if in your codes and initialize it. * write the struct to the control node. * - * .. code-block:: bash - * - * echo op block [error [sub_block address value]] > .../ras/ras_ctrl - * - * op: disable, enable, inject - * disable: only block is needed - * enable: block and error are needed - * inject: error, address, value are needed - * block: umc, sdma, gfx, ......... - * see ras_block_string[] for details - * error: ue, ce - * ue: multi_uncorrectable - * ce: single_correctable - * sub_block: - * sub block index, pass 0 if there is no sub block - * - * here are some examples for bash commands: - * - * .. code-block:: bash + * bash: + * echo op block [error [sub_blcok address value]] > .../ras/ras_ctrl + * op: disable, enable, inject + * disable: only block is needed + * enable: block and error are needed + * inject: error, address, value are needed + * block: umc, smda, gfx, ......... + * see ras_block_string[] for details + * error: ue, ce + * ue: multi_uncorrectable + * ce: single_correctable + * sub_block: sub block index, pass 0 if there is no sub block * + * here are some examples for bash commands, * echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl * echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl * echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl @@ -264,9 +245,8 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, * For inject, please check corresponding err count at * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count * - * .. note:: - * Operation is only allowed on blocks which are supported. - * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask + * NOTE: operation is only allowed on blocks which are supported. + * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask */ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) @@ -296,14 +276,6 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * break; } - /* umc ce/ue error injection for a bad page is not allowed */ - if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) && - amdgpu_ras_check_bad_page(adev, data.inject.address)) { - DRM_WARN("RAS WARN: 0x%llx has been marked as bad before error injection!\n", - data.inject.address); - break; - } - /* data.inject.address is offset instead of absolute gpu address */ ret = amdgpu_ras_error_inject(adev, &data.inject); break; @@ -318,33 +290,6 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * return size; } -/** - * DOC: AMDGPU RAS debugfs EEPROM table reset interface - * - * Some boards contain an EEPROM which is used to persistently store a list of - * bad pages containing ECC errors detected in vram. This interface provides - * a way to reset the EEPROM, e.g., after testing error injection. - * - * Usage: - * - * .. code-block:: bash - * - * echo 1 > ../ras/ras_eeprom_reset - * - * will reset EEPROM table to 0 entries. - * - */ -static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f, const char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; - int ret; - - ret = amdgpu_ras_eeprom_reset_table(&adev->psp.ras.ras->eeprom_control); - - return ret == 1 ? size : -EIO; -} - static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = { .owner = THIS_MODULE, .read = NULL, @@ -352,34 +297,6 @@ static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = { .llseek = default_llseek }; -static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = { - .owner = THIS_MODULE, - .read = NULL, - .write = amdgpu_ras_debugfs_eeprom_write, - .llseek = default_llseek -}; - -/** - * DOC: AMDGPU RAS sysfs Error Count Interface - * - * It allows user to read the error count for each IP block on the gpu through - * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count - * - * It outputs the multiple lines which report the uncorrected (ue) and corrected - * (ce) error counts. - * - * The format of one line is below, - * - * [ce|ue]: count - * - * Example: - * - * .. code-block:: bash - * - * ue: 0 - * ce: 1 - * - */ static ssize_t amdgpu_ras_sysfs_read(struct device *dev, struct device_attribute *attr, char *buf) { @@ -558,17 +475,15 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) return 0; - if (!amdgpu_ras_intr_triggered()) { - ret = psp_ras_enable_features(&adev->psp, &info, enable); - if (ret) { - DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n", - enable ? "enable":"disable", - ras_block_str(head->block), - ret); - if (ret == TA_RAS_STATUS__RESET_NEEDED) - return -EAGAIN; - return -EINVAL; - } + ret = psp_ras_enable_features(&adev->psp, &info, enable); + if (ret) { + DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n", + enable ? "enable":"disable", + ras_block_str(head->block), + ret); + if (ret == TA_RAS_STATUS__RESET_NEEDED) + return -EAGAIN; + return -EINVAL; } /* setup the obj */ @@ -700,12 +615,8 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, adev->gfx.funcs->query_ras_error_count(adev, &err_data); break; case AMDGPU_RAS_BLOCK__MMHUB: - if (adev->mmhub.funcs->query_ras_error_count) - adev->mmhub.funcs->query_ras_error_count(adev, &err_data); - break; - case AMDGPU_RAS_BLOCK__PCIE_BIF: - if (adev->nbio.funcs->query_ras_error_count) - adev->nbio.funcs->query_ras_error_count(adev, &err_data); + if (adev->mmhub_funcs->query_ras_error_count) + adev->mmhub_funcs->query_ras_error_count(adev, &err_data); break; default: break; @@ -717,14 +628,12 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, info->ue_count = obj->err_data.ue_count; info->ce_count = obj->err_data.ce_count; - if (err_data.ce_count) { + if (err_data.ce_count) dev_info(adev->dev, "%ld correctable errors detected in %s block\n", obj->err_data.ce_count, ras_block_str(info->head.block)); - } - if (err_data.ue_count) { + if (err_data.ue_count) dev_info(adev->dev, "%ld uncorrectable errors detected in %s block\n", obj->err_data.ue_count, ras_block_str(info->head.block)); - } return 0; } @@ -755,8 +664,6 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, break; case AMDGPU_RAS_BLOCK__UMC: case AMDGPU_RAS_BLOCK__MMHUB: - case AMDGPU_RAS_BLOCK__XGMI_WAFL: - case AMDGPU_RAS_BLOCK__PCIE_BIF: ret = psp_ras_trigger_error(&adev->psp, &block_info); break; default: @@ -816,18 +723,18 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, static char *amdgpu_ras_badpage_flags_str(unsigned int flags) { switch (flags) { - case AMDGPU_RAS_RETIRE_PAGE_RESERVED: + case 0: return "R"; - case AMDGPU_RAS_RETIRE_PAGE_PENDING: + case 1: return "P"; - case AMDGPU_RAS_RETIRE_PAGE_FAULT: + case 2: default: return "F"; }; } -/** - * DOC: AMDGPU RAS sysfs gpu_vram_bad_pages Interface +/* + * DOC: ras sysfs gpu_vram_bad_pages interface * * It allows user to read the bad pages of vram on the gpu through * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages @@ -839,21 +746,14 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags) * * gpu pfn and gpu page size are printed in hex format. * flags can be one of below character, - * * R: reserved, this gpu page is reserved and not able to use. - * * P: pending for reserve, this gpu page is marked as bad, will be reserved - * in next window of page_reserve. - * + * in next window of page_reserve. * F: unable to reserve. this gpu page can't be reserved due to some reasons. * - * Examples: - * - * .. code-block:: bash - * - * 0x00000001 : 0x00001000 : R - * 0x00000002 : 0x00001000 : P - * + * examples: + * 0x00000001 : 0x00001000 : R + * 0x00000002 : 0x00001000 : P */ static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f, @@ -1034,21 +934,8 @@ static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev) struct drm_minor *minor = adev->ddev->primary; con->dir = debugfs_create_dir("ras", minor->debugfs_root); - debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir, - adev, &amdgpu_ras_debugfs_ctrl_ops); - debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, con->dir, - adev, &amdgpu_ras_debugfs_eeprom_ops); - - /* - * After one uncorrectable error happens, usually GPU recovery will - * be scheduled. But due to the known problem in GPU recovery failing - * to bring GPU back, below interface provides one direct way to - * user to reboot system automatically in such case within - * ERREVENT_ATHUB_INTERRUPT generated. Normal GPU recovery routine - * will never be called. - */ - debugfs_create_bool("auto_reboot", S_IWUGO | S_IRUGO, con->dir, - &con->reboot); + con->ent = debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir, + adev, &amdgpu_ras_debugfs_ctrl_ops); } void amdgpu_ras_debugfs_create(struct amdgpu_device *adev, @@ -1093,8 +980,10 @@ static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev) amdgpu_ras_debugfs_remove(adev, &obj->head); } - debugfs_remove_recursive(con->dir); + debugfs_remove(con->ent); + debugfs_remove(con->dir); con->dir = NULL; + con->ent = NULL; } /* debugfs end */ @@ -1299,15 +1188,15 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, for (; i < data->count; i++) { (*bps)[i] = (struct ras_badpage){ - .bp = data->bps[i].retired_page, + .bp = data->bps[i].bp, .size = AMDGPU_GPU_PAGE_SIZE, - .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED, + .flags = 0, }; if (data->last_reserved <= i) - (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING; - else if (data->bps_bo[i] == NULL) - (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT; + (*bps)[i].flags = 1; + else if (data->bps[i].bo == NULL) + (*bps)[i].flags = 2; } *count = data->count; @@ -1325,46 +1214,105 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) atomic_set(&ras->in_recovery, 0); } +static int amdgpu_ras_release_vram(struct amdgpu_device *adev, + struct amdgpu_bo **bo_ptr) +{ + /* no need to free it actually. */ + amdgpu_bo_free_kernel(bo_ptr, NULL, NULL); + return 0; +} + +/* reserve vram with size@offset */ +static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev, + uint64_t offset, uint64_t size, + struct amdgpu_bo **bo_ptr) +{ + struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_bo_param bp; + int r = 0; + int i; + struct amdgpu_bo *bo; + + if (bo_ptr) + *bo_ptr = NULL; + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; + + r = amdgpu_bo_create(adev, &bp, &bo); + if (r) + return -EINVAL; + + r = amdgpu_bo_reserve(bo, false); + if (r) + goto error_reserve; + + offset = ALIGN(offset, PAGE_SIZE); + for (i = 0; i < bo->placement.num_placement; ++i) { + bo->placements[i].fpfn = offset >> PAGE_SHIFT; + bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT; + } + + ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem); + r = ttm_bo_mem_space(&bo->tbo, &bo->placement, &bo->tbo.mem, &ctx); + if (r) + goto error_pin; + + r = amdgpu_bo_pin_restricted(bo, + AMDGPU_GEM_DOMAIN_VRAM, + offset, + offset + size); + if (r) + goto error_pin; + + if (bo_ptr) + *bo_ptr = bo; + + amdgpu_bo_unreserve(bo); + return r; + +error_pin: + amdgpu_bo_unreserve(bo); +error_reserve: + amdgpu_bo_unref(&bo); + return r; +} + /* alloc/realloc bps array */ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev, struct ras_err_handler_data *data, int pages) { unsigned int old_space = data->count + data->space_left; unsigned int new_space = old_space + pages; - unsigned int align_space = ALIGN(new_space, 512); - void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL); - struct amdgpu_bo **bps_bo = - kmalloc(align_space * sizeof(*data->bps_bo), GFP_KERNEL); + unsigned int align_space = ALIGN(new_space, 1024); + void *tmp = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL); - if (!bps || !bps_bo) { - kfree(bps); - kfree(bps_bo); + if (!tmp) return -ENOMEM; - } if (data->bps) { - memcpy(bps, data->bps, + memcpy(tmp, data->bps, data->count * sizeof(*data->bps)); kfree(data->bps); } - if (data->bps_bo) { - memcpy(bps_bo, data->bps_bo, - data->count * sizeof(*data->bps_bo)); - kfree(data->bps_bo); - } - data->bps = bps; - data->bps_bo = bps_bo; + data->bps = tmp; data->space_left += align_space - old_space; return 0; } /* it deal with vram only. */ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, - struct eeprom_table_record *bps, int pages) + unsigned long *bps, int pages) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; + int i = pages; int ret = 0; if (!con || !con->eh_data || !bps || pages <= 0) @@ -1381,120 +1329,24 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, goto out; } - memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps)); - data->count += pages; + while (i--) + data->bps[data->count++].bp = bps[i]; + data->space_left -= pages; - out: mutex_unlock(&con->recovery_lock); return ret; } -/* - * write error record array to eeprom, the function should be - * protected by recovery_lock - */ -static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) -{ - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - struct ras_err_handler_data *data; - struct amdgpu_ras_eeprom_control *control; - int save_count; - - if (!con || !con->eh_data) - return 0; - - control = &con->eeprom_control; - data = con->eh_data; - save_count = data->count - control->num_recs; - /* only new entries are saved */ - if (save_count > 0) - if (amdgpu_ras_eeprom_process_recods(control, - &data->bps[control->num_recs], - true, - save_count)) { - DRM_ERROR("Failed to save EEPROM table data!"); - return -EIO; - } - - return 0; -} - -/* - * read error record array in eeprom and reserve enough space for - * storing new bad pages - */ -static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) -{ - struct amdgpu_ras_eeprom_control *control = - &adev->psp.ras.ras->eeprom_control; - struct eeprom_table_record *bps = NULL; - int ret = 0; - - /* no bad page record, skip eeprom access */ - if (!control->num_recs) - return ret; - - bps = kcalloc(control->num_recs, sizeof(*bps), GFP_KERNEL); - if (!bps) - return -ENOMEM; - - if (amdgpu_ras_eeprom_process_recods(control, bps, false, - control->num_recs)) { - DRM_ERROR("Failed to load EEPROM table records!"); - ret = -EIO; - goto out; - } - - ret = amdgpu_ras_add_bad_pages(adev, bps, control->num_recs); - -out: - kfree(bps); - return ret; -} - -/* - * check if an address belongs to bad page - * - * Note: this check is only for umc block - */ -static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, - uint64_t addr) -{ - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - struct ras_err_handler_data *data; - int i; - bool ret = false; - - if (!con || !con->eh_data) - return ret; - - mutex_lock(&con->recovery_lock); - data = con->eh_data; - if (!data) - goto out; - - addr >>= AMDGPU_GPU_PAGE_SHIFT; - for (i = 0; i < data->count; i++) - if (addr == data->bps[i].retired_page) { - ret = true; - goto out; - } - -out: - mutex_unlock(&con->recovery_lock); - return ret; -} - /* called in gpu recovery/init */ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; uint64_t bp; - struct amdgpu_bo *bo = NULL; - int i, ret = 0; + struct amdgpu_bo *bo; + int i; if (!con || !con->eh_data) return 0; @@ -1505,29 +1357,18 @@ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev) goto out; /* reserve vram at driver post stage. */ for (i = data->last_reserved; i < data->count; i++) { - bp = data->bps[i].retired_page; + bp = data->bps[i].bp; - /* There are two cases of reserve error should be ignored: - * 1) a ras bad page has been allocated (used by someone); - * 2) a ras bad page has been reserved (duplicate error injection - * for one page); - */ - if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT, - AMDGPU_GPU_PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &bo, NULL)) - DRM_WARN("RAS WARN: reserve vram for retired page %llx fail\n", bp); + if (amdgpu_ras_reserve_vram(adev, bp << PAGE_SHIFT, + PAGE_SIZE, &bo)) + DRM_ERROR("RAS ERROR: reserve vram %llx fail\n", bp); - data->bps_bo[i] = bo; + data->bps[i].bo = bo; data->last_reserved = i + 1; - bo = NULL; } - - /* continue to save bad pages to eeprom even reesrve_vram fails */ - ret = amdgpu_ras_save_bad_pages(adev); out: mutex_unlock(&con->recovery_lock); - return ret; + return 0; } /* called when driver unload */ @@ -1547,11 +1388,11 @@ static int amdgpu_ras_release_bad_pages(struct amdgpu_device *adev) goto out; for (i = data->last_reserved - 1; i >= 0; i--) { - bo = data->bps_bo[i]; + bo = data->bps[i].bo; - amdgpu_bo_free_kernel(&bo, NULL, NULL); + amdgpu_ras_release_vram(adev, &bo); - data->bps_bo[i] = bo; + data->bps[i].bo = bo; data->last_reserved = i; } out: @@ -1559,54 +1400,41 @@ out: return 0; } -int amdgpu_ras_recovery_init(struct amdgpu_device *adev) +static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) +{ + /* TODO + * write the array to eeprom when SMU disabled. + */ + return 0; +} + +static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) +{ + /* TODO + * read the array to eeprom when SMU disabled. + */ + return 0; +} + +static int amdgpu_ras_recovery_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - struct ras_err_handler_data **data; - int ret; + struct ras_err_handler_data **data = &con->eh_data; - if (con) - data = &con->eh_data; - else - return 0; - - *data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO); - if (!*data) { - ret = -ENOMEM; - goto out; - } + *data = kmalloc(sizeof(**data), + GFP_KERNEL|__GFP_ZERO); + if (!*data) + return -ENOMEM; mutex_init(&con->recovery_lock); INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery); atomic_set(&con->in_recovery, 0); con->adev = adev; - ret = amdgpu_ras_eeprom_init(&con->eeprom_control); - if (ret) - goto free; - - if (con->eeprom_control.num_recs) { - ret = amdgpu_ras_load_bad_pages(adev); - if (ret) - goto free; - ret = amdgpu_ras_reserve_bad_pages(adev); - if (ret) - goto release; - } + amdgpu_ras_load_bad_pages(adev); + amdgpu_ras_reserve_bad_pages(adev); return 0; - -release: - amdgpu_ras_release_bad_pages(adev); -free: - kfree((*data)->bps); - kfree((*data)->bps_bo); - kfree(*data); - con->eh_data = NULL; -out: - DRM_WARN("Failed to initialize ras recovery!\n"); - - return ret; } static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) @@ -1614,17 +1442,13 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data = con->eh_data; - /* recovery_init failed to init it, fini is useless */ - if (!data) - return 0; - cancel_work_sync(&con->recovery_work); + amdgpu_ras_save_bad_pages(adev); amdgpu_ras_release_bad_pages(adev); mutex_lock(&con->recovery_lock); con->eh_data = NULL; kfree(data->bps); - kfree(data->bps_bo); kfree(data); mutex_unlock(&con->recovery_lock); @@ -1676,7 +1500,6 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev, int amdgpu_ras_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - int r; if (con) return 0; @@ -1704,106 +1527,31 @@ int amdgpu_ras_init(struct amdgpu_device *adev) /* Might need get this flag from vbios. */ con->flags = RAS_DEFAULT_FLAGS; - if (adev->nbio.funcs->init_ras_controller_interrupt) { - r = adev->nbio.funcs->init_ras_controller_interrupt(adev); - if (r) - return r; - } - - if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) { - r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev); - if (r) - return r; - } + if (amdgpu_ras_recovery_init(adev)) + goto recovery_out; amdgpu_ras_mask &= AMDGPU_RAS_BLOCK_MASK; if (amdgpu_ras_fs_init(adev)) goto fs_out; + /* ras init for each ras block */ + if (adev->umc.funcs->ras_init) + adev->umc.funcs->ras_init(adev); + DRM_INFO("RAS INFO: ras initialized successfully, " "hardware ability[%x] ras_mask[%x]\n", con->hw_supported, con->supported); return 0; fs_out: + amdgpu_ras_recovery_fini(adev); +recovery_out: amdgpu_ras_set_context(adev, NULL); kfree(con); return -EINVAL; } -/* helper function to handle common stuff in ip late init phase */ -int amdgpu_ras_late_init(struct amdgpu_device *adev, - struct ras_common_if *ras_block, - struct ras_fs_if *fs_info, - struct ras_ih_if *ih_info) -{ - int r; - - /* disable RAS feature per IP block if it is not supported */ - if (!amdgpu_ras_is_supported(adev, ras_block->block)) { - amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); - return 0; - } - - r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1); - if (r) { - if (r == -EAGAIN) { - /* request gpu reset. will run again */ - amdgpu_ras_request_reset_on_boot(adev, - ras_block->block); - return 0; - } else if (adev->in_suspend || adev->in_gpu_reset) { - /* in resume phase, if fail to enable ras, - * clean up all ras fs nodes, and disable ras */ - goto cleanup; - } else - return r; - } - - /* in resume phase, no need to create ras fs node */ - if (adev->in_suspend || adev->in_gpu_reset) - return 0; - - if (ih_info->cb) { - r = amdgpu_ras_interrupt_add_handler(adev, ih_info); - if (r) - goto interrupt; - } - - amdgpu_ras_debugfs_create(adev, fs_info); - - r = amdgpu_ras_sysfs_create(adev, fs_info); - if (r) - goto sysfs; - - return 0; -cleanup: - amdgpu_ras_sysfs_remove(adev, ras_block); -sysfs: - amdgpu_ras_debugfs_remove(adev, ras_block); - if (ih_info->cb) - amdgpu_ras_interrupt_remove_handler(adev, ih_info); -interrupt: - amdgpu_ras_feature_enable(adev, ras_block, 0); - return r; -} - -/* helper function to remove ras fs node and interrupt handler */ -void amdgpu_ras_late_fini(struct amdgpu_device *adev, - struct ras_common_if *ras_block, - struct ras_ih_if *ih_info) -{ - if (!ras_block || !ih_info) - return; - - amdgpu_ras_sysfs_remove(adev, ras_block); - amdgpu_ras_debugfs_remove(adev, ras_block); - if (ih_info->cb) - amdgpu_ras_interrupt_remove_handler(adev, ih_info); - amdgpu_ras_feature_enable(adev, ras_block, 0); -} - /* do some init work after IP late init as dependence. * and it runs in resume/gpu reset/booting up cases. */ @@ -1897,18 +1645,3 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) return 0; } - -void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) -{ - uint32_t hw_supported, supported; - - amdgpu_ras_check_supported(adev, &hw_supported, &supported); - if (!hw_supported) - return; - - if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { - DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT detected!\n"); - - amdgpu_ras_reset_gpu(adev, false); - } -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index f80fd3428c98..6c76bb2a6843 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -317,6 +317,8 @@ struct amdgpu_ras { struct list_head head; /* debugfs */ struct dentry *dir; + /* debugfs ctrl */ + struct dentry *ent; /* sysfs */ struct device_attribute features_attr; struct bin_attribute badpages_attr; @@ -332,7 +334,7 @@ struct amdgpu_ras { struct mutex recovery_lock; uint32_t flags; - bool reboot; + struct amdgpu_ras_eeprom_control eeprom_control; }; @@ -345,14 +347,15 @@ struct ras_err_data { unsigned long ue_count; unsigned long ce_count; unsigned long err_addr_cnt; - struct eeprom_table_record *err_addr; + uint64_t *err_addr; }; struct ras_err_handler_data { - /* point to bad page records array */ - struct eeprom_table_record *bps; - /* point to reserved bo array */ - struct amdgpu_bo **bps_bo; + /* point to bad pages array */ + struct { + unsigned long bp; + struct amdgpu_bo *bo; + } *bps; /* the count of entries */ int count; /* the space can place new entries */ @@ -362,7 +365,7 @@ struct ras_err_handler_data { }; typedef int (*ras_ih_cb)(struct amdgpu_device *adev, - void *err_data, + struct ras_err_data *err_data, struct amdgpu_iv_entry *entry); struct ras_ih_data { @@ -478,7 +481,6 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, return ras && (ras->supported & (1 << block)); } -int amdgpu_ras_recovery_init(struct amdgpu_device *adev); int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, unsigned int block); @@ -490,7 +492,7 @@ unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, /* error handling functions */ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, - struct eeprom_table_record *bps, int pages); + unsigned long *bps, int pages); int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev); @@ -499,12 +501,6 @@ static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev, { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - /* save bad page to eeprom before gpu reset, - * i2c may be unstable in gpu reset - */ - if (in_task()) - amdgpu_ras_reserve_bad_pages(adev); - if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) schedule_work(&ras->recovery_work); return 0; @@ -570,13 +566,6 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) { int amdgpu_ras_init(struct amdgpu_device *adev); int amdgpu_ras_fini(struct amdgpu_device *adev); int amdgpu_ras_pre_fini(struct amdgpu_device *adev); -int amdgpu_ras_late_init(struct amdgpu_device *adev, - struct ras_common_if *ras_block, - struct ras_fs_if *fs_info, - struct ras_ih_if *ih_info); -void amdgpu_ras_late_fini(struct amdgpu_device *adev, - struct ras_common_if *ras_block, - struct ras_ih_if *ih_info); int amdgpu_ras_feature_enable(struct amdgpu_device *adev, struct ras_common_if *head, bool enable); @@ -610,14 +599,4 @@ int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev, int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev, struct ras_dispatch_if *info); - -extern atomic_t amdgpu_ras_in_intr; - -static inline bool amdgpu_ras_intr_triggered(void) -{ - return !!atomic_read(&amdgpu_ras_in_intr); -} - -void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev); - #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 7de16c0c2f20..8a32b5c93778 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -100,6 +100,171 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control, return ret; } +static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control); + +int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) +{ + int ret = 0; + struct amdgpu_device *adev = to_amdgpu_device(control); + unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 }; + struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; + struct i2c_msg msg = { + .addr = EEPROM_I2C_TARGET_ADDR, + .flags = I2C_M_RD, + .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, + .buf = buff, + }; + + mutex_init(&control->tbl_mutex); + + switch (adev->asic_type) { + case CHIP_VEGA20: + ret = smu_v11_0_i2c_eeprom_control_init(&control->eeprom_accessor); + break; + + default: + return 0; + } + + if (ret) { + DRM_ERROR("Failed to init I2C controller, ret:%d", ret); + return ret; + } + + /* Read/Create table header from EEPROM address 0 */ + ret = i2c_transfer(&control->eeprom_accessor, &msg, 1); + if (ret < 1) { + DRM_ERROR("Failed to read EEPROM table header, ret:%d", ret); + return ret; + } + + __decode_table_header_from_buff(hdr, &buff[2]); + + if (hdr->header == EEPROM_TABLE_HDR_VAL) { + control->num_recs = (hdr->tbl_size - EEPROM_TABLE_HEADER_SIZE) / + EEPROM_TABLE_RECORD_SIZE; + DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", + control->num_recs); + + } else { + DRM_INFO("Creating new EEPROM table"); + + hdr->header = EEPROM_TABLE_HDR_VAL; + hdr->version = EEPROM_TABLE_VER; + hdr->first_rec_offset = EEPROM_RECORD_START; + hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE; + + adev->psp.ras.ras->eeprom_control.tbl_byte_sum = + __calc_hdr_byte_sum(&adev->psp.ras.ras->eeprom_control); + ret = __update_table_header(control, buff); + } + + /* Start inserting records from here */ + adev->psp.ras.ras->eeprom_control.next_addr = EEPROM_RECORD_START; + + return ret == 1 ? 0 : -EIO; +} + +void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + switch (adev->asic_type) { + case CHIP_VEGA20: + smu_v11_0_i2c_eeprom_control_fini(&control->eeprom_accessor); + break; + + default: + return; + } +} + +static void __encode_table_record_to_buff(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *record, + unsigned char *buff) +{ + __le64 tmp = 0; + int i = 0; + + /* Next are all record fields according to EEPROM page spec in LE foramt */ + buff[i++] = record->err_type; + + buff[i++] = record->bank; + + tmp = cpu_to_le64(record->ts); + memcpy(buff + i, &tmp, 8); + i += 8; + + tmp = cpu_to_le64((record->offset & 0xffffffffffff)); + memcpy(buff + i, &tmp, 6); + i += 6; + + buff[i++] = record->mem_channel; + buff[i++] = record->mcumc_id; + + tmp = cpu_to_le64((record->retired_page & 0xffffffffffff)); + memcpy(buff + i, &tmp, 6); +} + +static void __decode_table_record_from_buff(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *record, + unsigned char *buff) +{ + __le64 tmp = 0; + int i = 0; + + /* Next are all record fields according to EEPROM page spec in LE foramt */ + record->err_type = buff[i++]; + + record->bank = buff[i++]; + + memcpy(&tmp, buff + i, 8); + record->ts = le64_to_cpu(tmp); + i += 8; + + memcpy(&tmp, buff + i, 6); + record->offset = (le64_to_cpu(tmp) & 0xffffffffffff); + i += 6; + + buff[i++] = record->mem_channel; + buff[i++] = record->mcumc_id; + + memcpy(&tmp, buff + i, 6); + record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff); +} + +/* + * When reaching end of EEPROM memory jump back to 0 record address + * When next record access will go beyond EEPROM page boundary modify bits A17/A8 + * in I2C selector to go to next page + */ +static uint32_t __correct_eeprom_dest_address(uint32_t curr_address) +{ + uint32_t next_address = curr_address + EEPROM_TABLE_RECORD_SIZE; + + /* When all EEPROM memory used jump back to 0 address */ + if (next_address > EEPROM_SIZE_BYTES) { + DRM_INFO("Reached end of EEPROM memory, jumping to 0 " + "and overriding old record"); + return EEPROM_RECORD_START; + } + + /* + * To check if we overflow page boundary compare next address with + * current and see if bits 17/8 of the EEPROM address will change + * If they do start from the next 256b page + * + * https://www.st.com/resource/en/datasheet/m24m02-dr.pdf sec. 5.1.2 + */ + if ((curr_address & EEPROM_ADDR_MSB_MASK) != (next_address & EEPROM_ADDR_MSB_MASK)) { + DRM_DEBUG_DRIVER("Reached end of EEPROM memory page, jumping to next: %lx", + (next_address & EEPROM_ADDR_MSB_MASK)); + + return (next_address & EEPROM_ADDR_MSB_MASK); + } + + return curr_address; +} static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control) @@ -171,207 +336,17 @@ static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control, return true; } -int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) -{ - unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 }; - struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; - int ret = 0; - - mutex_lock(&control->tbl_mutex); - - hdr->header = EEPROM_TABLE_HDR_VAL; - hdr->version = EEPROM_TABLE_VER; - hdr->first_rec_offset = EEPROM_RECORD_START; - hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE; - - control->tbl_byte_sum = 0; - __update_tbl_checksum(control, NULL, 0, 0); - control->next_addr = EEPROM_RECORD_START; - - ret = __update_table_header(control, buff); - - mutex_unlock(&control->tbl_mutex); - - return ret; - -} - -int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) -{ - int ret = 0; - struct amdgpu_device *adev = to_amdgpu_device(control); - unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 }; - struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; - struct i2c_msg msg = { - .addr = EEPROM_I2C_TARGET_ADDR, - .flags = I2C_M_RD, - .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, - .buf = buff, - }; - - mutex_init(&control->tbl_mutex); - - switch (adev->asic_type) { - case CHIP_VEGA20: - ret = smu_v11_0_i2c_eeprom_control_init(&control->eeprom_accessor); - break; - - case CHIP_ARCTURUS: - ret = smu_i2c_eeprom_init(&adev->smu, &control->eeprom_accessor); - break; - - default: - return 0; - } - - if (ret) { - DRM_ERROR("Failed to init I2C controller, ret:%d", ret); - return ret; - } - - /* Read/Create table header from EEPROM address 0 */ - ret = i2c_transfer(&control->eeprom_accessor, &msg, 1); - if (ret < 1) { - DRM_ERROR("Failed to read EEPROM table header, ret:%d", ret); - return ret; - } - - __decode_table_header_from_buff(hdr, &buff[2]); - - if (hdr->header == EEPROM_TABLE_HDR_VAL) { - control->num_recs = (hdr->tbl_size - EEPROM_TABLE_HEADER_SIZE) / - EEPROM_TABLE_RECORD_SIZE; - control->tbl_byte_sum = __calc_hdr_byte_sum(control); - control->next_addr = EEPROM_RECORD_START; - - DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", - control->num_recs); - - } else { - DRM_INFO("Creating new EEPROM table"); - - ret = amdgpu_ras_eeprom_reset_table(control); - } - - return ret == 1 ? 0 : -EIO; -} - -void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control *control) -{ - struct amdgpu_device *adev = to_amdgpu_device(control); - - switch (adev->asic_type) { - case CHIP_VEGA20: - smu_v11_0_i2c_eeprom_control_fini(&control->eeprom_accessor); - break; - case CHIP_ARCTURUS: - smu_i2c_eeprom_fini(&adev->smu, &control->eeprom_accessor); - break; - - default: - return; - } -} - -static void __encode_table_record_to_buff(struct amdgpu_ras_eeprom_control *control, - struct eeprom_table_record *record, - unsigned char *buff) -{ - __le64 tmp = 0; - int i = 0; - - /* Next are all record fields according to EEPROM page spec in LE foramt */ - buff[i++] = record->err_type; - - buff[i++] = record->bank; - - tmp = cpu_to_le64(record->ts); - memcpy(buff + i, &tmp, 8); - i += 8; - - tmp = cpu_to_le64((record->offset & 0xffffffffffff)); - memcpy(buff + i, &tmp, 6); - i += 6; - - buff[i++] = record->mem_channel; - buff[i++] = record->mcumc_id; - - tmp = cpu_to_le64((record->retired_page & 0xffffffffffff)); - memcpy(buff + i, &tmp, 6); -} - -static void __decode_table_record_from_buff(struct amdgpu_ras_eeprom_control *control, - struct eeprom_table_record *record, - unsigned char *buff) -{ - __le64 tmp = 0; - int i = 0; - - /* Next are all record fields according to EEPROM page spec in LE foramt */ - record->err_type = buff[i++]; - - record->bank = buff[i++]; - - memcpy(&tmp, buff + i, 8); - record->ts = le64_to_cpu(tmp); - i += 8; - - memcpy(&tmp, buff + i, 6); - record->offset = (le64_to_cpu(tmp) & 0xffffffffffff); - i += 6; - - record->mem_channel = buff[i++]; - record->mcumc_id = buff[i++]; - - memcpy(&tmp, buff + i, 6); - record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff); -} - -/* - * When reaching end of EEPROM memory jump back to 0 record address - * When next record access will go beyond EEPROM page boundary modify bits A17/A8 - * in I2C selector to go to next page - */ -static uint32_t __correct_eeprom_dest_address(uint32_t curr_address) -{ - uint32_t next_address = curr_address + EEPROM_TABLE_RECORD_SIZE; - - /* When all EEPROM memory used jump back to 0 address */ - if (next_address > EEPROM_SIZE_BYTES) { - DRM_INFO("Reached end of EEPROM memory, jumping to 0 " - "and overriding old record"); - return EEPROM_RECORD_START; - } - - /* - * To check if we overflow page boundary compare next address with - * current and see if bits 17/8 of the EEPROM address will change - * If they do start from the next 256b page - * - * https://www.st.com/resource/en/datasheet/m24m02-dr.pdf sec. 5.1.2 - */ - if ((curr_address & EEPROM_ADDR_MSB_MASK) != (next_address & EEPROM_ADDR_MSB_MASK)) { - DRM_DEBUG_DRIVER("Reached end of EEPROM memory page, jumping to next: %lx", - (next_address & EEPROM_ADDR_MSB_MASK)); - - return (next_address & EEPROM_ADDR_MSB_MASK); - } - - return curr_address; -} - int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, struct eeprom_table_record *records, bool write, int num) { int i, ret = 0; - struct i2c_msg *msgs, *msg; - unsigned char *buffs, *buff; - struct eeprom_table_record *record; + struct i2c_msg *msgs; + unsigned char *buffs; struct amdgpu_device *adev = to_amdgpu_device(control); - if (adev->asic_type != CHIP_VEGA20 && adev->asic_type != CHIP_ARCTURUS) + if (adev->asic_type != CHIP_VEGA20) return 0; buffs = kcalloc(num, EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE, @@ -398,9 +373,9 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, * 256b */ for (i = 0; i < num; i++) { - buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; - record = &records[i]; - msg = &msgs[i]; + unsigned char *buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; + struct eeprom_table_record *record = &records[i]; + struct i2c_msg *msg = &msgs[i]; control->next_addr = __correct_eeprom_dest_address(control->next_addr); @@ -440,8 +415,8 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, if (!write) { for (i = 0; i < num; i++) { - buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; - record = &records[i]; + unsigned char *buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; + struct eeprom_table_record *record = &records[i]; __decode_table_record_from_buff(control, record, buff + EEPROM_ADDRESS_SIZE); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index 622269957c1b..41f3fcb9a29b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -79,7 +79,6 @@ struct eeprom_table_record { int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control); void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control *control); -int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control); int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, struct eeprom_table_record *records, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 6010999d9020..5c13c503e61f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -23,7 +23,6 @@ #include "amdgpu.h" #include "amdgpu_sdma.h" -#include "amdgpu_ras.h" #define AMDGPU_CSA_SDMA_SIZE 64 /* SDMA CSA reside in the 3rd page of CSA */ @@ -84,101 +83,3 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, return csa_mc_addr; } - -int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, - void *ras_ih_info) -{ - int r, i; - struct ras_ih_if *ih_info = (struct ras_ih_if *)ras_ih_info; - struct ras_fs_if fs_info = { - .sysfs_name = "sdma_err_count", - .debugfs_name = "sdma_err_inject", - }; - - if (!ih_info) - return -EINVAL; - - if (!adev->sdma.ras_if) { - adev->sdma.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!adev->sdma.ras_if) - return -ENOMEM; - adev->sdma.ras_if->block = AMDGPU_RAS_BLOCK__SDMA; - adev->sdma.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->sdma.ras_if->sub_block_index = 0; - strcpy(adev->sdma.ras_if->name, "sdma"); - } - fs_info.head = ih_info->head = *adev->sdma.ras_if; - - r = amdgpu_ras_late_init(adev, adev->sdma.ras_if, - &fs_info, ih_info); - if (r) - goto free; - - if (amdgpu_ras_is_supported(adev, adev->sdma.ras_if->block)) { - for (i = 0; i < adev->sdma.num_instances; i++) { - r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, - AMDGPU_SDMA_IRQ_INSTANCE0 + i); - if (r) - goto late_fini; - } - } else { - r = 0; - goto free; - } - - return 0; - -late_fini: - amdgpu_ras_late_fini(adev, adev->sdma.ras_if, ih_info); -free: - kfree(adev->sdma.ras_if); - adev->sdma.ras_if = NULL; - return r; -} - -void amdgpu_sdma_ras_fini(struct amdgpu_device *adev) -{ - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) && - adev->sdma.ras_if) { - struct ras_common_if *ras_if = adev->sdma.ras_if; - struct ras_ih_if ih_info = { - .head = *ras_if, - /* the cb member will not be used by - * amdgpu_ras_interrupt_remove_handler, init it only - * to cheat the check in ras_late_fini - */ - .cb = amdgpu_sdma_process_ras_data_cb, - }; - - amdgpu_ras_late_fini(adev, ras_if, &ih_info); - kfree(ras_if); - } -} - -int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, - void *err_data, - struct amdgpu_iv_entry *entry) -{ - kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - amdgpu_ras_reset_gpu(adev, 0); - - return AMDGPU_RAS_SUCCESS; -} - -int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - struct ras_common_if *ras_if = adev->sdma.ras_if; - struct ras_dispatch_if ih_data = { - .entry = entry, - }; - - if (!ras_if) - return 0; - - ih_data.head = *ras_if; - - amdgpu_ras_interrupt_dispatch(adev, &ih_data); - return 0; -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 761ff8be6314..a9ae0d8a0589 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -104,13 +104,4 @@ struct amdgpu_sdma_instance * amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring); int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index); uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, unsigned vmid); -int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, - void *ras_ih_info); -void amdgpu_sdma_ras_fini(struct amdgpu_device *adev); -int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, - void *err_data, - struct amdgpu_iv_entry *entry); -int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index f940526c5889..77674a7b9616 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -170,7 +170,7 @@ TRACE_EVENT(amdgpu_cs_ioctl, __field(unsigned int, context) __field(unsigned int, seqno) __field(struct dma_fence *, fence) - __string(ring, to_amdgpu_ring(job->base.sched)->name) + __field(char *, ring_name) __field(u32, num_ibs) ), @@ -179,12 +179,12 @@ TRACE_EVENT(amdgpu_cs_ioctl, __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)) __entry->context = job->base.s_fence->finished.context; __entry->seqno = job->base.s_fence->finished.seqno; - __assign_str(ring, to_amdgpu_ring(job->base.sched)->name) + __entry->ring_name = to_amdgpu_ring(job->base.sched)->name; __entry->num_ibs = job->num_ibs; ), TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u", __entry->sched_job_id, __get_str(timeline), __entry->context, - __entry->seqno, __get_str(ring), __entry->num_ibs) + __entry->seqno, __entry->ring_name, __entry->num_ibs) ); TRACE_EVENT(amdgpu_sched_run_job, @@ -195,7 +195,7 @@ TRACE_EVENT(amdgpu_sched_run_job, __string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)) __field(unsigned int, context) __field(unsigned int, seqno) - __string(ring, to_amdgpu_ring(job->base.sched)->name) + __field(char *, ring_name) __field(u32, num_ibs) ), @@ -204,12 +204,12 @@ TRACE_EVENT(amdgpu_sched_run_job, __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)) __entry->context = job->base.s_fence->finished.context; __entry->seqno = job->base.s_fence->finished.seqno; - __assign_str(ring, to_amdgpu_ring(job->base.sched)->name) + __entry->ring_name = to_amdgpu_ring(job->base.sched)->name; __entry->num_ibs = job->num_ibs; ), TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u", __entry->sched_job_id, __get_str(timeline), __entry->context, - __entry->seqno, __get_str(ring), __entry->num_ibs) + __entry->seqno, __entry->ring_name, __entry->num_ibs) ); @@ -323,15 +323,14 @@ DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_cs, TRACE_EVENT(amdgpu_vm_set_ptes, TP_PROTO(uint64_t pe, uint64_t addr, unsigned count, - uint32_t incr, uint64_t flags, bool direct), - TP_ARGS(pe, addr, count, incr, flags, direct), + uint32_t incr, uint64_t flags), + TP_ARGS(pe, addr, count, incr, flags), TP_STRUCT__entry( __field(u64, pe) __field(u64, addr) __field(u32, count) __field(u32, incr) __field(u64, flags) - __field(bool, direct) ), TP_fast_assign( @@ -340,32 +339,28 @@ TRACE_EVENT(amdgpu_vm_set_ptes, __entry->count = count; __entry->incr = incr; __entry->flags = flags; - __entry->direct = direct; ), - TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u, " - "direct=%d", __entry->pe, __entry->addr, __entry->incr, - __entry->flags, __entry->count, __entry->direct) + TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u", + __entry->pe, __entry->addr, __entry->incr, + __entry->flags, __entry->count) ); TRACE_EVENT(amdgpu_vm_copy_ptes, - TP_PROTO(uint64_t pe, uint64_t src, unsigned count, bool direct), - TP_ARGS(pe, src, count, direct), + TP_PROTO(uint64_t pe, uint64_t src, unsigned count), + TP_ARGS(pe, src, count), TP_STRUCT__entry( __field(u64, pe) __field(u64, src) __field(u32, count) - __field(bool, direct) ), TP_fast_assign( __entry->pe = pe; __entry->src = src; __entry->count = count; - __entry->direct = direct; ), - TP_printk("pe=%010Lx, src=%010Lx, count=%u, direct=%d", - __entry->pe, __entry->src, __entry->count, - __entry->direct) + TP_printk("pe=%010Lx, src=%010Lx, count=%u", + __entry->pe, __entry->src, __entry->count) ); TRACE_EVENT(amdgpu_vm_flush, @@ -473,7 +468,7 @@ TRACE_EVENT(amdgpu_ib_pipe_sync, TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence), TP_ARGS(sched_job, fence), TP_STRUCT__entry( - __string(ring, sched_job->base.sched->name); + __field(const char *,name) __field(uint64_t, id) __field(struct dma_fence *, fence) __field(uint64_t, ctx) @@ -481,14 +476,14 @@ TRACE_EVENT(amdgpu_ib_pipe_sync, ), TP_fast_assign( - __assign_str(ring, sched_job->base.sched->name) + __entry->name = sched_job->base.sched->name; __entry->id = sched_job->base.id; __entry->fence = fence; __entry->ctx = fence->context; __entry->seqno = fence->seqno; ), TP_printk("job ring=%s, id=%llu, need pipe sync to fence=%p, context=%llu, seq=%u", - __get_str(ring), __entry->id, + __entry->name, __entry->id, __entry->fence, __entry->ctx, __entry->seqno) ); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index d5d916169226..c19100ced040 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -55,7 +55,6 @@ #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" #include "amdgpu_sdma.h" -#include "amdgpu_ras.h" #include "bif/bif_4_1_d.h" static int amdgpu_map_buffer(struct ttm_buffer_object *bo, @@ -486,12 +485,15 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, struct ttm_operation_ctx *ctx, struct ttm_mem_reg *new_mem) { + struct amdgpu_device *adev; struct ttm_mem_reg *old_mem = &bo->mem; struct ttm_mem_reg tmp_mem; struct ttm_place placements; struct ttm_placement placement; int r; + adev = amdgpu_ttm_adev(bo->bdev); + /* create space/pages for new_mem in GTT space */ tmp_mem = *new_mem; tmp_mem.mm_node = NULL; @@ -542,12 +544,15 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, struct ttm_operation_ctx *ctx, struct ttm_mem_reg *new_mem) { + struct amdgpu_device *adev; struct ttm_mem_reg *old_mem = &bo->mem; struct ttm_mem_reg tmp_mem; struct ttm_placement placement; struct ttm_place placements; int r; + adev = amdgpu_ttm_adev(bo->bdev); + /* make space in GTT for old_mem buffer */ tmp_mem = *new_mem; tmp_mem.mm_node = NULL; @@ -1214,8 +1219,11 @@ static struct ttm_backend_func amdgpu_backend_func = { static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags) { + struct amdgpu_device *adev; struct amdgpu_ttm_tt *gtt; + adev = amdgpu_ttm_adev(bo->bdev); + gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL); if (gtt == NULL) { return NULL; @@ -1648,105 +1656,81 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev) */ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) { - uint64_t vram_size = adev->gmc.visible_vram_size; + struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_bo_param bp; + int r = 0; + int i; + u64 vram_size = adev->gmc.visible_vram_size; + u64 offset = adev->fw_vram_usage.start_offset; + u64 size = adev->fw_vram_usage.size; + struct amdgpu_bo *bo; + memset(&bp, 0, sizeof(bp)); + bp.size = adev->fw_vram_usage.size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; adev->fw_vram_usage.va = NULL; adev->fw_vram_usage.reserved_bo = NULL; - if (adev->fw_vram_usage.size == 0 || - adev->fw_vram_usage.size > vram_size) - return 0; + if (adev->fw_vram_usage.size > 0 && + adev->fw_vram_usage.size <= vram_size) { - return amdgpu_bo_create_kernel_at(adev, - adev->fw_vram_usage.start_offset, - adev->fw_vram_usage.size, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->fw_vram_usage.reserved_bo, - &adev->fw_vram_usage.va); -} + r = amdgpu_bo_create(adev, &bp, + &adev->fw_vram_usage.reserved_bo); + if (r) + goto error_create; -/* - * Memoy training reservation functions - */ + r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false); + if (r) + goto error_reserve; -/** - * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram - * - * @adev: amdgpu_device pointer - * - * free memory training reserved vram if it has been reserved. - */ -static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev) -{ - struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; + /* remove the original mem node and create a new one at the + * request position + */ + bo = adev->fw_vram_usage.reserved_bo; + offset = ALIGN(offset, PAGE_SIZE); + for (i = 0; i < bo->placement.num_placement; ++i) { + bo->placements[i].fpfn = offset >> PAGE_SHIFT; + bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT; + } - ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT; - amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL); - ctx->c2p_bo = NULL; + ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem); + r = ttm_bo_mem_space(&bo->tbo, &bo->placement, + &bo->tbo.mem, &ctx); + if (r) + goto error_pin; - amdgpu_bo_free_kernel(&ctx->p2c_bo, NULL, NULL); - ctx->p2c_bo = NULL; + r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo, + AMDGPU_GEM_DOMAIN_VRAM, + adev->fw_vram_usage.start_offset, + (adev->fw_vram_usage.start_offset + + adev->fw_vram_usage.size)); + if (r) + goto error_pin; + r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo, + &adev->fw_vram_usage.va); + if (r) + goto error_kmap; - return 0; -} - -/** - * amdgpu_ttm_training_reserve_vram_init - create bo vram reservation from memory training - * - * @adev: amdgpu_device pointer - * - * create bo vram reservation from memory training. - */ -static int amdgpu_ttm_training_reserve_vram_init(struct amdgpu_device *adev) -{ - int ret; - struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; - - memset(ctx, 0, sizeof(*ctx)); - if (!adev->fw_vram_usage.mem_train_support) { - DRM_DEBUG("memory training does not support!\n"); - return 0; + amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); } + return r; - ctx->c2p_train_data_offset = adev->fw_vram_usage.mem_train_fb_loc; - ctx->p2c_train_data_offset = (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET); - ctx->train_data_size = GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES; - - DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", - ctx->train_data_size, - ctx->p2c_train_data_offset, - ctx->c2p_train_data_offset); - - ret = amdgpu_bo_create_kernel_at(adev, - ctx->p2c_train_data_offset, - ctx->train_data_size, - AMDGPU_GEM_DOMAIN_VRAM, - &ctx->p2c_bo, - NULL); - if (ret) { - DRM_ERROR("alloc p2c_bo failed(%d)!\n", ret); - goto Err_out; - } - - ret = amdgpu_bo_create_kernel_at(adev, - ctx->c2p_train_data_offset, - ctx->train_data_size, - AMDGPU_GEM_DOMAIN_VRAM, - &ctx->c2p_bo, - NULL); - if (ret) { - DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret); - goto Err_out; - } - - ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; - return 0; - -Err_out: - amdgpu_ttm_training_reserve_vram_fini(adev); - return ret; +error_kmap: + amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo); +error_pin: + amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); +error_reserve: + amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo); +error_create: + adev->fw_vram_usage.va = NULL; + adev->fw_vram_usage.reserved_bo = NULL; + return r; } - /** * amdgpu_ttm_init - Init the memory management (ttm) as well as various * gtt/vram related fields. @@ -1810,14 +1794,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } - /* - *The reserved vram for memory training must be pinned to the specified - *place on the VRAM, so reserve it early. - */ - r = amdgpu_ttm_training_reserve_vram_init(adev); - if (r) - return r; - /* allocate memory as required for VGA * This is used for VGA emulation and pre-OS scanout buffers to * avoid display artifacts while transitioning between pre-OS @@ -1828,20 +1804,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) NULL, &stolen_vga_buf); if (r) return r; - - /* - * reserve one TMR (64K) memory at the top of VRAM which holds - * IP Discovery data and is protected by PSP. - */ - r = amdgpu_bo_create_kernel_at(adev, - adev->gmc.real_vram_size - DISCOVERY_TMR_SIZE, - DISCOVERY_TMR_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->discovery_memory, - NULL); - if (r) - return r; - DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); @@ -1906,9 +1868,6 @@ void amdgpu_ttm_late_init(struct amdgpu_device *adev) void *stolen_vga_buf; /* return the VGA stolen memory (if any) back to VRAM */ amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf); - - /* return the IP Discovery TMR memory back to VRAM */ - amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL); } /** @@ -1920,7 +1879,6 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) return; amdgpu_ttm_debugfs_fini(adev); - amdgpu_ttm_training_reserve_vram_fini(adev); amdgpu_ttm_fw_reserve_vram_fini(adev); if (adev->mman.aper_base_kaddr) iounmap(adev->mman.aper_base_kaddr); @@ -2017,7 +1975,10 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo, *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GPU_PAGE_SIZE; - num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); + num_dw = adev->mman.buffer_funcs->copy_num_dw; + while (num_dw & 0x7) + num_dw++; + num_bytes = num_pages * 8; r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job); @@ -2077,7 +2038,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, max_bytes = adev->mman.buffer_funcs->copy_max_bytes; num_loops = DIV_ROUND_UP(byte_count, max_bytes); - num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); + num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw; + + /* for IB padding */ + while (num_dw & 0x7) + num_dw++; r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 833fc4b68940..3a6115ad0196 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -360,7 +360,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) case CHIP_RAVEN: case CHIP_VEGA12: case CHIP_VEGA20: - case CHIP_ARCTURUS: case CHIP_RENOIR: case CHIP_NAVI10: case CHIP_NAVI14: @@ -369,6 +368,8 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) return AMDGPU_FW_LOAD_DIRECT; else return AMDGPU_FW_LOAD_PSP; + case CHIP_ARCTURUS: + return AMDGPU_FW_LOAD_DIRECT; default: DRM_ERROR("Unknown firmware load type\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 410587b950f3..b34f00d42049 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -108,12 +108,6 @@ struct ta_firmware_header_v1_0 { uint32_t ta_ras_ucode_version; uint32_t ta_ras_offset_bytes; uint32_t ta_ras_size_bytes; - uint32_t ta_hdcp_ucode_version; - uint32_t ta_hdcp_offset_bytes; - uint32_t ta_hdcp_size_bytes; - uint32_t ta_dtm_ucode_version; - uint32_t ta_dtm_offset_bytes; - uint32_t ta_dtm_size_bytes; }; /* version_major=1, version_minor=0 */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 3283032a78e5..975afa04df09 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -54,8 +54,7 @@ adev->umc.funcs->disable_umc_index_mode(adev); struct amdgpu_umc_funcs { - void (*err_cnt_init)(struct amdgpu_device *adev); - int (*ras_late_init)(struct amdgpu_device *adev); + void (*ras_init)(struct amdgpu_device *adev); void (*query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status); void (*query_ras_error_address)(struct amdgpu_device *adev, @@ -63,7 +62,6 @@ struct amdgpu_umc_funcs { void (*enable_umc_index_mode)(struct amdgpu_device *adev, uint32_t umc_instance); void (*disable_umc_index_mode)(struct amdgpu_device *adev); - void (*init_registers)(struct amdgpu_device *adev); }; struct amdgpu_umc { @@ -77,17 +75,8 @@ struct amdgpu_umc { uint32_t channel_offs; /* channel index table of interleaved memory */ const uint32_t *channel_idx_tbl; - struct ras_common_if *ras_if; const struct amdgpu_umc_funcs *funcs; }; -int amdgpu_umc_ras_late_init(struct amdgpu_device *adev); -void amdgpu_umc_ras_fini(struct amdgpu_device *adev); -int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, - void *ras_error_status, - struct amdgpu_iv_entry *entry); -int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index b4dd89af6f09..b2c364b8695f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -39,8 +39,6 @@ #include "cikd.h" #include "uvd/uvd_4_2_d.h" -#include "amdgpu_ras.h" - /* 1 second timeout */ #define UVD_IDLE_TIMEOUT msecs_to_jiffies(1000) @@ -374,13 +372,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) if (!adev->uvd.inst[j].saved_bo) return -ENOMEM; - /* re-write 0 since err_event_athub will corrupt VCPU buffer */ - if (amdgpu_ras_intr_triggered()) { - DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n"); - memset(adev->uvd.inst[j].saved_bo, 0, size); - } else { - memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); - } + memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 703677f2ff6f..b70b3c45bb29 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -80,11 +80,6 @@ MODULE_FIRMWARE(FIRMWARE_VEGA12); MODULE_FIRMWARE(FIRMWARE_VEGA20); static void amdgpu_vce_idle_work_handler(struct work_struct *work); -static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, - struct amdgpu_bo *bo, - struct dma_fence **fence); -static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - bool direct, struct dma_fence **fence); /** * amdgpu_vce_init - allocate memory, load vce firmware @@ -433,15 +428,14 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) * * Open up a stream for HW test */ -static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, - struct amdgpu_bo *bo, - struct dma_fence **fence) +int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct dma_fence **fence) { const unsigned ib_size_dw = 1024; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t addr; + uint64_t dummy; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -450,7 +444,7 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, ib = &job->ibs[0]; - addr = amdgpu_bo_gpu_offset(bo); + dummy = ib->gpu_addr + 1024; /* stitch together an VCE create msg */ ib->length_dw = 0; @@ -482,8 +476,8 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, ib->ptr[ib->length_dw++] = 0x00000014; /* len */ ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */ - ib->ptr[ib->length_dw++] = upper_32_bits(addr); - ib->ptr[ib->length_dw++] = addr; + ib->ptr[ib->length_dw++] = upper_32_bits(dummy); + ib->ptr[ib->length_dw++] = dummy; ib->ptr[ib->length_dw++] = 0x00000001; for (i = ib->length_dw; i < ib_size_dw; ++i) @@ -513,8 +507,8 @@ err: * * Close up a stream for HW test or if userspace failed to do so */ -static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - bool direct, struct dma_fence **fence) +int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + bool direct, struct dma_fence **fence) { const unsigned ib_size_dw = 1024; struct amdgpu_job *job; @@ -1116,20 +1110,13 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence = NULL; - struct amdgpu_bo *bo = NULL; long r; /* skip vce ring1/2 ib test for now, since it's not reliable */ if (ring != &ring->adev->vce.ring[0]) return 0; - r = amdgpu_bo_create_reserved(ring->adev, 512, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &bo, NULL, NULL); - if (r) - return r; - - r = amdgpu_vce_get_create_msg(ring, 1, bo, NULL); + r = amdgpu_vce_get_create_msg(ring, 1, NULL); if (r) goto error; @@ -1145,7 +1132,5 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) error: dma_fence_put(fence); - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&bo); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index d6d83a3ec803..30ea54dd9117 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -58,6 +58,10 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev); int amdgpu_vce_entity_init(struct amdgpu_device *adev); int amdgpu_vce_suspend(struct amdgpu_device *adev); int amdgpu_vce_resume(struct amdgpu_device *adev); +int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct dma_fence **fence); +int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + bool direct, struct dma_fence **fence); void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 3199e4a5ff12..7a6beb2e7c4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -569,14 +569,13 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) } static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, - struct amdgpu_bo *bo, - struct dma_fence **fence) + struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t addr; + uint64_t dummy; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -584,14 +583,14 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand return r; ib = &job->ibs[0]; - addr = amdgpu_bo_gpu_offset(bo); + dummy = ib->gpu_addr + 1024; ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ ib->ptr[ib->length_dw++] = handle; - ib->ptr[ib->length_dw++] = upper_32_bits(addr); - ib->ptr[ib->length_dw++] = addr; + ib->ptr[ib->length_dw++] = upper_32_bits(dummy); + ib->ptr[ib->length_dw++] = dummy; ib->ptr[ib->length_dw++] = 0x0000000b; ib->ptr[ib->length_dw++] = 0x00000014; @@ -622,14 +621,13 @@ err: } static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - struct amdgpu_bo *bo, - struct dma_fence **fence) + struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t addr; + uint64_t dummy; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -637,14 +635,14 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han return r; ib = &job->ibs[0]; - addr = amdgpu_bo_gpu_offset(bo); + dummy = ib->gpu_addr + 1024; ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; ib->ptr[ib->length_dw++] = handle; - ib->ptr[ib->length_dw++] = upper_32_bits(addr); - ib->ptr[ib->length_dw++] = addr; + ib->ptr[ib->length_dw++] = upper_32_bits(dummy); + ib->ptr[ib->length_dw++] = dummy; ib->ptr[ib->length_dw++] = 0x0000000b; ib->ptr[ib->length_dw++] = 0x00000014; @@ -677,20 +675,13 @@ err: int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence = NULL; - struct amdgpu_bo *bo = NULL; long r; - r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &bo, NULL, NULL); - if (r) - return r; - - r = amdgpu_vcn_enc_get_create_msg(ring, 1, bo, NULL); + r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL); if (r) goto error; - r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, bo, &fence); + r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence); if (r) goto error; @@ -702,8 +693,6 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) error: dma_fence_put(fence); - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&bo); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e775f271f1ed..d8cfcf2d7455 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -130,8 +130,7 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, if (level == adev->vm_manager.root_level) /* For the root directory */ - return round_up(adev->vm_manager.max_pfn, 1ULL << shift) - >> shift; + return round_up(adev->vm_manager.max_pfn, 1ULL << shift) >> shift; else if (level != AMDGPU_VM_PTB) /* Everything in between */ return 512; @@ -342,7 +341,7 @@ static struct amdgpu_vm_pt *amdgpu_vm_pt_parent(struct amdgpu_vm_pt *pt) return container_of(parent->vm_bo, struct amdgpu_vm_pt, base); } -/* +/** * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt */ struct amdgpu_vm_pt_cursor { @@ -483,7 +482,6 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev, * * @adev: amdgpu_device structure * @vm: amdgpu_vm structure - * @start: optional cursor to start with * @cursor: state to initialize * * Starts a deep first traversal of the PD/PT tree. @@ -537,7 +535,7 @@ static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, amdgpu_vm_pt_ancestor(cursor); } -/* +/** * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs */ #define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \ @@ -568,14 +566,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, list_add(&entry->tv.head, validated); } -/** - * amdgpu_vm_del_from_lru_notify - update bulk_moveable flag - * - * @bo: BO which was removed from the LRU - * - * Make sure the bulk_moveable flag is updated when a BO is removed from the - * LRU. - */ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo) { struct amdgpu_bo *abo; @@ -702,7 +692,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) * @adev: amdgpu_device pointer * @vm: VM to clear BO from * @bo: BO to clear - * @direct: use a direct update * * Root PD needs to be reserved when calling this. * @@ -711,8 +700,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) */ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_bo *bo, - bool direct) + struct amdgpu_bo *bo) { struct ttm_operation_ctx ctx = { true, false }; unsigned level = adev->vm_manager.root_level; @@ -771,7 +759,6 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; - params.direct = direct; r = vm->update_funcs->prepare(¶ms, AMDGPU_FENCE_OWNER_KFD, NULL); if (r) @@ -825,13 +812,10 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vm: requesting vm - * @level: the page table level - * @direct: use a direct update * @bp: resulting BO allocation parameters */ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int level, bool direct, - struct amdgpu_bo_param *bp) + int level, struct amdgpu_bo_param *bp) { memset(bp, 0, sizeof(*bp)); @@ -846,7 +830,6 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, else if (!vm->root.base.bo || vm->root.base.bo->shadow) bp->flags |= AMDGPU_GEM_CREATE_SHADOW; bp->type = ttm_bo_type_kernel; - bp->no_wait_gpu = direct; if (vm->root.base.bo) bp->resv = vm->root.base.bo->tbo.base.resv; } @@ -857,7 +840,6 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, * @adev: amdgpu_device pointer * @vm: VM to allocate page tables for * @cursor: Which page table to allocate - * @direct: use a direct update * * Make sure a specific page table or directory is allocated. * @@ -867,8 +849,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, */ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *cursor, - bool direct) + struct amdgpu_vm_pt_cursor *cursor) { struct amdgpu_vm_pt *entry = cursor->entry; struct amdgpu_bo_param bp; @@ -889,7 +870,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, if (entry->base.bo) return 0; - amdgpu_vm_bo_param(adev, vm, cursor->level, direct, &bp); + amdgpu_vm_bo_param(adev, vm, cursor->level, &bp); r = amdgpu_bo_create(adev, &bp, &pt); if (r) @@ -901,7 +882,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, pt->parent = amdgpu_bo_ref(cursor->parent->base.bo); amdgpu_vm_bo_base_init(&entry->base, vm, pt); - r = amdgpu_vm_clear_bo(adev, vm, pt, direct); + r = amdgpu_vm_clear_bo(adev, vm, pt); if (r) goto error_free_pt; @@ -1038,8 +1019,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, * Returns: * 0 on success, errno otherwise. */ -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, - bool need_pipe_sync) +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; @@ -1053,8 +1033,10 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, id->oa_base != job->oa_base || id->oa_size != job->oa_size); bool vm_flush_needed = job->vm_needs_flush; + bool pasid_mapping_needed = id->pasid != job->pasid || + !id->pasid_mapping || + !dma_fence_is_signaled(id->pasid_mapping); struct dma_fence *fence = NULL; - bool pasid_mapping_needed = false; unsigned patch_offset = 0; int r; @@ -1064,12 +1046,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, pasid_mapping_needed = true; } - mutex_lock(&id_mgr->lock); - if (id->pasid != job->pasid || !id->pasid_mapping || - !dma_fence_is_signaled(id->pasid_mapping)) - pasid_mapping_needed = true; - mutex_unlock(&id_mgr->lock); - gds_switch_needed &= !!ring->funcs->emit_gds_switch; vm_flush_needed &= !!ring->funcs->emit_vm_flush && job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET; @@ -1109,11 +1085,9 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, } if (pasid_mapping_needed) { - mutex_lock(&id_mgr->lock); id->pasid = job->pasid; dma_fence_put(id->pasid_mapping); id->pasid_mapping = dma_fence_get(fence); - mutex_unlock(&id_mgr->lock); } dma_fence_put(fence); @@ -1197,10 +1171,10 @@ uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) return result; } -/** +/* * amdgpu_vm_update_pde - update a single level in the hierarchy * - * @params: parameters for the update + * @param: parameters for the update * @vm: requested vm * @entry: entry to update * @@ -1224,7 +1198,7 @@ static int amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params, return vm->update_funcs->update(params, bo, pde, pt, 1, 0, flags); } -/** +/* * amdgpu_vm_invalidate_pds - mark all PDs as invalid * * @adev: amdgpu_device pointer @@ -1243,20 +1217,19 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, amdgpu_vm_bo_relocated(&entry->base); } -/** - * amdgpu_vm_update_pdes - make sure that all directories are valid +/* + * amdgpu_vm_update_directories - make sure that all directories are valid * * @adev: amdgpu_device pointer * @vm: requested vm - * @direct: submit directly to the paging queue * * Makes sure all directories are up to date. * * Returns: * 0 for success, error for failure. */ -int amdgpu_vm_update_pdes(struct amdgpu_device *adev, - struct amdgpu_vm *vm, bool direct) +int amdgpu_vm_update_directories(struct amdgpu_device *adev, + struct amdgpu_vm *vm) { struct amdgpu_vm_update_params params; int r; @@ -1267,7 +1240,6 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; - params.direct = direct; r = vm->update_funcs->prepare(¶ms, AMDGPU_FENCE_OWNER_VM, NULL); if (r) @@ -1295,7 +1267,7 @@ error: return r; } -/* +/** * amdgpu_vm_update_flags - figure out flags for PTE updates * * Make sure to set the right flags for the PTEs at the desired level. @@ -1418,8 +1390,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, uint64_t incr, entry_end, pe_start; struct amdgpu_bo *pt; - r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor, - params->direct); + r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor); if (r) return r; @@ -1510,14 +1481,13 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table * * @adev: amdgpu_device pointer - * @vm: requested vm - * @direct: direct submission in a page fault * @exclusive: fence we need to sync to + * @pages_addr: DMA addresses to use for mapping + * @vm: requested vm * @start: start of mapped range * @last: last mapped entry * @flags: flags for the entries * @addr: addr to set the area to - * @pages_addr: DMA addresses to use for mapping * @fence: optional resulting fence * * Fill in the page table entries between @start and @last. @@ -1526,11 +1496,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * 0 for success, -EINVAL for failure. */ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - struct amdgpu_vm *vm, bool direct, struct dma_fence *exclusive, + dma_addr_t *pages_addr, + struct amdgpu_vm *vm, uint64_t start, uint64_t last, uint64_t flags, uint64_t addr, - dma_addr_t *pages_addr, struct dma_fence **fence) { struct amdgpu_vm_update_params params; @@ -1540,7 +1510,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; - params.direct = direct; params.pages_addr = pages_addr; /* sync to everything except eviction fences on unmapping */ @@ -1599,8 +1568,27 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) flags &= ~AMDGPU_PTE_WRITEABLE; - /* Apply ASIC specific mapping flags */ - amdgpu_gmc_get_vm_pte(adev, mapping, &flags); + flags &= ~AMDGPU_PTE_EXECUTABLE; + flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + + if (adev->asic_type >= CHIP_NAVI10) { + flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK; + flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK); + } else { + flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; + flags |= (mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK); + } + + if ((mapping->flags & AMDGPU_PTE_PRT) && + (adev->asic_type >= CHIP_VEGA10)) { + flags |= AMDGPU_PTE_PRT; + if (adev->asic_type >= CHIP_NAVI10) { + flags |= AMDGPU_PTE_SNOOPED; + flags |= AMDGPU_PTE_LOG; + flags |= AMDGPU_PTE_SYSTEM; + } + flags &= ~AMDGPU_PTE_VALID; + } trace_amdgpu_vm_bo_update(mapping); @@ -1644,8 +1632,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, dma_addr = pages_addr; } else { addr = pages_addr[pfn]; - max_entries = count * - AMDGPU_GPU_PAGES_IN_CPU_PAGE; + max_entries = count * AMDGPU_GPU_PAGES_IN_CPU_PAGE; } } else if (flags & AMDGPU_PTE_VALID) { @@ -1654,9 +1641,9 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, } last = min((uint64_t)mapping->last, start + max_entries - 1); - r = amdgpu_vm_bo_update_mapping(adev, vm, false, exclusive, + r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm, start, last, flags, addr, - dma_addr, fence); + fence); if (r) return r; @@ -1684,7 +1671,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, * Returns: * 0 for success, -EINVAL for failure. */ -int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, +int amdgpu_vm_bo_update(struct amdgpu_device *adev, + struct amdgpu_bo_va *bo_va, bool clear) { struct amdgpu_bo *bo = bo_va->base.bo; @@ -1711,7 +1699,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm); pages_addr = ttm->dma_address; } - exclusive = bo->tbo.moving; + exclusive = dma_resv_get_excl(bo->tbo.base.resv); } if (bo) { @@ -1742,6 +1730,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, return r; } + if (vm->use_cpu_for_update) { + /* Flush HDP */ + mb(); + amdgpu_asic_flush_hdp(adev, NULL); + } + /* If the BO is not in its preferred location add it back to * the evicted list so that it gets validated again on the * next command submission. @@ -1749,8 +1743,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) { uint32_t mem_type = bo->tbo.mem.mem_type; - if (!(bo->preferred_domains & - amdgpu_mem_type_to_domain(mem_type))) + if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type))) amdgpu_vm_bo_evicted(&bo_va->base); else amdgpu_vm_bo_idle(&bo_va->base); @@ -1944,9 +1937,9 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, mapping->start < AMDGPU_GMC_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; - r = amdgpu_vm_bo_update_mapping(adev, vm, false, NULL, + r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, mapping->start, mapping->last, - init_pte_value, 0, NULL, &f); + init_pte_value, 0, &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); if (r) { dma_fence_put(f); @@ -2688,17 +2681,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, spin_lock_init(&vm->invalidated_lock); INIT_LIST_HEAD(&vm->freed); - /* create scheduler entities for page table updates */ - r = drm_sched_entity_init(&vm->direct, adev->vm_manager.vm_pte_rqs, + /* create scheduler entity for page table updates */ + r = drm_sched_entity_init(&vm->entity, adev->vm_manager.vm_pte_rqs, adev->vm_manager.vm_pte_num_rqs, NULL); if (r) return r; - r = drm_sched_entity_init(&vm->delayed, adev->vm_manager.vm_pte_rqs, - adev->vm_manager.vm_pte_num_rqs, NULL); - if (r) - goto error_free_direct; - vm->pte_support_ats = false; if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { @@ -2713,8 +2701,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, } DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); - WARN_ONCE((vm->use_cpu_for_update && - !amdgpu_gmc_vram_full_visible(&adev->gmc)), + WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); if (vm->use_cpu_for_update) @@ -2723,12 +2710,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->update_funcs = &amdgpu_vm_sdma_funcs; vm->last_update = NULL; - amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp); + amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, &bp); if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW; r = amdgpu_bo_create(adev, &bp, &root); if (r) - goto error_free_delayed; + goto error_free_sched_entity; r = amdgpu_bo_reserve(root, true); if (r) @@ -2740,7 +2727,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, amdgpu_vm_bo_base_init(&vm->root.base, vm, root); - r = amdgpu_vm_clear_bo(adev, vm, root, false); + r = amdgpu_vm_clear_bo(adev, vm, root); if (r) goto error_unreserve; @@ -2771,11 +2758,8 @@ error_free_root: amdgpu_bo_unref(&vm->root.base.bo); vm->root.base.bo = NULL; -error_free_delayed: - drm_sched_entity_destroy(&vm->delayed); - -error_free_direct: - drm_sched_entity_destroy(&vm->direct); +error_free_sched_entity: + drm_sched_entity_destroy(&vm->entity); return r; } @@ -2816,7 +2800,6 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vm: requested vm - * @pasid: pasid to use * * This only works on GFX VMs that don't have any BOs added and no * page tables allocated yet. @@ -2832,8 +2815,7 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev, * Returns: * 0 for success, -errno for errors. */ -int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, - unsigned int pasid) +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid) { bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); int r; @@ -2865,7 +2847,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, */ if (pte_support_ats != vm->pte_support_ats) { vm->pte_support_ats = pte_support_ats; - r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, false); + r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo); if (r) goto free_idr; } @@ -2875,8 +2857,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, AMDGPU_VM_USE_CPU_FOR_COMPUTE); DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); - WARN_ONCE((vm->use_cpu_for_update && - !amdgpu_gmc_vram_full_visible(&adev->gmc)), + WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); if (vm->use_cpu_for_update) @@ -2955,38 +2936,19 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) struct amdgpu_bo_va_mapping *mapping, *tmp; bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt; struct amdgpu_bo *root; - int i; + int i, r; amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); - root = amdgpu_bo_ref(vm->root.base.bo); - amdgpu_bo_reserve(root, true); if (vm->pasid) { unsigned long flags; spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); - vm->pasid = 0; } - list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { - if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { - amdgpu_vm_prt_fini(adev, vm); - prt_fini_needed = false; - } - - list_del(&mapping->list); - amdgpu_vm_free_mapping(adev, vm, mapping, NULL); - } - - amdgpu_vm_free_pts(adev, vm, NULL); - amdgpu_bo_unreserve(root); - amdgpu_bo_unref(&root); - WARN_ON(vm->root.base.bo); - - drm_sched_entity_destroy(&vm->direct); - drm_sched_entity_destroy(&vm->delayed); + drm_sched_entity_destroy(&vm->entity); if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { dev_err(adev->dev, "still active bo inside vm\n"); @@ -2999,7 +2961,26 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) list_del(&mapping->list); kfree(mapping); } + list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { + if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { + amdgpu_vm_prt_fini(adev, vm); + prt_fini_needed = false; + } + list_del(&mapping->list); + amdgpu_vm_free_mapping(adev, vm, mapping, NULL); + } + + root = amdgpu_bo_ref(vm->root.base.bo); + r = amdgpu_bo_reserve(root, true); + if (r) { + dev_err(adev->dev, "Leaking page tables because BO reservation failed\n"); + } else { + amdgpu_vm_free_pts(adev, vm, NULL); + amdgpu_bo_unreserve(root); + } + amdgpu_bo_unref(&root); + WARN_ON(vm->root.base.bo); dma_fence_put(vm->last_update); for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) amdgpu_vmid_free_reserved(adev, vm, i); @@ -3083,9 +3064,8 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) switch (args->in.op) { case AMDGPU_VM_OP_RESERVE_VMID: - /* We only have requirement to reserve vmid from gfxhub */ - r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, - AMDGPU_GFXHUB_0); + /* current, we only have requirement to reserve vmid from gfxhub */ + r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0); if (r) return r; break; @@ -3128,88 +3108,13 @@ void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid, */ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) { - if (vm->task_info.pid) - return; + if (!vm->task_info.pid) { + vm->task_info.pid = current->pid; + get_task_comm(vm->task_info.task_name, current); - vm->task_info.pid = current->pid; - get_task_comm(vm->task_info.task_name, current); - - if (current->group_leader->mm != current->mm) - return; - - vm->task_info.tgid = current->group_leader->pid; - get_task_comm(vm->task_info.process_name, current->group_leader); -} - -/** - * amdgpu_vm_handle_fault - graceful handling of VM faults. - * @adev: amdgpu device pointer - * @pasid: PASID of the VM - * @addr: Address of the fault - * - * Try to gracefully handle a VM fault. Return true if the fault was handled and - * shouldn't be reported any more. - */ -bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid, - uint64_t addr) -{ - struct amdgpu_bo *root; - uint64_t value, flags; - struct amdgpu_vm *vm; - long r; - - spin_lock(&adev->vm_manager.pasid_lock); - vm = idr_find(&adev->vm_manager.pasid_idr, pasid); - if (vm) - root = amdgpu_bo_ref(vm->root.base.bo); - else - root = NULL; - spin_unlock(&adev->vm_manager.pasid_lock); - - if (!root) - return false; - - r = amdgpu_bo_reserve(root, true); - if (r) - goto error_unref; - - /* Double check that the VM still exists */ - spin_lock(&adev->vm_manager.pasid_lock); - vm = idr_find(&adev->vm_manager.pasid_idr, pasid); - if (vm && vm->root.base.bo != root) - vm = NULL; - spin_unlock(&adev->vm_manager.pasid_lock); - if (!vm) - goto error_unlock; - - addr /= AMDGPU_GPU_PAGE_SIZE; - flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED | - AMDGPU_PTE_SYSTEM; - - if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { - /* Redirect the access to the dummy page */ - value = adev->dummy_page_addr; - flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE | - AMDGPU_PTE_WRITEABLE; - } else { - /* Let the hw retry silently on the PTE */ - value = 0; + if (current->group_leader->mm == current->mm) { + vm->task_info.tgid = current->group_leader->pid; + get_task_comm(vm->task_info.process_name, current->group_leader); + } } - - r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1, - flags, value, NULL, NULL); - if (r) - goto error_unlock; - - r = amdgpu_vm_update_pdes(adev, vm, true); - -error_unlock: - amdgpu_bo_unreserve(root); - if (r < 0) - DRM_ERROR("Can't handle page fault (%ld)\n", r); - -error_unref: - amdgpu_bo_unref(&root); - - return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 4dbbe1b6b413..2eda3a8c330d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -99,9 +99,6 @@ struct amdgpu_bo_list_entry; #define AMDGPU_VM_FAULT_STOP_FIRST 1 #define AMDGPU_VM_FAULT_STOP_ALWAYS 2 -/* Reserve 4MB VRAM for page tables */ -#define AMDGPU_VM_RESERVED_VRAM (4ULL << 20) - /* max number of VMHUB */ #define AMDGPU_MAX_VMHUBS 3 #define AMDGPU_GFXHUB_0 0 @@ -201,11 +198,6 @@ struct amdgpu_vm_update_params { */ struct amdgpu_vm *vm; - /** - * @direct: if changes should be made directly - */ - bool direct; - /** * @pages_addr: * @@ -262,9 +254,8 @@ struct amdgpu_vm { struct amdgpu_vm_pt root; struct dma_fence *last_update; - /* Scheduler entities for page table updates */ - struct drm_sched_entity direct; - struct drm_sched_entity delayed; + /* Scheduler entity for page table updates */ + struct drm_sched_entity entity; unsigned int pasid; /* dedicated to vm */ @@ -366,8 +357,8 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*callback)(void *p, struct amdgpu_bo *bo), void *param); int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); -int amdgpu_vm_update_pdes(struct amdgpu_device *adev, - struct amdgpu_vm *vm, bool direct); +int amdgpu_vm_update_directories(struct amdgpu_device *adev, + struct amdgpu_vm *vm); int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); @@ -413,8 +404,6 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid, struct amdgpu_task_info *task_info); -bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid, - uint64_t addr); void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index 73fec7a0ced5..5222d165abfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -49,6 +49,13 @@ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, void *owner, { int r; + /* Wait for PT BOs to be idle. PTs share the same resv. object + * as the root PD BO + */ + r = amdgpu_bo_sync_wait(p->vm->root.base.bo, owner, true); + if (unlikely(r)) + return r; + /* Wait for any BO move to be completed */ if (exclusive) { r = dma_fence_wait(exclusive, true); @@ -56,14 +63,7 @@ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, void *owner, return r; } - /* Don't wait for submissions during page fault */ - if (p->direct) - return 0; - - /* Wait for PT BOs to be idle. PTs share the same resv. object - * as the root PD BO - */ - return amdgpu_bo_sync_wait(p->vm->root.base.bo, owner, true); + return 0; } /** @@ -89,7 +89,7 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p, pe += (unsigned long)amdgpu_bo_kptr(bo); - trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct); + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); for (i = 0; i < count; i++) { value = p->pages_addr ? diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 832db59f441e..61fc584cbb1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -68,19 +68,17 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, if (r) return r; - p->num_dw_left = ndw; - - /* Wait for moves to be completed */ r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false); if (r) return r; - /* Don't wait for any submissions during page fault handling */ - if (p->direct) - return 0; + r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.base.resv, + owner, false); + if (r) + return r; - return amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.base.resv, - owner, false); + p->num_dw_left = ndw; + return 0; } /** @@ -97,23 +95,22 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, { struct amdgpu_bo *root = p->vm->root.base.bo; struct amdgpu_ib *ib = p->job->ibs; - struct drm_sched_entity *entity; struct amdgpu_ring *ring; struct dma_fence *f; int r; - entity = p->direct ? &p->vm->direct : &p->vm->delayed; - ring = container_of(entity->rq->sched, struct amdgpu_ring, sched); + ring = container_of(p->vm->entity.rq->sched, struct amdgpu_ring, sched); WARN_ON(ib->length_dw == 0); amdgpu_ring_pad_ib(ring, ib); WARN_ON(ib->length_dw > p->num_dw_left); - r = amdgpu_job_submit(p->job, entity, AMDGPU_FENCE_OWNER_VM, &f); + r = amdgpu_job_submit(p->job, &p->vm->entity, + AMDGPU_FENCE_OWNER_VM, &f); if (r) goto error; amdgpu_bo_fence(root, f, true); - if (fence && !p->direct) + if (fence) swap(*fence, f); dma_fence_put(f); return 0; @@ -123,6 +120,7 @@ error: return r; } + /** * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping * @@ -143,7 +141,7 @@ static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p, src += p->num_dw_left * 4; pe += amdgpu_bo_gpu_offset(bo); - trace_amdgpu_vm_copy_ptes(pe, src, count, p->direct); + trace_amdgpu_vm_copy_ptes(pe, src, count); amdgpu_vm_copy_pte(p->adev, ib, pe, src, count); } @@ -170,7 +168,7 @@ static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p, struct amdgpu_ib *ib = p->job->ibs; pe += amdgpu_bo_gpu_offset(bo); - trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct); + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); if (count < 3) { amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags, count, incr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 82a3299e53c0..3a9d8c15fe9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -23,9 +23,6 @@ */ #include "amdgpu.h" -#include "amdgpu_vm.h" -#include "amdgpu_atomfirmware.h" -#include "atom.h" struct amdgpu_vram_mgr { struct drm_mm mm; @@ -104,39 +101,6 @@ static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev, amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM])); } -static ssize_t amdgpu_mem_info_vram_vendor(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct drm_device *ddev = dev_get_drvdata(dev); - struct amdgpu_device *adev = ddev->dev_private; - - switch (adev->gmc.vram_vendor) { - case SAMSUNG: - return snprintf(buf, PAGE_SIZE, "samsung\n"); - case INFINEON: - return snprintf(buf, PAGE_SIZE, "infineon\n"); - case ELPIDA: - return snprintf(buf, PAGE_SIZE, "elpida\n"); - case ETRON: - return snprintf(buf, PAGE_SIZE, "etron\n"); - case NANYA: - return snprintf(buf, PAGE_SIZE, "nanya\n"); - case HYNIX: - return snprintf(buf, PAGE_SIZE, "hynix\n"); - case MOSEL: - return snprintf(buf, PAGE_SIZE, "mosel\n"); - case WINBOND: - return snprintf(buf, PAGE_SIZE, "winbond\n"); - case ESMT: - return snprintf(buf, PAGE_SIZE, "esmt\n"); - case MICRON: - return snprintf(buf, PAGE_SIZE, "micron\n"); - default: - return snprintf(buf, PAGE_SIZE, "unknown\n"); - } -} - static DEVICE_ATTR(mem_info_vram_total, S_IRUGO, amdgpu_mem_info_vram_total_show, NULL); static DEVICE_ATTR(mem_info_vis_vram_total, S_IRUGO, @@ -145,8 +109,6 @@ static DEVICE_ATTR(mem_info_vram_used, S_IRUGO, amdgpu_mem_info_vram_used_show, NULL); static DEVICE_ATTR(mem_info_vis_vram_used, S_IRUGO, amdgpu_mem_info_vis_vram_used_show, NULL); -static DEVICE_ATTR(mem_info_vram_vendor, S_IRUGO, - amdgpu_mem_info_vram_vendor, NULL); /** * amdgpu_vram_mgr_init - init VRAM manager and DRM MM @@ -192,11 +154,6 @@ static int amdgpu_vram_mgr_init(struct ttm_mem_type_manager *man, DRM_ERROR("Failed to create device file mem_info_vis_vram_used\n"); return ret; } - ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_vendor); - if (ret) { - DRM_ERROR("Failed to create device file mem_info_vram_vendor\n"); - return ret; - } return 0; } @@ -223,7 +180,6 @@ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man) device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_total); device_remove_file(adev->dev, &dev_attr_mem_info_vram_used); device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_used); - device_remove_file(adev->dev, &dev_attr_mem_info_vram_vendor); return 0; } @@ -319,7 +275,7 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, struct drm_mm_node *nodes; enum drm_mm_insert_mode mode; unsigned long lpfn, num_nodes, pages_per_node, pages_left; - uint64_t vis_usage = 0, mem_bytes, max_bytes; + uint64_t vis_usage = 0, mem_bytes; unsigned i; int r; @@ -327,13 +283,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, if (!lpfn) lpfn = man->size; - max_bytes = adev->gmc.mc_vram_size; - if (tbo->type != ttm_bo_type_kernel) - max_bytes -= AMDGPU_VM_RESERVED_VRAM; - /* bail out quickly if there's likely not enough VRAM for this BO */ mem_bytes = (u64)mem->num_pages << PAGE_SHIFT; - if (atomic64_add_return(mem_bytes, &mgr->usage) > max_bytes) { + if (atomic64_add_return(mem_bytes, &mgr->usage) > adev->gmc.mc_vram_size) { atomic64_sub(mem_bytes, &mgr->usage); mem->mm_node = NULL; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 00371713c671..65aae75f80fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -25,7 +25,6 @@ #include "amdgpu.h" #include "amdgpu_xgmi.h" #include "amdgpu_smu.h" -#include "amdgpu_ras.h" #include "df/df_3_6_offset.h" static DEFINE_MUTEX(xgmi_mutex); @@ -438,52 +437,3 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev) mutex_unlock(&hive->hive_lock); } } - -int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev) -{ - int r; - struct ras_ih_if ih_info = { - .cb = NULL, - }; - struct ras_fs_if fs_info = { - .sysfs_name = "xgmi_wafl_err_count", - .debugfs_name = "xgmi_wafl_err_inject", - }; - - if (!adev->gmc.xgmi.supported || - adev->gmc.xgmi.num_physical_nodes == 0) - return 0; - - if (!adev->gmc.xgmi.ras_if) { - adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!adev->gmc.xgmi.ras_if) - return -ENOMEM; - adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL; - adev->gmc.xgmi.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->gmc.xgmi.ras_if->sub_block_index = 0; - strcpy(adev->gmc.xgmi.ras_if->name, "xgmi_wafl"); - } - ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if; - r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if, - &fs_info, &ih_info); - if (r || !amdgpu_ras_is_supported(adev, adev->gmc.xgmi.ras_if->block)) { - kfree(adev->gmc.xgmi.ras_if); - adev->gmc.xgmi.ras_if = NULL; - } - - return r; -} - -void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev) -{ - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) && - adev->gmc.xgmi.ras_if) { - struct ras_common_if *ras_if = adev->gmc.xgmi.ras_if; - struct ras_ih_if ih_info = { - .cb = NULL, - }; - - amdgpu_ras_late_fini(adev, ras_if, &ih_info); - kfree(ras_if); - } -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index bbf504ff7051..fbcee31788c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -42,8 +42,6 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); -int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev); -void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev); static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, struct amdgpu_device *bo_adev) diff --git a/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c index fda99c958c3b..4853899b1824 100644 --- a/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c @@ -24,6 +24,7 @@ #include "soc15.h" #include "soc15_common.h" +#include "soc15_hw_ip.h" #include "arct_ip_offset.h" int arct_reg_base_init(struct amdgpu_device *adev) @@ -51,8 +52,6 @@ int arct_reg_base_init(struct amdgpu_device *adev) adev->reg_offset[SDMA7_HWIP][i] = (uint32_t *)(&(SDMA7_BASE.instance[i])); adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); - adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i])); - adev->reg_offset[RSMU_HWIP][i] = (uint32_t *)(&(RSMU_BASE.instance[i])); } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 2d64d270725d..b81bb414fcb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -966,25 +966,6 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev, static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = { {mmGRBM_STATUS}, - {mmGRBM_STATUS2}, - {mmGRBM_STATUS_SE0}, - {mmGRBM_STATUS_SE1}, - {mmGRBM_STATUS_SE2}, - {mmGRBM_STATUS_SE3}, - {mmSRBM_STATUS}, - {mmSRBM_STATUS2}, - {mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET}, - {mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET}, - {mmCP_STAT}, - {mmCP_STALLED_STAT1}, - {mmCP_STALLED_STAT2}, - {mmCP_STALLED_STAT3}, - {mmCP_CPF_BUSY_STAT}, - {mmCP_CPF_STALLED_STAT1}, - {mmCP_CPF_STATUS}, - {mmCP_CPC_BUSY_STAT}, - {mmCP_CPC_STALLED_STAT1}, - {mmCP_CPC_STATUS}, {mmGB_ADDR_CONFIG}, {mmMC_ARB_RAMCFG}, {mmGB_TILE_MODE0}, @@ -1289,15 +1270,15 @@ static int cik_gpu_pci_config_reset(struct amdgpu_device *adev) } /** - * cik_asic_pci_config_reset - soft reset GPU + * cik_asic_reset - soft reset GPU * * @adev: amdgpu_device pointer * - * Use PCI Config method to reset the GPU. - * + * Look up which blocks are hung and attempt + * to reset them. * Returns 0 for success. */ -static int cik_asic_pci_config_reset(struct amdgpu_device *adev) +static int cik_asic_reset(struct amdgpu_device *adev) { int r; @@ -1313,45 +1294,7 @@ static int cik_asic_pci_config_reset(struct amdgpu_device *adev) static enum amd_reset_method cik_asic_reset_method(struct amdgpu_device *adev) { - bool baco_reset; - - switch (adev->asic_type) { - case CHIP_BONAIRE: - case CHIP_HAWAII: - /* disable baco reset until it works */ - /* smu7_asic_get_baco_capability(adev, &baco_reset); */ - baco_reset = false; - break; - default: - baco_reset = false; - break; - } - - if (baco_reset) - return AMD_RESET_METHOD_BACO; - else - return AMD_RESET_METHOD_LEGACY; -} - -/** - * cik_asic_reset - soft reset GPU - * - * @adev: amdgpu_device pointer - * - * Look up which blocks are hung and attempt - * to reset them. - * Returns 0 for success. - */ -static int cik_asic_reset(struct amdgpu_device *adev) -{ - int r; - - if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) - r = smu7_asic_baco_reset(adev); - else - r = cik_asic_pci_config_reset(adev); - - return r; + return AMD_RESET_METHOD_LEGACY; } static u32 cik_get_config_memsize(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.h b/drivers/gpu/drm/amd/amdgpu/cik.h index 9870bf27870e..54c625a2e570 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.h +++ b/drivers/gpu/drm/amd/amdgpu/cik.h @@ -31,7 +31,4 @@ void cik_srbm_select(struct amdgpu_device *adev, int cik_set_ip_blocks(struct amdgpu_device *adev); void legacy_doorbell_index_init(struct amdgpu_device *adev); -int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap); -int smu7_asic_baco_reset(struct amdgpu_device *adev); - #endif diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 40d2ac723dd6..645550e7caf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -330,11 +330,9 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; - struct drm_connector_list_iter iter; u32 tmp; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -370,7 +368,6 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev) amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } - drm_connector_list_iter_end(&iter); } /** @@ -385,11 +382,9 @@ static void dce_v10_0_hpd_fini(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; - struct drm_connector_list_iter iter; u32 tmp; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -402,7 +397,6 @@ static void dce_v10_0_hpd_fini(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } - drm_connector_list_iter_end(&iter); } static u32 dce_v10_0_hpd_get_gpio_reg(struct amdgpu_device *adev) @@ -1225,12 +1219,10 @@ static void dce_v10_0_afmt_audio_select_pin(struct drm_encoder *encoder) static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder, struct drm_display_mode *mode) { - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_device *adev = encoder->dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; - struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 tmp; int interlace = 0; @@ -1238,14 +1230,12 @@ static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder, if (!dig || !dig->afmt || !dig->afmt->pin) return; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } - drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1271,12 +1261,10 @@ static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder, static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder) { - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_device *adev = encoder->dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; - struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 tmp; u8 *sadb = NULL; @@ -1285,14 +1273,12 @@ static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder if (!dig || !dig->afmt || !dig->afmt->pin) return; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } - drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1327,12 +1313,10 @@ static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder) { - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_device *adev = encoder->dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; - struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; struct cea_sad *sads; int i, sad_count; @@ -1355,14 +1339,12 @@ static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder) if (!dig || !dig->afmt || !dig->afmt->pin) return; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } - drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1370,10 +1352,10 @@ static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); - if (sad_count < 0) + if (sad_count <= 0) { DRM_ERROR("Couldn't read SADs: %d\n", sad_count); - if (sad_count <= 0) return; + } BUG_ON(!sads); for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 898ef72d423c..d9f470632b2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -348,11 +348,9 @@ static void dce_v11_0_hpd_init(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; - struct drm_connector_list_iter iter; u32 tmp; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -387,7 +385,6 @@ static void dce_v11_0_hpd_init(struct amdgpu_device *adev) dce_v11_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } - drm_connector_list_iter_end(&iter); } /** @@ -402,11 +399,9 @@ static void dce_v11_0_hpd_fini(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; - struct drm_connector_list_iter iter; u32 tmp; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -418,7 +413,6 @@ static void dce_v11_0_hpd_fini(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } - drm_connector_list_iter_end(&iter); } static u32 dce_v11_0_hpd_get_gpio_reg(struct amdgpu_device *adev) @@ -1251,12 +1245,10 @@ static void dce_v11_0_afmt_audio_select_pin(struct drm_encoder *encoder) static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder, struct drm_display_mode *mode) { - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_device *adev = encoder->dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; - struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 tmp; int interlace = 0; @@ -1264,14 +1256,12 @@ static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder, if (!dig || !dig->afmt || !dig->afmt->pin) return; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } - drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1297,12 +1287,10 @@ static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder, static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder) { - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_device *adev = encoder->dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; - struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 tmp; u8 *sadb = NULL; @@ -1311,14 +1299,12 @@ static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder if (!dig || !dig->afmt || !dig->afmt->pin) return; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } - drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1353,12 +1339,10 @@ static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder) { - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_device *adev = encoder->dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; - struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; struct cea_sad *sads; int i, sad_count; @@ -1381,14 +1365,12 @@ static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder) if (!dig || !dig->afmt || !dig->afmt->pin) return; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } - drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1396,10 +1378,10 @@ static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); - if (sad_count < 0) + if (sad_count <= 0) { DRM_ERROR("Couldn't read SADs: %d\n", sad_count); - if (sad_count <= 0) return; + } BUG_ON(!sads); for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index db15a112becc..3eb2e7429269 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -281,11 +281,9 @@ static void dce_v6_0_hpd_init(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; - struct drm_connector_list_iter iter; u32 tmp; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -311,7 +309,7 @@ static void dce_v6_0_hpd_init(struct amdgpu_device *adev) dce_v6_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } - drm_connector_list_iter_end(&iter); + } /** @@ -326,11 +324,9 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; - struct drm_connector_list_iter iter; u32 tmp; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -342,7 +338,6 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } - drm_connector_list_iter_end(&iter); } static u32 dce_v6_0_hpd_get_gpio_reg(struct amdgpu_device *adev) @@ -1129,24 +1124,20 @@ static void dce_v6_0_audio_select_pin(struct drm_encoder *encoder) static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder, struct drm_display_mode *mode) { - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_device *adev = encoder->dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; - struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; int interlace = 0; u32 tmp; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } - drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1173,25 +1164,21 @@ static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder, static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder) { - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_device *adev = encoder->dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; - struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u8 *sadb = NULL; int sad_count; u32 tmp; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } - drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1234,12 +1221,10 @@ static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder) static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) { - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_device *adev = encoder->dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; - struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; struct cea_sad *sads; int i, sad_count; @@ -1259,14 +1244,12 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO }, }; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } - drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1274,10 +1257,10 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); - if (sad_count < 0) + if (sad_count <= 0) { DRM_ERROR("Couldn't read SADs: %d\n", sad_count); - if (sad_count <= 0) return; + } for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { u32 tmp = 0; @@ -1649,7 +1632,6 @@ static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder, struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; - struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); int bpc = 8; @@ -1657,14 +1639,12 @@ static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder, if (!dig || !dig->afmt) return; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } - drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index f06c9022c1fd..a16c5e9e610e 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -275,11 +275,9 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; - struct drm_connector_list_iter iter; u32 tmp; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -305,7 +303,6 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev) dce_v8_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } - drm_connector_list_iter_end(&iter); } /** @@ -320,11 +317,9 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; - struct drm_connector_list_iter iter; u32 tmp; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -336,7 +331,6 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } - drm_connector_list_iter_end(&iter); } static u32 dce_v8_0_hpd_get_gpio_reg(struct amdgpu_device *adev) @@ -1163,12 +1157,10 @@ static void dce_v8_0_afmt_audio_select_pin(struct drm_encoder *encoder) static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder, struct drm_display_mode *mode) { - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_device *adev = encoder->dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; - struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 tmp = 0, offset; @@ -1177,14 +1169,12 @@ static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder, offset = dig->afmt->pin->offset; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } - drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1224,12 +1214,10 @@ static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder, static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder) { - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_device *adev = encoder->dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; - struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 offset, tmp; u8 *sadb = NULL; @@ -1240,14 +1228,12 @@ static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder) offset = dig->afmt->pin->offset; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } - drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1277,13 +1263,11 @@ static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder) static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder) { - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_device *adev = encoder->dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; u32 offset; struct drm_connector *connector; - struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; struct cea_sad *sads; int i, sad_count; @@ -1308,14 +1292,12 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder) offset = dig->afmt->pin->offset; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } - drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1323,10 +1305,10 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); - if (sad_count < 0) + if (sad_count <= 0) { DRM_ERROR("Couldn't read SADs: %d\n", sad_count); - if (sad_count <= 0) return; + } BUG_ON(!sads); for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c index d6221298b477..844c03868248 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c @@ -33,10 +33,6 @@ static void df_v1_7_sw_init(struct amdgpu_device *adev) { } -static void df_v1_7_sw_fini(struct amdgpu_device *adev) -{ -} - static void df_v1_7_enable_broadcast_mode(struct amdgpu_device *adev, bool enable) { @@ -115,7 +111,6 @@ static void df_v1_7_enable_ecc_force_par_wr_rmw(struct amdgpu_device *adev, const struct amdgpu_df_funcs df_v1_7_funcs = { .sw_init = df_v1_7_sw_init, - .sw_fini = df_v1_7_sw_fini, .enable_broadcast_mode = df_v1_7_enable_broadcast_mode, .get_fb_channel_number = df_v1_7_get_fb_channel_number, .get_hbm_channel_number = df_v1_7_get_hbm_channel_number, diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index 16fbd2bc8ad1..5850c8e34caa 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -99,8 +99,8 @@ static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev, unsigned long flags, address, data; uint32_t ficadl_val, ficadh_val; - address = adev->nbio.funcs->get_pcie_index_offset(adev); - data = adev->nbio.funcs->get_pcie_data_offset(adev); + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3); @@ -122,8 +122,8 @@ static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val, { unsigned long flags, address, data; - address = adev->nbio.funcs->get_pcie_index_offset(adev); - data = adev->nbio.funcs->get_pcie_data_offset(adev); + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3); @@ -150,8 +150,8 @@ static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev, { unsigned long flags, address, data; - address = adev->nbio.funcs->get_pcie_index_offset(adev); - data = adev->nbio.funcs->get_pcie_data_offset(adev); + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, lo_addr); @@ -172,8 +172,8 @@ static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr, { unsigned long flags, address, data; - address = adev->nbio.funcs->get_pcie_index_offset(adev); - data = adev->nbio.funcs->get_pcie_data_offset(adev); + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, lo_addr); @@ -220,13 +220,6 @@ static void df_v3_6_sw_init(struct amdgpu_device *adev) adev->df_perfmon_config_assign_mask[i] = 0; } -static void df_v3_6_sw_fini(struct amdgpu_device *adev) -{ - - device_remove_file(adev->dev, &dev_attr_df_cntr_avail); - -} - static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev, bool enable) { @@ -544,7 +537,6 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, const struct amdgpu_df_funcs df_v3_6_funcs = { .sw_init = df_v3_6_sw_init, - .sw_fini = df_v3_6_sw_fini, .enable_broadcast_mode = df_v3_6_enable_broadcast_mode, .get_fb_channel_number = df_v3_6_get_fb_channel_number, .get_hbm_channel_number = df_v3_6_get_hbm_channel_number, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index e7bab4f094b3..957811b73672 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -93,7 +93,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0x60000ff0, 0x60000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000000, 0x40000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), @@ -127,7 +127,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010), SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CGTT_CLK_CTRL, 0xfeff0fff, 0x40000100), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000) }; static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] = @@ -140,7 +140,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xf8ff0fff, 0x60000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000ff0, 0x40000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), @@ -171,7 +171,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000), }; static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = @@ -179,7 +179,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 0x003c0014), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0x0d000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0xc0000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xffffcfff, 0x60000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0xffff0fff, 0x40000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), @@ -1442,7 +1442,7 @@ static int gfx_v10_0_sw_fini(void *handle) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); amdgpu_gfx_mqd_sw_fini(adev); - amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); amdgpu_gfx_kiq_fini(adev); gfx_v10_0_pfp_fini(adev); @@ -2443,7 +2443,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->nbio.funcs->hdp_flush(adev, NULL); + adev->nbio_funcs->hdp_flush(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); @@ -2513,7 +2513,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->nbio.funcs->hdp_flush(adev, NULL); + adev->nbio_funcs->hdp_flush(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0); @@ -2582,7 +2582,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->nbio.funcs->hdp_flush(adev, NULL); + adev->nbio_funcs->hdp_flush(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); @@ -2903,7 +2903,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->nbio.funcs->hdp_flush(adev, NULL); + adev->nbio_funcs->hdp_flush(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); @@ -3106,7 +3106,6 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) memcpy(mqd, adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], sizeof(*mqd)); /* reset the ring */ ring->wptr = 0; - adev->wb.wb[ring->wptr_offs] = 0; amdgpu_ring_clear_ring(ring); #ifdef BRING_UP_DEBUG mutex_lock(&adev->srbm_mutex); @@ -4358,7 +4357,7 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 ref_and_mask, reg_mem_engine; - const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { switch (ring->me) { @@ -4378,8 +4377,8 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) } gfx_v10_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, - adev->nbio.funcs->get_hdp_flush_req_offset(adev), - adev->nbio.funcs->get_hdp_flush_done_offset(adev), + adev->nbio_funcs->get_hdp_flush_req_offset(adev), + adev->nbio_funcs->get_hdp_flush_done_offset(adev), ref_and_mask, ref_and_mask, 0x20); } @@ -5284,12 +5283,15 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev) { - unsigned total_cu = adev->gfx.config.max_cu_per_sh * - adev->gfx.config.max_sh_per_se * - adev->gfx.config.max_shader_engines; + /* init asic gds info */ + switch (adev->asic_type) { + case CHIP_NAVI10: + default: + adev->gds.gds_size = 0x10000; + adev->gds.gds_compute_max_wave_id = 0x4ff; + break; + } - adev->gds.gds_size = 0x10000; - adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1; adev->gds.gws_size = 64; adev->gds.oa_size = 16; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index ffbde9136372..87dd55e9d72b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -2103,7 +2103,7 @@ static int gfx_v8_0_sw_fini(void *handle) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); amdgpu_gfx_mqd_sw_fini(adev); - amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); amdgpu_gfx_kiq_fini(adev); gfx_v8_0_mec_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 39297baedfb4..dcadc73bffd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -131,18 +131,6 @@ MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 -struct ras_gfx_subblock_reg { - const char *name; - uint32_t hwip; - uint32_t inst; - uint32_t seg; - uint32_t reg_offset; - uint32_t sec_count_mask; - uint32_t sec_count_shift; - uint32_t ded_count_mask; - uint32_t ded_count_shift; -}; - enum ta_ras_gfx_subblock { /*CPC*/ TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, @@ -529,9 +517,9 @@ static const struct soc15_reg_golden golden_settings_gc_9_0[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) }; static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = @@ -594,9 +582,9 @@ static const struct soc15_reg_golden golden_settings_gc_9_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) }; static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = @@ -688,9 +676,9 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) }; static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = @@ -703,7 +691,6 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), }; static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = @@ -1337,8 +1324,7 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, /* TODO: Determine if MEC2 JT FW loading can be removed for all GFX V9 asic and above */ - if (adev->asic_type != CHIP_ARCTURUS && - adev->asic_type != CHIP_RENOIR) { + if (adev->asic_type != CHIP_ARCTURUS) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; info->fw = adev->gfx.mec2_fw; @@ -1970,6 +1956,190 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) return 0; } +static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, + struct amdgpu_ngg_buf *ngg_buf, + int size_se, + int default_size_se) +{ + int r; + + if (size_se < 0) { + dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se); + return -EINVAL; + } + size_se = size_se ? size_se : default_size_se; + + ngg_buf->size = size_se * adev->gfx.config.max_shader_engines; + r = amdgpu_bo_create_kernel(adev, ngg_buf->size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &ngg_buf->bo, + &ngg_buf->gpu_addr, + NULL); + if (r) { + dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r); + return r; + } + ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo); + + return r; +} + +static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < NGG_BUF_MAX; i++) + amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo, + &adev->gfx.ngg.buf[i].gpu_addr, + NULL); + + memset(&adev->gfx.ngg.buf[0], 0, + sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX); + + adev->gfx.ngg.init = false; + + return 0; +} + +static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) +{ + int r; + + if (!amdgpu_ngg || adev->gfx.ngg.init == true) + return 0; + + /* GDS reserve memory: 64 bytes alignment */ + adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); + adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size; + adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); + adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); + + /* Primitive Buffer */ + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], + amdgpu_prim_buf_per_se, + 64 * 1024); + if (r) { + dev_err(adev->dev, "Failed to create Primitive Buffer\n"); + goto err; + } + + /* Position Buffer */ + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS], + amdgpu_pos_buf_per_se, + 256 * 1024); + if (r) { + dev_err(adev->dev, "Failed to create Position Buffer\n"); + goto err; + } + + /* Control Sideband */ + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL], + amdgpu_cntl_sb_buf_per_se, + 256); + if (r) { + dev_err(adev->dev, "Failed to create Control Sideband Buffer\n"); + goto err; + } + + /* Parameter Cache, not created by default */ + if (amdgpu_param_buf_per_se <= 0) + goto out; + + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM], + amdgpu_param_buf_per_se, + 512 * 1024); + if (r) { + dev_err(adev->dev, "Failed to create Parameter Cache\n"); + goto err; + } + +out: + adev->gfx.ngg.init = true; + return 0; +err: + gfx_v9_0_ngg_fini(adev); + return r; +} + +static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; + int r; + u32 data, base; + + if (!amdgpu_ngg) + return 0; + + /* Program buffer size */ + data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, + adev->gfx.ngg.buf[NGG_PRIM].size >> 8); + data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, + adev->gfx.ngg.buf[NGG_POS].size >> 8); + WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); + + data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, + adev->gfx.ngg.buf[NGG_CNTL].size >> 8); + data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, + adev->gfx.ngg.buf[NGG_PARAM].size >> 10); + WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); + + /* Program buffer base address */ + base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); + data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); + WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data); + + base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); + data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); + WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data); + + base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); + data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); + WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data); + + base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); + data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); + WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data); + + base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); + data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); + WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data); + + base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); + data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); + WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data); + + /* Clear GDS reserved memory */ + r = amdgpu_ring_alloc(ring, 17); + if (r) { + DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n", + ring->name, r); + return r; + } + + gfx_v9_0_write_data_to_reg(ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), + (adev->gds.gds_size + + adev->gfx.ngg.gds_reserve_size)); + + amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); + amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | + PACKET3_DMA_DATA_DST_SEL(1) | + PACKET3_DMA_DATA_SRC_SEL(2))); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | + adev->gfx.ngg.gds_reserve_size); + + gfx_v9_0_write_data_to_reg(ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0); + + amdgpu_ring_commit(ring); + + return 0; +} + static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, int mec, int pipe, int queue) { @@ -2137,6 +2307,10 @@ static int gfx_v9_0_sw_init(void *handle) if (r) return r; + r = gfx_v9_0_ngg_init(adev); + if (r) + return r; + return 0; } @@ -2146,7 +2320,19 @@ static int gfx_v9_0_sw_fini(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_gfx_ras_fini(adev); + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && + adev->gfx.ras_if) { + struct ras_common_if *ras_if = adev->gfx.ras_if; + struct ras_ih_if ih_info = { + .head = *ras_if, + }; + + amdgpu_ras_debugfs_remove(adev, ras_if); + amdgpu_ras_sysfs_remove(adev, ras_if); + amdgpu_ras_interrupt_remove_handler(adev, &ih_info); + amdgpu_ras_feature_enable(adev, ras_if, 0); + kfree(ras_if); + } for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); @@ -2154,10 +2340,11 @@ static int gfx_v9_0_sw_fini(void *handle) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); amdgpu_gfx_mqd_sw_fini(adev); - amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); amdgpu_gfx_kiq_fini(adev); gfx_v9_0_mec_fini(adev); + gfx_v9_0_ngg_fini(adev); amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, @@ -3696,6 +3883,12 @@ static int gfx_v9_0_hw_init(void *handle) if (r) return r; + if (adev->asic_type != CHIP_ARCTURUS) { + r = gfx_v9_0_ngg_en(adev); + if (r) + return r; + } + return r; } @@ -3737,10 +3930,8 @@ static int gfx_v9_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); - /* DF freeze and kcq disable will fail */ - if (!amdgpu_ras_intr_triggered()) - /* disable KCQ to avoid CPC touch memory not valid anymore */ - gfx_v9_0_kcq_disable(adev); + /* disable KCQ to avoid CPC touch memory not valid anymore */ + gfx_v9_0_kcq_disable(adev); if (amdgpu_sriov_vf(adev)) { gfx_v9_0_cp_gfx_enable(adev, false); @@ -3993,7 +4184,6 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, - { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16}, { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, @@ -4013,10 +4203,6 @@ static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; int i, r; - /* only support when RAS is enabled */ - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) - return 0; - r = amdgpu_ring_alloc(ring, 7); if (r) { DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", @@ -4207,14 +4393,33 @@ static int gfx_v9_0_early_init(void *handle) return 0; } +static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, + struct ras_err_data *err_data, + struct amdgpu_iv_entry *entry); + static int gfx_v9_0_ecc_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct ras_common_if **ras_if = &adev->gfx.ras_if; + struct ras_ih_if ih_info = { + .cb = gfx_v9_0_process_ras_data_cb, + }; + struct ras_fs_if fs_info = { + .sysfs_name = "gfx_err_count", + .debugfs_name = "gfx_err_inject", + }; + struct ras_common_if ras_block = { + .block = AMDGPU_RAS_BLOCK__GFX, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + .sub_block_index = 0, + .name = "gfx", + }; int r; - r = amdgpu_gfx_ras_late_init(adev); - if (r) - return r; + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { + amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); + return 0; + } r = gfx_v9_0_do_edc_gds_workarounds(adev); if (r) @@ -4225,7 +4430,72 @@ static int gfx_v9_0_ecc_late_init(void *handle) if (r) return r; + /* handle resume path. */ + if (*ras_if) { + /* resend ras TA enable cmd during resume. + * prepare to handle failure. + */ + ih_info.head = **ras_if; + r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); + if (r) { + if (r == -EAGAIN) { + /* request a gpu reset. will run again. */ + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__GFX); + return 0; + } + /* fail to enable ras, cleanup all. */ + goto irq; + } + /* enable successfully. continue. */ + goto resume; + } + + *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); + if (!*ras_if) + return -ENOMEM; + + **ras_if = ras_block; + + r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); + if (r) { + if (r == -EAGAIN) { + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__GFX); + r = 0; + } + goto feature; + } + + ih_info.head = **ras_if; + fs_info.head = **ras_if; + + r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); + if (r) + goto interrupt; + + amdgpu_ras_debugfs_create(adev, &fs_info); + + r = amdgpu_ras_sysfs_create(adev, &fs_info); + if (r) + goto sysfs; +resume: + r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); + if (r) + goto irq; + return 0; +irq: + amdgpu_ras_sysfs_remove(adev, *ras_if); +sysfs: + amdgpu_ras_debugfs_remove(adev, *ras_if); + amdgpu_ras_interrupt_remove_handler(adev, &ih_info); +interrupt: + amdgpu_ras_feature_enable(adev, *ras_if, 0); +feature: + kfree(*ras_if); + *ras_if = NULL; + return r; } static int gfx_v9_0_late_init(void *handle) @@ -4290,14 +4560,16 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, { amdgpu_gfx_rlc_enter_safe_mode(adev); + if (is_support_sw_smu(adev) && !enable) + smu_set_gfx_cgpg(&adev->smu, enable); + if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { gfx_v9_0_enable_gfx_cg_power_gating(adev, true); if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); } else { gfx_v9_0_enable_gfx_cg_power_gating(adev, false); - if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) - gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); + gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); } amdgpu_gfx_rlc_exit_safe_mode(adev); @@ -4566,6 +4838,8 @@ static int gfx_v9_0_set_powergating_state(void *handle, gfx_v9_0_enable_cp_power_gating(adev, false); /* update gfx cgpg state */ + if (is_support_sw_smu(adev) && enable) + smu_set_gfx_cgpg(&adev->smu, enable); gfx_v9_0_update_gfx_cg_power_gating(adev, enable); /* update mgcg state */ @@ -4696,7 +4970,7 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 ref_and_mask, reg_mem_engine; - const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { switch (ring->me) { @@ -4716,8 +4990,8 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) } gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, - adev->nbio.funcs->get_hdp_flush_req_offset(adev), - adev->nbio.funcs->get_hdp_flush_done_offset(adev), + adev->nbio_funcs->get_hdp_flush_req_offset(adev), + adev->nbio_funcs->get_hdp_flush_done_offset(adev), ref_and_mask, ref_and_mask, 0x20); } @@ -5449,446 +5723,313 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, + struct ras_err_data *err_data, + struct amdgpu_iv_entry *entry) +{ + /* TODO ue will trigger an interrupt. */ + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + if (adev->gfx.funcs->query_ras_error_count) + adev->gfx.funcs->query_ras_error_count(adev, err_data); + amdgpu_ras_reset_gpu(adev, 0); + return AMDGPU_RAS_SUCCESS; +} -static const struct ras_gfx_subblock_reg ras_subblock_regs[] = { - { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), - SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), - SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) - }, - { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), - SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT), - SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) - }, - { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), - SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1), - 0, 0 - }, - { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), - SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2), - 0, 0 - }, - { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), - SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT), - SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) - }, - { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), - SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT), - 0, 0 - }, - { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), - SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), - SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT) - }, - { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), - SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT), - SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT) - }, - { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), - SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1), - 0, 0 - }, - { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), - SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1), - 0, 0 - }, - { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), - SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1), - 0, 0 - }, - { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), - SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC), - SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) - }, - { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), - SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), - 0, 0 - }, +static const struct { + const char *name; + uint32_t ip; + uint32_t inst; + uint32_t seg; + uint32_t reg_offset; + uint32_t per_se_instance; + int32_t num_instance; + uint32_t sec_count_mask; + uint32_t ded_count_mask; +} gfx_ras_edc_regs[] = { + { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, + REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT), + REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) }, + { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, + REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT), + REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) }, + { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, + REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 }, + { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, + REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 }, + { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, + REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT), + REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) }, + { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, + REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 }, + { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, + REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), + REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) }, + { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, + REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT), + REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) }, + { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, + REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 }, + { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, + REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 }, + { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, + REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 }, + { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC), + REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) }, + { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 }, { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), - SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), - SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) - }, + 0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), + REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) }, { "GDS_OA_PHY_PHY_CMD_RAM_MEM", - SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), - SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), - SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) - }, + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), + REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) }, { "GDS_OA_PHY_PHY_DATA_RAM_MEM", - SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), - SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), - 0, 0 - }, + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 }, { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", - SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), - SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), - SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) - }, + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) }, { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", - SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), - SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), - SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) - }, + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) }, { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", - SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), - SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), - SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) - }, + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) }, { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", - SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), - SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), - SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) - }, - { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), - SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), - 0, 0 - }, - { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), - SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), - SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) - }, - { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), - SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), - 0, 0 - }, - { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), - SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), - 0, 0 - }, - { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), - SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), - 0, 0 - }, - { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), - SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), - 0, 0 - }, - { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), - SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), - 0, 0 - }, - { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), - SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), - 0, 0 - }, - { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), - SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), - SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) - }, - { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), - SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), - SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) - }, - { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), - SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), - SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) - }, - { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), - SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), - SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) - }, - { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), - SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), - SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) - }, - { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), - SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), - 0, 0 - }, - { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), - SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), - 0, 0 - }, - { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), - SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), - 0, 0 - }, - { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), - SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), - 0, 0 - }, - { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), - SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), - 0, 0 - }, - { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), - SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), - 0, 0 - }, - { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), - SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), - 0, 0 - }, - { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), - SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), - 0, 0 - }, - { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), - SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), - 0, 0 - }, + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) }, + { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1, + REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 }, + { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, + REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), + REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) }, + { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, + REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 }, + { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, + REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 }, + { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, + REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 }, + { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, + REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 }, + { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2, + REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 }, + { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2, + REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 }, + { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), + REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) }, + { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), + REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) }, + { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), + REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) }, + { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), + REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) }, + { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), + REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) }, + { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 }, + { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 }, + { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 }, + { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 }, + { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 }, + { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 }, + { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 }, + { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 }, + { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, + 16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 }, { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), - SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), - 0, 0 - }, - { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), - SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), - 0, 0 - }, + 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), + 0 }, + { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, + 16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 }, { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), - SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), - 0, 0 - }, - { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), - SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), - 0, 0 - }, - { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), - SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), - 0, 0 - }, - { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), - SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), - SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) - }, - { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), - SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), - SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) - }, - { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), - SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), - 0, 0 - }, - { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), - SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), - 0, 0 - }, - { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), - SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), - 0, 0 - }, - { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), - SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), - SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) - }, - { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), - SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), - SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) - }, - { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), - SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), - SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) - }, - { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), - SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), - SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) - }, - { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), - SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT), - 0, 0 - }, - { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), - SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT), - SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) - }, - { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), - SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT), - SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) - }, - { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), - SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT), - SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) - }, - { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), - SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT), - SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) - }, - { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), - SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT), - SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) - }, - { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), - SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT), - SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) - }, - { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), - SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT), - SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) - }, + 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), + 0 }, + { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, + 16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 }, + { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72, + REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 }, + { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, + REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), + REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) }, + { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, + REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), + REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) }, + { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, + REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 }, + { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, + REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 }, + { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, + REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 }, + { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, + REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), + REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) }, + { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, + REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), + REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) }, + { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, + REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), + REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) }, + { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, + REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), + REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) }, + { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, + REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 }, + { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, + REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT), + REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) }, + { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, + REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT), + REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) }, + { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, + REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT), + REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) }, + { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, + REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT), + REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) }, + { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, + REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT), + REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) }, + { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, + REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT), + REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) }, + { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, + REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT), + REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) }, { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), - SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), - SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) - }, - { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), - SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), - SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) - }, + 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) }, + { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) }, { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), - SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), - SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) - }, - { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), - SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), - SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) - }, + 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) }, + { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) }, { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), - SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), - SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) - }, - { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), - SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), - SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) - }, - { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), - SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), - SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) - }, - { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), - SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), - SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) - }, - { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), - SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), - SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) - }, - { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), - SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), - SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) - }, - { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), - SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), - 0, 0 - }, - { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), - SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), - 0, 0 - }, - { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), - SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), - 0, 0 - }, - { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), - SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), - 0, 0 - }, - { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), - SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), - 0, 0 - }, - { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), - SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), - SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) - }, - { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), - SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), - SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) - }, - { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), - SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), - SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) - }, - { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), - SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), - SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) - }, - { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), - SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), - SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) - }, - { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), - SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), - 0, 0 - }, - { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), - SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), - 0, 0 - }, - { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), - SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), - 0, 0 - }, - { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), - SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), - 0, 0 - }, - { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), - SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), - 0, 0 - }, - { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), - SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), - SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) - }, - { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), - SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), - SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) - }, - { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), - SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), - SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) - }, - { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), - SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), - SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) - }, - { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), - SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), - SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) - }, - { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), - SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), - 0, 0 - }, - { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), - SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), - 0, 0 - }, - { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), - SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), - 0, 0 - }, - { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), - SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), - 0, 0 - }, - { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), - SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), - 0, 0 - }, - { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), - SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), - SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) - }, - { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), - SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), - SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) - }, - { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), - SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), - SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) - }, - { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), - SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), - 0, 0 - }, - { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), - SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), - 0, 0 - }, - { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), - SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), - 0, 0 - }, - { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), - SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), - 0, 0 - }, - { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), - SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), - 0, 0 - }, - { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), - SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), - 0, 0 - } + 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) }, + { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) }, + { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) }, + { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) }, + { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) }, + { "SQC_INST_BANKA_UTCL1_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, + REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), + 0 }, + { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 }, + { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 }, + { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 }, + { "SQC_DATA_BANKA_DIRTY_BIT_RAM", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, + REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 }, + { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, + REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) }, + { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) }, + { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) }, + { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) }, + { "SQC_INST_BANKB_UTCL1_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6, + REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), + 0 }, + { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 }, + { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 }, + { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 }, + { "SQC_DATA_BANKB_DIRTY_BIT_RAM", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6, + REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 }, + { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) }, + { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) }, + { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) }, + { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) }, + { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) }, + { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 }, + { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 }, + { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 }, + { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 }, + { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 }, + { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) }, + { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) }, + { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) }, + { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 }, + { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 }, + { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 }, + { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 }, + { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 }, + { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 }, }; static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, @@ -5937,217 +6078,14 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, return ret; } -static const char *vml2_mems[] = { - "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", - "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", - "UTC_VML2_BANK_CACHE_0_4K_MEM0", - "UTC_VML2_BANK_CACHE_0_4K_MEM1", - "UTC_VML2_BANK_CACHE_1_BIGK_MEM0", - "UTC_VML2_BANK_CACHE_1_BIGK_MEM1", - "UTC_VML2_BANK_CACHE_1_4K_MEM0", - "UTC_VML2_BANK_CACHE_1_4K_MEM1", - "UTC_VML2_BANK_CACHE_2_BIGK_MEM0", - "UTC_VML2_BANK_CACHE_2_BIGK_MEM1", - "UTC_VML2_BANK_CACHE_2_4K_MEM0", - "UTC_VML2_BANK_CACHE_2_4K_MEM1", - "UTC_VML2_BANK_CACHE_3_BIGK_MEM0", - "UTC_VML2_BANK_CACHE_3_BIGK_MEM1", - "UTC_VML2_BANK_CACHE_3_4K_MEM0", - "UTC_VML2_BANK_CACHE_3_4K_MEM1", -}; - -static const char *vml2_walker_mems[] = { - "UTC_VML2_CACHE_PDE0_MEM0", - "UTC_VML2_CACHE_PDE0_MEM1", - "UTC_VML2_CACHE_PDE1_MEM0", - "UTC_VML2_CACHE_PDE1_MEM1", - "UTC_VML2_CACHE_PDE2_MEM0", - "UTC_VML2_CACHE_PDE2_MEM1", - "UTC_VML2_RDIF_LOG_FIFO", -}; - -static const char *atc_l2_cache_2m_mems[] = { - "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", - "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", - "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", - "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM", -}; - -static const char *atc_l2_cache_4k_mems[] = { - "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0", - "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1", - "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2", - "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3", - "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4", - "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5", - "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6", - "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7", - "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0", - "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1", - "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2", - "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3", - "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4", - "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5", - "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6", - "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7", - "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0", - "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1", - "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2", - "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3", - "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4", - "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5", - "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6", - "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7", - "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0", - "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1", - "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2", - "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3", - "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4", - "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5", - "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6", - "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7", -}; - -static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, - struct ras_err_data *err_data) -{ - uint32_t i, data; - uint32_t sec_count, ded_count; - - WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); - WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); - WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); - WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); - WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); - WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); - WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); - WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); - - for (i = 0; i < 16; i++) { - WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); - data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); - - sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT); - if (sec_count) { - DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, - vml2_mems[i], sec_count); - err_data->ce_count += sec_count; - } - - ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT); - if (ded_count) { - DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, - vml2_mems[i], ded_count); - err_data->ue_count += ded_count; - } - } - - for (i = 0; i < 7; i++) { - WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); - data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); - - sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, - SEC_COUNT); - if (sec_count) { - DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, - vml2_walker_mems[i], sec_count); - err_data->ce_count += sec_count; - } - - ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, - DED_COUNT); - if (ded_count) { - DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, - vml2_walker_mems[i], ded_count); - err_data->ue_count += ded_count; - } - } - - for (i = 0; i < 4; i++) { - WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); - data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); - - sec_count = (data & 0x00006000L) >> 0xd; - if (sec_count) { - DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, - atc_l2_cache_2m_mems[i], sec_count); - err_data->ce_count += sec_count; - } - } - - for (i = 0; i < 32; i++) { - WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); - data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); - - sec_count = (data & 0x00006000L) >> 0xd; - if (sec_count) { - DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, - atc_l2_cache_4k_mems[i], sec_count); - err_data->ce_count += sec_count; - } - - ded_count = (data & 0x00018000L) >> 0xf; - if (ded_count) { - DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, - atc_l2_cache_4k_mems[i], ded_count); - err_data->ue_count += ded_count; - } - } - - WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); - WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); - WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); - WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); - - return 0; -} - -static int __get_ras_error_count(const struct soc15_reg_entry *reg, - uint32_t se_id, uint32_t inst_id, uint32_t value, - uint32_t *sec_count, uint32_t *ded_count) -{ - uint32_t i; - uint32_t sec_cnt, ded_cnt; - - for (i = 0; i < ARRAY_SIZE(ras_subblock_regs); i++) { - if(ras_subblock_regs[i].reg_offset != reg->reg_offset || - ras_subblock_regs[i].seg != reg->seg || - ras_subblock_regs[i].inst != reg->inst) - continue; - - sec_cnt = (value & - ras_subblock_regs[i].sec_count_mask) >> - ras_subblock_regs[i].sec_count_shift; - if (sec_cnt) { - DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n", - ras_subblock_regs[i].name, - se_id, inst_id, - sec_cnt); - *sec_count += sec_cnt; - } - - ded_cnt = (value & - ras_subblock_regs[i].ded_count_mask) >> - ras_subblock_regs[i].ded_count_shift; - if (ded_cnt) { - DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n", - ras_subblock_regs[i].name, - se_id, inst_id, - ded_cnt); - *ded_count += ded_cnt; - } - } - - return 0; -} - static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - uint32_t sec_count = 0, ded_count = 0; - uint32_t i, j, k; + uint32_t sec_count, ded_count; + uint32_t i; uint32_t reg_value; + uint32_t se_id, instance_id; if (adev->asic_type != CHIP_VEGA20) return -EINVAL; @@ -6156,29 +6094,71 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, err_data->ce_count = 0; mutex_lock(&adev->grbm_idx_mutex); + for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) { + for (instance_id = 0; instance_id < 256; instance_id++) { + for (i = 0; + i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]); + i++) { + if (se_id != 0 && + !gfx_ras_edc_regs[i].per_se_instance) + continue; + if (instance_id >= gfx_ras_edc_regs[i].num_instance) + continue; - for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { - for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { - for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { - gfx_v9_0_select_se_sh(adev, j, 0, k); - reg_value = - RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); - if (reg_value) - __get_ras_error_count(&sec_ded_counter_registers[i], - j, k, reg_value, - &sec_count, &ded_count); + gfx_v9_0_select_se_sh(adev, se_id, 0, + instance_id); + + reg_value = RREG32( + adev->reg_offset[gfx_ras_edc_regs[i].ip] + [gfx_ras_edc_regs[i].inst] + [gfx_ras_edc_regs[i].seg] + + gfx_ras_edc_regs[i].reg_offset); + sec_count = reg_value & + gfx_ras_edc_regs[i].sec_count_mask; + ded_count = reg_value & + gfx_ras_edc_regs[i].ded_count_mask; + if (sec_count) { + DRM_INFO( + "Instance[%d][%d]: SubBlock %s, SEC %d\n", + se_id, instance_id, + gfx_ras_edc_regs[i].name, + sec_count); + err_data->ce_count++; + } + + if (ded_count) { + DRM_INFO( + "Instance[%d][%d]: SubBlock %s, DED %d\n", + se_id, instance_id, + gfx_ras_edc_regs[i].name, + ded_count); + err_data->ue_count++; + } } } } - - err_data->ce_count += sec_count; - err_data->ue_count += ded_count; - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); - gfx_v9_0_query_utc_edc_status(adev, err_data); + return 0; +} +static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct ras_common_if *ras_if = adev->gfx.ras_if; + struct ras_dispatch_if ih_data = { + .entry = entry, + }; + + if (!ras_if) + return 0; + + ih_data.head = *ras_if; + + DRM_ERROR("CP ECC ERROR IRQ\n"); + amdgpu_ras_interrupt_dispatch(adev, &ih_data); return 0; } @@ -6345,7 +6325,7 @@ static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { .set = gfx_v9_0_set_cp_ecc_error_state, - .process = amdgpu_gfx_cp_ecc_error_irq, + .process = gfx_v9_0_cp_ecc_error_irq, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 9ec4297e61e5..6ce37ce77d14 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -178,8 +178,6 @@ static void gfxhub_v1_0_enable_system_domain(struct amdgpu_device *adev) tmp = RREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, - RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); WREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL, tmp); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index b4f32d853ca1..8b789f750b72 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -46,25 +46,21 @@ u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev) return (u64)RREG32_SOC15(GC, 0, mmGCMC_VM_FB_OFFSET) << 24; } -void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, - uint64_t page_table_base) +static void gfxhub_v2_0_init_gart_pt_regs(struct amdgpu_device *adev) { - /* two registers distance between mmGCVM_CONTEXT0_* to mmGCVM_CONTEXT1_* */ - int offset = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - - mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - offset * vmid, lower_32_bits(page_table_base)); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - offset * vmid, upper_32_bits(page_table_base)); + WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + lower_32_bits(value)); + + WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + upper_32_bits(value)); } static void gfxhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev) { - uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); - - gfxhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base); + gfxhub_v2_0_init_gart_pt_regs(adev); WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, (u32)(adev->gmc.gart_start >> 12)); @@ -155,15 +151,6 @@ static void gfxhub_v2_0_init_cache_regs(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, tmp); tmp = mmGCVM_L2_CNTL3_DEFAULT; - if (adev->gmc.translate_further) { - tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12); - tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, - L2_CACHE_BIGK_FRAGMENT_SIZE, 9); - } else { - tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9); - tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, - L2_CACHE_BIGK_FRAGMENT_SIZE, 6); - } WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, tmp); tmp = mmGCVM_L2_CNTL4_DEFAULT; @@ -179,8 +166,6 @@ static void gfxhub_v2_0_enable_system_domain(struct amdgpu_device *adev) tmp = RREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); - tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, - RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL, tmp); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h index 392b8cd94fc0..06807940748b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h @@ -31,7 +31,5 @@ void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, bool value); void gfxhub_v2_0_init(struct amdgpu_device *adev); u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev); -void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, - uint64_t page_table_base); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 6e1b25bd1fe7..241a4e57cf4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -278,7 +278,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, int r; /* flush hdp cache */ - adev->nbio.funcs->hdp_flush(adev, NULL); + adev->nbio_funcs->hdp_flush(adev, NULL); mutex_lock(&adev->mman.gtt_window_lock); @@ -309,7 +309,6 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); job->vm_needs_flush = true; - job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; amdgpu_ring_pad_ib(ring, &job->ibs[0]); r = amdgpu_job_submit(job, &adev->mman.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &fence); @@ -398,23 +397,43 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid * 1 system * 0 valid */ - -static uint64_t gmc_v10_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) +static uint64_t gmc_v10_0_get_vm_pte_flags(struct amdgpu_device *adev, + uint32_t flags) { - switch (flags) { + uint64_t pte_flag = 0; + + if (flags & AMDGPU_VM_PAGE_EXECUTABLE) + pte_flag |= AMDGPU_PTE_EXECUTABLE; + if (flags & AMDGPU_VM_PAGE_READABLE) + pte_flag |= AMDGPU_PTE_READABLE; + if (flags & AMDGPU_VM_PAGE_WRITEABLE) + pte_flag |= AMDGPU_PTE_WRITEABLE; + + switch (flags & AMDGPU_VM_MTYPE_MASK) { case AMDGPU_VM_MTYPE_DEFAULT: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); + pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); + break; case AMDGPU_VM_MTYPE_NC: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); + pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); + break; case AMDGPU_VM_MTYPE_WC: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC); + pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_WC); + break; case AMDGPU_VM_MTYPE_CC: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC); + pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_CC); + break; case AMDGPU_VM_MTYPE_UC: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); + pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); + break; default: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); + pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); + break; } + + if (flags & AMDGPU_VM_PAGE_PRT) + pte_flag |= AMDGPU_PTE_PRT; + + return pte_flag; } static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level, @@ -441,32 +460,12 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level, } } -static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, - uint64_t *flags) -{ - *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; - - *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK; - *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK); - - if (mapping->flags & AMDGPU_PTE_PRT) { - *flags |= AMDGPU_PTE_PRT; - *flags |= AMDGPU_PTE_SNOOPED; - *flags |= AMDGPU_PTE_LOG; - *flags |= AMDGPU_PTE_SYSTEM; - *flags &= ~AMDGPU_PTE_VALID; - } -} - static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb, .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping, - .map_mtype = gmc_v10_0_map_mtype, - .get_vm_pde = gmc_v10_0_get_vm_pde, - .get_vm_pte = gmc_v10_0_get_vm_pte + .get_vm_pte_flags = gmc_v10_0_get_vm_pte_flags, + .get_vm_pde = gmc_v10_0_get_vm_pde }; static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev) @@ -520,7 +519,8 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, { u64 base = 0; - base = gfxhub_v2_0_get_fb_location(adev); + if (!amdgpu_sriov_vf(adev)) + base = gfxhub_v2_0_get_fb_location(adev); amdgpu_gmc_vram_location(adev, &adev->gmc, base); amdgpu_gmc_gart_location(adev, mc); @@ -540,13 +540,24 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, */ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) { + int chansize, numchan; + + if (!amdgpu_emu_mode) + adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); + else { + /* hard code vram_width for emulation */ + chansize = 128; + numchan = 1; + adev->gmc.vram_width = numchan * chansize; + } + /* Could aper size report 0 ? */ adev->gmc.aper_base = pci_resource_start(adev->pdev, 0); adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); /* size in MB on si */ adev->gmc.mc_vram_size = - adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; + adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL; adev->gmc.real_vram_size = adev->gmc.mc_vram_size; adev->gmc.visible_vram_size = adev->gmc.aper_size; @@ -625,7 +636,7 @@ static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev) static int gmc_v10_0_sw_init(void *handle) { - int r, vram_width = 0, vram_type = 0, vram_vendor = 0; + int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; gfxhub_v2_0_init(adev); @@ -633,15 +644,7 @@ static int gmc_v10_0_sw_init(void *handle) spin_lock_init(&adev->gmc.invalidate_lock); - r = amdgpu_atomfirmware_get_vram_info(adev, - &vram_width, &vram_type, &vram_vendor); - if (!amdgpu_emu_mode) - adev->gmc.vram_width = vram_width; - else - adev->gmc.vram_width = 1 * 128; /* numchan * chansize */ - - adev->gmc.vram_type = vram_type; - adev->gmc.vram_vendor = vram_vendor; + adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev); switch (adev->asic_type) { case CHIP_NAVI10: case CHIP_NAVI14: @@ -791,7 +794,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); /* Flush HDP after it is initialized */ - adev->nbio.funcs->hdp_flush(adev, NULL); + adev->nbio_funcs->hdp_flush(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index b205039350b6..9fb1765e92d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -386,20 +386,27 @@ static uint64_t gmc_v6_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, return pd_addr; } +static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev, + uint32_t flags) +{ + uint64_t pte_flag = 0; + + if (flags & AMDGPU_VM_PAGE_READABLE) + pte_flag |= AMDGPU_PTE_READABLE; + if (flags & AMDGPU_VM_PAGE_WRITEABLE) + pte_flag |= AMDGPU_PTE_WRITEABLE; + if (flags & AMDGPU_VM_PAGE_PRT) + pte_flag |= AMDGPU_PTE_PRT; + + return pte_flag; +} + static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { BUG_ON(*addr & 0xFFFFFF0000000FFFULL); } -static void gmc_v6_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, - uint64_t *flags) -{ - *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags &= ~AMDGPU_PTE_PRT; -} - static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { @@ -1146,7 +1153,7 @@ static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = { .emit_flush_gpu_tlb = gmc_v6_0_emit_flush_gpu_tlb, .set_prt = gmc_v6_0_set_prt, .get_vm_pde = gmc_v6_0_get_vm_pde, - .get_vm_pte = gmc_v6_0_get_vm_pte, + .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags }; static const struct amdgpu_irq_src_funcs gmc_v6_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index f08e5330642d..0c3d9bc3a641 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -463,20 +463,27 @@ static void gmc_v7_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid); } +static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev, + uint32_t flags) +{ + uint64_t pte_flag = 0; + + if (flags & AMDGPU_VM_PAGE_READABLE) + pte_flag |= AMDGPU_PTE_READABLE; + if (flags & AMDGPU_VM_PAGE_WRITEABLE) + pte_flag |= AMDGPU_PTE_WRITEABLE; + if (flags & AMDGPU_VM_PAGE_PRT) + pte_flag |= AMDGPU_PTE_PRT; + + return pte_flag; +} + static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { BUG_ON(*addr & 0xFFFFFF0000000FFFULL); } -static void gmc_v7_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, - uint64_t *flags) -{ - *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags &= ~AMDGPU_PTE_PRT; -} - /** * gmc_v8_0_set_fault_enable_default - update VM fault handling * @@ -1336,8 +1343,8 @@ static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping, .set_prt = gmc_v7_0_set_prt, - .get_vm_pde = gmc_v7_0_get_vm_pde, - .get_vm_pte = gmc_v7_0_get_vm_pte + .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags, + .get_vm_pde = gmc_v7_0_get_vm_pde }; static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 6d96d40fbcb8..ea764dd9245d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -686,21 +686,29 @@ static void gmc_v8_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, * 0 valid */ +static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev, + uint32_t flags) +{ + uint64_t pte_flag = 0; + + if (flags & AMDGPU_VM_PAGE_EXECUTABLE) + pte_flag |= AMDGPU_PTE_EXECUTABLE; + if (flags & AMDGPU_VM_PAGE_READABLE) + pte_flag |= AMDGPU_PTE_READABLE; + if (flags & AMDGPU_VM_PAGE_WRITEABLE) + pte_flag |= AMDGPU_PTE_WRITEABLE; + if (flags & AMDGPU_VM_PAGE_PRT) + pte_flag |= AMDGPU_PTE_PRT; + + return pte_flag; +} + static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { BUG_ON(*addr & 0xFFFFFF0000000FFFULL); } -static void gmc_v8_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, - uint64_t *flags) -{ - *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; - *flags &= ~AMDGPU_PTE_PRT; -} - /** * gmc_v8_0_set_fault_enable_default - update VM fault handling * @@ -1703,8 +1711,8 @@ static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping, .set_prt = gmc_v8_0_set_prt, - .get_vm_pde = gmc_v8_0_get_vm_pde, - .get_vm_pte = gmc_v8_0_get_vm_pte + .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags, + .get_vm_pde = gmc_v8_0_get_vm_pde }; static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 9f2a893871ec..f91337030dc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -51,12 +51,10 @@ #include "gfxhub_v1_1.h" #include "mmhub_v9_4.h" #include "umc_v6_1.h" -#include "umc_v6_0.h" #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" #include "amdgpu_ras.h" -#include "amdgpu_xgmi.h" /* add these here since we already include dce12 headers and these are for DCN */ #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d @@ -245,6 +243,44 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, return 0; } +static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev, + struct ras_err_data *err_data, + struct amdgpu_iv_entry *entry) +{ + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + if (adev->umc.funcs->query_ras_error_count) + adev->umc.funcs->query_ras_error_count(adev, err_data); + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + if (adev->umc.funcs->query_ras_error_address) + adev->umc.funcs->query_ras_error_address(adev, err_data); + + /* only uncorrectable error needs gpu reset */ + if (err_data->ue_count) + amdgpu_ras_reset_gpu(adev, 0); + + return AMDGPU_RAS_SUCCESS; +} + +static int gmc_v9_0_process_ecc_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct ras_common_if *ras_if = adev->gmc.umc_ras_if; + struct ras_dispatch_if ih_data = { + .entry = entry, + }; + + if (!ras_if) + return 0; + + ih_data.head = *ras_if; + + amdgpu_ras_interrupt_dispatch(adev, &ih_data); + return 0; +} + static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, @@ -319,10 +355,6 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, } /* If it's the first fault for this address, process it normally */ - if (retry_fault && !in_interrupt() && - amdgpu_vm_handle_fault(adev, entry->pasid, addr)) - return 1; /* This also prevents sending it to KFD */ - if (!amdgpu_sriov_vf(adev)) { /* * Issue a dummy read to wait for the status register to @@ -385,7 +417,7 @@ static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = { static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = { .set = gmc_v9_0_ecc_interrupt_state, - .process = amdgpu_umc_process_ecc_irq, + .process = gmc_v9_0_process_ecc_irq, }; static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) @@ -552,25 +584,44 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, * 0 valid */ -static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) +static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev, + uint32_t flags) { - switch (flags) { + uint64_t pte_flag = 0; + + if (flags & AMDGPU_VM_PAGE_EXECUTABLE) + pte_flag |= AMDGPU_PTE_EXECUTABLE; + if (flags & AMDGPU_VM_PAGE_READABLE) + pte_flag |= AMDGPU_PTE_READABLE; + if (flags & AMDGPU_VM_PAGE_WRITEABLE) + pte_flag |= AMDGPU_PTE_WRITEABLE; + + switch (flags & AMDGPU_VM_MTYPE_MASK) { case AMDGPU_VM_MTYPE_DEFAULT: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); + pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); + break; case AMDGPU_VM_MTYPE_NC: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); + pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); + break; case AMDGPU_VM_MTYPE_WC: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC); - case AMDGPU_VM_MTYPE_RW: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW); + pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_WC); + break; case AMDGPU_VM_MTYPE_CC: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC); + pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_CC); + break; case AMDGPU_VM_MTYPE_UC: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC); + pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_UC); + break; default: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); + pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); + break; } + + if (flags & AMDGPU_VM_PAGE_PRT) + pte_flag |= AMDGPU_PTE_PRT; + + return pte_flag; } static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, @@ -597,34 +648,12 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, } } -static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, - uint64_t *flags) -{ - *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; - - *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; - *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK; - - if (mapping->flags & AMDGPU_PTE_PRT) { - *flags |= AMDGPU_PTE_PRT; - *flags &= ~AMDGPU_PTE_VALID; - } - - if (adev->asic_type == CHIP_ARCTURUS && - !(*flags & AMDGPU_PTE_SYSTEM) && - mapping->bo_va->is_xgmi) - *flags |= AMDGPU_PTE_SNOOPED; -} - static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, - .map_mtype = gmc_v9_0_map_mtype, - .get_vm_pde = gmc_v9_0_get_vm_pde, - .get_vm_pte = gmc_v9_0_get_vm_pte + .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags, + .get_vm_pde = gmc_v9_0_get_vm_pde }; static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) @@ -635,9 +664,6 @@ static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) { switch (adev->asic_type) { - case CHIP_VEGA10: - adev->umc.funcs = &umc_v6_0_funcs; - break; case CHIP_VEGA20: adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM; @@ -655,7 +681,7 @@ static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA20: - adev->mmhub.funcs = &mmhub_v1_0_funcs; + adev->mmhub_funcs = &mmhub_v1_0_funcs; break; default: break; @@ -736,10 +762,140 @@ static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev) return 0; } +static int gmc_v9_0_ecc_ras_block_late_init(void *handle, + struct ras_fs_if *fs_info, struct ras_common_if *ras_block) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct ras_common_if **ras_if = NULL; + struct ras_ih_if ih_info = { + .cb = gmc_v9_0_process_ras_data_cb, + }; + int r; + + if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) + ras_if = &adev->gmc.umc_ras_if; + else if (ras_block->block == AMDGPU_RAS_BLOCK__MMHUB) + ras_if = &adev->gmc.mmhub_ras_if; + else + BUG(); + + if (!amdgpu_ras_is_supported(adev, ras_block->block)) { + amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); + return 0; + } + + /* handle resume path. */ + if (*ras_if) { + /* resend ras TA enable cmd during resume. + * prepare to handle failure. + */ + ih_info.head = **ras_if; + r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); + if (r) { + if (r == -EAGAIN) { + /* request a gpu reset. will run again. */ + amdgpu_ras_request_reset_on_boot(adev, + ras_block->block); + return 0; + } + /* fail to enable ras, cleanup all. */ + goto irq; + } + /* enable successfully. continue. */ + goto resume; + } + + *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); + if (!*ras_if) + return -ENOMEM; + + **ras_if = *ras_block; + + r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); + if (r) { + if (r == -EAGAIN) { + amdgpu_ras_request_reset_on_boot(adev, + ras_block->block); + r = 0; + } + goto feature; + } + + ih_info.head = **ras_if; + fs_info->head = **ras_if; + + if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) { + r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); + if (r) + goto interrupt; + } + + amdgpu_ras_debugfs_create(adev, fs_info); + + r = amdgpu_ras_sysfs_create(adev, fs_info); + if (r) + goto sysfs; +resume: + if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) { + r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); + if (r) + goto irq; + } + + return 0; +irq: + amdgpu_ras_sysfs_remove(adev, *ras_if); +sysfs: + amdgpu_ras_debugfs_remove(adev, *ras_if); + if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) + amdgpu_ras_interrupt_remove_handler(adev, &ih_info); +interrupt: + amdgpu_ras_feature_enable(adev, *ras_if, 0); +feature: + kfree(*ras_if); + *ras_if = NULL; + return r; +} + +static int gmc_v9_0_ecc_late_init(void *handle) +{ + int r; + + struct ras_fs_if umc_fs_info = { + .sysfs_name = "umc_err_count", + .debugfs_name = "umc_err_inject", + }; + struct ras_common_if umc_ras_block = { + .block = AMDGPU_RAS_BLOCK__UMC, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + .sub_block_index = 0, + .name = "umc", + }; + struct ras_fs_if mmhub_fs_info = { + .sysfs_name = "mmhub_err_count", + .debugfs_name = "mmhub_err_inject", + }; + struct ras_common_if mmhub_ras_block = { + .block = AMDGPU_RAS_BLOCK__MMHUB, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + .sub_block_index = 0, + .name = "mmhub", + }; + + r = gmc_v9_0_ecc_ras_block_late_init(handle, + &umc_fs_info, &umc_ras_block); + if (r) + return r; + + r = gmc_v9_0_ecc_ras_block_late_init(handle, + &mmhub_fs_info, &mmhub_ras_block); + return r; +} + static int gmc_v9_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; + bool r; if (!gmc_v9_0_keep_stolen_memory(adev)) amdgpu_bo_late_init(adev); @@ -773,7 +929,7 @@ static int gmc_v9_0_late_init(void *handle) } } - r = amdgpu_gmc_ras_late_init(adev); + r = gmc_v9_0_ecc_late_init(handle); if (r) return r; @@ -814,11 +970,33 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, */ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) { + int chansize, numchan; int r; + if (amdgpu_sriov_vf(adev)) { + /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN, + * and DF related registers is not readable, seems hardcord is the + * only way to set the correct vram_width + */ + adev->gmc.vram_width = 2048; + } else if (amdgpu_emu_mode != 1) { + adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); + } + + if (!adev->gmc.vram_width) { + /* hbm memory channel size */ + if (adev->flags & AMD_IS_APU) + chansize = 64; + else + chansize = 128; + + numchan = adev->df_funcs->get_hbm_channel_number(adev); + adev->gmc.vram_width = numchan * chansize; + } + /* size in MB on si */ adev->gmc.mc_vram_size = - adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; + adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL; adev->gmc.real_vram_size = adev->gmc.mc_vram_size; if (!(adev->flags & AMD_IS_APU)) { @@ -930,7 +1108,7 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) static int gmc_v9_0_sw_init(void *handle) { - int r, vram_width = 0, vram_type = 0, vram_vendor = 0; + int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; gfxhub_v1_0_init(adev); @@ -941,32 +1119,7 @@ static int gmc_v9_0_sw_init(void *handle) spin_lock_init(&adev->gmc.invalidate_lock); - r = amdgpu_atomfirmware_get_vram_info(adev, - &vram_width, &vram_type, &vram_vendor); - if (amdgpu_sriov_vf(adev)) - /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN, - * and DF related registers is not readable, seems hardcord is the - * only way to set the correct vram_width - */ - adev->gmc.vram_width = 2048; - else if (amdgpu_emu_mode != 1) - adev->gmc.vram_width = vram_width; - - if (!adev->gmc.vram_width) { - int chansize, numchan; - - /* hbm memory channel size */ - if (adev->flags & AMD_IS_APU) - chansize = 64; - else - chansize = 128; - - numchan = adev->df_funcs->get_hbm_channel_number(adev); - adev->gmc.vram_width = numchan * chansize; - } - - adev->gmc.vram_type = vram_type; - adev->gmc.vram_vendor = vram_vendor; + adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev); switch (adev->asic_type) { case CHIP_RAVEN: adev->num_vmhubs = 2; @@ -1087,7 +1240,33 @@ static int gmc_v9_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; void *stolen_vga_buf; - amdgpu_gmc_ras_fini(adev); + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && + adev->gmc.umc_ras_if) { + struct ras_common_if *ras_if = adev->gmc.umc_ras_if; + struct ras_ih_if ih_info = { + .head = *ras_if, + }; + + /* remove fs first */ + amdgpu_ras_debugfs_remove(adev, ras_if); + amdgpu_ras_sysfs_remove(adev, ras_if); + /* remove the IH */ + amdgpu_ras_interrupt_remove_handler(adev, &ih_info); + amdgpu_ras_feature_enable(adev, ras_if, 0); + kfree(ras_if); + } + + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) && + adev->gmc.mmhub_ras_if) { + struct ras_common_if *ras_if = adev->gmc.mmhub_ras_if; + + /* remove fs and disable ras feature */ + amdgpu_ras_debugfs_remove(adev, ras_if); + amdgpu_ras_sysfs_remove(adev, ras_if); + amdgpu_ras_feature_enable(adev, ras_if, 0); + kfree(ras_if); + } + amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); @@ -1137,7 +1316,13 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) */ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) { - int r; + int r, i; + bool value; + u32 tmp; + + amdgpu_device_program_register_sequence(adev, + golden_settings_vega10_hdp, + ARRAY_SIZE(golden_settings_vega10_hdp)); if (adev->gart.bo == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); @@ -1147,6 +1332,15 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) if (r) return r; + switch (adev->asic_type) { + case CHIP_RAVEN: + /* TODO for renoir */ + mmhub_v1_0_update_power_gating(adev, true); + break; + default: + break; + } + r = gfxhub_v1_0_gart_enable(adev); if (r) return r; @@ -1158,49 +1352,6 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) if (r) return r; - DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", - (unsigned)(adev->gmc.gart_size >> 20), - (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); - adev->gart.ready = true; - return 0; -} - -static int gmc_v9_0_hw_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool value; - int r, i; - u32 tmp; - - /* The sequence of these two function calls matters.*/ - gmc_v9_0_init_golden_registers(adev); - - if (adev->mode_info.num_crtc) { - if (adev->asic_type != CHIP_ARCTURUS) { - /* Lockout access through VGA aperture*/ - WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); - - /* disable VGA render */ - WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); - } - } - - amdgpu_device_program_register_sequence(adev, - golden_settings_vega10_hdp, - ARRAY_SIZE(golden_settings_vega10_hdp)); - - switch (adev->asic_type) { - case CHIP_RAVEN: - /* TODO for renoir */ - mmhub_v1_0_update_power_gating(adev, true); - break; - case CHIP_ARCTURUS: - WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1); - break; - default: - break; - } - WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1); tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); @@ -1210,7 +1361,7 @@ static int gmc_v9_0_hw_init(void *handle) WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40)); /* After HDP is initialized, flush HDP.*/ - adev->nbio.funcs->hdp_flush(adev, NULL); + adev->nbio_funcs->hdp_flush(adev, NULL); if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) value = false; @@ -1226,8 +1377,28 @@ static int gmc_v9_0_hw_init(void *handle) for (i = 0; i < adev->num_vmhubs; ++i) gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0); - if (adev->umc.funcs && adev->umc.funcs->init_registers) - adev->umc.funcs->init_registers(adev); + DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", + (unsigned)(adev->gmc.gart_size >> 20), + (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); + adev->gart.ready = true; + return 0; +} + +static int gmc_v9_0_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* The sequence of these two function calls matters.*/ + gmc_v9_0_init_golden_registers(adev); + + if (adev->mode_info.num_crtc) { + /* Lockout access through VGA aperture*/ + WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); + + /* disable VGA render */ + WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); + } r = gmc_v9_0_gart_enable(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 6965e1e6fa9e..04cd4b6f95d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -206,8 +206,6 @@ static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev) tmp = RREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, - RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL, tmp); } @@ -618,6 +616,5 @@ static void mmhub_v1_0_query_ras_error_count(struct amdgpu_device *adev, } const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = { - .ras_late_init = amdgpu_mmhub_ras_late_init, .query_ras_error_count = mmhub_v1_0_query_ras_error_count, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 945533634711..3542c203c3c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -31,25 +31,20 @@ #include "soc15_common.h" -void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, - uint64_t page_table_base) +static void mmhub_v2_0_init_gart_pt_regs(struct amdgpu_device *adev) { - /* two registers distance between mmMMVM_CONTEXT0_* to mmMMVM_CONTEXT1_* */ - int offset = mmMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - - mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - offset * vmid, lower_32_bits(page_table_base)); + WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + lower_32_bits(value)); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - offset * vmid, upper_32_bits(page_table_base)); + WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + upper_32_bits(value)); } static void mmhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev) { - uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); - - mmhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base); + mmhub_v2_0_init_gart_pt_regs(adev); WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, (u32)(adev->gmc.gart_start >> 12)); @@ -142,15 +137,6 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2, tmp); tmp = mmMMVM_L2_CNTL3_DEFAULT; - if (adev->gmc.translate_further) { - tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12); - tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, - L2_CACHE_BIGK_FRAGMENT_SIZE, 9); - } else { - tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9); - tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, - L2_CACHE_BIGK_FRAGMENT_SIZE, 6); - } WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, tmp); tmp = mmMMVM_L2_CNTL4_DEFAULT; @@ -166,8 +152,6 @@ static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev) tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); - tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, - RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, tmp); } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h index 3ea4344f0315..db16f3ece218 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h @@ -31,7 +31,5 @@ void mmhub_v2_0_init(struct amdgpu_device *adev); int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state); void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); -void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, - uint64_t page_table_base); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 657970f9ebfb..0cf7ef44b4b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -240,8 +240,6 @@ static void mmhub_v9_4_enable_system_domain(struct amdgpu_device *adev, hubid * MMHUB_INSTANCE_REGISTER_OFFSET); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); - tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, - RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL, hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); } diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 9af73567e716..9fe08408db58 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -117,7 +117,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev) /* disable irqs */ navi10_ih_disable_interrupts(adev); - adev->nbio.funcs->ih_control(adev); + adev->nbio_funcs->ih_control(adev); /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE, ih->gpu_addr >> 8); @@ -162,7 +162,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev) } WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, ih_doorbell_rtpr); - adev->nbio.funcs->ih_doorbell_range(adev, ih->use_doorbell, + adev->nbio_funcs->ih_doorbell_range(adev, ih->use_doorbell, ih->doorbell_index); tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL); diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c index 88efaecf9f70..a56c93620e78 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c @@ -24,6 +24,7 @@ #include "nv.h" #include "soc15_common.h" +#include "soc15_hw_ip.h" #include "navi10_ip_offset.h" int navi10_reg_base_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c index a786d159e5e9..cadc7603ca41 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c @@ -24,6 +24,7 @@ #include "nv.h" #include "soc15_common.h" +#include "soc15_hw_ip.h" #include "navi12_ip_offset.h" int navi12_reg_base_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c index 4ea1e8fbb601..3b5f0f65e096 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c @@ -24,6 +24,7 @@ #include "nv.h" #include "soc15_common.h" +#include "soc15_hw_ip.h" #include "navi14_ip_offset.h" int navi14_reg_base_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index f3a3fe746222..c05d78d4efc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -27,21 +27,11 @@ #include "nbio/nbio_2_3_default.h" #include "nbio/nbio_2_3_offset.h" #include "nbio/nbio_2_3_sh_mask.h" -#include #define smnPCIE_CONFIG_CNTL 0x11180044 #define smnCPM_CONTROL 0x11180460 #define smnPCIE_CNTL2 0x11180070 - -static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev) -{ - WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, - adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); - WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL, - adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); -} - static u32 nbio_v2_3_get_rev_id(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); @@ -66,9 +56,10 @@ static void nbio_v2_3_hdp_flush(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + WREG32_SOC15_NO_KIQ(NBIO, 0, mmBIF_BX_PF_HDP_MEM_COHERENCY_FLUSH_CNTL, 0); else - amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( + NBIO, 0, mmBIF_BX_PF_HDP_MEM_COHERENCY_FLUSH_CNTL), 0); } static u32 nbio_v2_3_get_memsize(struct amdgpu_device *adev) @@ -320,6 +311,7 @@ static void nbio_v2_3_init_registers(struct amdgpu_device *adev) } const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { + .hdp_flush_reg = &nbio_v2_3_hdp_flush_reg, .get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset, .get_pcie_index_offset = nbio_v2_3_get_pcie_index_offset, @@ -339,5 +331,4 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { .ih_control = nbio_v2_3_ih_control, .init_registers = nbio_v2_3_init_registers, .detect_hw_virt = nbio_v2_3_detect_hw_virt, - .remap_hdp_registers = nbio_v2_3_remap_hdp_registers, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h index a43b60acf7f6..5ae52085f6b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h @@ -26,7 +26,6 @@ #include "soc15_common.h" -extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg; extern const struct amdgpu_nbio_funcs nbio_v2_3_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 635d9e1fc0a3..6590143c3f75 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -226,7 +226,7 @@ static u32 nbio_v6_1_get_pcie_data_offset(struct amdgpu_device *adev) return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2); } -const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = { +static const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = { .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK, .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK, .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK, @@ -277,6 +277,7 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev) } const struct amdgpu_nbio_funcs nbio_v6_1_funcs = { + .hdp_flush_reg = &nbio_v6_1_hdp_flush_reg, .get_hdp_flush_req_offset = nbio_v6_1_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v6_1_get_hdp_flush_done_offset, .get_pcie_index_offset = nbio_v6_1_get_pcie_index_offset, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h index 6dc743b73218..0743a6f016f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h @@ -26,7 +26,6 @@ #include "soc15_common.h" -extern const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg; extern const struct amdgpu_nbio_funcs nbio_v6_1_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index d6cbf26074bc..74eecb768a82 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -292,6 +292,7 @@ static void nbio_v7_0_init_registers(struct amdgpu_device *adev) } const struct amdgpu_nbio_funcs nbio_v7_0_funcs = { + .hdp_flush_reg = &nbio_v7_0_hdp_flush_reg, .get_hdp_flush_req_offset = nbio_v7_0_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_0_get_hdp_flush_done_offset, .get_pcie_index_offset = nbio_v7_0_get_pcie_index_offset, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h index e7aefb252550..508d549c5029 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h @@ -26,7 +26,6 @@ #include "soc15_common.h" -extern const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg; extern const struct amdgpu_nbio_funcs nbio_v7_0_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 0db458f9fafc..910fffced43b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -23,12 +23,10 @@ #include "amdgpu.h" #include "amdgpu_atombios.h" #include "nbio_v7_4.h" -#include "amdgpu_ras.h" #include "nbio/nbio_7_4_offset.h" #include "nbio/nbio_7_4_sh_mask.h" #include "nbio/nbio_7_4_0_smn.h" -#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" #include #define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c @@ -268,7 +266,7 @@ static u32 nbio_v7_4_get_pcie_data_offset(struct amdgpu_device *adev) return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2); } -const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = { +static const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = { .ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK, .ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK, .ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK, @@ -308,208 +306,17 @@ static void nbio_v7_4_detect_hw_virt(struct amdgpu_device *adev) static void nbio_v7_4_init_registers(struct amdgpu_device *adev) { + uint32_t def, data; -} + def = data = RREG32_PCIE(smnPCIE_CI_CNTL); + data = REG_SET_FIELD(data, PCIE_CI_CNTL, CI_SLV_ORDERING_DIS, 1); -static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev) -{ - uint32_t bif_doorbell_intr_cntl; - - bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); - if (REG_GET_FIELD(bif_doorbell_intr_cntl, - BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_STATUS)) { - /* driver has to clear the interrupt status when bif ring is disabled */ - bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl, - BIF_DOORBELL_INT_CNTL, - RAS_CNTLR_INTERRUPT_CLEAR, 1); - WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); - - amdgpu_ras_global_ras_isr(adev); - } -} - -static void nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev) -{ - uint32_t bif_doorbell_intr_cntl; - - bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); - if (REG_GET_FIELD(bif_doorbell_intr_cntl, - BIF_DOORBELL_INT_CNTL, RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) { - /* driver has to clear the interrupt status when bif ring is disabled */ - bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl, - BIF_DOORBELL_INT_CNTL, - RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1); - WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); - - amdgpu_ras_global_ras_isr(adev); - } -} - - -static int nbio_v7_4_set_ras_controller_irq_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *src, - unsigned type, - enum amdgpu_interrupt_state state) -{ - /* The ras_controller_irq enablement should be done in psp bl when it - * tries to enable ras feature. Driver only need to set the correct interrupt - * vector for bare-metal and sriov use case respectively - */ - uint32_t bif_intr_cntl; - - bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL); - if (state == AMDGPU_IRQ_STATE_ENABLE) { - /* set interrupt vector select bit to 0 to select - * vetcor 1 for bare metal case */ - bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl, - BIF_INTR_CNTL, - RAS_INTR_VEC_SEL, 0); - WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl); - } - - return 0; -} - -static int nbio_v7_4_process_ras_controller_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - /* By design, the ih cookie for ras_controller_irq should be written - * to BIFring instead of general iv ring. However, due to known bif ring - * hw bug, it has to be disabled. There is no chance the process function - * will be involked. Just left it as a dummy one. - */ - return 0; -} - -static int nbio_v7_4_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *src, - unsigned type, - enum amdgpu_interrupt_state state) -{ - /* The ras_controller_irq enablement should be done in psp bl when it - * tries to enable ras feature. Driver only need to set the correct interrupt - * vector for bare-metal and sriov use case respectively - */ - uint32_t bif_intr_cntl; - - bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL); - if (state == AMDGPU_IRQ_STATE_ENABLE) { - /* set interrupt vector select bit to 0 to select - * vetcor 1 for bare metal case */ - bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl, - BIF_INTR_CNTL, - RAS_INTR_VEC_SEL, 0); - WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl); - } - - return 0; -} - -static int nbio_v7_4_process_err_event_athub_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - /* By design, the ih cookie for err_event_athub_irq should be written - * to BIFring instead of general iv ring. However, due to known bif ring - * hw bug, it has to be disabled. There is no chance the process function - * will be involked. Just left it as a dummy one. - */ - return 0; -} - -static const struct amdgpu_irq_src_funcs nbio_v7_4_ras_controller_irq_funcs = { - .set = nbio_v7_4_set_ras_controller_irq_state, - .process = nbio_v7_4_process_ras_controller_irq, -}; - -static const struct amdgpu_irq_src_funcs nbio_v7_4_ras_err_event_athub_irq_funcs = { - .set = nbio_v7_4_set_ras_err_event_athub_irq_state, - .process = nbio_v7_4_process_err_event_athub_irq, -}; - -static int nbio_v7_4_init_ras_controller_interrupt (struct amdgpu_device *adev) -{ - int r; - - /* init the irq funcs */ - adev->nbio.ras_controller_irq.funcs = - &nbio_v7_4_ras_controller_irq_funcs; - adev->nbio.ras_controller_irq.num_types = 1; - - /* register ras controller interrupt */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, - NBIF_7_4__SRCID__RAS_CONTROLLER_INTERRUPT, - &adev->nbio.ras_controller_irq); - if (r) - return r; - - return 0; -} - -static int nbio_v7_4_init_ras_err_event_athub_interrupt (struct amdgpu_device *adev) -{ - - int r; - - /* init the irq funcs */ - adev->nbio.ras_err_event_athub_irq.funcs = - &nbio_v7_4_ras_err_event_athub_irq_funcs; - adev->nbio.ras_err_event_athub_irq.num_types = 1; - - /* register ras err event athub interrupt */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, - NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT, - &adev->nbio.ras_err_event_athub_irq); - if (r) - return r; - - return 0; -} - -static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev, - void *ras_error_status) -{ - uint32_t global_sts, central_sts, int_eoi; - uint32_t corr, fatal, non_fatal; - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - global_sts = RREG32_PCIE(smnRAS_GLOBAL_STATUS_LO); - corr = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrCorr); - fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrFatal); - non_fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, - ParityErrNonFatal); - - if (corr) - err_data->ce_count++; - if (fatal) - err_data->ue_count++; - - if (corr || fatal || non_fatal) { - central_sts = RREG32_PCIE(smnBIFL_RAS_CENTRAL_STATUS); - /* clear error status register */ - WREG32_PCIE(smnRAS_GLOBAL_STATUS_LO, global_sts); - - if (REG_GET_FIELD(central_sts, BIFL_RAS_CENTRAL_STATUS, - BIFL_RasContller_Intr_Recv)) { - /* clear interrupt status register */ - WREG32_PCIE(smnBIFL_RAS_CENTRAL_STATUS, central_sts); - int_eoi = RREG32_PCIE(smnIOHC_INTERRUPT_EOI); - int_eoi = REG_SET_FIELD(int_eoi, - IOHC_INTERRUPT_EOI, SMI_EOI, 1); - WREG32_PCIE(smnIOHC_INTERRUPT_EOI, int_eoi); - } - } -} - -static void nbio_v7_4_enable_doorbell_interrupt(struct amdgpu_device *adev, - bool enable) -{ - WREG32_FIELD15(NBIO, 0, BIF_DOORBELL_INT_CNTL, - DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1); + if (def != data) + WREG32_PCIE(smnPCIE_CI_CNTL, data); } const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { + .hdp_flush_reg = &nbio_v7_4_hdp_flush_reg, .get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset, .get_pcie_index_offset = nbio_v7_4_get_pcie_index_offset, @@ -523,7 +330,6 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .enable_doorbell_aperture = nbio_v7_4_enable_doorbell_aperture, .enable_doorbell_selfring_aperture = nbio_v7_4_enable_doorbell_selfring_aperture, .ih_doorbell_range = nbio_v7_4_ih_doorbell_range, - .enable_doorbell_interrupt = nbio_v7_4_enable_doorbell_interrupt, .update_medium_grain_clock_gating = nbio_v7_4_update_medium_grain_clock_gating, .update_medium_grain_light_sleep = nbio_v7_4_update_medium_grain_light_sleep, .get_clockgating_state = nbio_v7_4_get_clockgating_state, @@ -531,10 +337,4 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .init_registers = nbio_v7_4_init_registers, .detect_hw_virt = nbio_v7_4_detect_hw_virt, .remap_hdp_registers = nbio_v7_4_remap_hdp_registers, - .handle_ras_controller_intr_no_bifring = nbio_v7_4_handle_ras_controller_intr_no_bifring, - .handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring, - .init_ras_controller_interrupt = nbio_v7_4_init_ras_controller_interrupt, - .init_ras_err_event_athub_interrupt = nbio_v7_4_init_ras_err_event_athub_interrupt, - .query_ras_error_count = nbio_v7_4_query_ras_error_count, - .ras_late_init = amdgpu_nbio_ras_late_init, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h index b1ac82872752..c442865bac4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h @@ -26,7 +26,6 @@ #include "soc15_common.h" -extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg; extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index a55a2e83fb19..de9b995b65b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -46,7 +46,6 @@ #include "gmc_v10_0.h" #include "gfxhub_v2_0.h" #include "mmhub_v2_0.h" -#include "nbio_v2_3.h" #include "nv.h" #include "navi10_ih.h" #include "gfx_v10_0.h" @@ -54,7 +53,6 @@ #include "vcn_v2_0.h" #include "dce_virtual.h" #include "mes_v10_1.h" -#include "mxgpu_nv.h" static const struct amd_ip_funcs nv_common_ip_funcs; @@ -65,8 +63,8 @@ static u32 nv_pcie_rreg(struct amdgpu_device *adev, u32 reg) { unsigned long flags, address, data; u32 r; - address = adev->nbio.funcs->get_pcie_index_offset(adev); - data = adev->nbio.funcs->get_pcie_data_offset(adev); + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, reg); @@ -80,8 +78,8 @@ static void nv_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags, address, data; - address = adev->nbio.funcs->get_pcie_index_offset(adev); - data = adev->nbio.funcs->get_pcie_data_offset(adev); + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, reg); @@ -121,7 +119,7 @@ static void nv_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v) static u32 nv_get_config_memsize(struct amdgpu_device *adev) { - return adev->nbio.funcs->get_memsize(adev); + return adev->nbio_funcs->get_memsize(adev); } static u32 nv_get_xclk(struct amdgpu_device *adev) @@ -178,7 +176,6 @@ static struct soc15_allowed_register_entry nv_allowed_read_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_BUSY_STAT)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STALLED_STAT1)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STATUS)}, - { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_BUSY_STAT)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)}, { SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)}, @@ -282,7 +279,7 @@ static int nv_asic_mode1_reset(struct amdgpu_device *adev) /* wait for asic to come out of reset */ for (i = 0; i < adev->usec_timeout; i++) { - u32 memsize = adev->nbio.funcs->get_memsize(adev); + u32 memsize = adev->nbio_funcs->get_memsize(adev); if (memsize != 0xffffffff) break; @@ -299,7 +296,7 @@ nv_asic_reset_method(struct amdgpu_device *adev) { struct smu_context *smu = &adev->smu; - if (!amdgpu_sriov_vf(adev) && smu_baco_is_support(smu)) + if (smu_baco_is_support(smu)) return AMD_RESET_METHOD_BACO; else return AMD_RESET_METHOD_MODE1; @@ -371,8 +368,8 @@ static void nv_program_aspm(struct amdgpu_device *adev) static void nv_enable_doorbell_aperture(struct amdgpu_device *adev, bool enable) { - adev->nbio.funcs->enable_doorbell_aperture(adev, enable); - adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable); + adev->nbio_funcs->enable_doorbell_aperture(adev, enable); + adev->nbio_funcs->enable_doorbell_selfring_aperture(adev, enable); } static const struct amdgpu_ip_block_version nv_common_ip_block = @@ -426,13 +423,9 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) if (r) return r; - adev->nbio.funcs = &nbio_v2_3_funcs; - adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; + adev->nbio_funcs = &nbio_v2_3_funcs; - adev->nbio.funcs->detect_hw_virt(adev); - - if (amdgpu_sriov_vf(adev)) - adev->virt.ops = &xgpu_nv_virt_ops; + adev->nbio_funcs->detect_hw_virt(adev); switch (adev->asic_type) { case CHIP_NAVI10: @@ -442,7 +435,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + is_support_sw_smu(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -453,7 +446,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + is_support_sw_smu(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); if (adev->enable_mes) @@ -465,7 +458,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + is_support_sw_smu(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -476,7 +469,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + is_support_sw_smu(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); break; @@ -489,12 +482,12 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) static uint32_t nv_get_rev_id(struct amdgpu_device *adev) { - return adev->nbio.funcs->get_rev_id(adev); + return adev->nbio_funcs->get_rev_id(adev); } static void nv_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - adev->nbio.funcs->hdp_flush(adev, ring); + adev->nbio_funcs->hdp_flush(adev, ring); } static void nv_invalidate_hdp(struct amdgpu_device *adev, @@ -590,11 +583,8 @@ static const struct amdgpu_asic_funcs nv_asic_funcs = static int nv_common_early_init(void *handle) { -#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; - adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; adev->smc_rreg = NULL; adev->smc_wreg = NULL; adev->pcie_rreg = &nv_pcie_rreg; @@ -677,31 +667,16 @@ static int nv_common_early_init(void *handle) return -EINVAL; } - if (amdgpu_sriov_vf(adev)) { - amdgpu_virt_init_setting(adev); - xgpu_nv_mailbox_set_irq_funcs(adev); - } - return 0; } static int nv_common_late_init(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (amdgpu_sriov_vf(adev)) - xgpu_nv_mailbox_get_irq(adev); - return 0; } static int nv_common_sw_init(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (amdgpu_sriov_vf(adev)) - xgpu_nv_mailbox_add_irq_id(adev); - return 0; } @@ -719,13 +694,7 @@ static int nv_common_hw_init(void *handle) /* enable aspm */ nv_program_aspm(adev); /* setup nbio registers */ - adev->nbio.funcs->init_registers(adev); - /* remap HDP registers to a hole in mmio space, - * for the purpose of expose those registers - * to process space - */ - if (adev->nbio.funcs->remap_hdp_registers) - adev->nbio.funcs->remap_hdp_registers(adev); + adev->nbio_funcs->init_registers(adev); /* enable the doorbell aperture */ nv_enable_doorbell_aperture(adev, true); @@ -887,9 +856,9 @@ static int nv_common_set_clockgating_state(void *handle, case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: - adev->nbio.funcs->update_medium_grain_clock_gating(adev, + adev->nbio_funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - adev->nbio.funcs->update_medium_grain_light_sleep(adev, + adev->nbio_funcs->update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); nv_update_hdp_mem_power_gating(adev, state == AMD_CG_STATE_GATE ? true : false); @@ -917,7 +886,7 @@ static void nv_common_get_clockgating_state(void *handle, u32 *flags) if (amdgpu_sriov_vf(adev)) *flags = 0; - adev->nbio.funcs->get_clockgating_state(adev, flags); + adev->nbio_funcs->get_clockgating_state(adev, flags); /* AMD_CG_SUPPORT_HDP_MGCG */ tmp = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index b345e69ba246..5d95e614369a 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -40,9 +40,6 @@ MODULE_FIRMWARE("amdgpu/raven_asd.bin"); MODULE_FIRMWARE("amdgpu/picasso_asd.bin"); MODULE_FIRMWARE("amdgpu/raven2_asd.bin"); -MODULE_FIRMWARE("amdgpu/picasso_ta.bin"); -MODULE_FIRMWARE("amdgpu/raven2_ta.bin"); -MODULE_FIRMWARE("amdgpu/raven_ta.bin"); static int psp_v10_0_init_microcode(struct psp_context *psp) { @@ -51,7 +48,7 @@ static int psp_v10_0_init_microcode(struct psp_context *psp) char fw_name[30]; int err = 0; const struct psp_firmware_header_v1_0 *hdr; - const struct ta_firmware_header_v1_0 *ta_hdr; + DRM_DEBUG("\n"); switch (adev->asic_type) { @@ -82,45 +79,7 @@ static int psp_v10_0_init_microcode(struct psp_context *psp) adev->psp.asd_start_addr = (uint8_t *)hdr + le32_to_cpu(hdr->header.ucode_array_offset_bytes); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); - err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); - if (err) { - release_firmware(adev->psp.ta_fw); - adev->psp.ta_fw = NULL; - dev_info(adev->dev, - "psp v10.0: Failed to load firmware \"%s\"\n", - fw_name); - } else { - err = amdgpu_ucode_validate(adev->psp.ta_fw); - if (err) - goto out2; - - ta_hdr = (const struct ta_firmware_header_v1_0 *) - adev->psp.ta_fw->data; - adev->psp.ta_hdcp_ucode_version = - le32_to_cpu(ta_hdr->ta_hdcp_ucode_version); - adev->psp.ta_hdcp_ucode_size = - le32_to_cpu(ta_hdr->ta_hdcp_size_bytes); - adev->psp.ta_hdcp_start_addr = - (uint8_t *)ta_hdr + - le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); - - adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); - - adev->psp.ta_dtm_ucode_version = - le32_to_cpu(ta_hdr->ta_dtm_ucode_version); - adev->psp.ta_dtm_ucode_size = - le32_to_cpu(ta_hdr->ta_dtm_size_bytes); - adev->psp.ta_dtm_start_addr = - (uint8_t *)adev->psp.ta_hdcp_start_addr + - le32_to_cpu(ta_hdr->ta_dtm_offset_bytes); - } - return 0; - -out2: - release_firmware(adev->psp.ta_fw); - adev->psp.ta_fw = NULL; out: if (err) { dev_err(adev->dev, @@ -269,7 +228,6 @@ static int psp_v10_0_cmd_submit(struct psp_context *psp, write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); write_frame->fence_value = index; - amdgpu_asic_flush_hdp(adev, NULL); /* Update the write Pointer in DWORDs */ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index ffeaa2f5588d..10166104b8a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -49,7 +49,6 @@ MODULE_FIRMWARE("amdgpu/navi12_sos.bin"); MODULE_FIRMWARE("amdgpu/navi12_asd.bin"); MODULE_FIRMWARE("amdgpu/arcturus_sos.bin"); MODULE_FIRMWARE("amdgpu/arcturus_asd.bin"); -MODULE_FIRMWARE("amdgpu/arcturus_ta.bin"); /* address block */ #define smnMP1_FIRMWARE_FLAGS 0x3010024 @@ -58,8 +57,6 @@ MODULE_FIRMWARE("amdgpu/arcturus_ta.bin"); #define mmRLC_GPM_UCODE_DATA_NV10 0x5b62 #define mmSDMA0_UCODE_ADDR_NV10 0x5880 #define mmSDMA0_UCODE_DATA_NV10 0x5881 -/* memory training timeout define */ -#define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000 static int psp_v11_0_init_microcode(struct psp_context *psp) { @@ -158,7 +155,6 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) switch (adev->asic_type) { case CHIP_VEGA20: - case CHIP_ARCTURUS: snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); if (err) { @@ -186,6 +182,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_ARCTURUS: break; default: BUG(); @@ -208,26 +205,18 @@ out: return err; } -static bool psp_v11_0_is_sos_alive(struct psp_context *psp) -{ - struct amdgpu_device *adev = psp->adev; - uint32_t sol_reg; - - sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); - - return sol_reg != 0x0; -} - static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp) { int ret; uint32_t psp_gfxdrv_command_reg = 0; struct amdgpu_device *adev = psp->adev; + uint32_t sol_reg; /* Check tOS sign of life register to confirm sys driver and sOS * are already been loaded. */ - if (psp_v11_0_is_sos_alive(psp)) { + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + if (sol_reg) { psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); dev_info(adev->dev, "sos fw version = 0x%x.\n", psp->sos_fw_version); return 0; @@ -244,7 +233,7 @@ static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp) /* Copy PSP KDB binary to memory */ memcpy(psp->fw_pri_buf, psp->kdb_start_addr, psp->kdb_bin_size); - /* Provide the PSP KDB to bootloader */ + /* Provide the sys driver to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); psp_gfxdrv_command_reg = PSP_BL__LOAD_KEY_DATABASE; @@ -263,11 +252,13 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) int ret; uint32_t psp_gfxdrv_command_reg = 0; struct amdgpu_device *adev = psp->adev; + uint32_t sol_reg; /* Check sOS sign of life register to confirm sys driver and sOS * are already been loaded. */ - if (psp_v11_0_is_sos_alive(psp)) { + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + if (sol_reg) { psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); dev_info(adev->dev, "sos fw version = 0x%x.\n", psp->sos_fw_version); return 0; @@ -305,11 +296,13 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp) int ret; unsigned int psp_gfxdrv_command_reg = 0; struct amdgpu_device *adev = psp->adev; + uint32_t sol_reg; /* Check sOS sign of life register to confirm sys driver and sOS * are already been loaded. */ - if (psp_v11_0_is_sos_alive(psp)) + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + if (sol_reg) return 0; /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ @@ -405,6 +398,59 @@ static bool psp_v11_0_support_vmr_ring(struct psp_context *psp) return false; } +static int psp_v11_0_ring_create(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + unsigned int psp_ring_reg = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + if (psp_v11_0_support_vmr_ring(psp)) { + /* Write low address of the ring to C2PMSG_102 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); + /* Write high address of the ring to C2PMSG_103 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg); + + /* Write the ring initialization command to C2PMSG_101 */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_INIT_GPCOM_RING); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x8000FFFF, false); + + } else { + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + } + + return ret; +} + static int psp_v11_0_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type) { @@ -433,74 +479,6 @@ static int psp_v11_0_ring_stop(struct psp_context *psp, return ret; } -static int psp_v11_0_ring_create(struct psp_context *psp, - enum psp_ring_type ring_type) -{ - int ret = 0; - unsigned int psp_ring_reg = 0; - struct psp_ring *ring = &psp->km_ring; - struct amdgpu_device *adev = psp->adev; - - if (psp_v11_0_support_vmr_ring(psp)) { - ret = psp_v11_0_ring_stop(psp, ring_type); - if (ret) { - DRM_ERROR("psp_v11_0_ring_stop_sriov failed!\n"); - return ret; - } - - /* Write low address of the ring to C2PMSG_102 */ - psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); - /* Write high address of the ring to C2PMSG_103 */ - psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg); - - /* Write the ring initialization command to C2PMSG_101 */ - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, - GFX_CTRL_CMD_ID_INIT_GPCOM_RING); - - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - - /* Wait for response flag (bit 31) in C2PMSG_101 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - 0x80000000, 0x8000FFFF, false); - - } else { - /* Wait for sOS ready for ring creation */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); - if (ret) { - DRM_ERROR("Failed to wait for sOS ready for ring creation\n"); - return ret; - } - - /* Write low address of the ring to C2PMSG_69 */ - psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); - /* Write high address of the ring to C2PMSG_70 */ - psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); - /* Write size of ring to C2PMSG_71 */ - psp_ring_reg = ring->ring_size; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); - /* Write the ring initialization command to C2PMSG_64 */ - psp_ring_reg = ring_type; - psp_ring_reg = psp_ring_reg << 16; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); - - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - - /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); - } - - return ret; -} - - static int psp_v11_0_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) { @@ -563,7 +541,6 @@ static int psp_v11_0_cmd_submit(struct psp_context *psp, write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); write_frame->fence_value = index; - amdgpu_asic_flush_hdp(adev, NULL); /* Update the write Pointer in DWORDs */ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; @@ -912,162 +889,6 @@ static int psp_v11_0_rlc_autoload_start(struct psp_context *psp) return psp_rlc_autoload_start(psp); } -static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg) -{ - int ret; - int i; - uint32_t data_32; - int max_wait; - struct amdgpu_device *adev = psp->adev; - - data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, data_32); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, msg); - - max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout; - for (i = 0; i < max_wait; i++) { - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); - if (ret == 0) - break; - } - if (i < max_wait) - ret = 0; - else - ret = -ETIME; - - DRM_DEBUG("training %s %s, cost %d @ %d ms\n", - (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long", - (ret == 0) ? "succeed" : "failed", - i, adev->usec_timeout/1000); - return ret; -} - -static void psp_v11_0_memory_training_fini(struct psp_context *psp) -{ - struct psp_memory_training_context *ctx = &psp->mem_train_ctx; - - ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT; - kfree(ctx->sys_cache); - ctx->sys_cache = NULL; -} - -static int psp_v11_0_memory_training_init(struct psp_context *psp) -{ - int ret; - struct psp_memory_training_context *ctx = &psp->mem_train_ctx; - - if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) { - DRM_DEBUG("memory training is not supported!\n"); - return 0; - } - - ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL); - if (ctx->sys_cache == NULL) { - DRM_ERROR("alloc mem_train_ctx.sys_cache failed!\n"); - ret = -ENOMEM; - goto Err_out; - } - - DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", - ctx->train_data_size, - ctx->p2c_train_data_offset, - ctx->c2p_train_data_offset); - ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS; - return 0; - -Err_out: - psp_v11_0_memory_training_fini(psp); - return ret; -} - -/* - * save and restore proces - */ -static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) -{ - int ret; - uint32_t p2c_header[4]; - struct psp_memory_training_context *ctx = &psp->mem_train_ctx; - uint32_t *pcache = (uint32_t*)ctx->sys_cache; - - if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) { - DRM_DEBUG("Memory training is not supported.\n"); - return 0; - } else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) { - DRM_ERROR("Memory training initialization failure.\n"); - return -EINVAL; - } - - if (psp_v11_0_is_sos_alive(psp)) { - DRM_DEBUG("SOS is alive, skip memory training.\n"); - return 0; - } - - amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false); - DRM_DEBUG("sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n", - pcache[0], pcache[1], pcache[2], pcache[3], - p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]); - - if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) { - DRM_DEBUG("Short training depends on restore.\n"); - ops |= PSP_MEM_TRAIN_RESTORE; - } - - if ((ops & PSP_MEM_TRAIN_RESTORE) && - pcache[0] != MEM_TRAIN_SYSTEM_SIGNATURE) { - DRM_DEBUG("sys_cache[0] is invalid, restore depends on save.\n"); - ops |= PSP_MEM_TRAIN_SAVE; - } - - if (p2c_header[0] == MEM_TRAIN_SYSTEM_SIGNATURE && - !(pcache[0] == MEM_TRAIN_SYSTEM_SIGNATURE && - pcache[3] == p2c_header[3])) { - DRM_DEBUG("sys_cache is invalid or out-of-date, need save training data to sys_cache.\n"); - ops |= PSP_MEM_TRAIN_SAVE; - } - - if ((ops & PSP_MEM_TRAIN_SAVE) && - p2c_header[0] != MEM_TRAIN_SYSTEM_SIGNATURE) { - DRM_DEBUG("p2c_header[0] is invalid, save depends on long training.\n"); - ops |= PSP_MEM_TRAIN_SEND_LONG_MSG; - } - - if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { - ops &= ~PSP_MEM_TRAIN_SEND_SHORT_MSG; - ops |= PSP_MEM_TRAIN_SAVE; - } - - DRM_DEBUG("Memory training ops:%x.\n", ops); - - if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { - ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN); - if (ret) { - DRM_ERROR("Send long training msg failed.\n"); - return ret; - } - } - - if (ops & PSP_MEM_TRAIN_SAVE) { - amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, ctx->sys_cache, ctx->train_data_size, false); - } - - if (ops & PSP_MEM_TRAIN_RESTORE) { - amdgpu_device_vram_access(psp->adev, ctx->c2p_train_data_offset, ctx->sys_cache, ctx->train_data_size, true); - } - - if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) { - ret = psp_v11_0_memory_training_send_msg(psp, (amdgpu_force_long_training > 0) ? - PSP_BL__DRAM_LONG_TRAIN : PSP_BL__DRAM_SHORT_TRAIN); - if (ret) { - DRM_ERROR("send training msg failed.\n"); - return ret; - } - } - ctx->training_cnt++; - return 0; -} - static const struct psp_funcs psp_v11_0_funcs = { .init_microcode = psp_v11_0_init_microcode, .bootloader_load_kdb = psp_v11_0_bootloader_load_kdb, @@ -1088,9 +909,6 @@ static const struct psp_funcs psp_v11_0_funcs = { .ras_trigger_error = psp_v11_0_ras_trigger_error, .ras_cure_posion = psp_v11_0_ras_cure_posion, .rlc_autoload_start = psp_v11_0_rlc_autoload_start, - .mem_training_init = psp_v11_0_memory_training_init, - .mem_training_fini = psp_v11_0_memory_training_fini, - .mem_training = psp_v11_0_memory_training, }; void psp_v11_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index 8f553f6f92d6..c72e43f8e0be 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -378,7 +378,6 @@ static int psp_v12_0_cmd_submit(struct psp_context *psp, write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); write_frame->fence_value = index; - amdgpu_asic_flush_hdp(adev, NULL); /* Update the write Pointer in DWORDs */ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index fdc00938327b..d2c727f6a8bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -454,7 +454,6 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp, write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); write_frame->fence_value = index; - amdgpu_asic_flush_hdp(adev, NULL); /* Update the write Pointer in DWORDs */ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 4ef4d31f5231..78452cf0115d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -254,7 +254,6 @@ static const struct soc15_reg_golden golden_settings_sdma_4_3[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), - SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000) }; static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, @@ -747,13 +746,13 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 ref_and_mask = 0; - const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; sdma_v4_0_wait_reg_mem(ring, 0, 1, - adev->nbio.funcs->get_hdp_flush_done_offset(adev), - adev->nbio.funcs->get_hdp_flush_req_offset(adev), + adev->nbio_funcs->get_hdp_flush_done_offset(adev), + adev->nbio_funcs->get_hdp_flush_req_offset(adev), ref_and_mask, ref_and_mask, 10); } @@ -1691,17 +1690,102 @@ static int sdma_v4_0_early_init(void *handle) } static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, - void *err_data, + struct ras_err_data *err_data, struct amdgpu_iv_entry *entry); static int sdma_v4_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct ras_common_if **ras_if = &adev->sdma.ras_if; struct ras_ih_if ih_info = { .cb = sdma_v4_0_process_ras_data_cb, }; + struct ras_fs_if fs_info = { + .sysfs_name = "sdma_err_count", + .debugfs_name = "sdma_err_inject", + }; + struct ras_common_if ras_block = { + .block = AMDGPU_RAS_BLOCK__SDMA, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + .sub_block_index = 0, + .name = "sdma", + }; + int r, i; - return amdgpu_sdma_ras_late_init(adev, &ih_info); + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { + amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); + return 0; + } + + /* handle resume path. */ + if (*ras_if) { + /* resend ras TA enable cmd during resume. + * prepare to handle failure. + */ + ih_info.head = **ras_if; + r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); + if (r) { + if (r == -EAGAIN) { + /* request a gpu reset. will run again. */ + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__SDMA); + return 0; + } + /* fail to enable ras, cleanup all. */ + goto irq; + } + /* enable successfully. continue. */ + goto resume; + } + + *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); + if (!*ras_if) + return -ENOMEM; + + **ras_if = ras_block; + + r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); + if (r) { + if (r == -EAGAIN) { + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__SDMA); + r = 0; + } + goto feature; + } + + ih_info.head = **ras_if; + fs_info.head = **ras_if; + + r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); + if (r) + goto interrupt; + + amdgpu_ras_debugfs_create(adev, &fs_info); + + r = amdgpu_ras_sysfs_create(adev, &fs_info); + if (r) + goto sysfs; +resume: + for (i = 0; i < adev->sdma.num_instances; i++) { + r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, + AMDGPU_SDMA_IRQ_INSTANCE0 + i); + if (r) + goto irq; + } + + return 0; +irq: + amdgpu_ras_sysfs_remove(adev, *ras_if); +sysfs: + amdgpu_ras_debugfs_remove(adev, *ras_if); + amdgpu_ras_interrupt_remove_handler(adev, &ih_info); +interrupt: + amdgpu_ras_feature_enable(adev, *ras_if, 0); +feature: + kfree(*ras_if); + *ras_if = NULL; + return r; } static int sdma_v4_0_sw_init(void *handle) @@ -1773,7 +1857,21 @@ static int sdma_v4_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - amdgpu_sdma_ras_fini(adev); + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) && + adev->sdma.ras_if) { + struct ras_common_if *ras_if = adev->sdma.ras_if; + struct ras_ih_if ih_info = { + .head = *ras_if, + }; + + /*remove fs first*/ + amdgpu_ras_debugfs_remove(adev, ras_if); + amdgpu_ras_sysfs_remove(adev, ras_if); + /*remove the IH*/ + amdgpu_ras_interrupt_remove_handler(adev, &ih_info); + amdgpu_ras_feature_enable(adev, ras_if, 0); + kfree(ras_if); + } for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_ring_fini(&adev->sdma.instance[i].ring); @@ -1793,7 +1891,7 @@ static int sdma_v4_0_hw_init(void *handle) if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_powergating_by_smu) || - (adev->asic_type == CHIP_RENOIR && !adev->in_gpu_reset)) + adev->asic_type == CHIP_RENOIR) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false); if (!amdgpu_sriov_vf(adev)) @@ -1926,28 +2024,52 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, } static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, - void *err_data, + struct ras_err_data *err_data, struct amdgpu_iv_entry *entry) { + uint32_t err_source; int instance; - /* When “Full RAS” is enabled, the per-IP interrupt sources should - * be disabled and the driver should only look for the aggregated - * interrupt via sync flood - */ - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) - goto out; - instance = sdma_v4_0_irq_id_to_seq(entry->client_id); if (instance < 0) - goto out; + return 0; - amdgpu_sdma_process_ras_data_cb(adev, err_data, entry); + switch (entry->src_id) { + case SDMA0_4_0__SRCID__SDMA_SRAM_ECC: + err_source = 0; + break; + case SDMA0_4_0__SRCID__SDMA_ECC: + err_source = 1; + break; + default: + return 0; + } + + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + + amdgpu_ras_reset_gpu(adev, 0); -out: return AMDGPU_RAS_SUCCESS; } +static int sdma_v4_0_process_ecc_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct ras_common_if *ras_if = adev->sdma.ras_if; + struct ras_dispatch_if ih_data = { + .entry = entry, + }; + + if (!ras_if) + return 0; + + ih_data.head = *ras_if; + + amdgpu_ras_interrupt_dispatch(adev, &ih_data); + return 0; +} + static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -2295,7 +2417,7 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_illegal_inst_irq_funcs = { static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = { .set = sdma_v4_0_set_ecc_irq_state, - .process = amdgpu_sdma_process_ecc_irq, + .process = sdma_v4_0_process_ecc_irq, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index b8fdb192f6d6..f6e81680dd7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -406,7 +406,7 @@ static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 ref_and_mask = 0; - const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; if (ring->me == 0) ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0; @@ -416,8 +416,8 @@ static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ - amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); - amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2); amdgpu_ring_write(ring, ref_and_mask); /* reference */ amdgpu_ring_write(ring, ref_and_mask); /* mask */ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | @@ -683,7 +683,7 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); - adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, + adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index, 20); if (amdgpu_sriov_vf(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 29024e64c886..493af42152f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -975,17 +975,6 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v) static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { {GRBM_STATUS}, - {mmGRBM_STATUS2}, - {mmGRBM_STATUS_SE0}, - {mmGRBM_STATUS_SE1}, - {mmSRBM_STATUS}, - {mmSRBM_STATUS2}, - {DMA_STATUS_REG + DMA0_REGISTER_OFFSET}, - {DMA_STATUS_REG + DMA1_REGISTER_OFFSET}, - {mmCP_STAT}, - {mmCP_STALLED_STAT1}, - {mmCP_STALLED_STAT2}, - {mmCP_STALLED_STAT3}, {GB_ADDR_CONFIG}, {MC_ARB_RAMCFG}, {GB_TILE_MODE0}, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 741b564b4aa5..f8ab80c8801b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -58,9 +58,6 @@ #include "mmhub_v1_0.h" #include "df_v1_7.h" #include "df_v3_6.h" -#include "nbio_v6_1.h" -#include "nbio_v7_0.h" -#include "nbio_v7_4.h" #include "vega10_ih.h" #include "sdma_v4_0.h" #include "uvd_v7_0.h" @@ -94,8 +91,8 @@ static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg) { unsigned long flags, address, data; u32 r; - address = adev->nbio.funcs->get_pcie_index_offset(adev); - data = adev->nbio.funcs->get_pcie_data_offset(adev); + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, reg); @@ -109,8 +106,8 @@ static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags, address, data; - address = adev->nbio.funcs->get_pcie_index_offset(adev); - data = adev->nbio.funcs->get_pcie_data_offset(adev); + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, reg); @@ -124,8 +121,8 @@ static u64 soc15_pcie_rreg64(struct amdgpu_device *adev, u32 reg) { unsigned long flags, address, data; u64 r; - address = adev->nbio.funcs->get_pcie_index_offset(adev); - data = adev->nbio.funcs->get_pcie_data_offset(adev); + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); /* read low 32 bit */ @@ -145,8 +142,8 @@ static void soc15_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v) { unsigned long flags, address, data; - address = adev->nbio.funcs->get_pcie_index_offset(adev); - data = adev->nbio.funcs->get_pcie_data_offset(adev); + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); /* write low 32 bit */ @@ -265,7 +262,7 @@ static void soc15_se_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v) static u32 soc15_get_config_memsize(struct amdgpu_device *adev) { - return adev->nbio.funcs->get_memsize(adev); + return adev->nbio_funcs->get_memsize(adev); } static u32 soc15_get_xclk(struct amdgpu_device *adev) @@ -339,7 +336,6 @@ static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_BUSY_STAT)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STALLED_STAT1)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STATUS)}, - { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_BUSY_STAT)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)}, { SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)}, @@ -465,7 +461,7 @@ static int soc15_asic_mode1_reset(struct amdgpu_device *adev) /* wait for asic to come out of reset */ for (i = 0; i < adev->usec_timeout; i++) { - u32 memsize = adev->nbio.funcs->get_memsize(adev); + u32 memsize = adev->nbio_funcs->get_memsize(adev); if (memsize != 0xffffffff) break; @@ -479,66 +475,42 @@ static int soc15_asic_mode1_reset(struct amdgpu_device *adev) static int soc15_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap) { - if (is_support_sw_smu(adev)) { - struct smu_context *smu = &adev->smu; + void *pp_handle = adev->powerplay.pp_handle; + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - *cap = smu_baco_is_support(smu); - return 0; - } else { - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs || !pp_funcs->get_asic_baco_capability) { - *cap = false; - return -ENOENT; - } - - return pp_funcs->get_asic_baco_capability(pp_handle, cap); + if (!pp_funcs || !pp_funcs->get_asic_baco_capability) { + *cap = false; + return -ENOENT; } + + return pp_funcs->get_asic_baco_capability(pp_handle, cap); } static int soc15_asic_baco_reset(struct amdgpu_device *adev) { - struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + void *pp_handle = adev->powerplay.pp_handle; + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - /* avoid NBIF got stuck when do RAS recovery in BACO reset */ - if (ras && ras->supported) - adev->nbio.funcs->enable_doorbell_interrupt(adev, false); + if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) + return -ENOENT; + + /* enter BACO state */ + if (pp_funcs->set_asic_baco_state(pp_handle, 1)) + return -EIO; + + /* exit BACO state */ + if (pp_funcs->set_asic_baco_state(pp_handle, 0)) + return -EIO; dev_info(adev->dev, "GPU BACO reset\n"); - if (is_support_sw_smu(adev)) { - struct smu_context *smu = &adev->smu; - - if (smu_baco_reset(smu)) - return -EIO; - } else { - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) - return -ENOENT; - - /* enter BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 1)) - return -EIO; - - /* exit BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 0)) - return -EIO; - } - - /* re-enable doorbell interrupt after BACO exit */ - if (ras && ras->supported) - adev->nbio.funcs->enable_doorbell_interrupt(adev, true); + adev->in_baco_reset = 1; return 0; } static int soc15_mode2_reset(struct amdgpu_device *adev) { - if (is_support_sw_smu(adev)) - return smu_mode2_reset(&adev->smu); if (!adev->powerplay.pp_funcs || !adev->powerplay.pp_funcs->asic_reset_mode_2) return -ENOENT; @@ -553,7 +525,6 @@ soc15_asic_reset_method(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_RAVEN: - case CHIP_RENOIR: return AMD_RESET_METHOD_MODE2; case CHIP_VEGA10: case CHIP_VEGA12: @@ -655,8 +626,8 @@ static void soc15_program_aspm(struct amdgpu_device *adev) static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev, bool enable) { - adev->nbio.funcs->enable_doorbell_aperture(adev, enable); - adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable); + adev->nbio_funcs->enable_doorbell_aperture(adev, enable); + adev->nbio_funcs->enable_doorbell_selfring_aperture(adev, enable); } static const struct amdgpu_ip_block_version vega10_common_ip_block = @@ -670,7 +641,7 @@ static const struct amdgpu_ip_block_version vega10_common_ip_block = static uint32_t soc15_get_rev_id(struct amdgpu_device *adev) { - return adev->nbio.funcs->get_rev_id(adev); + return adev->nbio_funcs->get_rev_id(adev); } int soc15_set_ip_blocks(struct amdgpu_device *adev) @@ -696,17 +667,13 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) adev->gmc.xgmi.supported = true; - if (adev->flags & AMD_IS_APU) { - adev->nbio.funcs = &nbio_v7_0_funcs; - adev->nbio.hdp_flush_reg = &nbio_v7_0_hdp_flush_reg; - } else if (adev->asic_type == CHIP_VEGA20 || - adev->asic_type == CHIP_ARCTURUS) { - adev->nbio.funcs = &nbio_v7_4_funcs; - adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; - } else { - adev->nbio.funcs = &nbio_v6_1_funcs; - adev->nbio.hdp_flush_reg = &nbio_v6_1_hdp_flush_reg; - } + if (adev->flags & AMD_IS_APU) + adev->nbio_funcs = &nbio_v7_0_funcs; + else if (adev->asic_type == CHIP_VEGA20 || + adev->asic_type == CHIP_ARCTURUS) + adev->nbio_funcs = &nbio_v7_4_funcs; + else + adev->nbio_funcs = &nbio_v6_1_funcs; if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) adev->df_funcs = &df_v3_6_funcs; @@ -714,7 +681,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) adev->df_funcs = &df_v1_7_funcs; adev->rev_id = soc15_get_rev_id(adev); - adev->nbio.funcs->detect_hw_virt(adev); + adev->nbio_funcs->detect_hw_virt(adev); if (amdgpu_sriov_vf(adev)) adev->virt.ops = &xgpu_ai_virt_ops; @@ -783,26 +750,13 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) case CHIP_ARCTURUS: amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); - - if (amdgpu_sriov_vf(adev)) { - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); - } else { - amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - } - + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); - if (!amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - - if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)) - amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); break; case CHIP_RENOIR: amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); @@ -831,7 +785,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) static void soc15_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - adev->nbio.funcs->hdp_flush(adev, ring); + adev->nbio_funcs->hdp_flush(adev, ring); } static void soc15_invalidate_hdp(struct amdgpu_device *adev, @@ -1203,8 +1157,7 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_MGCG | AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_MC_MGCG | - AMD_CG_SUPPORT_MC_LS | - AMD_CG_SUPPORT_IH_CG; + AMD_CG_SUPPORT_MC_LS; adev->pg_flags = 0; adev->external_rev_id = adev->rev_id + 0x32; break; @@ -1255,15 +1208,11 @@ static int soc15_common_early_init(void *handle) static int soc15_common_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r = 0; if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_get_irq(adev); - if (adev->nbio.funcs->ras_late_init) - r = adev->nbio.funcs->ras_late_init(adev); - - return r; + return 0; } static int soc15_common_sw_init(void *handle) @@ -1280,10 +1229,6 @@ static int soc15_common_sw_init(void *handle) static int soc15_common_sw_fini(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - amdgpu_nbio_ras_fini(adev); - adev->df_funcs->sw_fini(adev); return 0; } @@ -1296,12 +1241,12 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) { for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; - adev->nbio.funcs->sdma_doorbell_range(adev, i, + adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index, adev->doorbell_index.sdma_doorbell_range); } - adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, + adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index); } } @@ -1315,13 +1260,13 @@ static int soc15_common_hw_init(void *handle) /* enable aspm */ soc15_program_aspm(adev); /* setup nbio registers */ - adev->nbio.funcs->init_registers(adev); + adev->nbio_funcs->init_registers(adev); /* remap HDP registers to a hole in mmio space, * for the purpose of expose those registers * to process space */ - if (adev->nbio.funcs->remap_hdp_registers) - adev->nbio.funcs->remap_hdp_registers(adev); + if (adev->nbio_funcs->remap_hdp_registers) + adev->nbio_funcs->remap_hdp_registers(adev); /* enable the doorbell aperture */ soc15_enable_doorbell_aperture(adev, true); @@ -1344,14 +1289,6 @@ static int soc15_common_hw_fini(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_put_irq(adev); - if (adev->nbio.ras_if && - amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) { - if (adev->nbio.funcs->init_ras_controller_interrupt) - amdgpu_irq_put(adev, &adev->nbio.ras_controller_irq, 0); - if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) - amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0); - } - return 0; } @@ -1492,9 +1429,9 @@ static int soc15_common_set_clockgating_state(void *handle, case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: - adev->nbio.funcs->update_medium_grain_clock_gating(adev, + adev->nbio_funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - adev->nbio.funcs->update_medium_grain_light_sleep(adev, + adev->nbio_funcs->update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); soc15_update_hdp_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); @@ -1509,9 +1446,9 @@ static int soc15_common_set_clockgating_state(void *handle, break; case CHIP_RAVEN: case CHIP_RENOIR: - adev->nbio.funcs->update_medium_grain_clock_gating(adev, + adev->nbio_funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - adev->nbio.funcs->update_medium_grain_light_sleep(adev, + adev->nbio_funcs->update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); soc15_update_hdp_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); @@ -1540,7 +1477,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags) if (amdgpu_sriov_vf(adev)) *flags = 0; - adev->nbio.funcs->get_clockgating_state(adev, flags); + adev->nbio_funcs->get_clockgating_state(adev, flags); /* AMD_CG_SUPPORT_HDP_LS */ data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS)); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 9af6c6ffbfa2..a3dde0c31f57 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -67,8 +67,6 @@ struct soc15_allowed_register_entry { #define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \ { ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask } -#define SOC15_REG_FIELD(reg, field) reg##__##field##_MASK, reg##__##field##__SHIFT - void soc15_grbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); int soc15_set_ip_blocks(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 47c4b96b14d1..8502e736f721 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -75,17 +75,6 @@ static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev) RSMU_UMC_INDEX_MODE_EN, 0); } -static uint32_t umc_v6_1_get_umc_inst(struct amdgpu_device *adev) -{ - uint32_t rsmu_umc_index; - - rsmu_umc_index = RREG32_SOC15(RSMU, 0, - mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); - return REG_GET_FIELD(rsmu_umc_index, - RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, - RSMU_UMC_INDEX_INSTANCE); -} - static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, uint32_t umc_reg_offset, unsigned long *error_count) @@ -176,8 +165,7 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, uint32_t umc_reg_offset, uint32_t channel_index) { uint32_t lsb, mc_umc_status_addr; - uint64_t mc_umc_status, err_addr, retired_page; - struct eeprom_table_record *err_rec; + uint64_t mc_umc_status, err_addr; mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); @@ -189,7 +177,6 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, return; } - err_rec = &err_data->err_addr[err_data->err_addr_cnt]; mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); /* calculate error address if ue/ce error is detected */ @@ -204,24 +191,12 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, err_addr &= ~((0x1ULL << lsb) - 1); /* translate umc channel address to soc pa, 3 parts are included */ - retired_page = ADDR_OF_8KB_BLOCK(err_addr) | - ADDR_OF_256B_BLOCK(channel_index) | - OFFSET_IN_256B_BLOCK(err_addr); + err_data->err_addr[err_data->err_addr_cnt] = + ADDR_OF_8KB_BLOCK(err_addr) | + ADDR_OF_256B_BLOCK(channel_index) | + OFFSET_IN_256B_BLOCK(err_addr); - /* we only save ue error information currently, ce is skipped */ - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) - == 1) { - err_rec->address = err_addr; - /* page frame address is saved */ - err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - err_rec->ts = (uint64_t)ktime_get_real_seconds(); - err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; - err_rec->cu = 0; - err_rec->mem_channel = channel_index; - err_rec->mcumc_id = umc_v6_1_get_umc_inst(adev); - - err_data->err_addr_cnt++; - } + err_data->err_addr_cnt++; } /* clear umc status */ @@ -234,7 +209,7 @@ static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, amdgpu_umc_for_each_channel(umc_v6_1_query_error_address); } -static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev, +static void umc_v6_1_ras_init_per_channel(struct amdgpu_device *adev, struct ras_err_data *err_data, uint32_t umc_reg_offset, uint32_t channel_index) { @@ -264,16 +239,15 @@ static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev, WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); } -static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev) +static void umc_v6_1_ras_init(struct amdgpu_device *adev) { void *ras_error_status = NULL; - amdgpu_umc_for_each_channel(umc_v6_1_err_cnt_init_per_channel); + amdgpu_umc_for_each_channel(umc_v6_1_ras_init_per_channel); } const struct amdgpu_umc_funcs umc_v6_1_funcs = { - .err_cnt_init = umc_v6_1_err_cnt_init, - .ras_late_init = amdgpu_umc_ras_late_init, + .ras_init = umc_v6_1_ras_init, .query_ras_error_count = umc_v6_1_query_ras_error_count, .query_ras_error_address = umc_v6_1_query_ras_error_address, .enable_umc_index_mode = umc_v6_1_enable_umc_index_mode, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 217084d56ab8..670784a78512 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -206,14 +206,13 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring) * Open up a stream for HW test */ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, - struct amdgpu_bo *bo, struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t addr; + uint64_t dummy; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -221,15 +220,15 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle return r; ib = &job->ibs[0]; - addr = amdgpu_bo_gpu_offset(bo); + dummy = ib->gpu_addr + 1024; ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ ib->ptr[ib->length_dw++] = handle; ib->ptr[ib->length_dw++] = 0x00010000; - ib->ptr[ib->length_dw++] = upper_32_bits(addr); - ib->ptr[ib->length_dw++] = addr; + ib->ptr[ib->length_dw++] = upper_32_bits(dummy); + ib->ptr[ib->length_dw++] = dummy; ib->ptr[ib->length_dw++] = 0x00000014; ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ @@ -269,14 +268,13 @@ err: */ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - struct amdgpu_bo *bo, struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t addr; + uint64_t dummy; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -284,15 +282,15 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, return r; ib = &job->ibs[0]; - addr = amdgpu_bo_gpu_offset(bo); + dummy = ib->gpu_addr + 1024; ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ ib->ptr[ib->length_dw++] = handle; ib->ptr[ib->length_dw++] = 0x00010000; - ib->ptr[ib->length_dw++] = upper_32_bits(addr); - ib->ptr[ib->length_dw++] = addr; + ib->ptr[ib->length_dw++] = upper_32_bits(dummy); + ib->ptr[ib->length_dw++] = dummy; ib->ptr[ib->length_dw++] = 0x00000014; ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ @@ -329,20 +327,13 @@ err: static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence = NULL; - struct amdgpu_bo *bo = NULL; long r; - r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &bo, NULL, NULL); - if (r) - return r; - - r = uvd_v6_0_enc_get_create_msg(ring, 1, bo, NULL); + r = uvd_v6_0_enc_get_create_msg(ring, 1, NULL); if (r) goto error; - r = uvd_v6_0_enc_get_destroy_msg(ring, 1, bo, &fence); + r = uvd_v6_0_enc_get_destroy_msg(ring, 1, &fence); if (r) goto error; @@ -354,8 +345,6 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) error: dma_fence_put(fence); - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&bo); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 0995378d8263..01f658fa72c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -214,14 +214,13 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) * Open up a stream for HW test */ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, - struct amdgpu_bo *bo, struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t addr; + uint64_t dummy; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -229,15 +228,15 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle return r; ib = &job->ibs[0]; - addr = amdgpu_bo_gpu_offset(bo); + dummy = ib->gpu_addr + 1024; ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ ib->ptr[ib->length_dw++] = handle; ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = upper_32_bits(addr); - ib->ptr[ib->length_dw++] = addr; + ib->ptr[ib->length_dw++] = upper_32_bits(dummy); + ib->ptr[ib->length_dw++] = dummy; ib->ptr[ib->length_dw++] = 0x00000014; ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ @@ -276,14 +275,13 @@ err: * Close up a stream for HW test or if userspace failed to do so */ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - struct amdgpu_bo *bo, - struct dma_fence **fence) + struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - uint64_t addr; + uint64_t dummy; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -291,15 +289,15 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl return r; ib = &job->ibs[0]; - addr = amdgpu_bo_gpu_offset(bo); + dummy = ib->gpu_addr + 1024; ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; ib->ptr[ib->length_dw++] = handle; ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = upper_32_bits(addr); - ib->ptr[ib->length_dw++] = addr; + ib->ptr[ib->length_dw++] = upper_32_bits(dummy); + ib->ptr[ib->length_dw++] = dummy; ib->ptr[ib->length_dw++] = 0x00000014; ib->ptr[ib->length_dw++] = 0x00000002; @@ -336,20 +334,13 @@ err: static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence = NULL; - struct amdgpu_bo *bo = NULL; long r; - r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &bo, NULL, NULL); - if (r) - return r; - - r = uvd_v7_0_enc_get_create_msg(ring, 1, bo, NULL); + r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL); if (r) goto error; - r = uvd_v7_0_enc_get_destroy_msg(ring, 1, bo, &fence); + r = uvd_v7_0_enc_get_destroy_msg(ring, 1, &fence); if (r) goto error; @@ -361,8 +352,6 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) error: dma_fence_put(fence); - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&bo); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index b4f84a820a44..93b3500e522b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -202,6 +202,7 @@ static int vcn_v1_0_hw_init(void *handle) for (i = 0; i < adev->vcn.num_enc_rings; ++i) { ring = &adev->vcn.inst->ring_enc[i]; + ring->sched.ready = true; r = amdgpu_ring_test_helper(ring); if (r) goto done; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 38f787a560cb..36ad0c0e8efb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -244,24 +244,33 @@ static int vcn_v2_0_hw_init(void *handle) struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; int i, r; - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell, ring->doorbell_index, 0); - r = amdgpu_ring_test_helper(ring); - if (r) + ring->sched.ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; goto done; + } for (i = 0; i < adev->vcn.num_enc_rings; ++i) { ring = &adev->vcn.inst->ring_enc[i]; - r = amdgpu_ring_test_helper(ring); - if (r) + ring->sched.ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; goto done; + } } ring = &adev->vcn.inst->ring_jpeg; - r = amdgpu_ring_test_helper(ring); - if (r) + ring->sched.ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; goto done; + } done: if (!r) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 03083e5f731a..395c2259f979 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -25,7 +25,6 @@ #include "amdgpu.h" #include "amdgpu_vcn.h" -#include "amdgpu_pm.h" #include "soc15.h" #include "soc15d.h" #include "vcn_v2_0.h" @@ -256,24 +255,32 @@ static int vcn_v2_5_hw_init(void *handle) continue; ring = &adev->vcn.inst[j].ring_dec; - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell, ring->doorbell_index, j); - r = amdgpu_ring_test_helper(ring); - if (r) + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; goto done; + } for (i = 0; i < adev->vcn.num_enc_rings; ++i) { ring = &adev->vcn.inst[j].ring_enc[i]; - r = amdgpu_ring_test_helper(ring); - if (r) + ring->sched.ready = false; + continue; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; goto done; + } } ring = &adev->vcn.inst[j].ring_jpeg; - r = amdgpu_ring_test_helper(ring); - if (r) + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; goto done; + } } done: if (!r) @@ -416,6 +423,7 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) * vcn_v2_5_disable_clock_gating - disable VCN clock gating * * @adev: amdgpu_device pointer + * @sw: enable SW clock gating * * Disable clock gating for VCN block */ @@ -534,6 +542,7 @@ static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev) * vcn_v2_5_enable_clock_gating - enable VCN clock gating * * @adev: amdgpu_device pointer + * @sw: enable SW clock gating * * Enable clock gating for VCN block */ @@ -707,9 +716,6 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) uint32_t rb_bufsz, tmp; int i, j, k, r; - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; @@ -940,9 +946,6 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev) ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); } - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 5cb7e231de5f..9eae3536ddad 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -226,7 +226,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) /* disable irqs */ vega10_ih_disable_interrupts(adev); - adev->nbio.funcs->ih_control(adev); + adev->nbio_funcs->ih_control(adev); ih = &adev->irq.ih; /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ @@ -675,49 +675,10 @@ static int vega10_ih_soft_reset(void *handle) return 0; } -static void vega10_ih_update_clockgating_state(struct amdgpu_device *adev, - bool enable) -{ - uint32_t data, def, field_val; - - if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) { - def = data = RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL); - field_val = enable ? 0 : 1; - /** - * Vega10 does not have IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE - * and IH_BUFFER_MEM_CLK_SOFT_OVERRIDE field. - */ - if (adev->asic_type > CHIP_VEGA10) { - data = REG_SET_FIELD(data, IH_CLK_CTRL, - IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE, field_val); - data = REG_SET_FIELD(data, IH_CLK_CTRL, - IH_BUFFER_MEM_CLK_SOFT_OVERRIDE, field_val); - } - - data = REG_SET_FIELD(data, IH_CLK_CTRL, - DBUS_MUX_CLK_SOFT_OVERRIDE, field_val); - data = REG_SET_FIELD(data, IH_CLK_CTRL, - OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val); - data = REG_SET_FIELD(data, IH_CLK_CTRL, - LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val); - data = REG_SET_FIELD(data, IH_CLK_CTRL, - DYN_CLK_SOFT_OVERRIDE, field_val); - data = REG_SET_FIELD(data, IH_CLK_CTRL, - REG_CLK_SOFT_OVERRIDE, field_val); - if (def != data) - WREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL, data); - } -} - static int vega10_ih_set_clockgating_state(void *handle, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - vega10_ih_update_clockgating_state(adev, - state == AMD_CG_STATE_GATE ? true : false); return 0; - } static int vega10_ih_set_powergating_state(void *handle, diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c index 6b52a539d51b..bd0580334f83 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c @@ -24,6 +24,7 @@ #include "soc15.h" #include "soc15_common.h" +#include "soc15_hw_ip.h" #include "vega10_ip_offset.h" int vega10_reg_base_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index 556f854e3551..587e33f5dcce 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -24,6 +24,7 @@ #include "soc15.h" #include "soc15_common.h" +#include "soc15_hw_ip.h" #include "vega20_ip_offset.h" int vega20_reg_base_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 78e5cdc0c058..5f8c8786cac5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -689,50 +689,16 @@ static int vi_gpu_pci_config_reset(struct amdgpu_device *adev) return -EINVAL; } -int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap) -{ - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs || !pp_funcs->get_asic_baco_capability) { - *cap = false; - return -ENOENT; - } - - return pp_funcs->get_asic_baco_capability(pp_handle, cap); -} - -int smu7_asic_baco_reset(struct amdgpu_device *adev) -{ - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state) - return -ENOENT; - - /* enter BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 1)) - return -EIO; - - /* exit BACO state */ - if (pp_funcs->set_asic_baco_state(pp_handle, 0)) - return -EIO; - - dev_info(adev->dev, "GPU BACO reset\n"); - - return 0; -} - /** - * vi_asic_pci_config_reset - soft reset GPU + * vi_asic_reset - soft reset GPU * * @adev: amdgpu_device pointer * - * Use PCI Config method to reset the GPU. - * + * Look up which blocks are hung and attempt + * to reset them. * Returns 0 for success. */ -static int vi_asic_pci_config_reset(struct amdgpu_device *adev) +static int vi_asic_reset(struct amdgpu_device *adev) { int r; @@ -748,47 +714,7 @@ static int vi_asic_pci_config_reset(struct amdgpu_device *adev) static enum amd_reset_method vi_asic_reset_method(struct amdgpu_device *adev) { - bool baco_reset; - - switch (adev->asic_type) { - case CHIP_FIJI: - case CHIP_TONGA: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_POLARIS12: - case CHIP_TOPAZ: - smu7_asic_get_baco_capability(adev, &baco_reset); - break; - default: - baco_reset = false; - break; - } - - if (baco_reset) - return AMD_RESET_METHOD_BACO; - else - return AMD_RESET_METHOD_LEGACY; -} - -/** - * vi_asic_reset - soft reset GPU - * - * @adev: amdgpu_device pointer - * - * Look up which blocks are hung and attempt - * to reset them. - * Returns 0 for success. - */ -static int vi_asic_reset(struct amdgpu_device *adev) -{ - int r; - - if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) - r = smu7_asic_baco_reset(adev); - else - r = vi_asic_pci_config_reset(adev); - - return r; + return AMD_RESET_METHOD_LEGACY; } static u32 vi_get_config_memsize(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h index 40d4174913a4..8de0772f986c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.h +++ b/drivers/gpu/drm/amd/amdgpu/vi.h @@ -31,7 +31,4 @@ void vi_srbm_select(struct amdgpu_device *adev, int vi_set_ip_blocks(struct amdgpu_device *adev); void legacy_doorbell_index_init(struct amdgpu_device *adev); -int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap); -int smu7_asic_baco_reset(struct amdgpu_device *adev); - #endif diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 9f59ba93cfe0..177d1e5329a5 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -33,9 +33,7 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, const struct cik_ih_ring_entry *ihre = (const struct cik_ih_ring_entry *)ih_ring_entry; const struct kfd2kgd_calls *f2g = dev->kfd2kgd; - unsigned int vmid; - uint16_t pasid; - bool ret; + unsigned int vmid, pasid; /* This workaround is due to HW/FW limitation on Hawaii that * VMID and PASID are not written into ih_ring_entry @@ -50,13 +48,13 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, *tmp_ihre = *ihre; vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd); - ret = f2g->get_atc_vmid_pasid_mapping_info(dev->kgd, vmid, &pasid); + pasid = f2g->get_atc_vmid_pasid_mapping_pasid(dev->kgd, vmid); tmp_ihre->ring_id &= 0x000000ff; tmp_ihre->ring_id |= vmid << 8; tmp_ihre->ring_id |= pasid << 16; - return ret && (pasid != 0) && + return (pasid != 0) && vmid >= dev->vm_info.first_vmid_kfd && vmid <= dev->vm_info.last_vmid_kfd; } diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index d3400da6ab64..901fe3590165 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -905,7 +905,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x7a5d0000, 0x807c817c, 0x807aff7a, 0x00000080, 0xbf0a717c, 0xbf85fff8, - 0xbf820142, 0xbef4037e, + 0xbf820141, 0xbef4037e, 0x8775ff7f, 0x0000ffff, 0x8875ff75, 0x00040000, 0xbef60380, 0xbef703ff, @@ -967,7 +967,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x725d0000, 0xe0304080, 0x725d0100, 0xe0304100, 0x725d0200, 0xe0304180, - 0x725d0300, 0xbf820032, + 0x725d0300, 0xbf820031, 0xbef603ff, 0x01000000, 0xbef20378, 0x8078ff78, 0x00000400, 0xbefc0384, @@ -992,84 +992,83 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x725d0000, 0xe0304100, 0x725d0100, 0xe0304200, 0x725d0200, 0xe0304300, - 0x725d0300, 0xbf8c3f70, - 0xb9782a05, 0x80788178, - 0x907c9973, 0x877c817c, - 0xbf06817c, 0xbf850002, - 0x8f788978, 0xbf820001, - 0x8f788a78, 0xb9721e06, - 0x8f728a72, 0x80787278, - 0x8078ff78, 0x00000200, - 0x80f8ff78, 0x00000050, - 0xbef603ff, 0x01000000, - 0xbefc03ff, 0x0000006c, - 0x80f89078, 0xf429003a, - 0xf0000000, 0xbf8cc07f, - 0x80fc847c, 0xbf800000, - 0xbe803100, 0xbe823102, - 0x80f8a078, 0xf42d003a, - 0xf0000000, 0xbf8cc07f, - 0x80fc887c, 0xbf800000, - 0xbe803100, 0xbe823102, - 0xbe843104, 0xbe863106, - 0x80f8c078, 0xf431003a, - 0xf0000000, 0xbf8cc07f, - 0x80fc907c, 0xbf800000, - 0xbe803100, 0xbe823102, - 0xbe843104, 0xbe863106, - 0xbe883108, 0xbe8a310a, - 0xbe8c310c, 0xbe8e310e, - 0xbf06807c, 0xbf84fff0, - 0xb9782a05, 0x80788178, - 0x907c9973, 0x877c817c, - 0xbf06817c, 0xbf850002, - 0x8f788978, 0xbf820001, - 0x8f788a78, 0xb9721e06, - 0x8f728a72, 0x80787278, - 0x8078ff78, 0x00000200, - 0xbef603ff, 0x01000000, - 0xf4211bfa, 0xf0000000, - 0x80788478, 0xf4211b3a, + 0x725d0300, 0xb9782a05, + 0x80788178, 0x907c9973, + 0x877c817c, 0xbf06817c, + 0xbf850002, 0x8f788978, + 0xbf820001, 0x8f788a78, + 0xb9721e06, 0x8f728a72, + 0x80787278, 0x8078ff78, + 0x00000200, 0x80f8ff78, + 0x00000050, 0xbef603ff, + 0x01000000, 0xbefc03ff, + 0x0000006c, 0x80f89078, + 0xf429003a, 0xf0000000, + 0xbf8cc07f, 0x80fc847c, + 0xbf800000, 0xbe803100, + 0xbe823102, 0x80f8a078, + 0xf42d003a, 0xf0000000, + 0xbf8cc07f, 0x80fc887c, + 0xbf800000, 0xbe803100, + 0xbe823102, 0xbe843104, + 0xbe863106, 0x80f8c078, + 0xf431003a, 0xf0000000, + 0xbf8cc07f, 0x80fc907c, + 0xbf800000, 0xbe803100, + 0xbe823102, 0xbe843104, + 0xbe863106, 0xbe883108, + 0xbe8a310a, 0xbe8c310c, + 0xbe8e310e, 0xbf06807c, + 0xbf84fff0, 0xb9782a05, + 0x80788178, 0x907c9973, + 0x877c817c, 0xbf06817c, + 0xbf850002, 0x8f788978, + 0xbf820001, 0x8f788a78, + 0xb9721e06, 0x8f728a72, + 0x80787278, 0x8078ff78, + 0x00000200, 0xbef603ff, + 0x01000000, 0xf4211bfa, 0xf0000000, 0x80788478, - 0xf4211b7a, 0xf0000000, - 0x80788478, 0xf4211eba, + 0xf4211b3a, 0xf0000000, + 0x80788478, 0xf4211b7a, 0xf0000000, 0x80788478, - 0xf4211efa, 0xf0000000, - 0x80788478, 0xf4211c3a, + 0xf4211eba, 0xf0000000, + 0x80788478, 0xf4211efa, 0xf0000000, 0x80788478, - 0xf4211c7a, 0xf0000000, - 0x80788478, 0xf4211e7a, + 0xf4211c3a, 0xf0000000, + 0x80788478, 0xf4211c7a, 0xf0000000, 0x80788478, - 0xf4211cfa, 0xf0000000, - 0x80788478, 0xf4211bba, + 0xf4211e7a, 0xf0000000, + 0x80788478, 0xf4211cfa, 0xf0000000, 0x80788478, - 0xbf8cc07f, 0xb9eef814, 0xf4211bba, 0xf0000000, 0x80788478, 0xbf8cc07f, - 0xb9eef815, 0xbef2036d, - 0x876dff72, 0x0000ffff, - 0xbefc036f, 0xbefe037a, - 0xbeff037b, 0x876f71ff, - 0x000003ff, 0xb9ef4803, - 0xb9f9f816, 0x876f71ff, - 0xfffff800, 0x906f8b6f, - 0xb9efa2c3, 0xb9f3f801, - 0x876fff72, 0xfc000000, - 0x906f9a6f, 0x8f6f906f, - 0xbef30380, 0x88736f73, - 0x876fff72, 0x02000000, - 0x906f996f, 0x8f6f8f6f, + 0xb9eef814, 0xf4211bba, + 0xf0000000, 0x80788478, + 0xbf8cc07f, 0xb9eef815, + 0xbef2036d, 0x876dff72, + 0x0000ffff, 0xbefc036f, + 0xbefe037a, 0xbeff037b, + 0x876f71ff, 0x000003ff, + 0xb9ef4803, 0xb9f9f816, + 0x876f71ff, 0xfffff800, + 0x906f8b6f, 0xb9efa2c3, + 0xb9f3f801, 0x876fff72, + 0xfc000000, 0x906f9a6f, + 0x8f6f906f, 0xbef30380, 0x88736f73, 0x876fff72, - 0x01000000, 0x906f986f, - 0x8f6f996f, 0x88736f73, - 0x876fff70, 0x00800000, - 0x906f976f, 0xb9f3f807, - 0x87fe7e7e, 0x87ea6a6a, - 0xb9f0f802, 0xbf8a0000, - 0xbe80226c, 0xbf810000, + 0x02000000, 0x906f996f, + 0x8f6f8f6f, 0x88736f73, + 0x876fff72, 0x01000000, + 0x906f986f, 0x8f6f996f, + 0x88736f73, 0x876fff70, + 0x00800000, 0x906f976f, + 0xb9f3f807, 0x87fe7e7e, + 0x87ea6a6a, 0xb9f0f802, + 0xbf8a0000, 0xbe80226c, + 0xbf810000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, - 0xbf9f0000, 0x00000000, }; static const uint32_t cwsr_trap_arcturus_hex[] = { 0xbf820001, 0xbf8202c4, diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm index 4433bda2ce25..cdaa523ce6be 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm @@ -758,7 +758,6 @@ L_RESTORE_V0: buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256 buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2 buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3 - s_waitcnt vmcnt(0) /* restore SGPRs */ //will be 2+8+16*6 diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 9af45d07515b..1d3cd5c50d5f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -282,7 +282,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, goto err_bind_process; } - pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n", + pr_debug("Creating queue for PASID %d on gpu 0x%x\n", p->pasid, dev->id); @@ -332,7 +332,7 @@ static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, int retval; struct kfd_ioctl_destroy_queue_args *args = data; - pr_debug("Destroying queue id %d for pasid 0x%x\n", + pr_debug("Destroying queue id %d for pasid %d\n", args->queue_id, p->pasid); @@ -378,7 +378,7 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, properties.queue_percent = args->queue_percentage; properties.priority = args->queue_priority; - pr_debug("Updating queue id %d for pasid 0x%x\n", + pr_debug("Updating queue id %d for pasid %d\n", args->queue_id, p->pasid); mutex_lock(&p->mutex); @@ -855,7 +855,7 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, struct kfd_process_device_apertures *pAperture; struct kfd_process_device *pdd; - dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid); + dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); args->num_of_nodes = 0; @@ -913,7 +913,7 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, uint32_t nodes = 0; int ret; - dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid); + dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); if (args->num_of_nodes == 0) { /* Return number of nodes, so that user space can alloacate @@ -1128,7 +1128,7 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep, mutex_unlock(&p->mutex); if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && - pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va) + pdd->qpd.vmid != 0) dev->kfd2kgd->set_scratch_backing_va( dev->kgd, args->va_addr, pdd->qpd.vmid); @@ -1801,7 +1801,7 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) } else goto err_i1; - dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg); + dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg); process = kfd_get_process(current); if (IS_ERR(process)) { @@ -1856,8 +1856,7 @@ err_i1: kfree(kdata); if (retcode) - dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n", - nr, arg, retcode); + dev_dbg(kfd_device, "ret = %d\n", retcode); return retcode; } @@ -1878,7 +1877,7 @@ static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process, vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - pr_debug("pasid 0x%x mapping mmio page\n" + pr_debug("Process %d mapping mmio page\n" " target user address == 0x%08llX\n" " physical address == 0x%08llX\n" " vm_flags == 0x%04lX\n" diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index de9f68d5c312..66387caf966e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -138,7 +138,6 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = { /* TODO - check & update Vega10 cache details */ #define vega10_cache_info carrizo_cache_info #define raven_cache_info carrizo_cache_info -#define renoir_cache_info carrizo_cache_info /* TODO - check & update Navi10 cache details */ #define navi10_cache_info carrizo_cache_info @@ -671,13 +670,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, pcache_info = raven_cache_info; num_of_cache_types = ARRAY_SIZE(raven_cache_info); break; - case CHIP_RENOIR: - pcache_info = renoir_cache_info; - num_of_cache_types = ARRAY_SIZE(renoir_cache_info); - break; case CHIP_NAVI10: - case CHIP_NAVI12: - case CHIP_NAVI14: pcache_info = navi10_cache_info; num_of_cache_types = ARRAY_SIZE(navi10_cache_info); break; @@ -710,7 +703,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, pcache_info, cu_info, mem_available, - cu_info->cu_bitmap[i % 4][j + i / 4], + cu_info->cu_bitmap[i][j], ct, cu_processor_id, k); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index d59f2cd056c6..a3441b0e385b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -761,7 +761,6 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) { int status = 0; unsigned int vmid; - uint16_t queried_pasid; union SQ_CMD_BITS reg_sq_cmd; union GRBM_GFX_INDEX_BITS reg_gfx_index; struct kfd_process_device *pdd; @@ -783,18 +782,19 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) */ for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { - status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info - (dev->kgd, vmid, &queried_pasid); - - if (status && queried_pasid == p->pasid) { - pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", - vmid, p->pasid); - break; + if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid + (dev->kgd, vmid)) { + if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid + (dev->kgd, vmid) == p->pasid) { + pr_debug("Killing wave fronts of vmid %d and pasid %d\n", + vmid, p->pasid); + break; + } } } if (vmid > last_vmid_to_scan) { - pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid); + pr_err("Didn't find vmid for pasid %d\n", p->pasid); return -EFAULT; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c index 9bfa50633654..9d4af961c5d1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c @@ -96,7 +96,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p) { if (pmgr->pasid != 0) { - pr_debug("H/W debugger is already active using pasid 0x%x\n", + pr_debug("H/W debugger is already active using pasid %d\n", pmgr->pasid); return -EBUSY; } @@ -117,7 +117,7 @@ long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p) { /* Is the requests coming from the already registered process? */ if (pmgr->pasid != p->pasid) { - pr_debug("H/W debugger is not registered by calling pasid 0x%x\n", + pr_debug("H/W debugger is not registered by calling pasid %d\n", p->pasid); return -EINVAL; } @@ -134,7 +134,7 @@ long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, { /* Is the requests coming from the already registered process? */ if (pmgr->pasid != wac_info->process->pasid) { - pr_debug("H/W debugger support was not registered for requester pasid 0x%x\n", + pr_debug("H/W debugger support was not registered for requester pasid %d\n", wac_info->process->pasid); return -EINVAL; } @@ -147,7 +147,7 @@ long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, { /* Is the requests coming from the already registered process? */ if (pmgr->pasid != adw_info->process->pasid) { - pr_debug("H/W debugger support was not registered for requester pasid 0x%x\n", + pr_debug("H/W debugger support was not registered for requester pasid %d\n", adw_info->process->pasid); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 4fa8834ce7cb..0dc1084b5e82 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -39,41 +39,6 @@ */ static atomic_t kfd_locked = ATOMIC_INIT(0); -#ifdef CONFIG_DRM_AMDGPU_CIK -extern const struct kfd2kgd_calls gfx_v7_kfd2kgd; -#endif -extern const struct kfd2kgd_calls gfx_v8_kfd2kgd; -extern const struct kfd2kgd_calls gfx_v9_kfd2kgd; -extern const struct kfd2kgd_calls arcturus_kfd2kgd; -extern const struct kfd2kgd_calls gfx_v10_kfd2kgd; - -static const struct kfd2kgd_calls *kfd2kgd_funcs[] = { -#ifdef KFD_SUPPORT_IOMMU_V2 -#ifdef CONFIG_DRM_AMDGPU_CIK - [CHIP_KAVERI] = &gfx_v7_kfd2kgd, -#endif - [CHIP_CARRIZO] = &gfx_v8_kfd2kgd, - [CHIP_RAVEN] = &gfx_v9_kfd2kgd, -#endif -#ifdef CONFIG_DRM_AMDGPU_CIK - [CHIP_HAWAII] = &gfx_v7_kfd2kgd, -#endif - [CHIP_TONGA] = &gfx_v8_kfd2kgd, - [CHIP_FIJI] = &gfx_v8_kfd2kgd, - [CHIP_POLARIS10] = &gfx_v8_kfd2kgd, - [CHIP_POLARIS11] = &gfx_v8_kfd2kgd, - [CHIP_POLARIS12] = &gfx_v8_kfd2kgd, - [CHIP_VEGAM] = &gfx_v8_kfd2kgd, - [CHIP_VEGA10] = &gfx_v9_kfd2kgd, - [CHIP_VEGA12] = &gfx_v9_kfd2kgd, - [CHIP_VEGA20] = &gfx_v9_kfd2kgd, - [CHIP_RENOIR] = &gfx_v9_kfd2kgd, - [CHIP_ARCTURUS] = &arcturus_kfd2kgd, - [CHIP_NAVI10] = &gfx_v10_kfd2kgd, - [CHIP_NAVI12] = &gfx_v10_kfd2kgd, - [CHIP_NAVI14] = &gfx_v10_kfd2kgd, -}; - #ifdef KFD_SUPPORT_IOMMU_V2 static const struct kfd_device_info kaveri_device_info = { .asic_family = CHIP_KAVERI, @@ -386,24 +351,6 @@ static const struct kfd_device_info arcturus_device_info = { .num_sdma_queues_per_engine = 8, }; -static const struct kfd_device_info renoir_device_info = { - .asic_family = CHIP_RENOIR, - .asic_name = "renoir", - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = true, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 1, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 2, -}; - static const struct kfd_device_info navi10_device_info = { .asic_family = CHIP_NAVI10, .asic_name = "navi10", @@ -422,64 +369,133 @@ static const struct kfd_device_info navi10_device_info = { .num_sdma_queues_per_engine = 8, }; -static const struct kfd_device_info navi12_device_info = { - .asic_family = CHIP_NAVI12, - .asic_name = "navi12", - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 8, +struct kfd_deviceid { + unsigned short did; + const struct kfd_device_info *device_info; }; -static const struct kfd_device_info navi14_device_info = { - .asic_family = CHIP_NAVI14, - .asic_name = "navi14", - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 8, - .ih_ring_entry_size = 8 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_v9, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .needs_iommu_device = false, - .supports_cwsr = true, - .needs_pci_atomics = false, - .num_sdma_engines = 2, - .num_xgmi_sdma_engines = 0, - .num_sdma_queues_per_engine = 8, -}; - -/* For each entry, [0] is regular and [1] is virtualisation device. */ -static const struct kfd_device_info *kfd_supported_devices[][2] = { +static const struct kfd_deviceid supported_devices[] = { #ifdef KFD_SUPPORT_IOMMU_V2 - [CHIP_KAVERI] = {&kaveri_device_info, NULL}, - [CHIP_CARRIZO] = {&carrizo_device_info, NULL}, - [CHIP_RAVEN] = {&raven_device_info, NULL}, + { 0x1304, &kaveri_device_info }, /* Kaveri */ + { 0x1305, &kaveri_device_info }, /* Kaveri */ + { 0x1306, &kaveri_device_info }, /* Kaveri */ + { 0x1307, &kaveri_device_info }, /* Kaveri */ + { 0x1309, &kaveri_device_info }, /* Kaveri */ + { 0x130A, &kaveri_device_info }, /* Kaveri */ + { 0x130B, &kaveri_device_info }, /* Kaveri */ + { 0x130C, &kaveri_device_info }, /* Kaveri */ + { 0x130D, &kaveri_device_info }, /* Kaveri */ + { 0x130E, &kaveri_device_info }, /* Kaveri */ + { 0x130F, &kaveri_device_info }, /* Kaveri */ + { 0x1310, &kaveri_device_info }, /* Kaveri */ + { 0x1311, &kaveri_device_info }, /* Kaveri */ + { 0x1312, &kaveri_device_info }, /* Kaveri */ + { 0x1313, &kaveri_device_info }, /* Kaveri */ + { 0x1315, &kaveri_device_info }, /* Kaveri */ + { 0x1316, &kaveri_device_info }, /* Kaveri */ + { 0x1317, &kaveri_device_info }, /* Kaveri */ + { 0x1318, &kaveri_device_info }, /* Kaveri */ + { 0x131B, &kaveri_device_info }, /* Kaveri */ + { 0x131C, &kaveri_device_info }, /* Kaveri */ + { 0x131D, &kaveri_device_info }, /* Kaveri */ + { 0x9870, &carrizo_device_info }, /* Carrizo */ + { 0x9874, &carrizo_device_info }, /* Carrizo */ + { 0x9875, &carrizo_device_info }, /* Carrizo */ + { 0x9876, &carrizo_device_info }, /* Carrizo */ + { 0x9877, &carrizo_device_info }, /* Carrizo */ + { 0x15DD, &raven_device_info }, /* Raven */ + { 0x15D8, &raven_device_info }, /* Raven */ #endif - [CHIP_HAWAII] = {&hawaii_device_info, NULL}, - [CHIP_TONGA] = {&tonga_device_info, NULL}, - [CHIP_FIJI] = {&fiji_device_info, &fiji_vf_device_info}, - [CHIP_POLARIS10] = {&polaris10_device_info, &polaris10_vf_device_info}, - [CHIP_POLARIS11] = {&polaris11_device_info, NULL}, - [CHIP_POLARIS12] = {&polaris12_device_info, NULL}, - [CHIP_VEGAM] = {&vegam_device_info, NULL}, - [CHIP_VEGA10] = {&vega10_device_info, &vega10_vf_device_info}, - [CHIP_VEGA12] = {&vega12_device_info, NULL}, - [CHIP_VEGA20] = {&vega20_device_info, NULL}, - [CHIP_RENOIR] = {&renoir_device_info, NULL}, - [CHIP_ARCTURUS] = {&arcturus_device_info, &arcturus_device_info}, - [CHIP_NAVI10] = {&navi10_device_info, NULL}, - [CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info}, - [CHIP_NAVI14] = {&navi14_device_info, NULL}, + { 0x67A0, &hawaii_device_info }, /* Hawaii */ + { 0x67A1, &hawaii_device_info }, /* Hawaii */ + { 0x67A2, &hawaii_device_info }, /* Hawaii */ + { 0x67A8, &hawaii_device_info }, /* Hawaii */ + { 0x67A9, &hawaii_device_info }, /* Hawaii */ + { 0x67AA, &hawaii_device_info }, /* Hawaii */ + { 0x67B0, &hawaii_device_info }, /* Hawaii */ + { 0x67B1, &hawaii_device_info }, /* Hawaii */ + { 0x67B8, &hawaii_device_info }, /* Hawaii */ + { 0x67B9, &hawaii_device_info }, /* Hawaii */ + { 0x67BA, &hawaii_device_info }, /* Hawaii */ + { 0x67BE, &hawaii_device_info }, /* Hawaii */ + { 0x6920, &tonga_device_info }, /* Tonga */ + { 0x6921, &tonga_device_info }, /* Tonga */ + { 0x6928, &tonga_device_info }, /* Tonga */ + { 0x6929, &tonga_device_info }, /* Tonga */ + { 0x692B, &tonga_device_info }, /* Tonga */ + { 0x6938, &tonga_device_info }, /* Tonga */ + { 0x6939, &tonga_device_info }, /* Tonga */ + { 0x7300, &fiji_device_info }, /* Fiji */ + { 0x730F, &fiji_vf_device_info }, /* Fiji vf*/ + { 0x67C0, &polaris10_device_info }, /* Polaris10 */ + { 0x67C1, &polaris10_device_info }, /* Polaris10 */ + { 0x67C2, &polaris10_device_info }, /* Polaris10 */ + { 0x67C4, &polaris10_device_info }, /* Polaris10 */ + { 0x67C7, &polaris10_device_info }, /* Polaris10 */ + { 0x67C8, &polaris10_device_info }, /* Polaris10 */ + { 0x67C9, &polaris10_device_info }, /* Polaris10 */ + { 0x67CA, &polaris10_device_info }, /* Polaris10 */ + { 0x67CC, &polaris10_device_info }, /* Polaris10 */ + { 0x67CF, &polaris10_device_info }, /* Polaris10 */ + { 0x67D0, &polaris10_vf_device_info }, /* Polaris10 vf*/ + { 0x67DF, &polaris10_device_info }, /* Polaris10 */ + { 0x6FDF, &polaris10_device_info }, /* Polaris10 */ + { 0x67E0, &polaris11_device_info }, /* Polaris11 */ + { 0x67E1, &polaris11_device_info }, /* Polaris11 */ + { 0x67E3, &polaris11_device_info }, /* Polaris11 */ + { 0x67E7, &polaris11_device_info }, /* Polaris11 */ + { 0x67E8, &polaris11_device_info }, /* Polaris11 */ + { 0x67E9, &polaris11_device_info }, /* Polaris11 */ + { 0x67EB, &polaris11_device_info }, /* Polaris11 */ + { 0x67EF, &polaris11_device_info }, /* Polaris11 */ + { 0x67FF, &polaris11_device_info }, /* Polaris11 */ + { 0x6980, &polaris12_device_info }, /* Polaris12 */ + { 0x6981, &polaris12_device_info }, /* Polaris12 */ + { 0x6985, &polaris12_device_info }, /* Polaris12 */ + { 0x6986, &polaris12_device_info }, /* Polaris12 */ + { 0x6987, &polaris12_device_info }, /* Polaris12 */ + { 0x6995, &polaris12_device_info }, /* Polaris12 */ + { 0x6997, &polaris12_device_info }, /* Polaris12 */ + { 0x699F, &polaris12_device_info }, /* Polaris12 */ + { 0x694C, &vegam_device_info }, /* VegaM */ + { 0x694E, &vegam_device_info }, /* VegaM */ + { 0x694F, &vegam_device_info }, /* VegaM */ + { 0x6860, &vega10_device_info }, /* Vega10 */ + { 0x6861, &vega10_device_info }, /* Vega10 */ + { 0x6862, &vega10_device_info }, /* Vega10 */ + { 0x6863, &vega10_device_info }, /* Vega10 */ + { 0x6864, &vega10_device_info }, /* Vega10 */ + { 0x6867, &vega10_device_info }, /* Vega10 */ + { 0x6868, &vega10_device_info }, /* Vega10 */ + { 0x6869, &vega10_device_info }, /* Vega10 */ + { 0x686A, &vega10_device_info }, /* Vega10 */ + { 0x686B, &vega10_device_info }, /* Vega10 */ + { 0x686C, &vega10_vf_device_info }, /* Vega10 vf*/ + { 0x686D, &vega10_device_info }, /* Vega10 */ + { 0x686E, &vega10_device_info }, /* Vega10 */ + { 0x686F, &vega10_device_info }, /* Vega10 */ + { 0x687F, &vega10_device_info }, /* Vega10 */ + { 0x69A0, &vega12_device_info }, /* Vega12 */ + { 0x69A1, &vega12_device_info }, /* Vega12 */ + { 0x69A2, &vega12_device_info }, /* Vega12 */ + { 0x69A3, &vega12_device_info }, /* Vega12 */ + { 0x69AF, &vega12_device_info }, /* Vega12 */ + { 0x66a0, &vega20_device_info }, /* Vega20 */ + { 0x66a1, &vega20_device_info }, /* Vega20 */ + { 0x66a2, &vega20_device_info }, /* Vega20 */ + { 0x66a3, &vega20_device_info }, /* Vega20 */ + { 0x66a4, &vega20_device_info }, /* Vega20 */ + { 0x66a7, &vega20_device_info }, /* Vega20 */ + { 0x66af, &vega20_device_info }, /* Vega20 */ + { 0x738C, &arcturus_device_info }, /* Arcturus */ + { 0x7388, &arcturus_device_info }, /* Arcturus */ + { 0x738E, &arcturus_device_info }, /* Arcturus */ + { 0x7390, &arcturus_device_info }, /* Arcturus vf */ + { 0x7310, &navi10_device_info }, /* Navi10 */ + { 0x7312, &navi10_device_info }, /* Navi10 */ + { 0x7318, &navi10_device_info }, /* Navi10 */ + { 0x731a, &navi10_device_info }, /* Navi10 */ + { 0x731f, &navi10_device_info }, /* Navi10 */ }; static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, @@ -488,25 +504,32 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd); static int kfd_resume(struct kfd_dev *kfd); -struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, - struct pci_dev *pdev, unsigned int asic_type, bool vf) +static const struct kfd_device_info *lookup_device_info(unsigned short did) { - struct kfd_dev *kfd; - const struct kfd_device_info *device_info; - const struct kfd2kgd_calls *f2g; + size_t i; - if (asic_type >= sizeof(kfd_supported_devices) / (sizeof(void *) * 2) - || asic_type >= sizeof(kfd2kgd_funcs) / sizeof(void *)) { - dev_err(kfd_device, "asic_type %d out of range\n", asic_type); - return NULL; /* asic_type out of range */ + for (i = 0; i < ARRAY_SIZE(supported_devices); i++) { + if (supported_devices[i].did == did) { + WARN_ON(!supported_devices[i].device_info); + return supported_devices[i].device_info; + } } - device_info = kfd_supported_devices[asic_type][vf]; - f2g = kfd2kgd_funcs[asic_type]; + dev_warn(kfd_device, "DID %04x is missing in supported_devices\n", + did); - if (!device_info || !f2g) { - dev_err(kfd_device, "%s %s not supported in kfd\n", - amdgpu_asic_name[asic_type], vf ? "VF" : ""); + return NULL; +} + +struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, + struct pci_dev *pdev, const struct kfd2kgd_calls *f2g) +{ + struct kfd_dev *kfd; + const struct kfd_device_info *device_info = + lookup_device_info(pdev->device); + + if (!device_info) { + dev_err(kfd_device, "kgd2kfd_probe failed\n"); return NULL; } @@ -570,12 +593,10 @@ static void kfd_cwsr_init(struct kfd_dev *kfd) } bool kgd2kfd_device_init(struct kfd_dev *kfd, - struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources) { unsigned int size; - kfd->ddev = ddev; kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, KGD_ENGINE_MEC1); kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, @@ -730,6 +751,9 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd) return 0; kgd2kfd_suspend(kfd); + /* hold dqm->lock to prevent further execution*/ + dqm_lock(kfd->dqm); + kfd_signal_reset_event(kfd); return 0; } @@ -747,6 +771,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd) if (!kfd->init_complete) return 0; + dqm_unlock(kfd->dqm); + ret = kfd_resume(kfd); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 984c2f2b24b6..d985e31fcc1e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -195,30 +195,20 @@ static int allocate_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) { - int allocated_vmid = -1, i; + int bit, allocated_vmid; - for (i = dqm->dev->vm_info.first_vmid_kfd; - i <= dqm->dev->vm_info.last_vmid_kfd; i++) { - if (!dqm->vmid_pasid[i]) { - allocated_vmid = i; - break; - } - } + if (dqm->vmid_bitmap == 0) + return -ENOMEM; - if (allocated_vmid < 0) { - pr_err("no more vmid to allocate\n"); - return -ENOSPC; - } - - pr_debug("vmid allocated: %d\n", allocated_vmid); - - dqm->vmid_pasid[allocated_vmid] = q->process->pasid; - - set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid); + bit = ffs(dqm->vmid_bitmap) - 1; + dqm->vmid_bitmap &= ~(1 << bit); + allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd; + pr_debug("vmid allocation %d\n", allocated_vmid); qpd->vmid = allocated_vmid; q->properties.vmid = allocated_vmid; + set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid); program_sh_mem_settings(dqm, qpd); /* qpd->page_table_base is set earlier when register_process() @@ -230,9 +220,8 @@ static int allocate_vmid(struct device_queue_manager *dqm, /* invalidate the VM context after pasid and vmid mapping is set up */ kfd_flush_tlb(qpd_to_pdd(qpd)); - if (dqm->dev->kfd2kgd->set_scratch_backing_va) - dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd, - qpd->sh_hidden_private_base, qpd->vmid); + dqm->dev->kfd2kgd->set_scratch_backing_va( + dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid); return 0; } @@ -259,6 +248,8 @@ static void deallocate_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) { + int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; + /* On GFX v7, CP doesn't flush TC at dequeue */ if (q->device->device_info->asic_family == CHIP_HAWAII) if (flush_texture_cache_nocpsch(q->device, qpd)) @@ -268,8 +259,8 @@ static void deallocate_vmid(struct device_queue_manager *dqm, /* Release the vmid mapping */ set_pasid_vmid_mapping(dqm, 0, qpd->vmid); - dqm->vmid_pasid[qpd->vmid] = 0; + dqm->vmid_bitmap |= (1 << bit); qpd->vmid = 0; q->properties.vmid = 0; } @@ -340,10 +331,6 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (q->properties.is_active) { - if (!dqm->sched_running) { - WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); - goto add_queue_to_list; - } if (WARN(q->process->mm != current->mm, "should only run in user thread")) @@ -355,7 +342,6 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, goto out_free_mqd; } -add_queue_to_list: list_add(&q->list, &qpd->queues_list); qpd->queue_count++; if (q->properties.is_active) @@ -463,11 +449,6 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, deallocate_doorbell(qpd, q); - if (!dqm->sched_running) { - WARN_ONCE(1, "Destroy non-HWS queue while stopped\n"); - return 0; - } - retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_RESET, KFD_UNMAP_LATENCY_MS, @@ -543,12 +524,6 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || q->properties.type == KFD_QUEUE_TYPE_SDMA || q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { - - if (!dqm->sched_running) { - WARN_ONCE(1, "Update non-HWS queue while stopped\n"); - goto out_unlock; - } - retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); @@ -604,7 +579,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, goto out; pdd = qpd_to_pdd(qpd); - pr_info_ratelimited("Evicting PASID 0x%x queues\n", + pr_info_ratelimited("Evicting PASID %u queues\n", pdd->process->pasid); /* Mark all queues as evicted. Deactivate all active queues on @@ -618,11 +593,6 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; q->properties.is_active = false; - dqm->queue_count--; - - if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) - continue; - retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); @@ -631,6 +601,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, * maintain a consistent eviction state */ ret = retval; + dqm->queue_count--; } out: @@ -650,7 +621,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, goto out; pdd = qpd_to_pdd(qpd); - pr_info_ratelimited("Evicting PASID 0x%x queues\n", + pr_info_ratelimited("Evicting PASID %u queues\n", pdd->process->pasid); /* Mark all queues as evicted. Deactivate all active queues on @@ -696,7 +667,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, goto out; } - pr_info_ratelimited("Restoring PASID 0x%x queues\n", + pr_info_ratelimited("Restoring PASID %u queues\n", pdd->process->pasid); /* Update PD Base in QPD */ @@ -731,11 +702,6 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; q->properties.is_active = true; - dqm->queue_count++; - - if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) - continue; - retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, &q->properties, mm); if (retval && !ret) @@ -743,6 +709,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, * maintain a consistent eviction state */ ret = retval; + dqm->queue_count++; } qpd->evicted = 0; out: @@ -772,7 +739,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, goto out; } - pr_info_ratelimited("Restoring PASID 0x%x queues\n", + pr_info_ratelimited("Restoring PASID %u queues\n", pdd->process->pasid); /* Update PD Base in QPD */ @@ -912,8 +879,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) dqm->allocated_queues[pipe] |= 1 << queue; } - memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); - + dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm)); dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm)); @@ -936,20 +902,12 @@ static void uninitialize(struct device_queue_manager *dqm) static int start_nocpsch(struct device_queue_manager *dqm) { init_interrupts(dqm); - - if (dqm->dev->device_info->asic_family == CHIP_HAWAII) - return pm_init(&dqm->packets, dqm); - dqm->sched_running = true; - - return 0; + return pm_init(&dqm->packets, dqm); } static int stop_nocpsch(struct device_queue_manager *dqm) { - if (dqm->dev->device_info->asic_family == CHIP_HAWAII) - pm_uninit(&dqm->packets); - dqm->sched_running = false; - + pm_uninit(&dqm->packets); return 0; } @@ -1100,7 +1058,6 @@ static int start_cpsch(struct device_queue_manager *dqm) dqm_lock(dqm); /* clear hang status when driver try to start the hw scheduler */ dqm->is_hws_hang = false; - dqm->sched_running = true; execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); dqm_unlock(dqm); @@ -1116,7 +1073,6 @@ static int stop_cpsch(struct device_queue_manager *dqm) { dqm_lock(dqm); unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); - dqm->sched_running = false; dqm_unlock(dqm); kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); @@ -1303,10 +1259,9 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) { int retval; - if (!dqm->sched_running) - return 0; if (dqm->queue_count <= 0 || dqm->processes_count <= 0) return 0; + if (dqm->active_runlist) return 0; @@ -1328,8 +1283,6 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, { int retval = 0; - if (!dqm->sched_running) - return 0; if (dqm->is_hws_hang) return -EIO; if (!dqm->active_runlist) @@ -1723,8 +1676,7 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) struct kfd_dev *dev = dqm->dev; struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * - (dev->device_info->num_sdma_engines + - dev->device_info->num_xgmi_sdma_engines) * + dev->device_info->num_sdma_engines * dev->device_info->num_sdma_queues_per_engine + dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; @@ -1834,13 +1786,10 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: - case CHIP_RENOIR: case CHIP_ARCTURUS: device_queue_manager_init_v9(&dqm->asic_ops); break; case CHIP_NAVI10: - case CHIP_NAVI12: - case CHIP_NAVI14: device_queue_manager_init_v10_navi10(&dqm->asic_ops); break; default: @@ -1934,12 +1883,6 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) int pipe, queue; int r = 0; - if (!dqm->sched_running) { - seq_printf(m, " Device is stopped\n"); - - return 0; - } - r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd, KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs); @@ -1974,8 +1917,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) } } - for (pipe = 0; pipe < get_num_sdma_engines(dqm) + - get_num_xgmi_sdma_engines(dqm); pipe++) { + for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) { for (queue = 0; queue < dqm->dev->device_info->num_sdma_queues_per_engine; queue++) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index a8c37e6da027..90db2c9275f6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -32,8 +32,6 @@ #include "kfd_mqd_manager.h" -#define VMID_NUM 16 - struct device_process_node { struct qcm_process_device *qpd; struct list_head list; @@ -187,8 +185,7 @@ struct device_queue_manager { unsigned int *allocated_queues; uint64_t sdma_bitmap; uint64_t xgmi_sdma_bitmap; - /* the pasid mapping for each kfd vmid */ - uint16_t vmid_pasid[VMID_NUM]; + unsigned int vmid_bitmap; uint64_t pipelines_addr; struct kfd_mem_obj *pipeline_mem; uint64_t fence_gpu_addr; @@ -201,7 +198,6 @@ struct device_queue_manager { bool is_hws_hang; struct work_struct hw_exception_work; struct kfd_mem_obj hiq_sdma_mqd; - bool sched_running; }; void device_queue_manager_init_cik( diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 908081c85de1..d674d4b3340f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -852,8 +852,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, if (type == KFD_EVENT_TYPE_MEMORY) { dev_warn(kfd_device, - "Sending SIGSEGV to process %d (pasid 0x%x)", - p->lead_thread->pid, p->pasid); + "Sending SIGSEGV to HSA Process with PID %d ", + p->lead_thread->pid); send_sig(SIGSEGV, p->lead_thread, 0); } @@ -861,13 +861,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, if (send_signal) { if (send_sigterm) { dev_warn(kfd_device, - "Sending SIGTERM to process %d (pasid 0x%x)", - p->lead_thread->pid, p->pasid); + "Sending SIGTERM to HSA Process with PID %d ", + p->lead_thread->pid); send_sig(SIGTERM, p->lead_thread, 0); } else { dev_err(kfd_device, - "Process %d (pasid 0x%x) got unhandled exception", - p->lead_thread->pid, p->pasid); + "HSA Process (PID %d) got unhandled exception", + p->lead_thread->pid); } } } @@ -936,8 +936,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, /* Workaround on Raven to not kill the process when memory is freed * before IOMMU is able to finish processing all the excessive PPRs */ - if (dev->device_info->asic_family != CHIP_RAVEN && - dev->device_info->asic_family != CHIP_RENOIR) { + if (dev->device_info->asic_family != CHIP_RAVEN) { mutex_lock(&p->event_mutex); /* Lookup events by type and signal them */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index bb77b8890e77..9dc4bff8085e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -369,13 +369,8 @@ int kfd_init_apertures(struct kfd_process *process) /*Iterating over all devices*/ while (kfd_topology_enum_kfd_devices(id, &dev) == 0) { - if (!dev || kfd_devcgroup_check_permission(dev)) { - /* Skip non GPU devices and devices to which the - * current process have no access to. Access can be - * limited by placing the process in a specific - * cgroup hierarchy - */ - id++; + if (!dev) { + id++; /* Skip non GPU devices */ continue; } @@ -410,11 +405,8 @@ int kfd_init_apertures(struct kfd_process *process) case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: - case CHIP_RENOIR: case CHIP_ARCTURUS: case CHIP_NAVI10: - case CHIP_NAVI12: - case CHIP_NAVI14: kfd_init_apertures_v9(pdd, id); break; default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index e05d75ecda21..3ef67d2e0d9f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -54,7 +54,8 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev, memcpy(patched_ihre, ih_ring_entry, dev->device_info->ih_ring_entry_size); - pasid = dev->dqm->vmid_pasid[vmid]; + pasid = dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid( + dev->kgd, vmid); /* Patch the pasid field */ patched_ihre[3] = cpu_to_le32((le32_to_cpu(patched_ihre[3]) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index bc47f6a44456..c56ac47cd318 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -62,11 +62,6 @@ int kfd_interrupt_init(struct kfd_dev *kfd) } kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1); - if (unlikely(!kfd->ih_wq)) { - kfifo_free(&kfd->ih_fifo); - dev_err(kfd_chardev(), "Failed to allocate KFD IH workqueue\n"); - return -ENOMEM; - } spin_lock_init(&kfd->interrupt_lock); INIT_WORK(&kfd->interrupt_work, interrupt_wq); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c index 193e2835bd4d..5f35df23fb18 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c @@ -160,7 +160,7 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) if (!p) return; - pr_debug("Unbinding process 0x%x from IOMMU\n", pasid); + pr_debug("Unbinding process %d from IOMMU\n", pasid); mutex_lock(kfd_get_dbgmgr_mutex()); @@ -194,7 +194,7 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, struct kfd_dev *dev; dev_warn_ratelimited(kfd_device, - "Invalid PPR device %x:%x.%x pasid 0x%x address 0x%lX flags 0x%X", + "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X", PCI_BUS_NUM(pdev->devfn), PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), @@ -235,7 +235,7 @@ static int kfd_bind_processes_to_device(struct kfd_dev *kfd) err = amd_iommu_bind_pasid(kfd->pdev, p->pasid, p->lead_thread); if (err < 0) { - pr_err("Unexpected pasid 0x%x binding failure\n", + pr_err("Unexpected pasid %d binding failure\n", p->pasid); mutex_unlock(&p->mutex); break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 11d244891393..8b4564f71a7a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -330,13 +330,10 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: - case CHIP_RENOIR: case CHIP_ARCTURUS: kernel_queue_init_v9(&kq->ops_asic_specific); break; case CHIP_NAVI10: - case CHIP_NAVI12: - case CHIP_NAVI14: kernel_queue_init_v10(&kq->ops_asic_specific); break; default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index f4b7f7e6c40e..986ff52d5750 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -82,7 +82,7 @@ static void kfd_exit(void) kfd_chardev_exit(); } -int kgd2kfd_init(void) +int kgd2kfd_init() { return kfd_init(); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 4a236b2c2354..9cd3eb2d90bd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -69,13 +69,35 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, struct queue_properties *q) { - struct kfd_mem_obj *mqd_mem_obj; + int retval; + struct kfd_mem_obj *mqd_mem_obj = NULL; - if (kfd_gtt_sa_allocate(kfd, sizeof(struct v10_compute_mqd), - &mqd_mem_obj)) + /* From V9, for CWSR, the control stack is located on the next page + * boundary after the mqd, we will use the gtt allocation function + * instead of sub-allocation function. + */ + if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { + mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); + if (!mqd_mem_obj) + return NULL; + retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd, + ALIGN(q->ctl_stack_size, PAGE_SIZE) + + ALIGN(sizeof(struct v10_compute_mqd), PAGE_SIZE), + &(mqd_mem_obj->gtt_mem), + &(mqd_mem_obj->gpu_addr), + (void *)&(mqd_mem_obj->cpu_ptr), true); + } else { + retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v10_compute_mqd), + &mqd_mem_obj); + } + + if (retval) { + kfree(mqd_mem_obj); return NULL; + } return mqd_mem_obj; + } static void init_mqd(struct mqd_manager *mm, void **mqd, @@ -228,7 +250,14 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd, static void free_mqd(struct mqd_manager *mm, void *mqd, struct kfd_mem_obj *mqd_mem_obj) { - kfd_gtt_sa_free(mm->dev, mqd_mem_obj); + struct kfd_dev *kfd = mm->dev; + + if (mqd_mem_obj->gtt_mem) { + amdgpu_amdkfd_free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem); + kfree(mqd_mem_obj); + } else { + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); + } } static bool is_occupied(struct mqd_manager *mm, void *mqd, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 83ef4b3dd2fb..2c8624c5b42c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -239,13 +239,10 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: - case CHIP_RENOIR: case CHIP_ARCTURUS: pm->pmf = &kfd_v9_pm_funcs; break; case CHIP_NAVI10: - case CHIP_NAVI12: - case CHIP_NAVI14: pm->pmf = &kfd_v10_pm_funcs; break; default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 060a9e8b301e..c89326125d71 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -36,10 +36,6 @@ #include #include #include -#include -#include -#include -#include #include #include "amd_shared.h" @@ -183,6 +179,10 @@ enum cache_policy { cache_policy_noncoherent }; +#define KFD_IS_VI(chip) ((chip) >= CHIP_CARRIZO && (chip) <= CHIP_POLARIS11) +#define KFD_IS_DGPU(chip) (((chip) >= CHIP_TONGA && \ + (chip) <= CHIP_NAVI10) || \ + (chip) == CHIP_HAWAII) #define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10) struct kfd_event_interrupt_class { @@ -230,7 +230,6 @@ struct kfd_dev { const struct kfd_device_info *device_info; struct pci_dev *pdev; - struct drm_device *ddev; unsigned int id; /* topology stub index */ @@ -688,7 +687,7 @@ struct kfd_process { /* We want to receive a notification when the mm_struct is destroyed */ struct mmu_notifier mmu_notifier; - uint16_t pasid; + unsigned int pasid; unsigned int doorbell_index; /* @@ -1041,21 +1040,6 @@ bool kfd_is_locked(void); void kfd_inc_compute_active(struct kfd_dev *dev); void kfd_dec_compute_active(struct kfd_dev *dev); -/* Cgroup Support */ -/* Check with device cgroup if @kfd device is accessible */ -static inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd) -{ -#if defined(CONFIG_CGROUP_DEVICE) - struct drm_device *ddev = kfd->ddev; - - return devcgroup_check_permission(DEVCG_DEV_CHAR, ddev->driver->major, - ddev->render->index, - DEVCG_ACC_WRITE | DEVCG_ACC_READ); -#else - return 0; -#endif -} - /* Debugfs */ #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 10f9af5784f2..40e3fc0c6942 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -416,7 +416,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) list_for_each_entry_safe(pdd, temp, &p->per_device_data, per_device_list) { - pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n", + pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n", pdd->dev->id, p->pasid); if (pdd->drm_file) { @@ -687,8 +687,6 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd, struct kfd_dev *dev) { unsigned int i; - int range_start = dev->shared_resources.non_cp_doorbells_start; - int range_end = dev->shared_resources.non_cp_doorbells_end; if (!KFD_IS_SOC15(dev->device_info->asic_family)) return 0; @@ -700,16 +698,14 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd, return -ENOMEM; /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */ - pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end); - pr_debug("reserved doorbell 0x%03x - 0x%03x\n", - range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET, - range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET); - for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) { - if (i >= range_start && i <= range_end) { + if (i >= dev->shared_resources.non_cp_doorbells_start + && i <= dev->shared_resources.non_cp_doorbells_end) { set_bit(i, qpd->doorbell_bitmap); set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET, qpd->doorbell_bitmap); + pr_debug("reserved doorbell 0x%03x and 0x%03x\n", i, + i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET); } } @@ -1024,7 +1020,7 @@ static void evict_process_worker(struct work_struct *work) */ flush_delayed_work(&p->restore_work); - pr_debug("Started evicting pasid 0x%x\n", p->pasid); + pr_debug("Started evicting pasid %d\n", p->pasid); ret = kfd_process_evict_queues(p); if (!ret) { dma_fence_signal(p->ef); @@ -1033,9 +1029,9 @@ static void evict_process_worker(struct work_struct *work) queue_delayed_work(kfd_restore_wq, &p->restore_work, msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)); - pr_debug("Finished evicting pasid 0x%x\n", p->pasid); + pr_debug("Finished evicting pasid %d\n", p->pasid); } else - pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid); + pr_err("Failed to evict queues of pasid %d\n", p->pasid); } static void restore_process_worker(struct work_struct *work) @@ -1050,7 +1046,7 @@ static void restore_process_worker(struct work_struct *work) * lifetime of this thread, kfd_process p will be valid */ p = container_of(dwork, struct kfd_process, restore_work); - pr_debug("Started restoring pasid 0x%x\n", p->pasid); + pr_debug("Started restoring pasid %d\n", p->pasid); /* Setting last_restore_timestamp before successful restoration. * Otherwise this would have to be set by KGD (restore_process_bos) @@ -1066,7 +1062,7 @@ static void restore_process_worker(struct work_struct *work) ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info, &p->ef); if (ret) { - pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n", + pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n", p->pasid, PROCESS_BACK_OFF_TIME_MS); ret = queue_delayed_work(kfd_restore_wq, &p->restore_work, msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS)); @@ -1076,9 +1072,9 @@ static void restore_process_worker(struct work_struct *work) ret = kfd_process_restore_queues(p); if (!ret) - pr_debug("Finished restoring pasid 0x%x\n", p->pasid); + pr_debug("Finished restoring pasid %d\n", p->pasid); else - pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid); + pr_err("Failed to restore queues of pasid %d\n", p->pasid); } void kfd_suspend_all_processes(void) @@ -1092,7 +1088,7 @@ void kfd_suspend_all_processes(void) cancel_delayed_work_sync(&p->restore_work); if (kfd_process_evict_queues(p)) - pr_err("Failed to suspend process 0x%x\n", p->pasid); + pr_err("Failed to suspend process %d\n", p->pasid); dma_fence_signal(p->ef); dma_fence_put(p->ef); p->ef = NULL; @@ -1175,7 +1171,7 @@ int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data) int idx = srcu_read_lock(&kfd_processes_srcu); hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { - seq_printf(m, "Process %d PASID 0x%x:\n", + seq_printf(m, "Process %d PASID %d:\n", p->lead_thread->tgid, p->pasid); mutex_lock(&p->mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 2659d226c056..7e6c3ee82f5b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -53,7 +53,7 @@ static int find_available_queue_slot(struct process_queue_manager *pqm, pr_debug("The new slot id %lu\n", found); if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { - pr_info("Cannot open more queues for process with pasid 0x%x\n", + pr_info("Cannot open more queues for process with pasid %d\n", pqm->process->pasid); return -ENOMEM; } @@ -298,7 +298,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, } if (retval != 0) { - pr_err("Pasid 0x%x DQM create queue %d failed. ret %d\n", + pr_err("Pasid %d DQM create queue %d failed. ret %d\n", pqm->process->pasid, type, retval); goto err_create_queue; } @@ -377,7 +377,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) dqm = pqn->q->device->dqm; retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); if (retval) { - pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n", + pr_err("Pasid %d destroy queue %d failed, ret %d\n", pqm->process->pasid, pqn->q->properties.queue_id, retval); if (retval != -ETIME) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 69bd0628fdc6..7551761f2aa9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -269,8 +269,6 @@ static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr, buffer[0] = 0; iolink = container_of(attr, struct kfd_iolink_properties, attr); - if (iolink->gpu && kfd_devcgroup_check_permission(iolink->gpu)) - return -EPERM; sysfs_show_32bit_prop(buffer, "type", iolink->iolink_type); sysfs_show_32bit_prop(buffer, "version_major", iolink->ver_maj); sysfs_show_32bit_prop(buffer, "version_minor", iolink->ver_min); @@ -307,8 +305,6 @@ static ssize_t mem_show(struct kobject *kobj, struct attribute *attr, buffer[0] = 0; mem = container_of(attr, struct kfd_mem_properties, attr); - if (mem->gpu && kfd_devcgroup_check_permission(mem->gpu)) - return -EPERM; sysfs_show_32bit_prop(buffer, "heap_type", mem->heap_type); sysfs_show_64bit_prop(buffer, "size_in_bytes", mem->size_in_bytes); sysfs_show_32bit_prop(buffer, "flags", mem->flags); @@ -338,8 +334,6 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr, buffer[0] = 0; cache = container_of(attr, struct kfd_cache_properties, attr); - if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu)) - return -EPERM; sysfs_show_32bit_prop(buffer, "processor_id_low", cache->processor_id_low); sysfs_show_32bit_prop(buffer, "level", cache->cache_level); @@ -420,8 +414,6 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, if (strcmp(attr->name, "gpu_id") == 0) { dev = container_of(attr, struct kfd_topology_device, attr_gpuid); - if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) - return -EPERM; return sysfs_show_32bit_val(buffer, dev->gpu_id); } @@ -429,15 +421,11 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev = container_of(attr, struct kfd_topology_device, attr_name); - if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) - return -EPERM; return sysfs_show_str_val(buffer, dev->node_props.name); } dev = container_of(attr, struct kfd_topology_device, attr_props); - if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) - return -EPERM; sysfs_show_32bit_prop(buffer, "cpu_cores_count", dev->node_props.cpu_cores_count); sysfs_show_32bit_prop(buffer, "simd_count", @@ -1110,9 +1098,6 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) { struct kfd_topology_device *dev; struct kfd_topology_device *out_dev = NULL; - struct kfd_mem_properties *mem; - struct kfd_cache_properties *cache; - struct kfd_iolink_properties *iolink; down_write(&topology_lock); list_for_each_entry(dev, &topology_device_list, list) { @@ -1126,13 +1111,6 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) if (!dev->gpu && (dev->node_props.simd_count > 0)) { dev->gpu = gpu; out_dev = dev; - - list_for_each_entry(mem, &dev->mem_props, list) - mem->gpu = dev->gpu; - list_for_each_entry(cache, &dev->cache_props, list) - cache->gpu = dev->gpu; - list_for_each_entry(iolink, &dev->io_link_props, list) - iolink->gpu = dev->gpu; break; } } @@ -1339,11 +1317,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu) case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: - case CHIP_RENOIR: case CHIP_ARCTURUS: case CHIP_NAVI10: - case CHIP_NAVI12: - case CHIP_NAVI14: dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 15843e0fc756..d4718d58d0f2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -102,7 +102,6 @@ struct kfd_mem_properties { uint32_t flags; uint32_t width; uint32_t mem_clk_max; - struct kfd_dev *gpu; struct kobject *kobj; struct attribute attr; }; @@ -124,7 +123,6 @@ struct kfd_cache_properties { uint32_t cache_latency; uint32_t cache_type; uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE]; - struct kfd_dev *gpu; struct kobject *kobj; struct attribute attr; }; @@ -143,7 +141,6 @@ struct kfd_iolink_properties { uint32_t max_bandwidth; uint32_t rec_transfer_size; uint32_t flags; - struct kfd_dev *gpu; struct kobject *kobj; struct attribute attr; }; diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 313183b80032..71991a28a775 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -23,16 +23,16 @@ config DRM_AMD_DC_DCN2_0 depends on DRM_AMD_DC && X86 depends on DRM_AMD_DC_DCN1_0 help - Choose this option if you want to have - Navi support for display engine + Choose this option if you want to have + Navi support for display engine config DRM_AMD_DC_DCN2_1 - bool "DCN 2.1 family" - depends on DRM_AMD_DC && X86 - depends on DRM_AMD_DC_DCN2_0 - help - Choose this option if you want to have - Renoir support for display engine + bool "DCN 2.1 family" + depends on DRM_AMD_DC && X86 + depends on DRM_AMD_DC_DCN2_0 + help + Choose this option if you want to have + Renoir support for display engine config DRM_AMD_DC_DSC_SUPPORT bool "DSC support" @@ -41,16 +41,8 @@ config DRM_AMD_DC_DSC_SUPPORT depends on DRM_AMD_DC_DCN1_0 depends on DRM_AMD_DC_DCN2_0 help - Choose this option if you want to have - Dynamic Stream Compression support - -config DRM_AMD_DC_HDCP - bool "Enable HDCP support in DC" - depends on DRM_AMD_DC - help - Choose this option - if you want to support - HDCP authentication + Choose this option if you want to have + Dynamic Stream Compression support config DEBUG_KERNEL_DC bool "Enable kgdb break in DC" diff --git a/drivers/gpu/drm/amd/display/Makefile b/drivers/gpu/drm/amd/display/Makefile index 36b3d6a5d04d..496cee000f10 100644 --- a/drivers/gpu/drm/amd/display/Makefile +++ b/drivers/gpu/drm/amd/display/Makefile @@ -34,19 +34,12 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/freesync subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/color subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/info_packet subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/power -ifdef CONFIG_DRM_AMD_DC_HDCP -subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/hdcp -endif #TODO: remove when Timing Sync feature is complete subdir-ccflags-y += -DBUILD_FEATURE_TIMING_SYNC=0 DAL_LIBS = amdgpu_dm dc modules/freesync modules/color modules/info_packet modules/power -ifdef CONFIG_DRM_AMD_DC_HDCP -DAL_LIBS += modules/hdcp -endif - AMD_DAL = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/,$(DAL_LIBS))) include $(AMD_DAL) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile index 9a3b7bf8ab0b..94911871eb9b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile @@ -31,10 +31,6 @@ ifneq ($(CONFIG_DRM_AMD_DC),) AMDGPUDM += amdgpu_dm_services.o amdgpu_dm_helpers.o amdgpu_dm_pp_smu.o endif -ifdef CONFIG_DRM_AMD_DC_HDCP -AMDGPUDM += amdgpu_dm_hdcp.o -endif - ifneq ($(CONFIG_DEBUG_FS),) AMDGPUDM += amdgpu_dm_crc.o amdgpu_dm_debugfs.o endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0a6173d727e3..2fca601b43f0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -37,9 +37,6 @@ #include "amdgpu_ucode.h" #include "atom.h" #include "amdgpu_dm.h" -#ifdef CONFIG_DRM_AMD_DC_HDCP -#include "amdgpu_dm_hdcp.h" -#endif #include "amdgpu_pm.h" #include "amd_shared.h" @@ -70,7 +67,6 @@ #include #include #include -#include #if defined(CONFIG_DRM_AMD_DC_DCN1_0) #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h" @@ -147,12 +143,6 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, static void handle_cursor_update(struct drm_plane *plane, struct drm_plane_state *old_plane_state); -static void amdgpu_dm_set_psr_caps(struct dc_link *link); -static bool amdgpu_dm_psr_enable(struct dc_stream_state *stream); -static bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream); -static bool amdgpu_dm_psr_disable(struct dc_stream_state *stream); - - /* * dm_vblank_get_counter * @@ -273,13 +263,6 @@ static inline bool amdgpu_dm_vrr_active(struct dm_crtc_state *dm_state) dm_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED; } -/** - * dm_pflip_high_irq() - Handle pageflip interrupt - * @interrupt_params: ignored - * - * Handles the pageflip interrupt by notifying all interested parties - * that the pageflip has been completed. - */ static void dm_pflip_high_irq(void *interrupt_params) { struct amdgpu_crtc *amdgpu_crtc; @@ -424,13 +407,6 @@ static void dm_vupdate_high_irq(void *interrupt_params) } } -/** - * dm_crtc_high_irq() - Handles CRTC interrupt - * @interrupt_params: ignored - * - * Handles the CRTC/VSYNC interrupt by notfying DRM's VBLANK - * event handler. - */ static void dm_crtc_high_irq(void *interrupt_params) { struct common_irq_params *irq_params = interrupt_params; @@ -670,18 +646,11 @@ void amdgpu_dm_audio_eld_notify(struct amdgpu_device *adev, int pin) static int amdgpu_dm_init(struct amdgpu_device *adev) { struct dc_init_data init_data; -#ifdef CONFIG_DRM_AMD_DC_HDCP - struct dc_callback_init init_params; -#endif - adev->dm.ddev = adev->ddev; adev->dm.adev = adev; /* Zero all the fields */ memset(&init_data, 0, sizeof(init_data)); -#ifdef CONFIG_DRM_AMD_DC_HDCP - memset(&init_params, 0, sizeof(init_params)); -#endif mutex_init(&adev->dm.dc_lock); mutex_init(&adev->dm.audio_lock); @@ -728,9 +697,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_dc_feature_mask & DC_MULTI_MON_PP_MCLK_SWITCH_MASK) init_data.flags.multi_mon_pp_mclk_switch = true; - if (amdgpu_dc_feature_mask & DC_DISABLE_FRACTIONAL_PWM_MASK) - init_data.flags.disable_fractional_pwm = true; - init_data.flags.power_down_display_on_boot = true; #ifdef CONFIG_DRM_AMD_DC_DCN2_0 @@ -747,8 +713,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) goto error; } - dc_hardware_init(adev->dm.dc); - adev->dm.freesync_module = mod_freesync_create(adev->dm.dc); if (!adev->dm.freesync_module) { DRM_ERROR( @@ -759,18 +723,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) amdgpu_dm_init_color_mod(); -#ifdef CONFIG_DRM_AMD_DC_HDCP - if (adev->asic_type >= CHIP_RAVEN) { - adev->dm.hdcp_workqueue = hdcp_create_workqueue(&adev->psp, &init_params.cp_psp, adev->dm.dc); - - if (!adev->dm.hdcp_workqueue) - DRM_ERROR("amdgpu: failed to initialize hdcp_workqueue.\n"); - else - DRM_DEBUG_DRIVER("amdgpu: hdcp_workqueue init done %p.\n", adev->dm.hdcp_workqueue); - - dc_init_callbacks(adev->dm.dc, &init_params); - } -#endif if (amdgpu_dm_initialize_drm_device(adev)) { DRM_ERROR( "amdgpu: failed to initialize sw for display support.\n"); @@ -812,16 +764,6 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) amdgpu_dm_destroy_drm_device(&adev->dm); -#ifdef CONFIG_DRM_AMD_DC_HDCP - if (adev->dm.hdcp_workqueue) { - hdcp_destroy(adev->dm.hdcp_workqueue); - adev->dm.hdcp_workqueue = NULL; - } - - if (adev->dm.dc) - dc_deinit_callbacks(adev->dm.dc); -#endif - /* DC Destroy TODO: Replace destroy DAL */ if (adev->dm.dc) dc_destroy(&adev->dm.dc); @@ -955,29 +897,27 @@ static int detect_mst_link_for_all_connectors(struct drm_device *dev) { struct amdgpu_dm_connector *aconnector; struct drm_connector *connector; - struct drm_connector_list_iter iter; int ret = 0; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); + + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { aconnector = to_amdgpu_dm_connector(connector); if (aconnector->dc_link->type == dc_connection_mst_branch && aconnector->mst_mgr.aux) { DRM_DEBUG_DRIVER("DM_MST: starting TM on aconnector: %p [id: %d]\n", - aconnector, - aconnector->base.base.id); + aconnector, aconnector->base.base.id); ret = drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_mgr, true); if (ret < 0) { DRM_ERROR("DM_MST: Failed to start MST\n"); - aconnector->dc_link->type = - dc_connection_single; - break; + ((struct dc_link *)aconnector->dc_link)->type = dc_connection_single; + return ret; + } } - } } - drm_connector_list_iter_end(&iter); + drm_modeset_unlock(&dev->mode_config.connection_mutex); return ret; } @@ -1000,11 +940,6 @@ static int dm_late_init(void *handle) params.backlight_lut_array_size = 16; params.backlight_lut_array = linear_lut; - /* Min backlight level after ABM reduction, Don't allow below 1% - * 0xFFFF x 0.01 = 0x28F - */ - params.min_abm_backlight = 0x28F; - /* todo will enable for navi10 */ if (adev->asic_type <= CHIP_RAVEN) { ret = dmcu_load_iram(dmcu, params); @@ -1020,13 +955,14 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend) { struct amdgpu_dm_connector *aconnector; struct drm_connector *connector; - struct drm_connector_list_iter iter; struct drm_dp_mst_topology_mgr *mgr; int ret; bool need_hotplug = false; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); + + list_for_each_entry(connector, &dev->mode_config.connector_list, + head) { aconnector = to_amdgpu_dm_connector(connector); if (aconnector->dc_link->type != dc_connection_mst_branch || aconnector->mst_port) @@ -1037,14 +973,15 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend) if (suspend) { drm_dp_mst_topology_mgr_suspend(mgr); } else { - ret = drm_dp_mst_topology_mgr_resume(mgr, true); + ret = drm_dp_mst_topology_mgr_resume(mgr); if (ret < 0) { drm_dp_mst_topology_mgr_set_mst(mgr, false); need_hotplug = true; } } } - drm_connector_list_iter_end(&iter); + + drm_modeset_unlock(&dev->mode_config.connection_mutex); if (need_hotplug) drm_kms_helper_hotplug_event(dev); @@ -1052,7 +989,7 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend) /** * dm_hw_init() - Initialize DC device - * @handle: The base driver device containing the amdgpu_dm device. + * @handle: The base driver device containing the amdpgu_dm device. * * Initialize the &struct amdgpu_display_manager device. This involves calling * the initializers of each DM component, then populating the struct with them. @@ -1082,7 +1019,7 @@ static int dm_hw_init(void *handle) /** * dm_hw_fini() - Teardown DC device - * @handle: The base driver device containing the amdgpu_dm device. + * @handle: The base driver device containing the amdpgu_dm device. * * Teardown components within &struct amdgpu_display_manager that require * cleanup. This involves cleaning up the DRM device, DC, and any modules that @@ -1226,7 +1163,6 @@ static int dm_resume(void *handle) struct amdgpu_display_manager *dm = &adev->dm; struct amdgpu_dm_connector *aconnector; struct drm_connector *connector; - struct drm_connector_list_iter iter; struct drm_crtc *crtc; struct drm_crtc_state *new_crtc_state; struct dm_crtc_state *dm_new_crtc_state; @@ -1249,18 +1185,17 @@ static int dm_resume(void *handle) /* program HPD filter */ dc_resume(dm->dc); + /* On resume we need to rewrite the MSTM control bits to enamble MST*/ + s3_handle_mst(ddev, false); + /* * early enable HPD Rx IRQ, should be done before set mode as short * pulse interrupts are used for MST */ amdgpu_dm_irq_resume_early(adev); - /* On resume we need to rewrite the MSTM control bits to enable MST*/ - s3_handle_mst(ddev, false); - /* Do detection*/ - drm_connector_list_iter_begin(ddev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &ddev->mode_config.connector_list, head) { aconnector = to_amdgpu_dm_connector(connector); /* @@ -1288,7 +1223,6 @@ static int dm_resume(void *handle) amdgpu_dm_update_connector_after_detect(aconnector); mutex_unlock(&aconnector->hpd_lock); } - drm_connector_list_iter_end(&iter); /* Force mode set in atomic commit */ for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) @@ -1504,11 +1438,6 @@ amdgpu_dm_update_connector_after_detect(struct amdgpu_dm_connector *aconnector) dc_sink_release(aconnector->dc_sink); aconnector->dc_sink = NULL; aconnector->edid = NULL; -#ifdef CONFIG_DRM_AMD_DC_HDCP - /* Set CP to DESIRED if it was ENABLED, so we can re-enable it again on hotplug */ - if (connector->state->content_protection == DRM_MODE_CONTENT_PROTECTION_ENABLED) - connector->state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED; -#endif } mutex_unlock(&dev->mode_config.mutex); @@ -1523,9 +1452,6 @@ static void handle_hpd_irq(void *param) struct drm_connector *connector = &aconnector->base; struct drm_device *dev = connector->dev; enum dc_connection_type new_connection_type = dc_connection_none; -#ifdef CONFIG_DRM_AMD_DC_HDCP - struct amdgpu_device *adev = dev->dev_private; -#endif /* * In case of failure or MST no need to update connector status or notify the OS @@ -1533,10 +1459,6 @@ static void handle_hpd_irq(void *param) */ mutex_lock(&aconnector->hpd_lock); -#ifdef CONFIG_DRM_AMD_DC_HDCP - if (adev->asic_type >= CHIP_RAVEN) - hdcp_reset_display(adev->dm.hdcp_workqueue, aconnector->dc_link->link_index); -#endif if (aconnector->fake_enable) aconnector->fake_enable = false; @@ -1655,12 +1577,6 @@ static void handle_hpd_rx_irq(void *param) struct dc_link *dc_link = aconnector->dc_link; bool is_mst_root_connector = aconnector->mst_mgr.mst_state; enum dc_connection_type new_connection_type = dc_connection_none; -#ifdef CONFIG_DRM_AMD_DC_HDCP - union hpd_irq_data hpd_irq_data; - struct amdgpu_device *adev = dev->dev_private; - - memset(&hpd_irq_data, 0, sizeof(hpd_irq_data)); -#endif /* * TODO:Temporary add mutex to protect hpd interrupt not have a gpio @@ -1670,12 +1586,7 @@ static void handle_hpd_rx_irq(void *param) if (dc_link->type != dc_connection_mst_branch) mutex_lock(&aconnector->hpd_lock); - -#ifdef CONFIG_DRM_AMD_DC_HDCP - if (dc_link_handle_hpd_rx_irq(dc_link, &hpd_irq_data, NULL) && -#else if (dc_link_handle_hpd_rx_irq(dc_link, NULL, NULL) && -#endif !is_mst_root_connector) { /* Downstream Port status changed. */ if (!dc_link_detect_sink(dc_link, &new_connection_type)) @@ -1710,10 +1621,6 @@ static void handle_hpd_rx_irq(void *param) drm_kms_helper_hotplug_event(dev); } } -#ifdef CONFIG_DRM_AMD_DC_HDCP - if (hpd_irq_data.bytes.device_service_irq.bits.CP_IRQ) - hdcp_handle_cpirq(adev->dm.hdcp_workqueue, aconnector->base.index); -#endif if ((dc_link->cur_link_settings.lane_count != LANE_COUNT_UNKNOWN) || (dc_link->type == dc_connection_mst_branch)) dm_handle_hpd_rx_irq(aconnector); @@ -2427,8 +2334,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) } else if (dc_link_detect(link, DETECT_REASON_BOOT)) { amdgpu_dm_update_connector_after_detect(aconnector); register_backlight_device(dm, link); - if (amdgpu_dc_feature_mask & DC_PSR_MASK) - amdgpu_dm_set_psr_caps(link); } @@ -3406,12 +3311,8 @@ static void fill_stream_properties_from_drm_display_mode( { struct dc_crtc_timing *timing_out = &stream->timing; const struct drm_display_info *info = &connector->display_info; - struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); - struct hdmi_vendor_infoframe hv_frame; - struct hdmi_avi_infoframe avi_frame; - memset(&hv_frame, 0, sizeof(hv_frame)); - memset(&avi_frame, 0, sizeof(avi_frame)); + memset(timing_out, 0, sizeof(struct dc_crtc_timing)); timing_out->h_border_left = 0; timing_out->h_border_right = 0; @@ -3421,9 +3322,6 @@ static void fill_stream_properties_from_drm_display_mode( if (drm_mode_is_420_only(info, mode_in) && stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420; - else if (drm_mode_is_420_also(info, mode_in) - && aconnector->force_yuv420_output) - timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420; else if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCRCB444) && stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR444; @@ -3448,13 +3346,6 @@ static void fill_stream_properties_from_drm_display_mode( timing_out->flags.VSYNC_POSITIVE_POLARITY = 1; } - if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) { - drm_hdmi_avi_infoframe_from_display_mode(&avi_frame, (struct drm_connector *)connector, mode_in); - timing_out->vic = avi_frame.video_code; - drm_hdmi_vendor_infoframe_from_display_mode(&hv_frame, (struct drm_connector *)connector, mode_in); - timing_out->hdmi_vic = hv_frame.vic; - } - timing_out->h_addressable = mode_in->crtc_hdisplay; timing_out->h_total = mode_in->crtc_htotal; timing_out->h_sync_width = @@ -3675,9 +3566,6 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, stream->dm_stream_context = aconnector; - stream->timing.flags.LTE_340MCSC_SCRAMBLE = - drm_connector->display_info.hdmi.scdc.scrambling.low_rates; - list_for_each_entry(preferred_mode, &aconnector->base.modes, head) { /* Search for preferred mode */ if (preferred_mode->type & DRM_MODE_TYPE_PREFERRED) { @@ -3733,9 +3621,8 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, dc_link_get_link_cap(aconnector->dc_link)); if (dsc_caps.is_dsc_supported) - if (dc_dsc_compute_config(aconnector->dc_link->ctx->dc->res_pool->dscs[0], + if (dc_dsc_compute_config(aconnector->dc_link->ctx->dc, &dsc_caps, - aconnector->dc_link->ctx->dc->debug.dsc_min_slice_height_override, link_bandwidth_kbps, &stream->timing, &stream->timing.dsc_cfg)) @@ -3752,18 +3639,6 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, update_stream_signal(stream, sink); - if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) - mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket, false, false); - if (stream->link->psr_feature_enabled) { - struct dc *core_dc = stream->link->ctx->dc; - - if (dc_is_dmcu_initialized(core_dc)) { - struct dmcu *dmcu = core_dc->res_pool->dmcu; - - stream->psr_version = dmcu->dmcu_version.psr_version; - mod_build_vsc_infopacket(stream, &stream->vsc_infopacket); - } - } finish: dc_sink_release(sink); @@ -5213,10 +5088,6 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm, drm_connector_attach_vrr_capable_property( &aconnector->base); -#ifdef CONFIG_DRM_AMD_DC_HDCP - if (adev->asic_type >= CHIP_RAVEN) - drm_connector_attach_content_protection_property(&aconnector->base, false); -#endif } } @@ -5459,53 +5330,6 @@ is_scaling_state_different(const struct dm_connector_state *dm_state, return false; } -#ifdef CONFIG_DRM_AMD_DC_HDCP -static bool is_content_protection_different(struct drm_connector_state *state, - const struct drm_connector_state *old_state, - const struct drm_connector *connector, struct hdcp_workqueue *hdcp_w) -{ - struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); - - /* CP is being re enabled, ignore this */ - if (old_state->content_protection == DRM_MODE_CONTENT_PROTECTION_ENABLED && - state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED) { - state->content_protection = DRM_MODE_CONTENT_PROTECTION_ENABLED; - return false; - } - - /* S3 resume case, since old state will always be 0 (UNDESIRED) and the restored state will be ENABLED */ - if (old_state->content_protection == DRM_MODE_CONTENT_PROTECTION_UNDESIRED && - state->content_protection == DRM_MODE_CONTENT_PROTECTION_ENABLED) - state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED; - - /* Check if something is connected/enabled, otherwise we start hdcp but nothing is connected/enabled - * hot-plug, headless s3, dpms - */ - if (state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED && connector->dpms == DRM_MODE_DPMS_ON && - aconnector->dc_sink != NULL) - return true; - - if (old_state->content_protection == state->content_protection) - return false; - - if (state->content_protection == DRM_MODE_CONTENT_PROTECTION_UNDESIRED) - return true; - - return false; -} - -static void update_content_protection(struct drm_connector_state *state, const struct drm_connector *connector, - struct hdcp_workqueue *hdcp_w) -{ - struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); - - if (state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED) - hdcp_add_display(hdcp_w, aconnector->dc_link->link_index, aconnector); - else if (state->content_protection == DRM_MODE_CONTENT_PROTECTION_UNDESIRED) - hdcp_remove_display(hdcp_w, aconnector->dc_link->link_index, aconnector->base.index); - -} -#endif static void remove_stream(struct amdgpu_device *adev, struct amdgpu_crtc *acrtc, struct dc_stream_state *stream) @@ -5847,7 +5671,6 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, uint32_t target_vblank, last_flip_vblank; bool vrr_active = amdgpu_dm_vrr_active(acrtc_state); bool pflip_present = false; - bool swizzle = true; struct { struct dc_surface_update surface_updates[MAX_SURFACES]; struct dc_plane_info plane_infos[MAX_SURFACES]; @@ -5893,9 +5716,6 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, dc_plane = dm_new_plane_state->dc_state; - if (dc_plane && !dc_plane->tiling_info.gfx9.swizzle) - swizzle = false; - bundle->surface_updates[planes_count].surface = dc_plane; if (new_pcrtc_state->color_mgmt_changed) { bundle->surface_updates[planes_count].gamma = dc_plane->gamma_correction; @@ -6050,7 +5870,6 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, /* Update the planes if changed or disable if we don't have any. */ if ((planes_count || acrtc_state->active_planes == 0) && acrtc_state->stream) { - bundle->stream_update.stream = acrtc_state->stream; if (new_pcrtc_state->mode_changed) { bundle->stream_update.src = acrtc_state->stream->src; bundle->stream_update.dst = acrtc_state->stream->dst; @@ -6086,29 +5905,14 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, &acrtc_state->vrr_params.adjust); spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags); } - mutex_lock(&dm->dc_lock); - if ((acrtc_state->update_type > UPDATE_TYPE_FAST) && - acrtc_state->stream->link->psr_allow_active) - amdgpu_dm_psr_disable(acrtc_state->stream); + mutex_lock(&dm->dc_lock); dc_commit_updates_for_stream(dm->dc, bundle->surface_updates, planes_count, acrtc_state->stream, &bundle->stream_update, dc_state); - - if ((acrtc_state->update_type > UPDATE_TYPE_FAST) && - acrtc_state->stream->psr_version && - !acrtc_state->stream->link->psr_feature_enabled) - amdgpu_dm_link_setup_psr(acrtc_state->stream); - else if ((acrtc_state->update_type == UPDATE_TYPE_FAST) && - acrtc_state->stream->link->psr_feature_enabled && - !acrtc_state->stream->link->psr_allow_active && - swizzle) { - amdgpu_dm_psr_enable(acrtc_state->stream); - } - mutex_unlock(&dm->dc_lock); } @@ -6417,13 +6221,10 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) crtc->hwmode = new_crtc_state->mode; } else if (modereset_required(new_crtc_state)) { DRM_DEBUG_DRIVER("Atomic commit: RESET. crtc id %d:[%p]\n", acrtc->crtc_id, acrtc); - /* i.e. reset mode */ - if (dm_old_crtc_state->stream) { - if (dm_old_crtc_state->stream->link->psr_allow_active) - amdgpu_dm_psr_disable(dm_old_crtc_state->stream); + /* i.e. reset mode */ + if (dm_old_crtc_state->stream) remove_stream(adev, acrtc, dm_old_crtc_state->stream); - } } } /* for_each_crtc_in_state() */ @@ -6453,30 +6254,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) acrtc->otg_inst = status->primary_otg_inst; } } -#ifdef CONFIG_DRM_AMD_DC_HDCP - for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) { - struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state); - struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc); - struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); - - new_crtc_state = NULL; - - if (acrtc) - new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base); - - dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); - - if (dm_new_crtc_state && dm_new_crtc_state->stream == NULL && - connector->state->content_protection == DRM_MODE_CONTENT_PROTECTION_ENABLED) { - hdcp_reset_display(adev->dm.hdcp_workqueue, aconnector->dc_link->link_index); - new_con_state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED; - continue; - } - - if (is_content_protection_different(new_con_state, old_con_state, connector, adev->dm.hdcp_workqueue)) - update_content_protection(new_con_state, connector, adev->dm.hdcp_workqueue); - } -#endif /* Handle connector state changes */ for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) { @@ -6516,10 +6293,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) if (!scaling_changed && !abm_changed && !hdr_changed) continue; - stream_update.stream = dm_new_crtc_state->stream; if (scaling_changed) { update_stream_scaling_settings(&dm_new_con_state->base.crtc->mode, - dm_new_con_state, dm_new_crtc_state->stream); + dm_new_con_state, (struct dc_stream_state *)dm_new_crtc_state->stream); stream_update.src = dm_new_crtc_state->stream->src; stream_update.dst = dm_new_crtc_state->stream->dst; @@ -7388,7 +7164,7 @@ dm_determine_update_type_for_commit(struct amdgpu_display_manager *dm, status = dc_stream_get_status_from_state(old_dm_state->context, new_dm_crtc_state->stream); - stream_update.stream = new_dm_crtc_state->stream; + /* * TODO: DC modifies the surface during this call so we need * to lock here - find a way to do this without locking. @@ -7799,92 +7575,3 @@ update: freesync_capable); } -static void amdgpu_dm_set_psr_caps(struct dc_link *link) -{ - uint8_t dpcd_data[EDP_PSR_RECEIVER_CAP_SIZE]; - - if (!(link->connector_signal & SIGNAL_TYPE_EDP)) - return; - if (link->type == dc_connection_none) - return; - if (dm_helpers_dp_read_dpcd(NULL, link, DP_PSR_SUPPORT, - dpcd_data, sizeof(dpcd_data))) { - link->psr_feature_enabled = dpcd_data[0] ? true:false; - DRM_INFO("PSR support:%d\n", link->psr_feature_enabled); - } -} - -/* - * amdgpu_dm_link_setup_psr() - configure psr link - * @stream: stream state - * - * Return: true if success - */ -static bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream) -{ - struct dc_link *link = NULL; - struct psr_config psr_config = {0}; - struct psr_context psr_context = {0}; - struct dc *dc = NULL; - bool ret = false; - - if (stream == NULL) - return false; - - link = stream->link; - dc = link->ctx->dc; - - psr_config.psr_version = dc->res_pool->dmcu->dmcu_version.psr_version; - - if (psr_config.psr_version > 0) { - psr_config.psr_exit_link_training_required = 0x1; - psr_config.psr_frame_capture_indication_req = 0; - psr_config.psr_rfb_setup_time = 0x37; - psr_config.psr_sdp_transmit_line_num_deadline = 0x20; - psr_config.allow_smu_optimizations = 0x0; - - ret = dc_link_setup_psr(link, stream, &psr_config, &psr_context); - - } - DRM_DEBUG_DRIVER("PSR link: %d\n", link->psr_feature_enabled); - - return ret; -} - -/* - * amdgpu_dm_psr_enable() - enable psr f/w - * @stream: stream state - * - * Return: true if success - */ -bool amdgpu_dm_psr_enable(struct dc_stream_state *stream) -{ - struct dc_link *link = stream->link; - struct dc_static_screen_events triggers = {0}; - - DRM_DEBUG_DRIVER("Enabling psr...\n"); - - triggers.cursor_update = true; - triggers.overlay_update = true; - triggers.surface_update = true; - - dc_stream_set_static_screen_events(link->ctx->dc, - &stream, 1, - &triggers); - - return dc_link_set_psr_allow_active(link, true, false); -} - -/* - * amdgpu_dm_psr_disable() - disable psr f/w - * @stream: stream state - * - * Return: true if success - */ -static bool amdgpu_dm_psr_disable(struct dc_stream_state *stream) -{ - - DRM_DEBUG_DRIVER("Disabling psr...\n"); - - return dc_link_set_psr_allow_active(stream->link, false, true); -} diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 77c5166e6b08..c8c525a2b505 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -108,12 +108,6 @@ struct amdgpu_dm_backlight_caps { * @display_indexes_num: Max number of display streams supported * @irq_handler_list_table_lock: Synchronizes access to IRQ tables * @backlight_dev: Backlight control device - * @backlight_link: Link on which to control backlight - * @backlight_caps: Capabilities of the backlight device - * @freesync_module: Module handling freesync calculations - * @fw_dmcu: Reference to DMCU firmware - * @dmcu_fw_version: Version of the DMCU firmware - * @soc_bounding_box: SOC bounding box values provided by gpu_info FW * @cached_state: Caches device atomic state for suspend/resume * @compressor: Frame buffer compression buffer. See &struct dm_comressor_info */ @@ -134,7 +128,7 @@ struct amdgpu_display_manager { u16 display_indexes_num; /** - * @atomic_obj: + * @atomic_obj * * In combination with &dm_atomic_state it helps manage * global atomic state that doesn't map cleanly into existing @@ -231,9 +225,6 @@ struct amdgpu_display_manager { struct amdgpu_dm_backlight_caps backlight_caps; struct mod_freesync *freesync_module; -#ifdef CONFIG_DRM_AMD_DC_HDCP - struct hdcp_workqueue *hdcp_workqueue; -#endif struct drm_atomic_state *cached_state; @@ -243,8 +234,6 @@ struct amdgpu_display_manager { uint32_t dmcu_fw_version; #ifdef CONFIG_DRM_AMD_DC_DCN2_0 /** - * @soc_bounding_box: - * * gpu_info FW provided soc bounding box struct or 0 if not * available in FW */ @@ -298,7 +287,6 @@ struct amdgpu_dm_connector { uint32_t debugfs_dpcd_address; uint32_t debugfs_dpcd_size; #endif - bool force_yuv420_output; }; #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 2233d293a707..b43bb7f90e4e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -210,8 +210,6 @@ static int __set_legacy_tf(struct dc_transfer_func *func, res = mod_color_calculate_regamma_params(func, gamma, true, has_rom, NULL); - dc_gamma_release(&gamma); - return res ? 0 : -ENOMEM; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index eaad9099bc0b..a549c7c717dd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -122,16 +122,11 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, } /* Configure dithering */ - if (!dm_need_crc_dither(source)) { + if (!dm_need_crc_dither(source)) dc_stream_set_dither_option(stream_state, DITHER_OPTION_TRUN8); - dc_stream_set_dyn_expansion(stream_state->ctx->dc, stream_state, - DYN_EXPANSION_DISABLE); - } else { + else dc_stream_set_dither_option(stream_state, DITHER_OPTION_DEFAULT); - dc_stream_set_dyn_expansion(stream_state->ctx->dc, stream_state, - DYN_EXPANSION_AUTO); - } unlock: mutex_unlock(&adev->dm.dc_lock); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index bdb37e611015..f3dfb2887ae0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -942,52 +942,6 @@ static const struct { {"aux_dpcd_data", &dp_dpcd_data_debugfs_fops} }; -/* - * Force YUV420 output if available from the given mode - */ -static int force_yuv420_output_set(void *data, u64 val) -{ - struct amdgpu_dm_connector *connector = data; - - connector->force_yuv420_output = (bool)val; - - return 0; -} - -/* - * Check if YUV420 is forced when available from the given mode - */ -static int force_yuv420_output_get(void *data, u64 *val) -{ - struct amdgpu_dm_connector *connector = data; - - *val = connector->force_yuv420_output; - - return 0; -} - -DEFINE_DEBUGFS_ATTRIBUTE(force_yuv420_output_fops, force_yuv420_output_get, - force_yuv420_output_set, "%llu\n"); - -/* - * Read PSR state - */ -static int psr_get(void *data, u64 *val) -{ - struct amdgpu_dm_connector *connector = data; - struct dc_link *link = connector->dc_link; - uint32_t psr_state = 0; - - dc_link_get_psr_state(link, &psr_state); - - *val = psr_state; - - return 0; -} - - -DEFINE_DEBUGFS_ATTRIBUTE(psr_fops, psr_get, NULL, "%llu\n"); - void connector_debugfs_init(struct amdgpu_dm_connector *connector) { int i; @@ -1001,12 +955,6 @@ void connector_debugfs_init(struct amdgpu_dm_connector *connector) dp_debugfs_entries[i].fops); } } - if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP) - debugfs_create_file_unsafe("psr_state", 0444, dir, connector, &psr_fops); - - debugfs_create_file_unsafe("force_yuv420_output", 0644, dir, connector, - &force_yuv420_output_fops); - } /* diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 11e5784aa62a..ee1dc75f5ddc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -97,10 +97,11 @@ enum dc_edid_status dm_helpers_parse_edid_caps( (struct edid *) edid->raw_edid); sad_count = drm_edid_to_sad((struct edid *) edid->raw_edid, &sads); - if (sad_count < 0) - DRM_ERROR("Couldn't read SADs: %d\n", sad_count); - if (sad_count <= 0) + if (sad_count <= 0) { + DRM_INFO("SADs count is: %d, don't need to read it\n", + sad_count); return result; + } edid_caps->audio_mode_count = sad_count < DC_MAX_AUDIO_DESC_COUNT ? sad_count : DC_MAX_AUDIO_DESC_COUNT; for (i = 0; i < edid_caps->audio_mode_count; ++i) { @@ -281,7 +282,7 @@ void dm_helpers_dp_mst_clear_payload_allocation_table( * Polls for ACT (allocation change trigger) handled and sends * ALLOCATE_PAYLOAD message. */ -enum act_return_status dm_helpers_dp_mst_poll_for_allocation_change_trigger( +bool dm_helpers_dp_mst_poll_for_allocation_change_trigger( struct dc_context *ctx, const struct dc_stream_state *stream) { @@ -292,19 +293,19 @@ enum act_return_status dm_helpers_dp_mst_poll_for_allocation_change_trigger( aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; if (!aconnector || !aconnector->mst_port) - return ACT_FAILED; + return false; mst_mgr = &aconnector->mst_port->mst_mgr; if (!mst_mgr->mst_state) - return ACT_FAILED; + return false; ret = drm_dp_check_act_status(mst_mgr); if (ret) - return ACT_FAILED; + return false; - return ACT_SUCCESS; + return true; } bool dm_helpers_dp_mst_send_payload_allocation( diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index 64445c4cc4c2..fa5d503d379c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -732,10 +732,8 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; - struct drm_connector_list_iter iter; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector); @@ -753,7 +751,6 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) true); } } - drm_connector_list_iter_end(&iter); } /** @@ -768,10 +765,8 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; - struct drm_connector_list_iter iter; - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector); const struct dc_link *dc_link = amdgpu_dm_connector->dc_link; @@ -784,5 +779,4 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) false); } } - drm_connector_list_iter_end(&iter); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 49cf39711dfc..5ec14efd4d8c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -113,7 +113,6 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, result = -EIO; break; case AUX_CHANNEL_OPERATION_FAILED_INVALID_REPLY: - case AUX_CHANNEL_OPERATION_FAILED_ENGINE_ACQUIRE: result = -EBUSY; break; case AUX_CHANNEL_OPERATION_FAILED_TIMEOUT: @@ -124,14 +123,31 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, return result; } +static enum drm_connector_status +dm_dp_mst_detect(struct drm_connector *connector, bool force) +{ + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct amdgpu_dm_connector *master = aconnector->mst_port; + + enum drm_connector_status status = + drm_dp_mst_detect_port( + connector, + &master->mst_mgr, + aconnector->port); + + return status; +} + static void dm_dp_mst_connector_destroy(struct drm_connector *connector) { struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector); struct amdgpu_encoder *amdgpu_encoder = amdgpu_dm_connector->mst_encoder; - kfree(amdgpu_dm_connector->edid); - amdgpu_dm_connector->edid = NULL; + if (amdgpu_dm_connector->edid) { + kfree(amdgpu_dm_connector->edid); + amdgpu_dm_connector->edid = NULL; + } drm_encoder_cleanup(&amdgpu_encoder->base); kfree(amdgpu_encoder); @@ -161,6 +177,7 @@ amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector) } static const struct drm_connector_funcs dm_dp_mst_connector_funcs = { + .detect = dm_dp_mst_detect, .fill_modes = drm_helper_probe_single_connector_modes, .destroy = dm_dp_mst_connector_destroy, .reset = amdgpu_dm_connector_funcs_reset, @@ -228,29 +245,17 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) return ret; } -static struct drm_encoder * -dm_mst_atomic_best_encoder(struct drm_connector *connector, - struct drm_connector_state *connector_state) +static struct drm_encoder *dm_mst_best_encoder(struct drm_connector *connector) { - return &to_amdgpu_dm_connector(connector)->mst_encoder->base; -} + struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector); -static int -dm_dp_mst_detect(struct drm_connector *connector, - struct drm_modeset_acquire_ctx *ctx, bool force) -{ - struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); - struct amdgpu_dm_connector *master = aconnector->mst_port; - - return drm_dp_mst_detect_port(connector, ctx, &master->mst_mgr, - aconnector->port); + return &amdgpu_dm_connector->mst_encoder->base; } static const struct drm_connector_helper_funcs dm_dp_mst_connector_helper_funcs = { .get_modes = dm_dp_mst_get_modes, .mode_valid = amdgpu_dm_connector_mode_valid, - .atomic_best_encoder = dm_mst_atomic_best_encoder, - .detect_ctx = dm_dp_mst_detect, + .best_encoder = dm_mst_best_encoder, }; static void amdgpu_dm_encoder_destroy(struct drm_encoder *encoder) @@ -412,10 +417,6 @@ void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm, drm_dp_aux_register(&aconnector->dm_dp_aux.aux); drm_dp_cec_register_connector(&aconnector->dm_dp_aux.aux, &aconnector->base); - - if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_eDP) - return; - aconnector->mst_mgr.cbs = &dm_mst_cbs; drm_dp_mst_topology_mgr_init( &aconnector->mst_mgr, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c index 55a520a63712..f4cfa0caeba8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c @@ -345,7 +345,7 @@ bool dm_pp_get_clock_levels_by_type( /* Error in pplib. Provide default values. */ return true; } - } else if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->get_clock_by_type) { + } else if (adev->smu.funcs && adev->smu.funcs->get_clock_by_type) { if (smu_get_clock_by_type(&adev->smu, dc_to_pp_clock_type(clk_type), &pp_clks)) { @@ -365,7 +365,7 @@ bool dm_pp_get_clock_levels_by_type( validation_clks.memory_max_clock = 80000; validation_clks.level = 0; } - } else if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->get_max_high_clocks) { + } else if (adev->smu.funcs && adev->smu.funcs->get_max_high_clocks) { if (smu_get_max_high_clocks(&adev->smu, &validation_clks)) { DRM_INFO("DM_PPLIB: Warning: using default validation clocks!\n"); validation_clks.engine_max_clock = 72000; @@ -506,8 +506,8 @@ bool dm_pp_apply_clock_for_voltage_request( ret = adev->powerplay.pp_funcs->display_clock_voltage_request( adev->powerplay.pp_handle, &pp_clock_request); - else if (adev->smu.ppt_funcs && - adev->smu.ppt_funcs->display_clock_voltage_request) + else if (adev->smu.funcs && + adev->smu.funcs->display_clock_voltage_request) ret = smu_display_clock_voltage_request(&adev->smu, &pp_clock_request); if (ret) @@ -527,7 +527,7 @@ bool dm_pp_get_static_clocks( ret = adev->powerplay.pp_funcs->get_current_clocks( adev->powerplay.pp_handle, &pp_clk_info); - else if (adev->smu.ppt_funcs) + else if (adev->smu.funcs) ret = smu_get_current_clocks(&adev->smu, &pp_clk_info); if (ret) return false; @@ -589,9 +589,10 @@ void pp_rv_set_wm_ranges(struct pp_smu *pp, if (pp_funcs && pp_funcs->set_watermarks_for_clocks_ranges) pp_funcs->set_watermarks_for_clocks_ranges(pp_handle, &wm_with_clock_ranges); - else + else if (adev->smu.funcs && + adev->smu.funcs->set_watermarks_for_clock_ranges) smu_set_watermarks_for_clock_ranges(&adev->smu, - &wm_with_clock_ranges); + &wm_with_clock_ranges); } void pp_rv_set_pme_wa_enable(struct pp_smu *pp) @@ -603,7 +604,7 @@ void pp_rv_set_pme_wa_enable(struct pp_smu *pp) if (pp_funcs && pp_funcs->notify_smu_enable_pwe) pp_funcs->notify_smu_enable_pwe(pp_handle); - else if (adev->smu.ppt_funcs) + else if (adev->smu.funcs) smu_notify_smu_enable_pwe(&adev->smu); } @@ -664,6 +665,7 @@ enum pp_smu_status pp_nv_set_wm_ranges(struct pp_smu *pp, { const struct dc_context *ctx = pp->dm; struct amdgpu_device *adev = ctx->driver_context; + struct smu_context *smu = &adev->smu; struct dm_pp_wm_sets_with_clock_ranges_soc15 wm_with_clock_ranges; struct dm_pp_clock_range_for_dmif_wm_set_soc15 *wm_dce_clocks = wm_with_clock_ranges.wm_dmif_clocks_ranges; @@ -706,7 +708,15 @@ enum pp_smu_status pp_nv_set_wm_ranges(struct pp_smu *pp, ranges->writer_wm_sets[i].min_drain_clk_mhz * 1000; } - smu_set_watermarks_for_clock_ranges(&adev->smu, &wm_with_clock_ranges); + if (!smu->funcs) + return PP_SMU_RESULT_UNSUPPORTED; + + /* 0: successful or smu.funcs->set_watermarks_for_clock_ranges = NULL; + * 1: fail + */ + if (smu_set_watermarks_for_clock_ranges(&adev->smu, + &wm_with_clock_ranges)) + return PP_SMU_RESULT_UNSUPPORTED; return PP_SMU_RESULT_OK; } @@ -717,10 +727,10 @@ enum pp_smu_status pp_nv_set_pme_wa_enable(struct pp_smu *pp) struct amdgpu_device *adev = ctx->driver_context; struct smu_context *smu = &adev->smu; - if (!smu->ppt_funcs) + if (!smu->funcs) return PP_SMU_RESULT_UNSUPPORTED; - /* 0: successful or smu.ppt_funcs->set_azalia_d3_pme = NULL; 1: fail */ + /* 0: successful or smu.funcs->set_azalia_d3_pme = NULL; 1: fail */ if (smu_set_azalia_d3_pme(smu)) return PP_SMU_RESULT_FAIL; @@ -733,10 +743,10 @@ enum pp_smu_status pp_nv_set_display_count(struct pp_smu *pp, int count) struct amdgpu_device *adev = ctx->driver_context; struct smu_context *smu = &adev->smu; - if (!smu->ppt_funcs) + if (!smu->funcs) return PP_SMU_RESULT_UNSUPPORTED; - /* 0: successful or smu.ppt_funcs->set_display_count = NULL; 1: fail */ + /* 0: successful or smu.funcs->set_display_count = NULL; 1: fail */ if (smu_set_display_count(smu, count)) return PP_SMU_RESULT_FAIL; @@ -749,10 +759,10 @@ enum pp_smu_status pp_nv_set_min_deep_sleep_dcfclk(struct pp_smu *pp, int mhz) struct amdgpu_device *adev = ctx->driver_context; struct smu_context *smu = &adev->smu; - if (!smu->ppt_funcs) + if (!smu->funcs) return PP_SMU_RESULT_UNSUPPORTED; - /* 0: successful or smu.ppt_funcs->set_deep_sleep_dcefclk = NULL;1: fail */ + /* 0: successful or smu.funcs->set_deep_sleep_dcefclk = NULL;1: fail */ if (smu_set_deep_sleep_dcefclk(smu, mhz)) return PP_SMU_RESULT_FAIL; @@ -767,13 +777,13 @@ enum pp_smu_status pp_nv_set_hard_min_dcefclk_by_freq( struct smu_context *smu = &adev->smu; struct pp_display_clock_request clock_req; - if (!smu->ppt_funcs) + if (!smu->funcs) return PP_SMU_RESULT_UNSUPPORTED; clock_req.clock_type = amd_pp_dcef_clock; clock_req.clock_freq_in_khz = mhz * 1000; - /* 0: successful or smu.ppt_funcs->display_clock_voltage_request = NULL + /* 0: successful or smu.funcs->display_clock_voltage_request = NULL * 1: fail */ if (smu_display_clock_voltage_request(smu, &clock_req)) @@ -789,13 +799,13 @@ enum pp_smu_status pp_nv_set_hard_min_uclk_by_freq(struct pp_smu *pp, int mhz) struct smu_context *smu = &adev->smu; struct pp_display_clock_request clock_req; - if (!smu->ppt_funcs) + if (!smu->funcs) return PP_SMU_RESULT_UNSUPPORTED; clock_req.clock_type = amd_pp_mem_clock; clock_req.clock_freq_in_khz = mhz * 1000; - /* 0: successful or smu.ppt_funcs->display_clock_voltage_request = NULL + /* 0: successful or smu.funcs->display_clock_voltage_request = NULL * 1: fail */ if (smu_display_clock_voltage_request(smu, &clock_req)) @@ -825,7 +835,7 @@ enum pp_smu_status pp_nv_set_voltage_by_freq(struct pp_smu *pp, struct smu_context *smu = &adev->smu; struct pp_display_clock_request clock_req; - if (!smu->ppt_funcs) + if (!smu->funcs) return PP_SMU_RESULT_UNSUPPORTED; switch (clock_id) { @@ -843,7 +853,7 @@ enum pp_smu_status pp_nv_set_voltage_by_freq(struct pp_smu *pp, } clock_req.clock_freq_in_khz = mhz * 1000; - /* 0: successful or smu.ppt_funcs->display_clock_voltage_request = NULL + /* 0: successful or smu.funcs->display_clock_voltage_request = NULL * 1: fail */ if (smu_display_clock_voltage_request(smu, &clock_req)) @@ -859,13 +869,13 @@ enum pp_smu_status pp_nv_get_maximum_sustainable_clocks( struct amdgpu_device *adev = ctx->driver_context; struct smu_context *smu = &adev->smu; - if (!smu->ppt_funcs) + if (!smu->funcs) return PP_SMU_RESULT_UNSUPPORTED; - if (!smu->ppt_funcs->get_max_sustainable_clocks_by_dc) + if (!smu->funcs->get_max_sustainable_clocks_by_dc) return PP_SMU_RESULT_UNSUPPORTED; - if (!smu_get_max_sustainable_clocks_by_dc(smu, max_clocks)) + if (!smu->funcs->get_max_sustainable_clocks_by_dc(smu, max_clocks)) return PP_SMU_RESULT_OK; return PP_SMU_RESULT_FAIL; @@ -884,97 +894,13 @@ enum pp_smu_status pp_nv_get_uclk_dpm_states(struct pp_smu *pp, if (!smu->ppt_funcs->get_uclk_dpm_states) return PP_SMU_RESULT_UNSUPPORTED; - if (!smu_get_uclk_dpm_states(smu, + if (!smu->ppt_funcs->get_uclk_dpm_states(smu, clock_values_in_khz, num_states)) return PP_SMU_RESULT_OK; return PP_SMU_RESULT_FAIL; } -#ifdef CONFIG_DRM_AMD_DC_DCN2_1 -enum pp_smu_status pp_rn_get_dpm_clock_table( - struct pp_smu *pp, struct dpm_clocks *clock_table) -{ - const struct dc_context *ctx = pp->dm; - struct amdgpu_device *adev = ctx->driver_context; - struct smu_context *smu = &adev->smu; - - if (!smu->ppt_funcs) - return PP_SMU_RESULT_UNSUPPORTED; - - if (!smu->ppt_funcs->get_dpm_clock_table) - return PP_SMU_RESULT_UNSUPPORTED; - - if (!smu_get_dpm_clock_table(smu, clock_table)) - return PP_SMU_RESULT_OK; - - return PP_SMU_RESULT_FAIL; -} - -enum pp_smu_status pp_rn_set_wm_ranges(struct pp_smu *pp, - struct pp_smu_wm_range_sets *ranges) -{ - const struct dc_context *ctx = pp->dm; - struct amdgpu_device *adev = ctx->driver_context; - struct smu_context *smu = &adev->smu; - struct dm_pp_wm_sets_with_clock_ranges_soc15 wm_with_clock_ranges; - struct dm_pp_clock_range_for_dmif_wm_set_soc15 *wm_dce_clocks = - wm_with_clock_ranges.wm_dmif_clocks_ranges; - struct dm_pp_clock_range_for_mcif_wm_set_soc15 *wm_soc_clocks = - wm_with_clock_ranges.wm_mcif_clocks_ranges; - int32_t i; - - if (!smu->ppt_funcs) - return PP_SMU_RESULT_UNSUPPORTED; - - wm_with_clock_ranges.num_wm_dmif_sets = ranges->num_reader_wm_sets; - wm_with_clock_ranges.num_wm_mcif_sets = ranges->num_writer_wm_sets; - - for (i = 0; i < wm_with_clock_ranges.num_wm_dmif_sets; i++) { - if (ranges->reader_wm_sets[i].wm_inst > 3) - wm_dce_clocks[i].wm_set_id = WM_SET_A; - else - wm_dce_clocks[i].wm_set_id = - ranges->reader_wm_sets[i].wm_inst; - - wm_dce_clocks[i].wm_min_dcfclk_clk_in_khz = - ranges->reader_wm_sets[i].min_drain_clk_mhz; - - wm_dce_clocks[i].wm_max_dcfclk_clk_in_khz = - ranges->reader_wm_sets[i].max_drain_clk_mhz; - - wm_dce_clocks[i].wm_min_mem_clk_in_khz = - ranges->reader_wm_sets[i].min_fill_clk_mhz; - - wm_dce_clocks[i].wm_max_mem_clk_in_khz = - ranges->reader_wm_sets[i].max_fill_clk_mhz; - } - - for (i = 0; i < wm_with_clock_ranges.num_wm_mcif_sets; i++) { - if (ranges->writer_wm_sets[i].wm_inst > 3) - wm_soc_clocks[i].wm_set_id = WM_SET_A; - else - wm_soc_clocks[i].wm_set_id = - ranges->writer_wm_sets[i].wm_inst; - wm_soc_clocks[i].wm_min_socclk_clk_in_khz = - ranges->writer_wm_sets[i].min_fill_clk_mhz; - - wm_soc_clocks[i].wm_max_socclk_clk_in_khz = - ranges->writer_wm_sets[i].max_fill_clk_mhz; - - wm_soc_clocks[i].wm_min_mem_clk_in_khz = - ranges->writer_wm_sets[i].min_drain_clk_mhz; - - wm_soc_clocks[i].wm_max_mem_clk_in_khz = - ranges->writer_wm_sets[i].max_drain_clk_mhz; - } - - smu_set_watermarks_for_clock_ranges(&adev->smu, &wm_with_clock_ranges); - - return PP_SMU_RESULT_OK; -} -#endif - void dm_pp_get_funcs( struct dc_context *ctx, struct pp_smu_funcs *funcs) @@ -1019,15 +945,6 @@ void dm_pp_get_funcs( funcs->nv_funcs.set_pstate_handshake_support = pp_nv_set_pstate_handshake_support; break; #endif - -#ifdef CONFIG_DRM_AMD_DC_DCN2_1 - case DCN_VERSION_2_1: - funcs->ctx.ver = PP_SMU_VER_RN; - funcs->rn_funcs.pp_smu.dm = ctx; - funcs->rn_funcs.set_wm_ranges = pp_rn_set_wm_ranges; - funcs->rn_funcs.get_dpm_clock_table = pp_rn_get_dpm_clock_table; - break; -#endif default: DRM_ERROR("smu version is not supported !\n"); break; diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile index a160512a2f04..627982cb15d2 100644 --- a/drivers/gpu/drm/amd/display/dc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/Makefile @@ -48,10 +48,6 @@ DC_LIBS += dce110 DC_LIBS += dce100 DC_LIBS += dce80 -ifdef CONFIG_DRM_AMD_DC_HDCP -DC_LIBS += hdcp -endif - AMD_DC = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/dc/,$(DC_LIBS))) include $(AMD_DC) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index 823843cd2613..221e0f56389f 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -2543,6 +2543,7 @@ static enum bp_result construct_integrated_info( /* Sort voltage table from low to high*/ if (result == BP_RESULT_OK) { + struct clock_voltage_caps temp = {0, 0}; uint32_t i; uint32_t j; @@ -2552,8 +2553,10 @@ static enum bp_result construct_integrated_info( info->disp_clk_voltage[j].max_supported_clk < info->disp_clk_voltage[j-1].max_supported_clk) { /* swap j and j - 1*/ - swap(info->disp_clk_voltage[j - 1], - info->disp_clk_voltage[j]); + temp = info->disp_clk_voltage[j-1]; + info->disp_clk_voltage[j-1] = + info->disp_clk_voltage[j]; + info->disp_clk_voltage[j] = temp; } } } diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 7873abea4112..dff65c0fe82f 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -1613,6 +1613,8 @@ static enum bp_result construct_integrated_info( struct atom_common_table_header *header; struct atom_data_revision revision; + + struct clock_voltage_caps temp = {0, 0}; uint32_t i; uint32_t j; @@ -1642,8 +1644,10 @@ static enum bp_result construct_integrated_info( info->disp_clk_voltage[j-1].max_supported_clk ) { /* swap j and j - 1*/ - swap(info->disp_clk_voltage[j - 1], - info->disp_clk_voltage[j]); + temp = info->disp_clk_voltage[j-1]; + info->disp_clk_voltage[j-1] = + info->disp_clk_voltage[j]; + info->disp_clk_voltage[j] = temp; } } } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index 8828dd9c3783..c43797bea413 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -65,31 +65,6 @@ int clk_mgr_helper_get_active_display_cnt( return display_count; } -void clk_mgr_exit_optimized_pwr_state(const struct dc *dc, struct clk_mgr *clk_mgr) -{ - struct dc_link *edp_link = get_edp_link(dc); - - if (dc->hwss.exit_optimized_pwr_state) - dc->hwss.exit_optimized_pwr_state(dc, dc->current_state); - - if (edp_link) { - clk_mgr->psr_allow_active_cache = edp_link->psr_allow_active; - dc_link_set_psr_allow_active(edp_link, false, false); - } - -} - -void clk_mgr_optimize_pwr_state(const struct dc *dc, struct clk_mgr *clk_mgr) -{ - struct dc_link *edp_link = get_edp_link(dc); - - if (edp_link) - dc_link_set_psr_allow_active(edp_link, clk_mgr->psr_allow_active_cache, false); - - if (dc->hwss.optimize_pwr_state) - dc->hwss.optimize_pwr_state(dc, dc->current_state); - -} struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *pp_smu, struct dccg *dccg) { diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c index 26db1c5d4e4d..c5c8c4901eed 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c @@ -147,7 +147,7 @@ int dce_get_dp_ref_freq_khz(struct clk_mgr *clk_mgr_base) /* Calculate the current DFS clock, in kHz.*/ dp_ref_clk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; + * clk_mgr->dentist_vco_freq_khz) / target_div; return dce_adjust_dp_ref_freq_for_ss(clk_mgr, dp_ref_clk_khz); } @@ -239,7 +239,7 @@ int dce_set_clock( /* Make sure requested clock isn't lower than minimum threshold*/ if (requested_clk_khz > 0) requested_clk_khz = max(requested_clk_khz, - clk_mgr_dce->base.dentist_vco_freq_khz / 64); + clk_mgr_dce->dentist_vco_freq_khz / 64); /* Prepare to program display clock*/ pxl_clk_params.target_pixel_clock_100hz = requested_clk_khz * 10; @@ -276,11 +276,11 @@ static void dce_clock_read_integrated_info(struct clk_mgr_internal *clk_mgr_dce) int i; if (bp->integrated_info) - clk_mgr_dce->base.dentist_vco_freq_khz = bp->integrated_info->dentist_vco_freq; - if (clk_mgr_dce->base.dentist_vco_freq_khz == 0) { - clk_mgr_dce->base.dentist_vco_freq_khz = bp->fw_info.smu_gpu_pll_output_freq; - if (clk_mgr_dce->base.dentist_vco_freq_khz == 0) - clk_mgr_dce->base.dentist_vco_freq_khz = 3600000; + clk_mgr_dce->dentist_vco_freq_khz = bp->integrated_info->dentist_vco_freq; + if (clk_mgr_dce->dentist_vco_freq_khz == 0) { + clk_mgr_dce->dentist_vco_freq_khz = bp->fw_info.smu_gpu_pll_output_freq; + if (clk_mgr_dce->dentist_vco_freq_khz == 0) + clk_mgr_dce->dentist_vco_freq_khz = 3600000; } /*update the maximum display clock for each power state*/ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c index a6c46e903ff9..7c746ef1e32e 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c @@ -81,7 +81,7 @@ int dce112_set_clock(struct clk_mgr *clk_mgr_base, int requested_clk_khz) /* Make sure requested clock isn't lower than minimum threshold*/ if (requested_clk_khz > 0) requested_clk_khz = max(requested_clk_khz, - clk_mgr_dce->base.dentist_vco_freq_khz / 62); + clk_mgr_dce->dentist_vco_freq_khz / 62); dce_clk_params.target_clock_frequency = requested_clk_khz; dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS; @@ -135,7 +135,7 @@ int dce112_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_clk_khz) /* Make sure requested clock isn't lower than minimum threshold*/ if (requested_clk_khz > 0) requested_clk_khz = max(requested_clk_khz, - clk_mgr->base.dentist_vco_freq_khz / 62); + clk_mgr->dentist_vco_freq_khz / 62); dce_clk_params.target_clock_frequency = requested_clk_khz; dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c index 3fab9296918a..47f529ce280a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c @@ -139,9 +139,6 @@ static void rv1_update_clocks(struct clk_mgr *clk_mgr_base, ASSERT(clk_mgr->pp_smu); - if (dc->work_arounds.skip_clock_update) - return; - pp_smu = &clk_mgr->pp_smu->rv_funcs; display_count = clk_mgr_helper_get_active_display_cnt(dc, context); @@ -269,11 +266,11 @@ void rv1_clk_mgr_construct(struct dc_context *ctx, struct clk_mgr_internal *clk_ clk_mgr->base.dprefclk_khz = 600000; if (bp->integrated_info) - clk_mgr->base.dentist_vco_freq_khz = bp->integrated_info->dentist_vco_freq; - if (bp->fw_info_valid && clk_mgr->base.dentist_vco_freq_khz == 0) { - clk_mgr->base.dentist_vco_freq_khz = bp->fw_info.smu_gpu_pll_output_freq; - if (clk_mgr->base.dentist_vco_freq_khz == 0) - clk_mgr->base.dentist_vco_freq_khz = 3600000; + clk_mgr->dentist_vco_freq_khz = bp->integrated_info->dentist_vco_freq; + if (bp->fw_info_valid && clk_mgr->dentist_vco_freq_khz == 0) { + clk_mgr->dentist_vco_freq_khz = bp->fw_info.smu_gpu_pll_output_freq; + if (clk_mgr->dentist_vco_freq_khz == 0) + clk_mgr->dentist_vco_freq_khz = 3600000; } if (!debug->disable_dfs_bypass && bp->integrated_info) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c index 25d7b7c6681c..3e8ac303bd52 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c @@ -104,39 +104,84 @@ void dcn20_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr, { int i; - clk_mgr->dccg->ref_dppclk = clk_mgr->base.clks.dppclk_khz; for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) { int dpp_inst, dppclk_khz; - /* Loop index will match dpp->inst if resource exists, - * and we want to avoid dependency on dpp object - */ - dpp_inst = i; - dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz; + if (!context->res_ctx.pipe_ctx[i].plane_state) + continue; + dpp_inst = context->res_ctx.pipe_ctx[i].plane_res.dpp->inst; + dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz; clk_mgr->dccg->funcs->update_dpp_dto( - clk_mgr->dccg, dpp_inst, dppclk_khz); + clk_mgr->dccg, dpp_inst, dppclk_khz, false); } } -void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr) +static void update_global_dpp_clk(struct clk_mgr_internal *clk_mgr, unsigned int khz) { int dpp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz / clk_mgr->base.clks.dppclk_khz; - int disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz / clk_mgr->base.clks.dispclk_khz; + * clk_mgr->dentist_vco_freq_khz / khz; uint32_t dppclk_wdivider = dentist_get_did_from_divider(dpp_divider); - uint32_t dispclk_wdivider = dentist_get_did_from_divider(disp_divider); - REG_UPDATE(DENTIST_DISPCLK_CNTL, - DENTIST_DISPCLK_WDIVIDER, dispclk_wdivider); -// REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 5, 100); REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_WDIVIDER, dppclk_wdivider); REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_CHG_DONE, 1, 5, 100); } +static void update_display_clk(struct clk_mgr_internal *clk_mgr, unsigned int khz) +{ + int disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->dentist_vco_freq_khz / khz; + + uint32_t dispclk_wdivider = dentist_get_did_from_divider(disp_divider); + + REG_UPDATE(DENTIST_DISPCLK_CNTL, + DENTIST_DISPCLK_WDIVIDER, dispclk_wdivider); +} + +static void request_voltage_and_program_disp_clk(struct clk_mgr *clk_mgr_base, unsigned int khz) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + struct dc *dc = clk_mgr_base->ctx->dc; + struct pp_smu_funcs_nv *pp_smu = NULL; + bool going_up = clk_mgr->base.clks.dispclk_khz < khz; + + if (dc->res_pool->pp_smu) + pp_smu = &dc->res_pool->pp_smu->nv_funcs; + + clk_mgr->base.clks.dispclk_khz = khz; + + if (going_up && pp_smu && pp_smu->set_voltage_by_freq) + pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_DISPCLK, clk_mgr_base->clks.dispclk_khz / 1000); + + update_display_clk(clk_mgr, khz); + + if (!going_up && pp_smu && pp_smu->set_voltage_by_freq) + pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_DISPCLK, clk_mgr_base->clks.dispclk_khz / 1000); +} + +static void request_voltage_and_program_global_dpp_clk(struct clk_mgr *clk_mgr_base, unsigned int khz) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + struct dc *dc = clk_mgr_base->ctx->dc; + struct pp_smu_funcs_nv *pp_smu = NULL; + bool going_up = clk_mgr->base.clks.dppclk_khz < khz; + + if (dc->res_pool->pp_smu) + pp_smu = &dc->res_pool->pp_smu->nv_funcs; + + clk_mgr->base.clks.dppclk_khz = khz; + clk_mgr->dccg->ref_dppclk = khz; + + if (going_up && pp_smu && pp_smu->set_voltage_by_freq) + pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_PIXELCLK, clk_mgr_base->clks.dppclk_khz / 1000); + + update_global_dpp_clk(clk_mgr, khz); + + if (!going_up && pp_smu && pp_smu->set_voltage_by_freq) + pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_PIXELCLK, clk_mgr_base->clks.dppclk_khz / 1000); +} void dcn2_update_clocks(struct clk_mgr *clk_mgr_base, struct dc_state *context, @@ -147,12 +192,11 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base, struct dc *dc = clk_mgr_base->ctx->dc; struct pp_smu_funcs_nv *pp_smu = NULL; int display_count; - bool update_dppclk = false; bool update_dispclk = false; bool enter_display_off = false; - bool dpp_clock_lowered = false; struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu; bool force_reset = false; + int i; if (dc->work_arounds.skip_clock_update) return; @@ -207,10 +251,12 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base, if (should_update_pstate_support(safe_to_lower, new_clocks->p_state_change_support, clk_mgr_base->clks.p_state_change_support)) { clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support; + clk_mgr_base->clks.p_state_change_support = new_clocks->p_state_change_support; if (pp_smu && pp_smu->set_pstate_handshake_support) pp_smu->set_pstate_handshake_support(&pp_smu->pp_smu, clk_mgr_base->clks.p_state_change_support); } + clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support; if (should_set_clock(safe_to_lower, new_clocks->dramclk_khz, clk_mgr_base->clks.dramclk_khz)) { clk_mgr_base->clks.dramclk_khz = new_clocks->dramclk_khz; @@ -218,40 +264,50 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base, pp_smu->set_hard_min_uclk_by_freq(&pp_smu->pp_smu, clk_mgr_base->clks.dramclk_khz / 1000); } - if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) { - if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz) - dpp_clock_lowered = true; - clk_mgr->base.clks.dppclk_khz = new_clocks->dppclk_khz; + if (dc->config.forced_clocks == false) { + // First update display clock + if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) + request_voltage_and_program_disp_clk(clk_mgr_base, new_clocks->dispclk_khz); - if (pp_smu && pp_smu->set_voltage_by_freq) - pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_PIXELCLK, clk_mgr_base->clks.dppclk_khz / 1000); + // Updating DPP clock requires some more logic + if (!safe_to_lower) { + // For pre-programming, we need to make sure any DPP clock that will go up has to go up - update_dppclk = true; - } + // First raise the global reference if needed + if (new_clocks->dppclk_khz > clk_mgr_base->clks.dppclk_khz) + request_voltage_and_program_global_dpp_clk(clk_mgr_base, new_clocks->dppclk_khz); - if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { - clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; - if (pp_smu && pp_smu->set_voltage_by_freq) - pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_DISPCLK, clk_mgr_base->clks.dispclk_khz / 1000); + // Then raise any dividers that need raising + for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) { + int dpp_inst, dppclk_khz; - update_dispclk = true; - } + if (!context->res_ctx.pipe_ctx[i].plane_state) + continue; - if (dc->config.forced_clocks == false || (force_reset && safe_to_lower)) { - if (dpp_clock_lowered) { - // if clock is being lowered, increase DTO before lowering refclk - dcn20_update_clocks_update_dpp_dto(clk_mgr, context); - dcn20_update_clocks_update_dentist(clk_mgr); + dpp_inst = context->res_ctx.pipe_ctx[i].plane_res.dpp->inst; + dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz; + + clk_mgr->dccg->funcs->update_dpp_dto(clk_mgr->dccg, dpp_inst, dppclk_khz, true); + } } else { - // if clock is being raised, increase refclk before lowering DTO - if (update_dppclk || update_dispclk) - dcn20_update_clocks_update_dentist(clk_mgr); - // always update dtos unless clock is lowered and not safe to lower - if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz) - dcn20_update_clocks_update_dpp_dto(clk_mgr, context); + // For post-programming, we can lower ref clk if needed, and unconditionally set all the DTOs + + if (new_clocks->dppclk_khz < clk_mgr_base->clks.dppclk_khz) + request_voltage_and_program_global_dpp_clk(clk_mgr_base, new_clocks->dppclk_khz); + + for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) { + int dpp_inst, dppclk_khz; + + if (!context->res_ctx.pipe_ctx[i].plane_state) + continue; + + dpp_inst = context->res_ctx.pipe_ctx[i].plane_res.dpp->inst; + dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz; + + clk_mgr->dccg->funcs->update_dpp_dto(clk_mgr->dccg, dpp_inst, dppclk_khz, false); + } } } - if (update_dispclk && dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) { /*update dmcu for wait_loop count*/ @@ -264,8 +320,6 @@ void dcn2_update_clocks_fpga(struct clk_mgr *clk_mgr, struct dc_state *context, bool safe_to_lower) { - struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); - struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; /* Min fclk = 1.2GHz since all the extra scemi logic seems to run off of it */ int fclk_adj = new_clocks->fclk_khz > 1200000 ? new_clocks->fclk_khz : 1200000; @@ -303,18 +357,14 @@ void dcn2_update_clocks_fpga(struct clk_mgr *clk_mgr, clk_mgr->clks.dispclk_khz = new_clocks->dispclk_khz; } - /* Both fclk and ref_dppclk run on the same scemi clock. - * So take the higher value since the DPP DTO is typically programmed - * such that max dppclk is 1:1 with ref_dppclk. + /* Both fclk and dppclk ref are run on the same scemi clock so we + * need to keep the same value for both */ if (clk_mgr->clks.fclk_khz > clk_mgr->clks.dppclk_khz) clk_mgr->clks.dppclk_khz = clk_mgr->clks.fclk_khz; if (clk_mgr->clks.dppclk_khz > clk_mgr->clks.fclk_khz) clk_mgr->clks.fclk_khz = clk_mgr->clks.dppclk_khz; - // Both fclk and ref_dppclk run on the same scemi clock. - clk_mgr_int->dccg->ref_dppclk = clk_mgr->clks.fclk_khz; - dm_set_dcn_clocks(clk_mgr->ctx, &clk_mgr->clks); } @@ -359,36 +409,12 @@ void dcn2_get_clock(struct clk_mgr *clk_mgr, } } -static bool dcn2_are_clock_states_equal(struct dc_clocks *a, - struct dc_clocks *b) -{ - if (a->dispclk_khz != b->dispclk_khz) - return false; - else if (a->dppclk_khz != b->dppclk_khz) - return false; - else if (a->dcfclk_khz != b->dcfclk_khz) - return false; - else if (a->socclk_khz != b->socclk_khz) - return false; - else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz) - return false; - else if (a->phyclk_khz != b->phyclk_khz) - return false; - else if (a->dramclk_khz != b->dramclk_khz) - return false; - else if (a->p_state_change_support != b->p_state_change_support) - return false; - - return true; -} - static struct clk_mgr_funcs dcn2_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .update_clocks = dcn2_update_clocks, .init_clocks = dcn2_init_clocks, .enable_pme_wa = dcn2_enable_pme_wa, .get_clock = dcn2_get_clock, - .are_clock_states_equal = dcn2_are_clock_states_equal, }; @@ -416,7 +442,7 @@ void dcn20_clk_mgr_construct( if (IS_FPGA_MAXIMUS_DC(ctx->dce_environment)) { dcn2_funcs.update_clocks = dcn2_update_clocks_fpga; - clk_mgr->base.dentist_vco_freq_khz = 3850000; + clk_mgr->dentist_vco_freq_khz = 3850000; } else { /* DFS Slice 2 should be used for DPREFCLK */ @@ -440,15 +466,15 @@ void dcn20_clk_mgr_construct( pll_req = dc_fixpt_mul_int(pll_req, 100000); /* integer part is now VCO frequency in kHz */ - clk_mgr->base.dentist_vco_freq_khz = dc_fixpt_floor(pll_req); + clk_mgr->dentist_vco_freq_khz = dc_fixpt_floor(pll_req); /* in case we don't get a value from the register, use default */ - if (clk_mgr->base.dentist_vco_freq_khz == 0) - clk_mgr->base.dentist_vco_freq_khz = 3850000; + if (clk_mgr->dentist_vco_freq_khz == 0) + clk_mgr->dentist_vco_freq_khz = 3850000; /* Calculate the DPREFCLK in kHz.*/ clk_mgr->base.dprefclk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; + * clk_mgr->dentist_vco_freq_khz) / target_div; } //Integrated_info table does not exist on dGPU projects so should not be referenced //anywhere in code for dGPUs. diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h index c9fd824f3c23..ac31a9787305 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h @@ -50,5 +50,4 @@ void dcn2_get_clock(struct clk_mgr *clk_mgr, enum dc_clock_type clock_type, struct dc_clock_config *clock_cfg); -void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr); #endif //__DCN20_CLK_MGR_H__ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index 790a2d211bd6..787f94d815f4 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -52,45 +52,6 @@ #define REG(reg_name) \ (CLK_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name) - -/* TODO: evaluate how to lower or disable all dcn clocks in screen off case */ -int rn_get_active_display_cnt_wa( - struct dc *dc, - struct dc_state *context) -{ - int i, display_count; - bool hdmi_present = false; - - display_count = 0; - for (i = 0; i < context->stream_count; i++) { - const struct dc_stream_state *stream = context->streams[i]; - - if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) - hdmi_present = true; - } - - for (i = 0; i < dc->link_count; i++) { - const struct dc_link *link = dc->links[i]; - - /* - * Only notify active stream or virtual stream. - * Need to notify virtual stream to work around - * headless case. HPD does not fire when system is in - * S0i2. - */ - /* abusing the fact that the dig and phy are coupled to see if the phy is enabled */ - if (link->connector_signal == SIGNAL_TYPE_VIRTUAL || - link->link_enc->funcs->is_dig_enabled(link->link_enc)) - display_count++; - } - - /* WA for hang on HDMI after display off back back on*/ - if (display_count == 0 && hdmi_present) - display_count = 1; - - return display_count; -} - void rn_update_clocks(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool safe_to_lower) @@ -101,36 +62,17 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base, int display_count; bool update_dppclk = false; bool update_dispclk = false; + bool enter_display_off = false; bool dpp_clock_lowered = false; - struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu; - if (dc->work_arounds.skip_clock_update) - return; + display_count = clk_mgr_helper_get_active_display_cnt(dc, context); - /* - * if it is safe to lower, but we are already in the lower state, we don't have to do anything - * also if safe to lower is false, we just go in the higher state - */ - if (safe_to_lower) { - /* check that we're not already in lower */ - if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) { + if (display_count == 0) + enter_display_off = true; - display_count = rn_get_active_display_cnt_wa(dc, context); - /* if we can go lower, go lower */ - if (display_count == 0) { - rn_vbios_smu_set_dcn_low_power_state(clk_mgr, DCN_PWR_STATE_LOW_POWER); - /* update power state */ - clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER; - } - } - } else { - /* check that we're not already in D0 */ - if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_MISSION_MODE) { - rn_vbios_smu_set_dcn_low_power_state(clk_mgr, DCN_PWR_STATE_MISSION_MODE); - /* update power state */ - clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_MISSION_MODE; - } + if (enter_display_off == safe_to_lower) { + rn_vbios_smu_set_display_count(clk_mgr, display_count); } if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, clk_mgr_base->clks.phyclk_khz)) { @@ -171,8 +113,7 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base, // if clock is being raised, increase refclk before lowering DTO if (update_dppclk || update_dispclk) rn_vbios_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz); - // always update dtos unless clock is lowered and not safe to lower - if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz) + if (update_dppclk) dcn20_update_clocks_update_dpp_dto(clk_mgr, context); } @@ -378,7 +319,7 @@ void rn_get_clk_states(struct clk_mgr *clk_mgr_base, struct clk_states *s) rn_dump_clk_registers(&sb, clk_mgr_base, &log_info); - s->dprefclk_khz = sb.dprefclk * 1000; + s->dprefclk_khz = sb.dprefclk; } void rn_enable_pme_wa(struct clk_mgr *clk_mgr_base) @@ -388,96 +329,12 @@ void rn_enable_pme_wa(struct clk_mgr *clk_mgr_base) rn_vbios_smu_enable_pme_wa(clk_mgr); } -void rn_init_clocks(struct clk_mgr *clk_mgr) -{ - memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); - // Assumption is that boot state always supports pstate - clk_mgr->clks.p_state_change_support = true; - clk_mgr->clks.prev_p_state_change_support = true; - clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN; -} - -void build_watermark_ranges(struct clk_bw_params *bw_params, struct pp_smu_wm_range_sets *ranges) -{ - int i, num_valid_sets; - - num_valid_sets = 0; - - for (i = 0; i < WM_SET_COUNT; i++) { - /* skip empty entries, the smu array has no holes*/ - if (!bw_params->wm_table.entries[i].valid) - continue; - - ranges->reader_wm_sets[num_valid_sets].wm_inst = bw_params->wm_table.entries[i].wm_inst; - ranges->reader_wm_sets[num_valid_sets].wm_type = bw_params->wm_table.entries[i].wm_type;; - /* We will not select WM based on dcfclk, so leave it as unconstrained */ - ranges->reader_wm_sets[num_valid_sets].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; - ranges->reader_wm_sets[num_valid_sets].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; - /* fclk wil be used to select WM*/ - - if (ranges->reader_wm_sets[num_valid_sets].wm_type == WM_TYPE_PSTATE_CHG) { - if (i == 0) - ranges->reader_wm_sets[num_valid_sets].min_fill_clk_mhz = 0; - else { - /* add 1 to make it non-overlapping with next lvl */ - ranges->reader_wm_sets[num_valid_sets].min_fill_clk_mhz = bw_params->clk_table.entries[i - 1].fclk_mhz + 1; - } - ranges->reader_wm_sets[num_valid_sets].max_fill_clk_mhz = bw_params->clk_table.entries[i].fclk_mhz; - - } else { - /* unconstrained for memory retraining */ - ranges->reader_wm_sets[num_valid_sets].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; - ranges->reader_wm_sets[num_valid_sets].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; - - /* Modify previous watermark range to cover up to max */ - ranges->reader_wm_sets[num_valid_sets - 1].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; - } - num_valid_sets++; - } - - ASSERT(num_valid_sets != 0); /* Must have at least one set of valid watermarks */ - ranges->num_reader_wm_sets = num_valid_sets; - - /* modify the min and max to make sure we cover the whole range*/ - ranges->reader_wm_sets[0].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; - ranges->reader_wm_sets[0].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; - ranges->reader_wm_sets[ranges->num_reader_wm_sets - 1].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; - ranges->reader_wm_sets[ranges->num_reader_wm_sets - 1].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; - - /* This is for writeback only, does not matter currently as no writeback support*/ - ranges->num_writer_wm_sets = 1; - ranges->writer_wm_sets[0].wm_inst = WM_A; - ranges->writer_wm_sets[0].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; - ranges->writer_wm_sets[0].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; - ranges->writer_wm_sets[0].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; - ranges->writer_wm_sets[0].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; - -} - -static void rn_notify_wm_ranges(struct clk_mgr *clk_mgr_base) -{ - struct dc_debug_options *debug = &clk_mgr_base->ctx->dc->debug; - struct pp_smu_wm_range_sets ranges = {0}; - struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); - struct pp_smu_funcs *pp_smu = clk_mgr->pp_smu; - - if (!debug->disable_pplib_wm_range) { - build_watermark_ranges(clk_mgr_base->bw_params, &ranges); - - /* Notify PP Lib/SMU which Watermarks to use for which clock ranges */ - if (pp_smu && pp_smu->rn_funcs.set_wm_ranges) - pp_smu->rn_funcs.set_wm_ranges(&pp_smu->rn_funcs.pp_smu, &ranges); - } - -} - static struct clk_mgr_funcs dcn21_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .update_clocks = rn_update_clocks, - .init_clocks = rn_init_clocks, + .init_clocks = dcn2_init_clocks, .enable_pme_wa = rn_enable_pme_wa, - /* .dump_clk_registers = rn_dump_clk_registers, */ - .notify_wm_ranges = rn_notify_wm_ranges + /* .dump_clk_registers = rn_dump_clk_registers */ }; struct clk_bw_params rn_bw_params = { @@ -548,50 +405,80 @@ struct clk_bw_params rn_bw_params = { } }; -static unsigned int find_dcfclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage) +void build_watermark_ranges(struct clk_bw_params *bw_params, struct pp_smu_wm_range_sets *ranges) +{ + int i, num_valid_sets; + + num_valid_sets = 0; + + for (i = 0; i < WM_SET_COUNT; i++) { + /* skip empty entries, the smu array has no holes*/ + if (!bw_params->wm_table.entries[i].valid) + continue; + + ranges->reader_wm_sets[num_valid_sets].wm_inst = bw_params->wm_table.entries[i].wm_inst; + ranges->reader_wm_sets[num_valid_sets].wm_type = bw_params->wm_table.entries[i].wm_type;; + /* We will not select WM based on dcfclk, so leave it as unconstrained */ + ranges->reader_wm_sets[num_valid_sets].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; + ranges->reader_wm_sets[num_valid_sets].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; + /* fclk wil be used to select WM*/ + + if (ranges->reader_wm_sets[num_valid_sets].wm_type == WM_TYPE_PSTATE_CHG) { + if (i == 0) + ranges->reader_wm_sets[num_valid_sets].min_fill_clk_mhz = 0; + else { + /* add 1 to make it non-overlapping with next lvl */ + ranges->reader_wm_sets[num_valid_sets].min_fill_clk_mhz = bw_params->clk_table.entries[i - 1].fclk_mhz + 1; + } + ranges->reader_wm_sets[num_valid_sets].max_fill_clk_mhz = bw_params->clk_table.entries[i].fclk_mhz; + + } else { + /* unconstrained for memory retraining */ + ranges->reader_wm_sets[num_valid_sets].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; + ranges->reader_wm_sets[num_valid_sets].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; + + /* Modify previous watermark range to cover up to max */ + ranges->reader_wm_sets[num_valid_sets - 1].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; + } + num_valid_sets++; + } + + ASSERT(num_valid_sets != 0); /* Must have at least one set of valid watermarks */ + ranges->num_reader_wm_sets = num_valid_sets; + + /* modify the min and max to make sure we cover the whole range*/ + ranges->reader_wm_sets[0].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; + ranges->reader_wm_sets[0].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; + ranges->reader_wm_sets[ranges->num_reader_wm_sets - 1].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; + ranges->reader_wm_sets[ranges->num_reader_wm_sets - 1].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; + + /* This is for writeback only, does not matter currently as no writeback support*/ + ranges->num_writer_wm_sets = 1; + ranges->writer_wm_sets[0].wm_inst = WM_A; + ranges->writer_wm_sets[0].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; + ranges->writer_wm_sets[0].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; + ranges->writer_wm_sets[0].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; + ranges->writer_wm_sets[0].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; + +} + +void clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params, struct dpm_clocks *clock_table, struct hw_asic_id *asic_id) { int i; - for (i = 0; i < PP_SMU_NUM_DCFCLK_DPM_LEVELS; i++) { - if (clock_table->DcfClocks[i].Vol == voltage) - return clock_table->DcfClocks[i].Freq; - } - - ASSERT(0); - return 0; -} - -static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params, struct dpm_clocks *clock_table, struct hw_asic_id *asic_id) -{ - int i, j = 0; - - j = -1; - ASSERT(PP_SMU_NUM_FCLK_DPM_LEVELS <= MAX_NUM_DPM_LVL); - /* Find lowest DPM, FCLK is filled in reverse order*/ - - for (i = PP_SMU_NUM_FCLK_DPM_LEVELS - 1; i >= 0; i--) { - if (clock_table->FClocks[i].Freq != 0) { - j = i; + for (i = 0; i < PP_SMU_NUM_FCLK_DPM_LEVELS; i++) { + if (clock_table->FClocks[i].Freq == 0) break; - } - } - if (j == -1) { - /* clock table is all 0s, just use our own hardcode */ - ASSERT(0); - return; - } - - bw_params->clk_table.num_entries = j + 1; - - for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) { - bw_params->clk_table.entries[i].fclk_mhz = clock_table->FClocks[j].Freq; - bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemClocks[j].Freq; - bw_params->clk_table.entries[i].voltage = clock_table->FClocks[j].Vol; - bw_params->clk_table.entries[i].dcfclk_mhz = find_dcfclk_for_voltage(clock_table, clock_table->FClocks[j].Vol); + bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i].Freq; + bw_params->clk_table.entries[i].fclk_mhz = clock_table->FClocks[i].Freq; + bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemClocks[i].Freq; + bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i].Freq; + bw_params->clk_table.entries[i].voltage = clock_table->FClocks[i].Vol; } + bw_params->clk_table.num_entries = i; bw_params->vram_type = asic_id->vram_type; bw_params->num_channels = asic_id->vram_width / DDR4_DRAM_WIDTH; @@ -599,7 +486,7 @@ static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params for (i = 0; i < WM_SET_COUNT; i++) { bw_params->wm_table.entries[i].wm_inst = i; - if (i >= bw_params->clk_table.num_entries) { + if (clock_table->FClocks[i].Freq == 0) { bw_params->wm_table.entries[i].valid = false; continue; } @@ -647,42 +534,57 @@ void rn_clk_mgr_construct( if (IS_FPGA_MAXIMUS_DC(ctx->dce_environment)) { dcn21_funcs.update_clocks = dcn2_update_clocks_fpga; - clk_mgr->base.dentist_vco_freq_khz = 3600000; + clk_mgr->dentist_vco_freq_khz = 3600000; clk_mgr->base.dprefclk_khz = 600000; } else { struct clk_log_info log_info = {0}; /* TODO: Check we get what we expect during bringup */ - clk_mgr->base.dentist_vco_freq_khz = get_vco_frequency_from_reg(clk_mgr); + clk_mgr->dentist_vco_freq_khz = get_vco_frequency_from_reg(clk_mgr); /* in case we don't get a value from the register, use default */ - if (clk_mgr->base.dentist_vco_freq_khz == 0) - clk_mgr->base.dentist_vco_freq_khz = 3600000; + if (clk_mgr->dentist_vco_freq_khz == 0) + clk_mgr->dentist_vco_freq_khz = 3600000; rn_dump_clk_registers(&s, &clk_mgr->base, &log_info); - /* Convert dprefclk units from MHz to KHz */ - /* Value already divided by 10, some resolution lost */ - clk_mgr->base.dprefclk_khz = s.dprefclk * 1000; + clk_mgr->base.dprefclk_khz = s.dprefclk; + + if (clk_mgr->base.dprefclk_khz != 600000) { + clk_mgr->base.dprefclk_khz = 600000; + ASSERT(1); //TODO: Renoir follow up. + } /* in case we don't get a value from the register, use default */ - if (clk_mgr->base.dprefclk_khz == 0) { - ASSERT(clk_mgr->base.dprefclk_khz == 600000); + if (clk_mgr->base.dprefclk_khz == 0) clk_mgr->base.dprefclk_khz = 600000; - } } dce_clock_read_ss_info(clk_mgr); clk_mgr->base.bw_params = &rn_bw_params; - if (pp_smu && pp_smu->rn_funcs.get_dpm_clock_table) { + if (pp_smu) { pp_smu->rn_funcs.get_dpm_clock_table(&pp_smu->rn_funcs.pp_smu, &clock_table); - rn_clk_mgr_helper_populate_bw_params(clk_mgr->base.bw_params, &clock_table, &ctx->asic_id); + clk_mgr_helper_populate_bw_params(clk_mgr->base.bw_params, &clock_table, &ctx->asic_id); } - if (!IS_FPGA_MAXIMUS_DC(ctx->dce_environment) && clk_mgr->smu_ver >= 0x00371500) { - /* enable powerfeatures when displaycount goes to 0 */ - rn_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(clk_mgr, !debug->disable_48mhz_pwrdwn); + /* + * Notify SMU which set of WM should be selected for different ranges of fclk + * On Renoir there is a maximumum of 4 DF pstates supported, could be less + * depending on DDR speed and fused maximum fclk. + */ + if (!debug->disable_pplib_wm_range) { + struct pp_smu_wm_range_sets ranges = {0}; + + build_watermark_ranges(clk_mgr->base.bw_params, &ranges); + + /* Notify PP Lib/SMU which Watermarks to use for which clock ranges */ + if (pp_smu && pp_smu->rn_funcs.set_wm_ranges) + pp_smu->rn_funcs.set_wm_ranges(&pp_smu->rn_funcs.pp_smu, &ranges); } + + /* enable powerfeatures when displaycount goes to 0 */ + if (!debug->disable_48mhz_pwrdwn) + rn_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(clk_mgr); } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h index e4322fa5475b..aadec06fde10 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h @@ -26,13 +26,11 @@ #ifndef __RN_CLK_MGR_H__ #define __RN_CLK_MGR_H__ -#include "clk_mgr.h" -#include "dm_pp_smu.h" - struct rn_clk_registers { uint32_t CLK1_CLK0_CURRENT_CNT; /* DPREFCLK */ }; + void rn_clk_mgr_construct(struct dc_context *ctx, struct clk_mgr_internal *clk_mgr, struct pp_smu_funcs *pp_smu, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c index cb7c0e8b7e1b..50984c1811bb 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c @@ -33,7 +33,7 @@ #include "mp/mp_12_0_0_sh_mask.h" #define REG(reg_name) \ - (MP0_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name) + (MP1_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name) #define FN(reg_name, field) \ FD(reg_name##__##field) @@ -84,12 +84,16 @@ int rn_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dis int actual_dispclk_set_mhz = -1; struct dc *core_dc = clk_mgr->base.ctx->dc; struct dmcu *dmcu = core_dc->res_pool->dmcu; + uint32_t clk = requested_dispclk_khz / 1000; + + if (clk <= 100) + clk = 101; /* Unit of SMU msg parameter is Mhz */ actual_dispclk_set_mhz = rn_vbios_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_SetDispclkFreq, - requested_dispclk_khz / 1000); + clk); if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) { if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) { @@ -120,7 +124,7 @@ int rn_vbios_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int reque { int actual_dcfclk_set_mhz = -1; - if (clk_mgr->smu_ver < 0x370c00) + if (clk_mgr->smu_ver < 0xFFFFFFFF) return actual_dcfclk_set_mhz; actual_dcfclk_set_mhz = rn_vbios_smu_send_msg_with_param( @@ -135,7 +139,7 @@ int rn_vbios_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int { int actual_min_ds_dcfclk_mhz = -1; - if (clk_mgr->smu_ver < 0x370c00) + if (clk_mgr->smu_ver < 0xFFFFFFFF) return actual_min_ds_dcfclk_mhz; actual_min_ds_dcfclk_mhz = rn_vbios_smu_send_msg_with_param( @@ -158,35 +162,33 @@ int rn_vbios_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_ { int actual_dppclk_set_mhz = -1; + uint32_t clk = requested_dpp_khz / 1000; + + if (clk <= 100) + clk = 101; + actual_dppclk_set_mhz = rn_vbios_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_SetDppclkFreq, - requested_dpp_khz / 1000); + clk); return actual_dppclk_set_mhz * 1000; } -void rn_vbios_smu_set_dcn_low_power_state(struct clk_mgr_internal *clk_mgr, enum dcn_pwr_state state) +void rn_vbios_smu_set_display_count(struct clk_mgr_internal *clk_mgr, int display_count) { - int disp_count; - - if (state == DCN_PWR_STATE_LOW_POWER) - disp_count = 0; - else - disp_count = 1; - rn_vbios_smu_send_msg_with_param( - clk_mgr, - VBIOSSMC_MSG_SetDisplayCount, - disp_count); + clk_mgr, + VBIOSSMC_MSG_SetDisplayCount, + display_count); } -void rn_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable) +void rn_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr) { rn_vbios_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown, - enable); + 0); } void rn_vbios_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h index ccc01879c9d4..da3a49487c6d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h @@ -33,8 +33,8 @@ int rn_vbios_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int reque int rn_vbios_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_min_ds_dcfclk_khz); void rn_vbios_smu_set_phyclk(struct clk_mgr_internal *clk_mgr, int requested_phyclk_khz); int rn_vbios_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_khz); -void rn_vbios_smu_set_dcn_low_power_state(struct clk_mgr_internal *clk_mgr, int display_count); -void rn_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable); +void rn_vbios_smu_set_display_count(struct clk_mgr_internal *clk_mgr, int display_count); +void rn_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr); void rn_vbios_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr); #endif /* DAL_DC_DCN10_RV1_CLK_MGR_VBIOS_SMU_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 32f31bf91915..5d1adeda4d90 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -194,7 +194,7 @@ static bool create_links( } } - if (dc->config.force_enum_edp || !should_destory_link) { + if (!should_destory_link) { dc->links[dc->link_count] = link; link->dc = dc; ++dc->link_count; @@ -411,27 +411,6 @@ bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream, return false; } -void dc_stream_set_dyn_expansion(struct dc *dc, struct dc_stream_state *stream, - enum dc_dynamic_expansion option) -{ - /* OPP FMT dyn expansion updates*/ - int i = 0; - struct pipe_ctx *pipe_ctx; - - for (i = 0; i < MAX_PIPES; i++) { - if (dc->current_state->res_ctx.pipe_ctx[i].stream - == stream) { - pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; - pipe_ctx->stream_res.opp->dyn_expansion = option; - pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion( - pipe_ctx->stream_res.opp, - COLOR_SPACE_YCBCR601, - stream->timing.display_color_depth, - stream->signal); - } - } -} - void dc_stream_set_dither_option(struct dc_stream_state *stream, enum dc_dither_option option) { @@ -601,10 +580,6 @@ static bool construct(struct dc *dc, #ifdef CONFIG_DRM_AMD_DC_DCN2_0 // Allocate memory for the vm_helper dc->vm_helper = kzalloc(sizeof(struct vm_helper), GFP_KERNEL); - if (!dc->vm_helper) { - dm_error("%s: failed to create dc->vm_helper\n", __func__); - goto fail; - } #endif memcpy(&dc->bb_overrides, &init_params->bb_overrides, sizeof(dc->bb_overrides)); @@ -786,13 +761,8 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) #if defined(CONFIG_DRM_AMD_DC_DCN2_0) disable_all_writeback_pipes_for_stream(dc, old_stream, dangling_context); #endif - if (dc->hwss.apply_ctx_for_surface) - dc->hwss.apply_ctx_for_surface(dc, old_stream, 0, dangling_context); + dc->hwss.apply_ctx_for_surface(dc, old_stream, 0, dangling_context); } -#if defined(CONFIG_DRM_AMD_DC_DCN2_0) - if (dc->hwss.program_front_end_for_ctx) - dc->hwss.program_front_end_for_ctx(dc, dangling_context); -#endif } current_ctx = dc->current_state; @@ -815,6 +785,9 @@ struct dc *dc_create(const struct dc_init_data *init_params) if (false == construct(dc, init_params)) goto construct_fail; + /*TODO: separate HW and SW initialization*/ + dc->hwss.init_hw(dc); + full_pipe_count = dc->res_pool->pipe_count; if (dc->res_pool->underlay_pipe_index != NO_UNDERLAY_PIPE) full_pipe_count--; @@ -847,24 +820,9 @@ alloc_fail: return NULL; } -void dc_hardware_init(struct dc *dc) -{ - dc->hwss.init_hw(dc); -} - void dc_init_callbacks(struct dc *dc, const struct dc_callback_init *init_params) { -#ifdef CONFIG_DRM_AMD_DC_HDCP - dc->ctx->cp_psp = init_params->cp_psp; -#endif -} - -void dc_deinit_callbacks(struct dc *dc) -{ -#ifdef CONFIG_DRM_AMD_DC_HDCP - memset(&dc->ctx->cp_psp, 0, sizeof(dc->ctx->cp_psp)); -#endif } void dc_destroy(struct dc **dc) @@ -943,11 +901,15 @@ static void program_timing_sync( /* set first pipe with plane as master */ for (j = 0; j < group_size; j++) { + struct pipe_ctx *temp; + if (pipe_set[j]->plane_state) { if (j == 0) break; - swap(pipe_set[0], pipe_set[j]); + temp = pipe_set[0]; + pipe_set[0] = pipe_set[j]; + pipe_set[j] = temp; break; } } @@ -1004,87 +966,40 @@ bool dc_validate_seamless_boot_timing(const struct dc *dc, struct dc_crtc_timing *crtc_timing) { struct timing_generator *tg; - struct stream_encoder *se = NULL; - - struct dc_crtc_timing hw_crtc_timing = {0}; - struct dc_link *link = sink->link; - unsigned int i, enc_inst, tg_inst = 0; - - // Seamless port only support single DP and EDP so far - if (sink->sink_signal != SIGNAL_TYPE_DISPLAY_PORT && - sink->sink_signal != SIGNAL_TYPE_EDP) - return false; + unsigned int enc_inst, tg_inst; /* Check for enabled DIG to identify enabled display */ if (!link->link_enc->funcs->is_dig_enabled(link->link_enc)) return false; + /* Check for which front end is used by this encoder. + * Note the inst is 1 indexed, where 0 is undefined. + * Note that DIG_FE can source from different OTG but our + * current implementation always map 1-to-1, so this code makes + * the same assumption and doesn't check OTG source. + */ enc_inst = link->link_enc->funcs->get_dig_frontend(link->link_enc); - if (enc_inst == ENGINE_ID_UNKNOWN) + /* Instance should be within the range of the pool */ + if (enc_inst >= dc->res_pool->pipe_count) return false; - for (i = 0; i < dc->res_pool->stream_enc_count; i++) { - if (dc->res_pool->stream_enc[i]->id == enc_inst) { - - se = dc->res_pool->stream_enc[i]; - - tg_inst = dc->res_pool->stream_enc[i]->funcs->dig_source_otg( - dc->res_pool->stream_enc[i]); - break; - } - } - - // tg_inst not found - if (i == dc->res_pool->stream_enc_count) + if (enc_inst >= dc->res_pool->stream_enc_count) return false; + tg_inst = dc->res_pool->stream_enc[enc_inst]->funcs->dig_source_otg( + dc->res_pool->stream_enc[enc_inst]); + if (tg_inst >= dc->res_pool->timing_generator_count) return false; tg = dc->res_pool->timing_generators[tg_inst]; - if (!tg->funcs->get_hw_timing) + if (!tg->funcs->is_matching_timing) return false; - if (!tg->funcs->get_hw_timing(tg, &hw_crtc_timing)) - return false; - - if (crtc_timing->h_total != hw_crtc_timing.h_total) - return false; - - if (crtc_timing->h_border_left != hw_crtc_timing.h_border_left) - return false; - - if (crtc_timing->h_addressable != hw_crtc_timing.h_addressable) - return false; - - if (crtc_timing->h_border_right != hw_crtc_timing.h_border_right) - return false; - - if (crtc_timing->h_front_porch != hw_crtc_timing.h_front_porch) - return false; - - if (crtc_timing->h_sync_width != hw_crtc_timing.h_sync_width) - return false; - - if (crtc_timing->v_total != hw_crtc_timing.v_total) - return false; - - if (crtc_timing->v_border_top != hw_crtc_timing.v_border_top) - return false; - - if (crtc_timing->v_addressable != hw_crtc_timing.v_addressable) - return false; - - if (crtc_timing->v_border_bottom != hw_crtc_timing.v_border_bottom) - return false; - - if (crtc_timing->v_front_porch != hw_crtc_timing.v_front_porch) - return false; - - if (crtc_timing->v_sync_width != hw_crtc_timing.v_sync_width) + if (!tg->funcs->is_matching_timing(tg, crtc_timing)) return false; if (dc_is_dp_signal(link->connector_signal)) { @@ -1097,20 +1012,6 @@ bool dc_validate_seamless_boot_timing(const struct dc *dc, if (crtc_timing->pix_clk_100hz != pix_clk_100hz) return false; - if (!se->funcs->dp_get_pixel_format) - return false; - - if (!se->funcs->dp_get_pixel_format( - se, - &hw_crtc_timing.pixel_encoding, - &hw_crtc_timing.display_color_depth)) - return false; - - if (hw_crtc_timing.display_color_depth != crtc_timing->display_color_depth) - return false; - - if (hw_crtc_timing.pixel_encoding != crtc_timing->pixel_encoding) - return false; } return true; @@ -1172,20 +1073,15 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c /* re-program planes for existing stream, in case we need to * free up plane resource for later use */ - if (dc->hwss.apply_ctx_for_surface) - for (i = 0; i < context->stream_count; i++) { - if (context->streams[i]->mode_changed) - continue; + for (i = 0; i < context->stream_count; i++) { + if (context->streams[i]->mode_changed) + continue; - dc->hwss.apply_ctx_for_surface( - dc, context->streams[i], - context->stream_status[i].plane_count, - context); /* use new pipe config in new context */ - } -#if defined(CONFIG_DRM_AMD_DC_DCN2_0) - if (dc->hwss.program_front_end_for_ctx) - dc->hwss.program_front_end_for_ctx(dc, context); -#endif + dc->hwss.apply_ctx_for_surface( + dc, context->streams[i], + context->stream_status[i].plane_count, + context); /* use new pipe config in new context */ + } /* Program hardware */ for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -1204,21 +1100,16 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c } /* Program all planes within new context*/ -#if defined(CONFIG_DRM_AMD_DC_DCN2_0) - if (dc->hwss.program_front_end_for_ctx) - dc->hwss.program_front_end_for_ctx(dc, context); -#endif for (i = 0; i < context->stream_count; i++) { const struct dc_link *link = context->streams[i]->link; if (!context->streams[i]->mode_changed) continue; - if (dc->hwss.apply_ctx_for_surface) - dc->hwss.apply_ctx_for_surface( - dc, context->streams[i], - context->stream_status[i].plane_count, - context); + dc->hwss.apply_ctx_for_surface( + dc, context->streams[i], + context->stream_status[i].plane_count, + context); /* * enable stereo @@ -1245,9 +1136,15 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c dc_enable_stereo(dc, context, dc_streams, context->stream_count); + if (!dc->optimize_seamless_boot) + /* pplib is notified if disp_num changed */ + dc->hwss.optimize_bandwidth(dc, context); + for (i = 0; i < context->stream_count; i++) context->streams[i]->mode_changed = false; + memset(&context->commit_hints, 0, sizeof(context->commit_hints)); + dc_release_state(dc->current_state); dc->current_state = context; @@ -1595,15 +1492,20 @@ static enum surface_update_type det_surface_update(const struct dc *dc, enum surface_update_type overall_type = UPDATE_TYPE_FAST; union surface_update_flags *update_flags = &u->surface->update_flags; + update_flags->raw = 0; // Reset all flags + if (u->flip_addr) update_flags->bits.addr_update = 1; - if (!is_surface_in_context(context, u->surface) || u->surface->force_full_update) { - update_flags->raw = 0xFFFFFFFF; + if (!is_surface_in_context(context, u->surface)) { + update_flags->bits.new_plane = 1; return UPDATE_TYPE_FULL; } - update_flags->raw = 0; // Reset all flags + if (u->surface->force_full_update) { + update_flags->bits.full_update = 1; + return UPDATE_TYPE_FULL; + } type = get_plane_info_update_type(u); elevate_update_type(&overall_type, type); @@ -1661,43 +1563,40 @@ static enum surface_update_type check_update_surfaces_for_stream( enum surface_update_type overall_type = UPDATE_TYPE_FAST; if (stream_status == NULL || stream_status->plane_count != surface_count) - overall_type = UPDATE_TYPE_FULL; + return UPDATE_TYPE_FULL; /* some stream updates require passive update */ if (stream_update) { - union stream_update_flags *su_flags = &stream_update->stream->update_flags; + if ((stream_update->src.height != 0) && + (stream_update->src.width != 0)) + return UPDATE_TYPE_FULL; - if ((stream_update->src.height != 0 && stream_update->src.width != 0) || - (stream_update->dst.height != 0 && stream_update->dst.width != 0)) - su_flags->bits.scaling = 1; + if ((stream_update->dst.height != 0) && + (stream_update->dst.width != 0)) + return UPDATE_TYPE_FULL; if (stream_update->out_transfer_func) - su_flags->bits.out_tf = 1; + return UPDATE_TYPE_FULL; if (stream_update->abm_level) - su_flags->bits.abm_level = 1; + return UPDATE_TYPE_FULL; if (stream_update->dpms_off) - su_flags->bits.dpms_off = 1; - - if (stream_update->gamut_remap) - su_flags->bits.gamut_remap = 1; + return UPDATE_TYPE_FULL; #if defined(CONFIG_DRM_AMD_DC_DCN2_0) if (stream_update->wb_update) - su_flags->bits.wb_update = 1; + return UPDATE_TYPE_FULL; #endif - if (su_flags->raw != 0) - overall_type = UPDATE_TYPE_FULL; - - if (stream_update->output_csc_transform || stream_update->output_color_space) - su_flags->bits.out_csc = 1; } for (i = 0 ; i < surface_count; i++) { enum surface_update_type type = det_surface_update(dc, &updates[i]); + if (type == UPDATE_TYPE_FULL) + return type; + elevate_update_type(&overall_type, type); } @@ -1719,29 +1618,16 @@ enum surface_update_type dc_check_update_surfaces_for_stream( int i; enum surface_update_type type; - if (stream_update) - stream_update->stream->update_flags.raw = 0; for (i = 0; i < surface_count; i++) updates[i].surface->update_flags.raw = 0; type = check_update_surfaces_for_stream(dc, updates, surface_count, stream_update, stream_status); - if (type == UPDATE_TYPE_FULL) { - if (stream_update) - stream_update->stream->update_flags.raw = 0xFFFFFFFF; + if (type == UPDATE_TYPE_FULL) for (i = 0; i < surface_count; i++) updates[i].surface->update_flags.raw = 0xFFFFFFFF; - } - if (type == UPDATE_TYPE_FAST) { - // If there's an available clock comparator, we use that. - if (dc->clk_mgr->funcs->are_clock_states_equal) { - if (!dc->clk_mgr->funcs->are_clock_states_equal(&dc->clk_mgr->clks, &dc->current_state->bw_ctx.bw.dcn.clk)) - dc->optimized_required = true; - // Else we fallback to mem compare. - } else if (memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0) { - dc->optimized_required = true; - } - } + if (type == UPDATE_TYPE_FAST && memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0) + dc->optimized_required = true; return type; } @@ -1982,7 +1868,6 @@ static void commit_planes_do_stream_update(struct dc *dc, struct dc_state *context) { int j; - bool should_program_abm; // Stream updates for (j = 0; j < dc->res_pool->pipe_count; j++) { @@ -2063,21 +1948,14 @@ static void commit_planes_do_stream_update(struct dc *dc, } if (stream_update->abm_level && pipe_ctx->stream_res.abm) { - should_program_abm = true; - - // if otg funcs defined check if blanked before programming - if (pipe_ctx->stream_res.tg->funcs->is_blanked) - if (pipe_ctx->stream_res.tg->funcs->is_blanked(pipe_ctx->stream_res.tg)) - should_program_abm = false; - - if (should_program_abm) { - if (*stream_update->abm_level == ABM_LEVEL_IMMEDIATE_DISABLE) { - pipe_ctx->stream_res.abm->funcs->set_abm_immediate_disable(pipe_ctx->stream_res.abm); - } else { + if (pipe_ctx->stream_res.tg->funcs->is_blanked) { + // if otg funcs defined check if blanked before programming + if (!pipe_ctx->stream_res.tg->funcs->is_blanked(pipe_ctx->stream_res.tg)) pipe_ctx->stream_res.abm->funcs->set_abm_level( pipe_ctx->stream_res.abm, stream->abm_level); - } - } + } else + pipe_ctx->stream_res.abm->funcs->set_abm_level( + pipe_ctx->stream_res.abm, stream->abm_level); } } } @@ -2122,13 +2000,7 @@ static void commit_planes_for_stream(struct dc *dc, * In case of turning off screen, no need to program front end a second time. * just return after program blank. */ - if (dc->hwss.apply_ctx_for_surface) - dc->hwss.apply_ctx_for_surface(dc, stream, 0, context); -#if defined(CONFIG_DRM_AMD_DC_DCN2_0) - if (dc->hwss.program_front_end_for_ctx) - dc->hwss.program_front_end_for_ctx(dc, context); -#endif - + dc->hwss.apply_ctx_for_surface(dc, stream, 0, context); return; } @@ -2188,15 +2060,10 @@ static void commit_planes_for_stream(struct dc *dc, stream_status = stream_get_status(context, pipe_ctx->stream); - if (dc->hwss.apply_ctx_for_surface) - dc->hwss.apply_ctx_for_surface( + dc->hwss.apply_ctx_for_surface( dc, pipe_ctx->stream, stream_status->plane_count, context); } } -#if defined(CONFIG_DRM_AMD_DC_DCN2_0) - if (dc->hwss.program_front_end_for_ctx && update_type != UPDATE_TYPE_FAST) - dc->hwss.program_front_end_for_ctx(dc, context); -#endif // Update Type FAST, Surface updates if (update_type == UPDATE_TYPE_FAST) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index cc94c1a73daa..ca20b150afcc 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -79,6 +79,7 @@ static void destruct(struct dc_link *link) int i; if (link->hpd_gpio != NULL) { + dal_gpio_close(link->hpd_gpio); dal_gpio_destroy_irq(&link->hpd_gpio); link->hpd_gpio = NULL; } @@ -519,7 +520,7 @@ static void link_disconnect_remap(struct dc_sink *prev_sink, struct dc_link *lin } -static void read_current_link_settings_on_detect(struct dc_link *link) +static void read_edp_current_link_settings_on_detect(struct dc_link *link) { union lane_count_set lane_count_set = { {0} }; uint8_t link_bw_set; @@ -554,23 +555,17 @@ static void read_current_link_settings_on_detect(struct dc_link *link) &link_bw_set, sizeof(link_bw_set)); if (link_bw_set == 0) { - if (link->connector_signal == SIGNAL_TYPE_EDP) { - /* If standard link rates are not being used, - * Read DPCD 00115h to find the edp link rate set used - */ - core_link_read_dpcd(link, DP_LINK_RATE_SET, - &link_rate_set, sizeof(link_rate_set)); + /* If standard link rates are not being used, + * Read DPCD 00115h to find the link rate set used + */ + core_link_read_dpcd(link, DP_LINK_RATE_SET, + &link_rate_set, sizeof(link_rate_set)); - // edp_supported_link_rates_count = 0 for DP - if (link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) { - link->cur_link_settings.link_rate = - link->dpcd_caps.edp_supported_link_rates[link_rate_set]; - link->cur_link_settings.link_rate_set = link_rate_set; - link->cur_link_settings.use_link_rate_set = true; - } - } else { - // Link Rate not found. Seamless boot may not work. - ASSERT(false); + if (link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) { + link->cur_link_settings.link_rate = + link->dpcd_caps.edp_supported_link_rates[link_rate_set]; + link->cur_link_settings.link_rate_set = link_rate_set; + link->cur_link_settings.use_link_rate_set = true; } } else { link->cur_link_settings.link_rate = link_bw_set; @@ -685,7 +680,7 @@ static bool is_same_edid(struct dc_edid *old_edid, struct dc_edid *new_edid) return (memcmp(old_edid->raw_edid, new_edid->raw_edid, new_edid->length) == 0); } -static bool wait_for_alt_mode(struct dc_link *link) +bool wait_for_alt_mode(struct dc_link *link) { /** @@ -743,8 +738,7 @@ static bool wait_for_alt_mode(struct dc_link *link) * This does not create remote sinks but will trigger DM * to start MST detection if a branch is detected. */ -static bool dc_link_detect_helper(struct dc_link *link, - enum dc_detect_reason reason) +bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) { struct dc_sink_init_data sink_init_data = { 0 }; struct display_sink_capability sink_caps = { 0 }; @@ -759,8 +753,6 @@ static bool dc_link_detect_helper(struct dc_link *link, struct dpcd_caps prev_dpcd_caps; bool same_dpcd = true; enum dc_connection_type new_connection_type = dc_connection_none; - bool perform_dp_seamless_boot = false; - DC_LOGGER_INIT(link->ctx->logger); if (dc_is_virtual_signal(link->connector_signal)) @@ -817,15 +809,15 @@ static bool dc_link_detect_helper(struct dc_link *link, } case SIGNAL_TYPE_EDP: { - read_current_link_settings_on_detect(link); + read_edp_current_link_settings_on_detect(link); detect_edp_sink_caps(link); - sink_caps.transaction_type = DDC_TRANSACTION_TYPE_I2C_OVER_AUX; + sink_caps.transaction_type = + DDC_TRANSACTION_TYPE_I2C_OVER_AUX; sink_caps.signal = SIGNAL_TYPE_EDP; break; } case SIGNAL_TYPE_DISPLAY_PORT: { - /* wa HPD high coming too early*/ if (link->link_enc->features.flags.bits.DP_IS_USB_C == 1) { @@ -873,24 +865,12 @@ static bool dc_link_detect_helper(struct dc_link *link, * empty which leads to allocate_mst_payload() has "0" * pbn_per_slot value leading to exception on dc_fixpt_div() */ - dp_verify_mst_link_cap(link); - + link->verified_link_cap = link->reported_link_cap; if (prev_sink != NULL) dc_sink_release(prev_sink); return false; } - // For seamless boot, to skip verify link cap, we read UEFI settings and set them as verified. - if (reason == DETECT_REASON_BOOT && - dc_ctx->dc->config.power_down_display_on_boot == false && - link->link_status.link_active == true) - perform_dp_seamless_boot = true; - - if (perform_dp_seamless_boot) { - read_current_link_settings_on_detect(link); - link->verified_link_cap = link->reported_link_cap; - } - break; } @@ -975,11 +955,10 @@ static bool dc_link_detect_helper(struct dc_link *link, * two link trainings */ - // verify link cap for SST non-seamless boot - if (!perform_dp_seamless_boot) - dp_verify_link_cap_with_retries(link, - &link->reported_link_cap, - LINK_TRAINING_MAX_VERIFY_RETRY); + /* deal with non-mst cases */ + dp_verify_link_cap_with_retries(link, + &link->reported_link_cap, + LINK_TRAINING_MAX_VERIFY_RETRY); } else { // If edid is the same, then discard new sink and revert back to original sink if (same_edid) { @@ -1068,23 +1047,6 @@ static bool dc_link_detect_helper(struct dc_link *link, dc_sink_release(prev_sink); return true; - -} - -bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) -{ - const struct dc *dc = link->dc; - bool ret; - - /* get out of low power state */ - clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr); - - ret = dc_link_detect_helper(link, reason); - - /* Go back to power optimized state */ - clk_mgr_optimize_pwr_state(dc, dc->clk_mgr); - - return ret; } bool dc_link_get_hpd_state(struct dc_link *dc_link) @@ -1530,7 +1492,7 @@ static enum dc_status enable_link_dp( pipe_ctx->stream_res.pix_clk_params.requested_sym_clk = link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ; - if (state->clk_mgr && !apply_seamless_boot_optimization) + if (!apply_seamless_boot_optimization) state->clk_mgr->funcs->update_clocks(state->clk_mgr, state, false); dp_enable_link_phy( @@ -2207,10 +2169,8 @@ static void disable_link(struct dc_link *link, enum signal_type signal) dp_set_fec_ready(link, false); } #endif - } else { - if (signal != SIGNAL_TYPE_VIRTUAL) - link->link_enc->funcs->disable_output(link->link_enc, signal); - } + } else + link->link_enc->funcs->disable_output(link->link_enc, signal); if (signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { /* MST disable link only when no stream use the link */ @@ -2257,7 +2217,7 @@ static bool dp_active_dongle_validate_timing( break; } - if (dpcd_caps->dongle_type != DISPLAY_DONGLE_DP_HDMI_CONVERTER || + if (dongle_caps->dongle_type != DISPLAY_DONGLE_DP_HDMI_CONVERTER || dongle_caps->extendedCapValid == false) return true; @@ -2421,206 +2381,17 @@ bool dc_link_set_abm_disable(const struct dc_link *link) return true; } -bool dc_link_set_psr_allow_active(struct dc_link *link, bool allow_active, bool wait) +bool dc_link_set_psr_enable(const struct dc_link *link, bool enable, bool wait) { struct dc *core_dc = link->ctx->dc; struct dmcu *dmcu = core_dc->res_pool->dmcu; - - - if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu)) && link->psr_feature_enabled) - dmcu->funcs->set_psr_enable(dmcu, allow_active, wait); - - link->psr_allow_active = allow_active; + if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu)) && link->psr_enabled) + dmcu->funcs->set_psr_enable(dmcu, enable, wait); return true; } -bool dc_link_get_psr_state(const struct dc_link *link, uint32_t *psr_state) -{ - struct dc *core_dc = link->ctx->dc; - struct dmcu *dmcu = core_dc->res_pool->dmcu; - - if (dmcu != NULL && link->psr_feature_enabled) - dmcu->funcs->get_psr_state(dmcu, psr_state); - - return true; -} - -static inline enum physical_phy_id -transmitter_to_phy_id(enum transmitter transmitter_value) -{ - switch (transmitter_value) { - case TRANSMITTER_UNIPHY_A: - return PHYLD_0; - case TRANSMITTER_UNIPHY_B: - return PHYLD_1; - case TRANSMITTER_UNIPHY_C: - return PHYLD_2; - case TRANSMITTER_UNIPHY_D: - return PHYLD_3; - case TRANSMITTER_UNIPHY_E: - return PHYLD_4; - case TRANSMITTER_UNIPHY_F: - return PHYLD_5; - case TRANSMITTER_NUTMEG_CRT: - return PHYLD_6; - case TRANSMITTER_TRAVIS_CRT: - return PHYLD_7; - case TRANSMITTER_TRAVIS_LCD: - return PHYLD_8; - case TRANSMITTER_UNIPHY_G: - return PHYLD_9; - case TRANSMITTER_COUNT: - return PHYLD_COUNT; - case TRANSMITTER_UNKNOWN: - return PHYLD_UNKNOWN; - default: - WARN_ONCE(1, "Unknown transmitter value %d\n", - transmitter_value); - return PHYLD_UNKNOWN; - } -} - -bool dc_link_setup_psr(struct dc_link *link, - const struct dc_stream_state *stream, struct psr_config *psr_config, - struct psr_context *psr_context) -{ - struct dc *core_dc; - struct dmcu *dmcu; - int i; - /* updateSinkPsrDpcdConfig*/ - union dpcd_psr_configuration psr_configuration; - - psr_context->controllerId = CONTROLLER_ID_UNDEFINED; - - if (!link) - return false; - - core_dc = link->ctx->dc; - dmcu = core_dc->res_pool->dmcu; - - if (!dmcu) - return false; - - - memset(&psr_configuration, 0, sizeof(psr_configuration)); - - psr_configuration.bits.ENABLE = 1; - psr_configuration.bits.CRC_VERIFICATION = 1; - psr_configuration.bits.FRAME_CAPTURE_INDICATION = - psr_config->psr_frame_capture_indication_req; - - /* Check for PSR v2*/ - if (psr_config->psr_version == 0x2) { - /* For PSR v2 selective update. - * Indicates whether sink should start capturing - * immediately following active scan line, - * or starting with the 2nd active scan line. - */ - psr_configuration.bits.LINE_CAPTURE_INDICATION = 0; - /*For PSR v2, determines whether Sink should generate - * IRQ_HPD when CRC mismatch is detected. - */ - psr_configuration.bits.IRQ_HPD_WITH_CRC_ERROR = 1; - } - - dm_helpers_dp_write_dpcd( - link->ctx, - link, - 368, - &psr_configuration.raw, - sizeof(psr_configuration.raw)); - - psr_context->channel = link->ddc->ddc_pin->hw_info.ddc_channel; - psr_context->transmitterId = link->link_enc->transmitter; - psr_context->engineId = link->link_enc->preferred_engine; - - for (i = 0; i < MAX_PIPES; i++) { - if (core_dc->current_state->res_ctx.pipe_ctx[i].stream - == stream) { - /* dmcu -1 for all controller id values, - * therefore +1 here - */ - psr_context->controllerId = - core_dc->current_state->res_ctx. - pipe_ctx[i].stream_res.tg->inst + 1; - break; - } - } - - /* Hardcoded for now. Can be Pcie or Uniphy (or Unknown)*/ - psr_context->phyType = PHY_TYPE_UNIPHY; - /*PhyId is associated with the transmitter id*/ - psr_context->smuPhyId = - transmitter_to_phy_id(link->link_enc->transmitter); - - psr_context->crtcTimingVerticalTotal = stream->timing.v_total; - psr_context->vsyncRateHz = div64_u64(div64_u64((stream-> - timing.pix_clk_100hz * 100), - stream->timing.v_total), - stream->timing.h_total); - - psr_context->psrSupportedDisplayConfig = true; - psr_context->psrExitLinkTrainingRequired = - psr_config->psr_exit_link_training_required; - psr_context->sdpTransmitLineNumDeadline = - psr_config->psr_sdp_transmit_line_num_deadline; - psr_context->psrFrameCaptureIndicationReq = - psr_config->psr_frame_capture_indication_req; - - psr_context->skipPsrWaitForPllLock = 0; /* only = 1 in KV */ - - psr_context->numberOfControllers = - link->dc->res_pool->timing_generator_count; - - psr_context->rfb_update_auto_en = true; - - /* 2 frames before enter PSR. */ - psr_context->timehyst_frames = 2; - /* half a frame - * (units in 100 lines, i.e. a value of 1 represents 100 lines) - */ - psr_context->hyst_lines = stream->timing.v_total / 2 / 100; - psr_context->aux_repeats = 10; - - psr_context->psr_level.u32all = 0; - -#if defined(CONFIG_DRM_AMD_DC_DCN1_0) - /*skip power down the single pipe since it blocks the cstate*/ - if (ASICREV_IS_RAVEN(link->ctx->asic_id.hw_internal_rev)) - psr_context->psr_level.bits.SKIP_CRTC_DISABLE = true; -#endif - - /* SMU will perform additional powerdown sequence. - * For unsupported ASICs, set psr_level flag to skip PSR - * static screen notification to SMU. - * (Always set for DAL2, did not check ASIC) - */ - psr_context->allow_smu_optimizations = psr_config->allow_smu_optimizations; - - /* Complete PSR entry before aborting to prevent intermittent - * freezes on certain eDPs - */ - psr_context->psr_level.bits.DISABLE_PSR_ENTRY_ABORT = 1; - - /* Controls additional delay after remote frame capture before - * continuing power down, default = 0 - */ - psr_context->frame_delay = 0; - - link->psr_feature_enabled = dmcu->funcs->setup_psr(dmcu, link, psr_context); - - /* psr_enabled == 0 indicates setup_psr did not succeed, but this - * should not happen since firmware should be running at this point - */ - if (link->psr_feature_enabled == 0) - ASSERT(0); - - return true; - -} - const struct dc_link_status *dc_link_get_status(const struct dc_link *link) { return &link->link_status; @@ -2739,7 +2510,7 @@ static void update_mst_stream_alloc_table( /* convert link_mst_stream_alloc_table to dm dp_mst_stream_alloc_table * because stream_encoder is not exposed to dm */ -enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx) +static enum dc_status allocate_mst_payload(struct pipe_ctx *pipe_ctx) { struct dc_stream_state *stream = pipe_ctx->stream; struct dc_link *link = stream->link; @@ -2750,7 +2521,6 @@ enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx) struct fixed31_32 pbn; struct fixed31_32 pbn_per_slot; uint8_t i; - enum act_return_status ret; DC_LOGGER_INIT(link->ctx->logger); /* enable_link_dp_mst already check link->enabled_stream_count @@ -2798,16 +2568,14 @@ enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx) &link->mst_stream_alloc_table); /* send down message */ - ret = dm_helpers_dp_mst_poll_for_allocation_change_trigger( + dm_helpers_dp_mst_poll_for_allocation_change_trigger( stream->ctx, stream); - if (ret != ACT_LINK_LOST) { - dm_helpers_dp_mst_send_payload_allocation( - stream->ctx, - stream, - true); - } + dm_helpers_dp_mst_send_payload_allocation( + stream->ctx, + stream, + true); /* slot X.Y for only current stream */ pbn_per_slot = get_pbn_per_slot(stream); @@ -2899,24 +2667,6 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) return DC_OK; } -#if defined(CONFIG_DRM_AMD_DC_HDCP) -static void update_psp_stream_config(struct pipe_ctx *pipe_ctx, bool dpms_off) -{ - struct cp_psp *cp_psp = &pipe_ctx->stream->ctx->cp_psp; - if (cp_psp && cp_psp->funcs.update_stream_config) { - struct cp_psp_stream_config config; - - memset(&config, 0, sizeof(config)); - - config.otg_inst = (uint8_t) pipe_ctx->stream_res.tg->inst; - config.stream_enc_inst = (uint8_t) pipe_ctx->stream_res.stream_enc->id; - config.link_enc_inst = pipe_ctx->stream->link->link_enc_hw_inst; - config.dpms_off = dpms_off; - config.dm_stream_ctx = pipe_ctx->stream->dm_stream_context; - cp_psp->funcs.update_stream_config(cp_psp->handle, &config); - } -} -#endif void core_link_enable_stream( struct dc_state *state, @@ -2927,10 +2677,6 @@ void core_link_enable_stream( enum dc_status status; DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger); - if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment) && - dc_is_virtual_signal(pipe_ctx->stream->signal)) - return; - if (!dc_is_virtual_signal(pipe_ctx->stream->signal)) { stream->link->link_enc->funcs->setup( stream->link->link_enc, @@ -2981,9 +2727,6 @@ void core_link_enable_stream( /* Do not touch link on seamless boot optimization. */ if (pipe_ctx->stream->apply_seamless_boot_optimization) { pipe_ctx->stream->dpms_off = false; -#if defined(CONFIG_DRM_AMD_DC_HDCP) - update_psp_stream_config(pipe_ctx, false); -#endif return; } @@ -2991,9 +2734,6 @@ void core_link_enable_stream( if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP && apply_edp_fast_boot_optimization) { pipe_ctx->stream->dpms_off = false; -#if defined(CONFIG_DRM_AMD_DC_HDCP) - update_psp_stream_config(pipe_ctx, false); -#endif return; } @@ -3027,15 +2767,6 @@ void core_link_enable_stream( CONTROLLER_DP_TEST_PATTERN_VIDEOMODE, COLOR_DEPTH_UNDEFINED); - /* This second call is needed to reconfigure the DIG - * as a workaround for the incorrect value being applied - * from transmitter control. - */ - if (!dc_is_virtual_signal(pipe_ctx->stream->signal)) - stream->link->link_enc->funcs->setup( - stream->link->link_enc, - pipe_ctx->stream->signal); - #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT if (pipe_ctx->stream->timing.flags.DSC) { if (dc_is_dp_signal(pipe_ctx->stream->signal) || @@ -3055,16 +2786,13 @@ void core_link_enable_stream( #endif if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) - dc_link_allocate_mst_payload(pipe_ctx); + allocate_mst_payload(pipe_ctx); core_dc->hwss.unblank_stream(pipe_ctx, &pipe_ctx->stream->link->cur_link_settings); if (dc_is_dp_signal(pipe_ctx->stream->signal)) enable_stream_features(pipe_ctx); -#if defined(CONFIG_DRM_AMD_DC_HDCP) - update_psp_stream_config(pipe_ctx, false); -#endif } #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT else { // if (IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) @@ -3082,14 +2810,6 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx) struct dc_stream_state *stream = pipe_ctx->stream; struct dc_link *link = stream->sink->link; - if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment) && - dc_is_virtual_signal(pipe_ctx->stream->signal)) - return; - -#if defined(CONFIG_DRM_AMD_DC_HDCP) - update_psp_stream_config(pipe_ctx, true); -#endif - core_dc->hwss.blank_stream(pipe_ctx); if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c index 7f904d55c1bc..505967b48e14 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c @@ -374,7 +374,6 @@ void dal_ddc_service_i2c_query_dp_dual_mode_adaptor( enum display_dongle_type *dongle = &sink_cap->dongle_type; uint8_t type2_dongle_buf[DP_ADAPTOR_TYPE2_SIZE]; bool is_type2_dongle = false; - int retry_count = 2; struct dp_hdmi_dongle_signature_data *dongle_signature; /* Assume we have no valid DP passive dongle connected */ @@ -387,24 +386,13 @@ void dal_ddc_service_i2c_query_dp_dual_mode_adaptor( DP_HDMI_DONGLE_ADDRESS, type2_dongle_buf, sizeof(type2_dongle_buf))) { - /* Passive HDMI dongles can sometimes fail here without retrying*/ - while (retry_count > 0) { - if (i2c_read(ddc, - DP_HDMI_DONGLE_ADDRESS, - type2_dongle_buf, - sizeof(type2_dongle_buf))) - break; - retry_count--; - } - if (retry_count == 0) { - *dongle = DISPLAY_DONGLE_DP_DVI_DONGLE; - sink_cap->max_hdmi_pixel_clock = DP_ADAPTOR_DVI_MAX_TMDS_CLK; + *dongle = DISPLAY_DONGLE_DP_DVI_DONGLE; + sink_cap->max_hdmi_pixel_clock = DP_ADAPTOR_DVI_MAX_TMDS_CLK; - CONN_DATA_DETECT(ddc->link, type2_dongle_buf, sizeof(type2_dongle_buf), - "DP-DVI passive dongle %dMhz: ", - DP_ADAPTOR_DVI_MAX_TMDS_CLK / 1000); - return; - } + CONN_DATA_DETECT(ddc->link, type2_dongle_buf, sizeof(type2_dongle_buf), + "DP-DVI passive dongle %dMhz: ", + DP_ADAPTOR_DVI_MAX_TMDS_CLK / 1000); + return; } /* Check if Type 2 dongle.*/ @@ -508,7 +496,7 @@ bool dal_ddc_service_query_ddc_data( uint8_t *read_buf, uint32_t read_size) { - bool ret = false; + bool ret; uint32_t payload_size = dal_ddc_service_is_in_aux_transaction_mode(ddc) ? DEFAULT_AUX_MAX_DATA_SIZE : EDID_SEGMENT_SIZE; @@ -527,32 +515,34 @@ bool dal_ddc_service_query_ddc_data( /*TODO: len of payload data for i2c and aux is uint8!!!!, * but we want to read 256 over i2c!!!!*/ if (dal_ddc_service_is_in_aux_transaction_mode(ddc)) { - struct aux_payload payload; - bool read_available = true; + struct aux_payload write_payload = { + .i2c_over_aux = true, + .write = true, + .mot = true, + .address = address, + .length = write_size, + .data = write_buf, + .reply = NULL, + .defer_delay = get_defer_delay(ddc), + }; - payload.i2c_over_aux = true; - payload.address = address; - payload.reply = NULL; - payload.defer_delay = get_defer_delay(ddc); + struct aux_payload read_payload = { + .i2c_over_aux = true, + .write = false, + .mot = false, + .address = address, + .length = read_size, + .data = read_buf, + .reply = NULL, + .defer_delay = get_defer_delay(ddc), + }; - if (write_size != 0) { - payload.write = true; - payload.mot = false; - payload.length = write_size; - payload.data = write_buf; + ret = dc_link_aux_transfer_with_retries(ddc, &write_payload); - ret = dal_ddc_submit_aux_command(ddc, &payload); - read_available = ret; - } + if (!ret) + return false; - if (read_size != 0 && read_available) { - payload.write = false; - payload.mot = false; - payload.length = read_size; - payload.data = read_buf; - - ret = dal_ddc_submit_aux_command(ddc, &payload); - } + ret = dc_link_aux_transfer_with_retries(ddc, &read_payload); } else { struct i2c_payloads *payloads = dal_ddc_i2c_payloads_create(ddc->ctx, payloads_num); @@ -583,41 +573,6 @@ bool dal_ddc_service_query_ddc_data( return ret; } -bool dal_ddc_submit_aux_command(struct ddc_service *ddc, - struct aux_payload *payload) -{ - uint8_t retrieved = 0; - bool ret = 0; - - if (!ddc) - return false; - - if (!payload) - return false; - - do { - struct aux_payload current_payload; - bool is_end_of_payload = (retrieved + DEFAULT_AUX_MAX_DATA_SIZE) > - payload->length ? true : false; - - current_payload.address = payload->address; - current_payload.data = &payload->data[retrieved]; - current_payload.defer_delay = payload->defer_delay; - current_payload.i2c_over_aux = payload->i2c_over_aux; - current_payload.length = is_end_of_payload ? - payload->length - retrieved : DEFAULT_AUX_MAX_DATA_SIZE; - current_payload.mot = !is_end_of_payload; - current_payload.reply = payload->reply; - current_payload.write = payload->write; - - ret = dc_link_aux_transfer_with_retries(ddc, ¤t_payload); - - retrieved += current_payload.length; - } while (retrieved < payload->length && ret == true); - - return ret; -} - /* dc_link_aux_transfer_raw() - Attempt to transfer * the given aux payload. This function does not perform * retries or handle error states. The reply is returned @@ -646,20 +601,6 @@ bool dc_link_aux_transfer_with_retries(struct ddc_service *ddc, return dce_aux_transfer_with_retries(ddc, payload); } - -enum dc_status dc_link_aux_configure_timeout(struct ddc_service *ddc, - uint32_t timeout) -{ - enum dc_status status = DC_OK; - struct ddc *ddc_pin = ddc->ddc_pin; - - if (ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]->funcs->configure_timeout == NULL) - return DC_ERROR_UNEXPECTED; - if (!ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]->funcs->configure_timeout(ddc, timeout)) - status = DC_ERROR_UNEXPECTED; - return status; -} - /*test only function*/ void dal_ddc_service_set_ddc_pin( struct ddc_service *ddc_service, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 0f59b68aa4c2..f5742719b5d9 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1409,9 +1409,6 @@ static struct dc_link_settings get_max_link_cap(struct dc_link *link) if (link->link_enc->features.flags.bits.IS_HBR3_CAPABLE) max_link_cap.link_rate = LINK_RATE_HIGH3; - if (link->link_enc->funcs->get_max_link_cap) - link->link_enc->funcs->get_max_link_cap(link->link_enc, &max_link_cap); - /* Lower link settings based on sink's link cap */ if (link->reported_link_cap.lane_count < max_link_cap.lane_count) max_link_cap.lane_count = @@ -1656,14 +1653,11 @@ bool dp_verify_link_cap_with_retries( for (i = 0; i < attempts; i++) { int fail_count = 0; - enum dc_connection_type type = dc_connection_none; + enum dc_connection_type type; memset(&link->verified_link_cap, 0, sizeof(struct dc_link_settings)); - if (!dc_link_detect_sink(link, &type) || type == dc_connection_none) { - link->verified_link_cap.lane_count = LANE_COUNT_ONE; - link->verified_link_cap.link_rate = LINK_RATE_LOW; - link->verified_link_cap.link_spread = LINK_SPREAD_DISABLED; + if (!dc_link_detect_sink(link, &type)) { break; } else if (dp_verify_link_cap(link, &link->reported_link_cap, @@ -1676,19 +1670,6 @@ bool dp_verify_link_cap_with_retries( return success; } -bool dp_verify_mst_link_cap( - struct dc_link *link) -{ - struct dc_link_settings max_link_cap = {0}; - - max_link_cap = get_max_link_cap(link); - link->verified_link_cap = get_common_supported_link_settings( - link->reported_link_cap, - max_link_cap); - - return true; -} - static struct dc_link_settings get_common_supported_link_settings( struct dc_link_settings link_setting_a, struct dc_link_settings link_setting_b) @@ -2076,11 +2057,11 @@ static bool allow_hpd_rx_irq(const struct dc_link *link) return false; } -static bool handle_hpd_irq_psr_sink(struct dc_link *link) +static bool handle_hpd_irq_psr_sink(const struct dc_link *link) { union dpcd_psr_configuration psr_configuration; - if (!link->psr_feature_enabled) + if (!link->psr_enabled) return false; dm_helpers_dp_read_dpcd( @@ -2119,8 +2100,8 @@ static bool handle_hpd_irq_psr_sink(struct dc_link *link) sizeof(psr_error_status.raw)); /* PSR error, disable and re-enable PSR */ - dc_link_set_psr_allow_active(link, false, true); - dc_link_set_psr_allow_active(link, true, true); + dc_link_set_psr_enable(link, false, true); + dc_link_set_psr_enable(link, true, true); return true; } else if (psr_sink_psr_status.bits.SINK_SELF_REFRESH_STATUS == @@ -2383,8 +2364,6 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd enum dc_status result; bool status = false; - struct pipe_ctx *pipe_ctx; - int i; if (out_link_loss) *out_link_loss = false; @@ -2461,15 +2440,6 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd &link->cur_link_settings, true, LINK_TRAINING_ATTEMPTS); - for (i = 0; i < MAX_PIPES; i++) { - pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; - if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link && - pipe_ctx->stream->dpms_off == false && - pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { - dc_link_allocate_mst_payload(pipe_ctx); - } - } - status = false; if (out_link_loss) *out_link_loss = true; @@ -2575,7 +2545,6 @@ static void get_active_converter_info( uint8_t data, struct dc_link *link) { union dp_downstream_port_present ds_port = { .byte = data }; - memset(&link->dpcd_caps.dongle_caps, 0, sizeof(link->dpcd_caps.dongle_caps)); /* decode converter info*/ if (!ds_port.fields.PORT_PRESENT) { @@ -2722,7 +2691,6 @@ static void dp_wa_power_up_0010FA(struct dc_link *link, uint8_t *dpcd_data, * keep receiver powered all the time.*/ case DP_BRANCH_DEVICE_ID_0010FA: case DP_BRANCH_DEVICE_ID_0080E1: - case DP_BRANCH_DEVICE_ID_00E04C: link->wa_flags.dp_keep_receiver_powered = true; break; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c index a519dbc5ecb6..79438c4f1e20 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c @@ -277,8 +277,7 @@ void dp_retrain_link_dp_test(struct dc_link *link, if (pipes[i].stream != NULL && !pipes[i].top_pipe && !pipes[i].prev_odm_pipe && pipes[i].stream->link != NULL && - pipes[i].stream_res.stream_enc != NULL && - pipes[i].stream->link == link) { + pipes[i].stream_res.stream_enc != NULL) { udelay(100); pipes[i].stream_res.stream_enc->funcs->dp_blank( diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 37698305a2dc..8f70295179ff 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -404,9 +404,6 @@ bool resource_are_streams_timing_synchronizable( if (stream1->view_format != stream2->view_format) return false; - if (stream1->ignore_msa_timing_param || stream2->ignore_msa_timing_param) - return false; - return true; } static bool is_dp_and_hdmi_sharable( @@ -951,7 +948,7 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx) data->inits.v_c_bot = dc_fixpt_add(data->inits.v_c, data->ratios.vert_c); } -static bool are_rects_integer_multiples(struct rect src, struct rect dest) +static bool are_rect_integer_multiples(struct rect src, struct rect dest) { if (dest.width >= src.width && dest.width % src.width == 0 && dest.height >= src.height && dest.height % src.height == 0) @@ -959,38 +956,6 @@ static bool are_rects_integer_multiples(struct rect src, struct rect dest) return false; } - -static void calculate_integer_scaling(struct pipe_ctx *pipe_ctx) -{ - if (!pipe_ctx->plane_state->scaling_quality.integer_scaling) - return; - - //for Centered Mode - if (pipe_ctx->stream->dst.width == pipe_ctx->stream->src.width && - pipe_ctx->stream->dst.height == pipe_ctx->stream->src.height) { - // calculate maximum # of replication of src onto addressable - unsigned int integer_multiple = min( - pipe_ctx->stream->timing.h_addressable / pipe_ctx->stream->src.width, - pipe_ctx->stream->timing.v_addressable / pipe_ctx->stream->src.height); - - //scale dst - pipe_ctx->stream->dst.width = integer_multiple * pipe_ctx->stream->src.width; - pipe_ctx->stream->dst.height = integer_multiple * pipe_ctx->stream->src.height; - - //center dst onto addressable - pipe_ctx->stream->dst.x = (pipe_ctx->stream->timing.h_addressable - pipe_ctx->stream->dst.width)/2; - pipe_ctx->stream->dst.y = (pipe_ctx->stream->timing.v_addressable - pipe_ctx->stream->dst.height)/2; - } - - //disable taps if src & dst are integer ratio - if (are_rects_integer_multiples(pipe_ctx->stream->src, pipe_ctx->stream->dst)) { - pipe_ctx->plane_state->scaling_quality.v_taps = 1; - pipe_ctx->plane_state->scaling_quality.h_taps = 1; - pipe_ctx->plane_state->scaling_quality.v_taps_c = 1; - pipe_ctx->plane_state->scaling_quality.h_taps_c = 1; - } -} - bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) { const struct dc_plane_state *plane_state = pipe_ctx->plane_state; @@ -1004,8 +969,6 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.scl_data.format = convert_pixel_format_to_dalsurface( pipe_ctx->plane_state->format); - calculate_integer_scaling(pipe_ctx); - calculate_scaling_ratios(pipe_ctx); calculate_viewport(pipe_ctx); @@ -1036,6 +999,13 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) res = pipe_ctx->plane_res.dpp->funcs->dpp_get_optimal_number_of_taps( pipe_ctx->plane_res.dpp, &pipe_ctx->plane_res.scl_data, &plane_state->scaling_quality); + if (res && + plane_state->scaling_quality.integer_scaling && + are_rect_integer_multiples(pipe_ctx->plane_res.scl_data.viewport, + pipe_ctx->plane_res.scl_data.recout)) { + pipe_ctx->plane_res.scl_data.taps.v_taps = 1; + pipe_ctx->plane_res.scl_data.taps.h_taps = 1; + } if (!res) { /* Try 24 bpp linebuffer */ @@ -1570,9 +1540,6 @@ bool dc_is_stream_unchanged( if (!are_stream_backends_same(old_stream, stream)) return false; - if (old_stream->ignore_msa_timing_param != stream->ignore_msa_timing_param) - return false; - return true; } @@ -1662,8 +1629,7 @@ static int acquire_first_free_pipe( static struct audio *find_first_free_audio( struct resource_context *res_ctx, const struct resource_pool *pool, - enum engine_id id, - enum dce_version dc_version) + enum engine_id id) { int i, available_audio_count; @@ -1882,28 +1848,28 @@ static int acquire_resource_from_hw_enabled_state( struct dc_stream_state *stream) { struct dc_link *link = stream->link; - unsigned int i, inst, tg_inst = 0; + unsigned int inst, tg_inst; /* Check for enabled DIG to identify enabled display */ if (!link->link_enc->funcs->is_dig_enabled(link->link_enc)) return -1; + /* Check for which front end is used by this encoder. + * Note the inst is 1 indexed, where 0 is undefined. + * Note that DIG_FE can source from different OTG but our + * current implementation always map 1-to-1, so this code makes + * the same assumption and doesn't check OTG source. + */ inst = link->link_enc->funcs->get_dig_frontend(link->link_enc); - if (inst == ENGINE_ID_UNKNOWN) - return false; + /* Instance should be within the range of the pool */ + if (inst >= pool->pipe_count) + return -1; - for (i = 0; i < pool->stream_enc_count; i++) { - if (pool->stream_enc[i]->id == inst) { - tg_inst = pool->stream_enc[i]->funcs->dig_source_otg( - pool->stream_enc[i]); - break; - } - } + if (inst >= pool->stream_enc_count) + return -1; - // tg_inst not found - if (i == pool->stream_enc_count) - return false; + tg_inst = pool->stream_enc[inst]->funcs->dig_source_otg(pool->stream_enc[inst]); if (tg_inst >= pool->timing_generator_count) return false; @@ -1999,7 +1965,7 @@ enum dc_status resource_map_pool_resources( dc_is_audio_capable_signal(pipe_ctx->stream->signal) && stream->audio_info.mode_count && stream->audio_info.flags.all) { pipe_ctx->stream_res.audio = find_first_free_audio( - &context->res_ctx, pool, pipe_ctx->stream_res.stream_enc->id, dc_ctx->dce_version); + &context->res_ctx, pool, pipe_ctx->stream_res.stream_enc->id); /* * Audio assigned in order first come first get. diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index bb09243758fe..bf1d7bb90e0f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -423,10 +423,10 @@ bool dc_stream_add_writeback(struct dc *dc, if (dwb->funcs->is_enabled(dwb)) { /* writeback pipe already enabled, only need to update */ - dc->hwss.update_writeback(dc, stream_status, wb_info, dc->current_state); + dc->hwss.update_writeback(dc, stream_status, wb_info); } else { /* Enable writeback pipe from scratch*/ - dc->hwss.enable_writeback(dc, stream_status, wb_info, dc->current_state); + dc->hwss.enable_writeback(dc, stream_status, wb_info); } } diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 0416a17b0897..a82352a87808 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.56" +#define DC_VER "3.2.48" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -111,20 +111,19 @@ struct dc_caps { bool force_dp_tps4_for_cp2520; bool disable_dp_clk_share; bool psp_setup_panel_mode; - bool extended_aux_timeout_support; #ifdef CONFIG_DRM_AMD_DC_DCN2_0 bool hw_3d_lut; #endif struct dc_plane_cap planes[MAX_PLANES]; }; -struct dc_bug_wa { #if defined(CONFIG_DRM_AMD_DC_DCN2_0) +struct dc_bug_wa { bool no_connect_phy_config; bool dedcn20_305_wa; -#endif bool skip_clock_update; }; +#endif struct dc_dcc_surface_param { struct dc_size surface_size; @@ -220,9 +219,7 @@ struct dc_config { bool allow_seamless_boot_optimization; bool power_down_display_on_boot; bool edp_not_connected; - bool force_enum_edp; bool forced_clocks; - bool disable_extended_timeout_support; // Used to disable extended timeout and lttpr feature as well bool multi_mon_pp_mclk_switch; }; @@ -230,7 +227,6 @@ enum visual_confirm { VISUAL_CONFIRM_DISABLE = 0, VISUAL_CONFIRM_SURFACE = 1, VISUAL_CONFIRM_HDR = 2, - VISUAL_CONFIRM_MPCTREE = 4, }; enum dcc_option { @@ -249,19 +245,6 @@ enum wm_report_mode { WM_REPORT_DEFAULT = 0, WM_REPORT_OVERRIDE = 1, }; -enum dtm_pstate{ - dtm_level_p0 = 0,/*highest voltage*/ - dtm_level_p1, - dtm_level_p2, - dtm_level_p3, - dtm_level_p4,/*when active_display_count = 0*/ -}; - -enum dcn_pwr_state { - DCN_PWR_STATE_UNKNOWN = -1, - DCN_PWR_STATE_MISSION_MODE = 0, - DCN_PWR_STATE_LOW_POWER = 3, -}; /* * For any clocks that may differ per pipe @@ -269,7 +252,11 @@ enum dcn_pwr_state { */ struct dc_clocks { int dispclk_khz; + int max_supported_dppclk_khz; + int max_supported_dispclk_khz; int dppclk_khz; + int bw_dppclk_khz; /*a copy of dppclk_khz*/ + int bw_dispclk_khz; int dcfclk_khz; int socclk_khz; int dcfclk_deep_sleep_khz; @@ -277,17 +264,12 @@ struct dc_clocks { int phyclk_khz; int dramclk_khz; bool p_state_change_support; - enum dcn_pwr_state pwr_state; + /* * Elements below are not compared for the purposes of * optimization required */ bool prev_p_state_change_support; - enum dtm_pstate dtm_level; - int max_supported_dppclk_khz; - int max_supported_dispclk_khz; - int bw_dppclk_khz; /*a copy of dppclk_khz*/ - int bw_dispclk_khz; }; struct dc_bw_validation_profile { @@ -365,7 +347,6 @@ struct dc_debug_options { bool disable_hubp_power_gate; #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT bool disable_dsc_power_gate; - int dsc_min_slice_height_override; #endif bool disable_pplib_wm_range; enum wm_report_mode pplib_wm_report_mode; @@ -481,7 +462,9 @@ struct dc { struct dc_config config; struct dc_debug_options debug; struct dc_bounding_box_overrides bb_overrides; +#if defined(CONFIG_DRM_AMD_DC_DCN2_0) struct dc_bug_wa work_arounds; +#endif struct dc_context *ctx; #ifdef CONFIG_DRM_AMD_DC_DCN2_0 struct dc_phy_addr_space_config vm_pa_config; @@ -570,16 +553,10 @@ struct dc_init_data { }; struct dc_callback_init { -#ifdef CONFIG_DRM_AMD_DC_HDCP - struct cp_psp cp_psp; -#else uint8_t reserved; -#endif }; struct dc *dc_create(const struct dc_init_data *init_params); -void dc_hardware_init(struct dc *dc); - int dc_get_vmid_use_vector(struct dc *dc); #ifdef CONFIG_DRM_AMD_DC_DCN2_0 void dc_setup_vm_context(struct dc *dc, struct dc_virtual_addr_space_config *va_config, int vmid); @@ -588,7 +565,6 @@ int dc_setup_system_context(struct dc *dc, struct dc_phy_addr_space_config *pa_c #endif void dc_init_callbacks(struct dc *dc, const struct dc_callback_init *init_params); -void dc_deinit_callbacks(struct dc *dc); void dc_destroy(struct dc **dc); /******************************************************************************* diff --git a/drivers/gpu/drm/amd/display/dc/dc_ddc_types.h b/drivers/gpu/drm/amd/display/dc/dc_ddc_types.h index 4f8f576d5fcf..4ef97f65e55d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_ddc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_ddc_types.h @@ -49,8 +49,7 @@ enum aux_channel_operation_result { AUX_CHANNEL_OPERATION_FAILED_REASON_UNKNOWN, AUX_CHANNEL_OPERATION_FAILED_INVALID_REPLY, AUX_CHANNEL_OPERATION_FAILED_TIMEOUT, - AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON, - AUX_CHANNEL_OPERATION_FAILED_ENGINE_ACQUIRE + AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON }; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dsc.h b/drivers/gpu/drm/amd/display/dc/dc_dsc.h index 0ed2962add5a..6e42209f0e20 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dsc.h @@ -30,7 +30,6 @@ #define DP_DSC_BRANCH_OVERALL_THROUGHPUT_0 0x0a0 /* DP 1.4a SCR */ #define DP_DSC_BRANCH_OVERALL_THROUGHPUT_1 0x0a1 #define DP_DSC_BRANCH_MAX_LINE_WIDTH 0x0a2 -#include "dc_types.h" struct dc_dsc_bw_range { uint32_t min_kbps; /* Bandwidth if min_target_bpp_x16 is used */ @@ -40,21 +39,13 @@ struct dc_dsc_bw_range { uint32_t stream_kbps; /* Uncompressed stream bandwidth */ }; -struct display_stream_compressor { - const struct dsc_funcs *funcs; -#ifndef AMD_EDID_UTILITY - struct dc_context *ctx; - int inst; -#endif -}; bool dc_dsc_parse_dsc_dpcd(const uint8_t *dpcd_dsc_basic_data, const uint8_t *dpcd_dsc_ext_data, struct dsc_dec_dpcd_caps *dsc_sink_caps); bool dc_dsc_compute_bandwidth_range( - const struct display_stream_compressor *dsc, - const uint32_t dsc_min_slice_height_override, + const struct dc *dc, const uint32_t min_kbps, const uint32_t max_kbps, const struct dsc_dec_dpcd_caps *dsc_sink_caps, @@ -62,9 +53,8 @@ bool dc_dsc_compute_bandwidth_range( struct dc_dsc_bw_range *range); bool dc_dsc_compute_config( - const struct display_stream_compressor *dsc, + const struct dc *dc, const struct dsc_dec_dpcd_caps *dsc_sink_caps, - const uint32_t dsc_min_slice_height_override, uint32_t target_bandwidth_kbps, const struct dc_crtc_timing *timing, struct dc_dsc_config *dsc_cfg); diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index e0856bb8511f..0b8700a8a94a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -26,8 +26,6 @@ #ifndef DC_HW_TYPES_H #define DC_HW_TYPES_H -#ifndef AMD_EDID_UTILITY - #include "os_types.h" #include "fixed31_32.h" #include "signal_types.h" @@ -126,6 +124,20 @@ struct plane_size { int chroma_pitch; struct rect surface_size; struct rect chroma_size; + + union { + struct { + struct rect surface_size; + int surface_pitch; + } grph; + + struct { + struct rect luma_size; + int luma_pitch; + struct rect chroma_size; + int chroma_pitch; + } video; + }; }; struct dc_plane_dcc_param { @@ -136,6 +148,21 @@ struct dc_plane_dcc_param { int meta_pitch_c; bool independent_64b_blks_c; + + union { + struct { + int meta_pitch; + bool independent_64b_blks; + } grph; + + struct { + int meta_pitch_l; + bool independent_64b_blks_l; + + int meta_pitch_c; + bool independent_64b_blks_c; + } video; + }; }; /*Displayable pixel format in fb*/ @@ -578,11 +605,6 @@ enum dc_quantization_range { QUANTIZATION_RANGE_LIMITED }; -enum dc_dynamic_expansion { - DYN_EXPANSION_AUTO, - DYN_EXPANSION_DISABLE -}; - /* XFM */ /* used in struct dc_plane_state */ @@ -594,8 +616,6 @@ struct scaling_taps { bool integer_scaling; }; -#endif /* AMD_EDID_UTILITY */ - enum dc_timing_standard { DC_TIMING_STANDARD_UNDEFINED, DC_TIMING_STANDARD_DMT, @@ -717,6 +737,30 @@ enum dc_timing_3d_format { TIMING_3D_FORMAT_MAX, }; +enum trigger_delay { + TRIGGER_DELAY_NEXT_PIXEL = 0, + TRIGGER_DELAY_NEXT_LINE, +}; + +enum crtc_event { + CRTC_EVENT_VSYNC_RISING = 0, + CRTC_EVENT_VSYNC_FALLING +}; + +struct crtc_trigger_info { + bool enabled; + struct dc_stream_state *event_source; + enum crtc_event event; + enum trigger_delay delay; +}; + +struct dc_crtc_timing_adjust { + uint32_t v_total_min; + uint32_t v_total_max; + uint32_t v_total_mid; + uint32_t v_total_mid_frame_num; +}; + #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT struct dc_dsc_config { uint32_t num_slices_h; /* Number of DSC slices - horizontal */ @@ -760,33 +804,6 @@ struct dc_crtc_timing { #endif }; -#ifndef AMD_EDID_UTILITY - -enum trigger_delay { - TRIGGER_DELAY_NEXT_PIXEL = 0, - TRIGGER_DELAY_NEXT_LINE, -}; - -enum crtc_event { - CRTC_EVENT_VSYNC_RISING = 0, - CRTC_EVENT_VSYNC_FALLING -}; - -struct crtc_trigger_info { - bool enabled; - struct dc_stream_state *event_source; - enum crtc_event event; - enum trigger_delay delay; -}; - -struct dc_crtc_timing_adjust { - uint32_t v_total_min; - uint32_t v_total_max; - uint32_t v_total_mid; - uint32_t v_total_mid_frame_num; -}; - - /* Passed on init */ enum vram_type { VIDEO_MEMORY_TYPE_GDDR5 = 2, @@ -857,7 +874,5 @@ struct tg_color { uint16_t color_b_cb; }; -#endif /* AMD_EDID_UTILITY */ - #endif /* DC_HW_TYPES_H */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index f24fd19ed93d..9ea75db3484e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -126,8 +126,7 @@ struct dc_link { unsigned short chip_caps; unsigned int dpcd_sink_count; enum edp_revision edp_revision; - bool psr_feature_enabled; - bool psr_allow_active; + bool psr_enabled; /* MST record stream using this link */ struct link_flags { @@ -159,18 +158,6 @@ static inline struct dc_link *dc_get_link_at_index(struct dc *dc, uint32_t link_ return dc->links[link_index]; } -static inline struct dc_link *get_edp_link(const struct dc *dc) -{ - int i; - - // report any eDP links, even unconnected DDI's - for (i = 0; i < dc->link_count; i++) { - if (dc->links[i]->connector_signal == SIGNAL_TYPE_EDP) - return dc->links[i]; - } - return NULL; -} - /* Set backlight level of an embedded panel (eDP, LVDS). * backlight_pwm_u16_16 is unsigned 32 bit with 16 bit integer * and 16 bit fractional, where 1.0 is max backlight value. @@ -183,7 +170,7 @@ int dc_link_get_backlight_level(const struct dc_link *dc_link); bool dc_link_set_abm_disable(const struct dc_link *dc_link); -bool dc_link_set_psr_allow_active(struct dc_link *dc_link, bool enable, bool wait); +bool dc_link_set_psr_enable(const struct dc_link *dc_link, bool enable, bool wait); bool dc_link_get_psr_state(const struct dc_link *dc_link, uint32_t *psr_state); @@ -205,7 +192,6 @@ enum dc_detect_reason { bool dc_link_detect(struct dc_link *dc_link, enum dc_detect_reason reason); bool dc_link_get_hpd_state(struct dc_link *dc_link); -enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx); /* Notify DC about DP RX Interrupt (aka Short Pulse Interrupt). * Return: diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index fdb6adc37857..0fa1c26bc20d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -113,21 +113,6 @@ struct periodic_interrupt_config { int lines_offset; }; -union stream_update_flags { - struct { - uint32_t scaling:1; - uint32_t out_tf:1; - uint32_t out_csc:1; - uint32_t abm_level:1; - uint32_t dpms_off:1; - uint32_t gamut_remap:1; -#if defined(CONFIG_DRM_AMD_DC_DCN2_0) - uint32_t wb_update:1; -#endif - } bits; - - uint32_t raw; -}; struct dc_stream_state { // sink is deprecated, new code should not reference @@ -229,14 +214,9 @@ struct dc_stream_state { #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT bool is_dsc_enabled; #endif - union stream_update_flags update_flags; }; -#define ABM_LEVEL_IMMEDIATE_DISABLE 0xFFFFFFFF - struct dc_stream_update { - struct dc_stream_state *stream; - struct rect src; struct rect dst; struct dc_transfer_func *out_transfer_func; @@ -451,9 +431,6 @@ void dc_stream_set_static_screen_events(struct dc *dc, int num_streams, const struct dc_static_screen_events *events); -void dc_stream_set_dyn_expansion(struct dc *dc, struct dc_stream_state *stream, - enum dc_dynamic_expansion option); - void dc_stream_set_dither_option(struct dc_stream_state *stream, enum dc_dither_option option); diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index d9be8fc3889f..b273735b6a3e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -25,11 +25,6 @@ #ifndef DC_TYPES_H_ #define DC_TYPES_H_ -#ifndef AMD_EDID_UTILITY -/* AND EdidUtility only needs a portion - * of this file, including the rest only - * causes additional issues. - */ #include "os_types.h" #include "fixed31_32.h" #include "irq_types.h" @@ -38,10 +33,6 @@ #include "dal_types.h" #include "grph_object_defs.h" -#ifdef CONFIG_DRM_AMD_DC_HDCP -#include "dm_cp_psp.h" -#endif - /* forward declarations */ struct dc_plane_state; struct dc_stream_state; @@ -109,9 +100,6 @@ struct dc_context { uint32_t dc_sink_id_count; uint32_t dc_stream_id_count; uint64_t fbc_gpu_addr; -#ifdef CONFIG_DRM_AMD_DC_HDCP - struct cp_psp cp_psp; -#endif }; @@ -171,12 +159,6 @@ enum dc_edid_status { EDID_THE_SAME, }; -enum act_return_status { - ACT_SUCCESS, - ACT_LINK_LOST, - ACT_FAILED -}; - /* audio capability from EDID*/ struct dc_cea_audio_mode { uint8_t format_code; /* ucData[0] [6:3]*/ @@ -757,9 +739,6 @@ struct dc_clock_config { uint32_t current_clock_khz;/*current clock in use*/ }; -#endif /*AMD_EDID_UTILITY*/ -//AMD EDID UTILITY does not need any of the above structures - #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT /* DSC DPCD capabilities */ union dsc_slice_caps1 { @@ -831,5 +810,4 @@ struct dsc_dec_dpcd_caps { uint32_t branch_max_line_width; }; #endif - #endif /* DC_TYPES_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c index b8a3fc505c9b..58bd131d5b48 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c @@ -77,9 +77,6 @@ static bool dce_abm_set_pipe(struct abm *abm, uint32_t controller_id) /* notifyDMCUMsg */ REG_UPDATE(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 1); - REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, - 1, 80000); - return true; } @@ -404,10 +401,6 @@ static bool dce_abm_init_backlight(struct abm *abm) /* Enable the backlight output */ REG_UPDATE(BL_PWM_CNTL, BL_PWM_EN, 1); - /* Disable fractional pwm if configured */ - REG_UPDATE(BL_PWM_CNTL, BL_PWM_FRACTIONAL_EN, - abm->ctx->dc->config.disable_fractional_pwm ? 0 : 1); - /* Unlock group 2 backlight registers */ REG_UPDATE(BL_PWM_GRP1_REG_LOCK, BL_PWM_GRP1_REG_LOCK, 0); @@ -496,6 +489,9 @@ void dce_abm_destroy(struct abm **abm) { struct dce_abm *abm_dce = TO_DCE_ABM(*abm); + if (abm_dce->base.dmcu_is_running == true) + abm_dce->base.funcs->set_abm_immediate_disable(*abm); + kfree(abm_dce); *abm = NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index e472608faf33..c3f9f4185ce8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -42,10 +42,6 @@ #include "reg_helper.h" -#undef FN -#define FN(reg_name, field_name) \ - aux110->shift->field_name, aux110->mask->field_name - #define FROM_AUX_ENGINE(ptr) \ container_of((ptr), struct aux_engine_dce110, base) @@ -59,14 +55,6 @@ enum { AUX_TIMED_OUT_RETRY_COUNTER = 2, AUX_DEFER_RETRY_COUNTER = 6 }; - -#define TIME_OUT_INCREMENT 1016 -#define TIME_OUT_MULTIPLIER_8 8 -#define TIME_OUT_MULTIPLIER_16 16 -#define TIME_OUT_MULTIPLIER_32 32 -#define TIME_OUT_MULTIPLIER_64 64 -#define MAX_TIMEOUT_LENGTH 127 - static void release_engine( struct dce_aux *engine) { @@ -210,7 +198,7 @@ static void submit_channel_request( REG_UPDATE(AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, 1); REG_WAIT(AUX_SW_STATUS, AUX_SW_DONE, 0, - 10, aux110->polling_timeout_period/10); + 10, aux110->timeout_period/10); /* set the delay and the number of bytes to write */ @@ -339,7 +327,7 @@ static enum aux_channel_operation_result get_channel_status( /* poll to make sure that SW_DONE is asserted */ REG_WAIT(AUX_SW_STATUS, AUX_SW_DONE, 1, - 10, aux110->polling_timeout_period/10); + 10, aux110->timeout_period/10); value = REG_READ(AUX_SW_STATUS); /* in case HPD is LOW, exit AUX transaction */ @@ -426,77 +414,20 @@ void dce110_engine_destroy(struct dce_aux **engine) *engine = NULL; } - -static bool dce_aux_configure_timeout(struct ddc_service *ddc, - uint32_t timeout_in_us) -{ - uint32_t multiplier = 0; - uint32_t length = 0; - struct ddc *ddc_pin = ddc->ddc_pin; - struct dce_aux *aux_engine = ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]; - struct aux_engine_dce110 *aux110 = FROM_AUX_ENGINE(aux_engine); - - /* 1-Update polling timeout period */ - aux110->polling_timeout_period = timeout_in_us * SW_AUX_TIMEOUT_PERIOD_MULTIPLIER; - - /* 2-Update aux timeout period length and multiplier */ - if (timeout_in_us <= TIME_OUT_INCREMENT) { - multiplier = 0; - length = timeout_in_us/TIME_OUT_MULTIPLIER_8; - if (timeout_in_us % TIME_OUT_MULTIPLIER_8 != 0) - length++; - } else if (timeout_in_us <= 2 * TIME_OUT_INCREMENT) { - multiplier = 1; - length = timeout_in_us/TIME_OUT_MULTIPLIER_16; - if (timeout_in_us % TIME_OUT_MULTIPLIER_16 != 0) - length++; - } else if (timeout_in_us <= 4 * TIME_OUT_INCREMENT) { - multiplier = 2; - length = timeout_in_us/TIME_OUT_MULTIPLIER_32; - if (timeout_in_us % TIME_OUT_MULTIPLIER_32 != 0) - length++; - } else if (timeout_in_us > 4 * TIME_OUT_INCREMENT) { - multiplier = 3; - length = timeout_in_us/TIME_OUT_MULTIPLIER_64; - if (timeout_in_us % TIME_OUT_MULTIPLIER_64 != 0) - length++; - } - - length = (length < MAX_TIMEOUT_LENGTH) ? length : MAX_TIMEOUT_LENGTH; - - REG_UPDATE_SEQ_2(AUX_DPHY_RX_CONTROL1, AUX_RX_TIMEOUT_LEN, length, AUX_RX_TIMEOUT_LEN_MUL, multiplier); - - return true; -} - -static struct dce_aux_funcs aux_functions = { - .configure_timeout = NULL, - .destroy = NULL, -}; - struct dce_aux *dce110_aux_engine_construct(struct aux_engine_dce110 *aux_engine110, struct dc_context *ctx, uint32_t inst, uint32_t timeout_period, - const struct dce110_aux_registers *regs, - const struct dce110_aux_registers_mask *mask, - const struct dce110_aux_registers_shift *shift, - bool is_ext_aux_timeout_configurable) + const struct dce110_aux_registers *regs) { aux_engine110->base.ddc = NULL; aux_engine110->base.ctx = ctx; aux_engine110->base.delay = 0; aux_engine110->base.max_defer_write_retry = 0; aux_engine110->base.inst = inst; - aux_engine110->polling_timeout_period = timeout_period; + aux_engine110->timeout_period = timeout_period; aux_engine110->regs = regs; - aux_engine110->mask = mask; - aux_engine110->shift = shift; - aux_engine110->base.funcs = &aux_functions; - if (is_ext_aux_timeout_configurable) - aux_engine110->base.funcs->configure_timeout = &dce_aux_configure_timeout; - return &aux_engine110->base; } @@ -533,10 +464,8 @@ int dce_aux_transfer_raw(struct ddc_service *ddc, memset(&aux_rep, 0, sizeof(aux_rep)); aux_engine = ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]; - if (!acquire(aux_engine, ddc_pin)) { - *operation_result = AUX_CHANNEL_OPERATION_FAILED_ENGINE_ACQUIRE; + if (!acquire(aux_engine, ddc_pin)) return -1; - } if (payload->i2c_over_aux) aux_req.type = AUX_TRANSACTION_TYPE_I2C; @@ -546,7 +475,7 @@ int dce_aux_transfer_raw(struct ddc_service *ddc, aux_req.action = i2caux_action_from_payload(payload); aux_req.address = payload->address; - aux_req.delay = 0; + aux_req.delay = payload->defer_delay * 10; aux_req.length = payload->length; aux_req.data = payload->data; @@ -615,15 +544,8 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, case AUX_TRANSACTION_REPLY_AUX_DEFER: case AUX_TRANSACTION_REPLY_I2C_OVER_AUX_NACK: case AUX_TRANSACTION_REPLY_I2C_OVER_AUX_DEFER: - if (++aux_defer_retries >= AUX_MAX_DEFER_RETRIES) { + if (++aux_defer_retries >= AUX_MAX_DEFER_RETRIES) goto fail; - } else { - if ((*payload->reply == AUX_TRANSACTION_REPLY_AUX_DEFER) || - (*payload->reply == AUX_TRANSACTION_REPLY_I2C_OVER_AUX_DEFER)) { - if (payload->defer_delay > 0) - msleep(payload->defer_delay); - } - } break; case AUX_TRANSACTION_REPLY_I2C_DEFER: @@ -660,7 +582,6 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, break; case AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON: - case AUX_CHANNEL_OPERATION_FAILED_ENGINE_ACQUIRE: case AUX_CHANNEL_OPERATION_FAILED_REASON_UNKNOWN: default: goto fail; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h index b4b2c79a8073..ed7fec8fe253 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h @@ -29,7 +29,6 @@ #include "i2caux_interface.h" #include "inc/hw/aux_engine.h" - #ifdef CONFIG_DRM_AMD_DC_DCN2_0 #define AUX_COMMON_REG_LIST0(id)\ SRI(AUX_CONTROL, DP_AUX, id), \ @@ -37,7 +36,6 @@ SRI(AUX_SW_DATA, DP_AUX, id), \ SRI(AUX_SW_CONTROL, DP_AUX, id), \ SRI(AUX_INTERRUPT_CONTROL, DP_AUX, id), \ - SRI(AUX_DPHY_RX_CONTROL1, DP_AUX, id), \ SRI(AUX_SW_STATUS, DP_AUX, id) #endif @@ -57,7 +55,6 @@ struct dce110_aux_registers { uint32_t AUX_SW_DATA; uint32_t AUX_SW_CONTROL; uint32_t AUX_INTERRUPT_CONTROL; - uint32_t AUX_DPHY_RX_CONTROL1; uint32_t AUX_SW_STATUS; uint32_t AUXN_IMPCAL; uint32_t AUXP_IMPCAL; @@ -65,156 +62,6 @@ struct dce110_aux_registers { uint32_t AUX_RESET_MASK; }; -#define DCE_AUX_REG_FIELD_LIST(type)\ - type AUX_EN;\ - type AUX_RESET;\ - type AUX_RESET_DONE;\ - type AUX_REG_RW_CNTL_STATUS;\ - type AUX_SW_USE_AUX_REG_REQ;\ - type AUX_SW_DONE_USING_AUX_REG;\ - type AUX_SW_AUTOINCREMENT_DISABLE;\ - type AUX_SW_DATA_RW;\ - type AUX_SW_INDEX;\ - type AUX_SW_GO;\ - type AUX_SW_DATA;\ - type AUX_SW_REPLY_BYTE_COUNT;\ - type AUX_SW_DONE;\ - type AUX_SW_DONE_ACK;\ - type AUXN_IMPCAL_ENABLE;\ - type AUXP_IMPCAL_ENABLE;\ - type AUXN_IMPCAL_OVERRIDE_ENABLE;\ - type AUXP_IMPCAL_OVERRIDE_ENABLE;\ - type AUX_RX_TIMEOUT_LEN;\ - type AUX_RX_TIMEOUT_LEN_MUL;\ - type AUXN_CALOUT_ERROR_AK;\ - type AUXP_CALOUT_ERROR_AK;\ - type AUX_SW_START_DELAY;\ - type AUX_SW_WR_BYTES - -#define DCE10_AUX_MASK_SH_LIST(mask_sh)\ - AUX_SF(AUX_CONTROL, AUX_EN, mask_sh),\ - AUX_SF(AUX_ARB_CONTROL, AUX_REG_RW_CNTL_STATUS, mask_sh),\ - AUX_SF(AUX_ARB_CONTROL, AUX_SW_USE_AUX_REG_REQ, mask_sh),\ - AUX_SF(AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, mask_sh),\ - AUX_SF(AUX_SW_CONTROL, AUX_SW_START_DELAY, mask_sh),\ - AUX_SF(AUX_SW_CONTROL, AUX_SW_WR_BYTES, mask_sh),\ - AUX_SF(AUX_SW_CONTROL, AUX_SW_GO, mask_sh),\ - AUX_SF(AUX_SW_DATA, AUX_SW_AUTOINCREMENT_DISABLE, mask_sh),\ - AUX_SF(AUX_SW_DATA, AUX_SW_DATA_RW, mask_sh),\ - AUX_SF(AUX_SW_DATA, AUX_SW_AUTOINCREMENT_DISABLE, mask_sh),\ - AUX_SF(AUX_SW_DATA, AUX_SW_INDEX, mask_sh),\ - AUX_SF(AUX_SW_DATA, AUX_SW_DATA, mask_sh),\ - AUX_SF(AUX_SW_STATUS, AUX_SW_REPLY_BYTE_COUNT, mask_sh),\ - AUX_SF(AUX_SW_STATUS, AUX_SW_DONE, mask_sh),\ - AUX_SF(AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, mask_sh),\ - AUX_SF(AUXN_IMPCAL, AUXN_CALOUT_ERROR_AK, mask_sh),\ - AUX_SF(AUXP_IMPCAL, AUXP_CALOUT_ERROR_AK, mask_sh),\ - AUX_SF(AUXN_IMPCAL, AUXN_IMPCAL_ENABLE, mask_sh),\ - AUX_SF(AUXP_IMPCAL, AUXP_IMPCAL_ENABLE, mask_sh),\ - AUX_SF(AUXP_IMPCAL, AUXP_IMPCAL_OVERRIDE_ENABLE, mask_sh),\ - AUX_SF(AUXN_IMPCAL, AUXN_IMPCAL_OVERRIDE_ENABLE, mask_sh) - -#define DCE_AUX_MASK_SH_LIST(mask_sh)\ - AUX_SF(AUX_CONTROL, AUX_EN, mask_sh),\ - AUX_SF(AUX_CONTROL, AUX_RESET, mask_sh),\ - AUX_SF(AUX_CONTROL, AUX_RESET_DONE, mask_sh),\ - AUX_SF(AUX_ARB_CONTROL, AUX_REG_RW_CNTL_STATUS, mask_sh),\ - AUX_SF(AUX_ARB_CONTROL, AUX_SW_USE_AUX_REG_REQ, mask_sh),\ - AUX_SF(AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, mask_sh),\ - AUX_SF(AUX_SW_CONTROL, AUX_SW_START_DELAY, mask_sh),\ - AUX_SF(AUX_SW_CONTROL, AUX_SW_WR_BYTES, mask_sh),\ - AUX_SF(AUX_SW_CONTROL, AUX_SW_GO, mask_sh),\ - AUX_SF(AUX_SW_DATA, AUX_SW_AUTOINCREMENT_DISABLE, mask_sh),\ - AUX_SF(AUX_SW_DATA, AUX_SW_DATA_RW, mask_sh),\ - AUX_SF(AUX_SW_DATA, AUX_SW_AUTOINCREMENT_DISABLE, mask_sh),\ - AUX_SF(AUX_SW_DATA, AUX_SW_INDEX, mask_sh),\ - AUX_SF(AUX_SW_DATA, AUX_SW_DATA, mask_sh),\ - AUX_SF(AUX_SW_STATUS, AUX_SW_REPLY_BYTE_COUNT, mask_sh),\ - AUX_SF(AUX_SW_STATUS, AUX_SW_DONE, mask_sh),\ - AUX_SF(AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, mask_sh),\ - AUX_SF(AUXN_IMPCAL, AUXN_CALOUT_ERROR_AK, mask_sh),\ - AUX_SF(AUXP_IMPCAL, AUXP_CALOUT_ERROR_AK, mask_sh),\ - AUX_SF(AUXN_IMPCAL, AUXN_IMPCAL_ENABLE, mask_sh),\ - AUX_SF(AUXP_IMPCAL, AUXP_IMPCAL_ENABLE, mask_sh),\ - AUX_SF(AUXP_IMPCAL, AUXP_IMPCAL_OVERRIDE_ENABLE, mask_sh),\ - AUX_SF(AUXN_IMPCAL, AUXN_IMPCAL_OVERRIDE_ENABLE, mask_sh) - -#define DCE12_AUX_MASK_SH_LIST(mask_sh)\ - AUX_SF(DP_AUX0_AUX_CONTROL, AUX_EN, mask_sh),\ - AUX_SF(DP_AUX0_AUX_CONTROL, AUX_RESET, mask_sh),\ - AUX_SF(DP_AUX0_AUX_CONTROL, AUX_RESET_DONE, mask_sh),\ - AUX_SF(DP_AUX0_AUX_ARB_CONTROL, AUX_REG_RW_CNTL_STATUS, mask_sh),\ - AUX_SF(DP_AUX0_AUX_ARB_CONTROL, AUX_SW_USE_AUX_REG_REQ, mask_sh),\ - AUX_SF(DP_AUX0_AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_CONTROL, AUX_SW_START_DELAY, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_CONTROL, AUX_SW_WR_BYTES, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_CONTROL, AUX_SW_GO, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_AUTOINCREMENT_DISABLE, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_DATA_RW, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_AUTOINCREMENT_DISABLE, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_INDEX, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_DATA, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_STATUS, AUX_SW_REPLY_BYTE_COUNT, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_STATUS, AUX_SW_DONE, mask_sh),\ - AUX_SF(DP_AUX0_AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, mask_sh),\ - AUX_SF(AUXN_IMPCAL, AUXN_CALOUT_ERROR_AK, mask_sh),\ - AUX_SF(AUXP_IMPCAL, AUXP_CALOUT_ERROR_AK, mask_sh),\ - AUX_SF(AUXN_IMPCAL, AUXN_IMPCAL_ENABLE, mask_sh),\ - AUX_SF(AUXP_IMPCAL, AUXP_IMPCAL_ENABLE, mask_sh),\ - AUX_SF(AUXP_IMPCAL, AUXP_IMPCAL_OVERRIDE_ENABLE, mask_sh),\ - AUX_SF(AUXN_IMPCAL, AUXN_IMPCAL_OVERRIDE_ENABLE, mask_sh) - -/* DCN10 MASK */ -#define DCN10_AUX_MASK_SH_LIST(mask_sh)\ - AUX_SF(DP_AUX0_AUX_CONTROL, AUX_EN, mask_sh),\ - AUX_SF(DP_AUX0_AUX_CONTROL, AUX_RESET, mask_sh),\ - AUX_SF(DP_AUX0_AUX_CONTROL, AUX_RESET_DONE, mask_sh),\ - AUX_SF(DP_AUX0_AUX_ARB_CONTROL, AUX_REG_RW_CNTL_STATUS, mask_sh),\ - AUX_SF(DP_AUX0_AUX_ARB_CONTROL, AUX_SW_USE_AUX_REG_REQ, mask_sh),\ - AUX_SF(DP_AUX0_AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_CONTROL, AUX_SW_START_DELAY, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_CONTROL, AUX_SW_WR_BYTES, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_CONTROL, AUX_SW_GO, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_AUTOINCREMENT_DISABLE, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_DATA_RW, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_AUTOINCREMENT_DISABLE, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_INDEX, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_DATA, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_STATUS, AUX_SW_REPLY_BYTE_COUNT, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_STATUS, AUX_SW_DONE, mask_sh),\ - AUX_SF(DP_AUX0_AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, mask_sh),\ - AUX_SF(AUXN_IMPCAL, AUXN_CALOUT_ERROR_AK, mask_sh),\ - AUX_SF(AUXP_IMPCAL, AUXP_CALOUT_ERROR_AK, mask_sh),\ - AUX_SF(AUXN_IMPCAL, AUXN_IMPCAL_ENABLE, mask_sh),\ - AUX_SF(AUXP_IMPCAL, AUXP_IMPCAL_ENABLE, mask_sh),\ - AUX_SF(AUXP_IMPCAL, AUXP_IMPCAL_OVERRIDE_ENABLE, mask_sh),\ - AUX_SF(AUXN_IMPCAL, AUXN_IMPCAL_OVERRIDE_ENABLE, mask_sh) - -/* for all other DCN */ -#define DCN_AUX_MASK_SH_LIST(mask_sh)\ - AUX_SF(DP_AUX0_AUX_CONTROL, AUX_EN, mask_sh),\ - AUX_SF(DP_AUX0_AUX_CONTROL, AUX_RESET, mask_sh),\ - AUX_SF(DP_AUX0_AUX_CONTROL, AUX_RESET_DONE, mask_sh),\ - AUX_SF(DP_AUX0_AUX_ARB_CONTROL, AUX_REG_RW_CNTL_STATUS, mask_sh),\ - AUX_SF(DP_AUX0_AUX_ARB_CONTROL, AUX_SW_USE_AUX_REG_REQ, mask_sh),\ - AUX_SF(DP_AUX0_AUX_ARB_CONTROL, AUX_SW_DONE_USING_AUX_REG, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_CONTROL, AUX_SW_START_DELAY, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_CONTROL, AUX_SW_WR_BYTES, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_CONTROL, AUX_SW_GO, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_AUTOINCREMENT_DISABLE, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_DATA_RW, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_AUTOINCREMENT_DISABLE, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_INDEX, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_DATA, AUX_SW_DATA, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_STATUS, AUX_SW_REPLY_BYTE_COUNT, mask_sh),\ - AUX_SF(DP_AUX0_AUX_SW_STATUS, AUX_SW_DONE, mask_sh),\ - AUX_SF(DP_AUX0_AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, mask_sh),\ - AUX_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_TIMEOUT_LEN, mask_sh),\ - AUX_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_TIMEOUT_LEN_MUL, mask_sh) - -#define AUX_SF(reg_name, field_name, post_fix)\ - .field_name = reg_name ## __ ## field_name ## post_fix - enum { /* This is the timeout as defined in DP 1.2a, * 2.3.4 "Detailed uPacket TX AUX CH State Description". */ @@ -250,34 +97,20 @@ struct dce_aux { uint32_t max_defer_write_retry; bool acquire_reset; - struct dce_aux_funcs *funcs; }; -struct dce110_aux_registers_mask { - DCE_AUX_REG_FIELD_LIST(uint32_t); -}; - -struct dce110_aux_registers_shift { - DCE_AUX_REG_FIELD_LIST(uint8_t); -}; - - struct aux_engine_dce110 { struct dce_aux base; const struct dce110_aux_registers *regs; - const struct dce110_aux_registers_mask *mask; - const struct dce110_aux_registers_shift *shift; struct { uint32_t aux_control; uint32_t aux_arb_control; uint32_t aux_sw_data; uint32_t aux_sw_control; uint32_t aux_interrupt_control; - uint32_t aux_dphy_rx_control1; - uint32_t aux_dphy_rx_control0; uint32_t aux_sw_status; } addr; - uint32_t polling_timeout_period; + uint32_t timeout_period; }; struct aux_engine_dce110_init_data { @@ -287,15 +120,12 @@ struct aux_engine_dce110_init_data { const struct dce110_aux_registers *regs; }; -struct dce_aux *dce110_aux_engine_construct(struct aux_engine_dce110 *aux_engine110, +struct dce_aux *dce110_aux_engine_construct( + struct aux_engine_dce110 *aux_engine110, struct dc_context *ctx, uint32_t inst, uint32_t timeout_period, - const struct dce110_aux_registers *regs, - - const struct dce110_aux_registers_mask *mask, - const struct dce110_aux_registers_shift *shift, - bool is_ext_aux_timeout_configurable); + const struct dce110_aux_registers *regs); void dce110_engine_destroy(struct dce_aux **engine); @@ -309,13 +139,4 @@ int dce_aux_transfer_raw(struct ddc_service *ddc, bool dce_aux_transfer_with_retries(struct ddc_service *ddc, struct aux_payload *cmd); - -struct dce_aux_funcs { - bool (*configure_timeout) - (struct ddc_service *ddc, - uint32_t timeout); - void (*destroy) - (struct aux_engine **ptr); -}; - #endif diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c index ba995d3f2318..0b86cee4876f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c @@ -907,6 +907,9 @@ void dce_dmcu_destroy(struct dmcu **dmcu) { struct dce_dmcu *dmcu_dce = TO_DCE_DMCU(*dmcu); + if (dmcu_dce->base.dmcu_state == DMCU_RUNNING) + dmcu_dce->base.funcs->set_psr_enable(*dmcu, false, true); + kfree(dmcu_dce); *dmcu = NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h index 32d145a0d6fc..ac04d77058f0 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h @@ -679,7 +679,6 @@ struct dce_hwseq_registers { HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN17_PGFSM_PWR_STATUS, mask_sh), \ HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN18_PGFSM_PWR_STATUS, mask_sh), \ HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \ - HWSEQ_LVTMA_MASK_SH_LIST(mask_sh), \ HWS_SF(, LVTMA_PWRSEQ_CNTL, LVTMA_BLON, mask_sh), \ HWS_SF(, LVTMA_PWRSEQ_STATE, LVTMA_PWRSEQ_TARGET_STATE_R, mask_sh) #endif diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c index 8aa937f496c4..31b698bf9cfc 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c @@ -606,11 +606,11 @@ static void dce_mi_allocate_dmif( } if (dce_mi->wa.single_head_rdreq_dmif_limit) { - uint32_t enable = (total_stream_num > 1) ? 0 : + uint32_t eanble = (total_stream_num > 1) ? 0 : dce_mi->wa.single_head_rdreq_dmif_limit; REG_UPDATE(MC_HUB_RDREQ_DMIF_LIMIT, - ENABLE, enable); + ENABLE, eanble); } } @@ -636,11 +636,11 @@ static void dce_mi_free_dmif( 10, 3500); if (dce_mi->wa.single_head_rdreq_dmif_limit) { - uint32_t enable = (total_stream_num > 1) ? 0 : + uint32_t eanble = (total_stream_num > 1) ? 0 : dce_mi->wa.single_head_rdreq_dmif_limit; REG_UPDATE(MC_HUB_RDREQ_DMIF_LIMIT, - ENABLE, enable); + ENABLE, eanble); } } diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c index a5e122c721ec..76d54885374a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c @@ -399,37 +399,6 @@ static const struct dc_plane_cap plane_cap = { #define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER__SHIFT 0x8 #endif -static int map_transmitter_id_to_phy_instance( - enum transmitter transmitter) -{ - switch (transmitter) { - case TRANSMITTER_UNIPHY_A: - return 0; - break; - case TRANSMITTER_UNIPHY_B: - return 1; - break; - case TRANSMITTER_UNIPHY_C: - return 2; - break; - case TRANSMITTER_UNIPHY_D: - return 3; - break; - case TRANSMITTER_UNIPHY_E: - return 4; - break; - case TRANSMITTER_UNIPHY_F: - return 5; - break; - case TRANSMITTER_UNIPHY_G: - return 6; - break; - default: - ASSERT(0); - return 0; - } -} - static void read_dce_straps( struct dc_context *ctx, struct resource_straps *straps) @@ -537,14 +506,6 @@ static const struct dce_mem_input_mask mi_masks = { .ENABLE = MC_HUB_RDREQ_DMIF_LIMIT__ENABLE_MASK }; -static const struct dce110_aux_registers_shift aux_shift = { - DCE10_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCE10_AUX_MASK_SH_LIST(_MASK) -}; - static struct mem_input *dce100_mem_input_create( struct dc_context *ctx, uint32_t inst) @@ -610,18 +571,14 @@ struct link_encoder *dce100_link_encoder_create( { struct dce110_link_encoder *enc110 = kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL); - int link_regs_id; if (!enc110) return NULL; - link_regs_id = - map_transmitter_id_to_phy_instance(enc_init_data->transmitter); - dce110_link_encoder_construct(enc110, enc_init_data, &link_enc_feature, - &link_enc_regs[link_regs_id], + &link_enc_regs[enc_init_data->transmitter], &link_enc_aux_regs[enc_init_data->channel - 1], &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; @@ -654,10 +611,7 @@ struct dce_aux *dce100_aux_engine_create( dce110_aux_engine_construct(aux_engine, ctx, inst, SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); + &aux_engine_regs[inst]); return &aux_engine->base; } @@ -1043,8 +997,6 @@ static bool construct( dc->caps.max_cursor_size = 128; dc->caps.dual_link_dvi = true; dc->caps.disable_dp_clk_share = true; - dc->caps.extended_aux_timeout_support = false; - for (i = 0; i < pool->base.pipe_count; i++) { pool->base.timing_generators[i] = dce100_timing_generator_create( diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index f0e837d14000..01a924bf477a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -944,6 +944,7 @@ void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx) { /* notify audio driver for audio modes of monitor */ struct dc *core_dc; + struct pp_smu_funcs *pp_smu = NULL; struct clk_mgr *clk_mgr; unsigned int i, num_audio = 1; @@ -956,6 +957,9 @@ void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx) if (pipe_ctx->stream_res.audio && pipe_ctx->stream_res.audio->enabled == true) return; + if (core_dc->res_pool->pp_smu) + pp_smu = core_dc->res_pool->pp_smu; + if (pipe_ctx->stream_res.audio) { for (i = 0; i < MAX_PIPES; i++) { /*current_state not updated yet*/ @@ -980,6 +984,7 @@ void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx) void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx) { struct dc *dc; + struct pp_smu_funcs *pp_smu = NULL; struct clk_mgr *clk_mgr; if (!pipe_ctx || !pipe_ctx->stream) @@ -996,6 +1001,9 @@ void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx) if (pipe_ctx->stream_res.audio) { pipe_ctx->stream_res.audio->enabled = false; + if (dc->res_pool->pp_smu) + pp_smu = dc->res_pool->pp_smu; + if (dc_is_dp_signal(pipe_ctx->stream->signal)) pipe_ctx->stream_res.stream_enc->funcs->dp_audio_disable( pipe_ctx->stream_res.stream_enc); @@ -1161,9 +1169,8 @@ static void build_audio_output( } } - if (state->clk_mgr && - (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT || - pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)) { + if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT || + pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { audio_output->pll_info.dp_dto_source_clock_in_khz = state->clk_mgr->funcs->get_dp_ref_clk_frequency( state->clk_mgr); @@ -1411,7 +1418,7 @@ static enum dc_status apply_single_controller_ctx_to_hw( pipe_ctx->plane_res.scl_data.lb_params.alpha_en = pipe_ctx->bottom_pipe != 0; - pipe_ctx->stream->link->psr_feature_enabled = false; + pipe_ctx->stream->link->psr_enabled = false; return DC_OK; } @@ -1421,6 +1428,8 @@ static enum dc_status apply_single_controller_ctx_to_hw( static void power_down_encoders(struct dc *dc) { int i; + enum connector_id connector_id; + enum signal_type signal = SIGNAL_TYPE_NONE; /* do not know BIOS back-front mapping, simply blank all. It will not * hurt for non-DP @@ -1431,12 +1440,15 @@ static void power_down_encoders(struct dc *dc) } for (i = 0; i < dc->link_count; i++) { - enum signal_type signal = dc->links[i]->connector_signal; + connector_id = dal_graphics_object_id_get_connector_id(dc->links[i]->link_id); + if ((connector_id == CONNECTOR_ID_DISPLAY_PORT) || + (connector_id == CONNECTOR_ID_EDP)) { - if ((signal == SIGNAL_TYPE_EDP) || - (signal == SIGNAL_TYPE_DISPLAY_PORT)) if (!dc->links[i]->wa_flags.dp_keep_receiver_powered) dp_receiver_power_ctrl(dc->links[i], false); + if (connector_id == CONNECTOR_ID_EDP) + signal = SIGNAL_TYPE_EDP; + } dc->links[i]->link_enc->funcs->disable_output( dc->links[i]->link_enc, signal); @@ -1517,6 +1529,18 @@ static struct dc_stream_state *get_edp_stream(struct dc_state *context) return NULL; } +static struct dc_link *get_edp_link(struct dc *dc) +{ + int i; + + // report any eDP links, even unconnected DDI's + for (i = 0; i < dc->link_count; i++) { + if (dc->links[i]->connector_signal == SIGNAL_TYPE_EDP) + return dc->links[i]; + } + return NULL; +} + static struct dc_link *get_edp_link_with_sink( struct dc *dc, struct dc_state *context) @@ -1810,7 +1834,7 @@ static bool should_enable_fbc(struct dc *dc, return false; /* PSR should not be enabled */ - if (pipe_ctx->stream->link->psr_feature_enabled) + if (pipe_ctx->stream->link->psr_enabled) return false; /* Nothing to compress */ @@ -2440,6 +2464,7 @@ static void dce110_program_front_end_for_pipe( struct dc *dc, struct pipe_ctx *pipe_ctx) { struct mem_input *mi = pipe_ctx->plane_res.mi; + struct pipe_ctx *old_pipe = NULL; struct dc_plane_state *plane_state = pipe_ctx->plane_state; struct xfm_grph_csc_adjustment adjust; struct out_csc_color_matrix tbl_entry; @@ -2447,6 +2472,9 @@ static void dce110_program_front_end_for_pipe( DC_LOGGER_INIT(); memset(&tbl_entry, 0, sizeof(tbl_entry)); + if (dc->current_state) + old_pipe = &dc->current_state->res_ctx.pipe_ctx[pipe_ctx->pipe_idx]; + memset(&adjust, 0, sizeof(adjust)); adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index 83a4dbf6d76e..89620adc81d8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -275,14 +275,6 @@ static const struct dce_stream_encoder_mask se_mask = { SE_COMMON_MASK_SH_LIST_DCE110(_MASK) }; -static const struct dce110_aux_registers_shift aux_shift = { - DCE_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCE_AUX_MASK_SH_LIST(_MASK) -}; - #define opp_regs(id)\ [id] = {\ OPP_DCE_110_REG_LIST(id),\ @@ -448,37 +440,6 @@ static const struct dc_plane_cap underlay_plane_cap = { #define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER__SHIFT 0x8 #endif -static int map_transmitter_id_to_phy_instance( - enum transmitter transmitter) -{ - switch (transmitter) { - case TRANSMITTER_UNIPHY_A: - return 0; - break; - case TRANSMITTER_UNIPHY_B: - return 1; - break; - case TRANSMITTER_UNIPHY_C: - return 2; - break; - case TRANSMITTER_UNIPHY_D: - return 3; - break; - case TRANSMITTER_UNIPHY_E: - return 4; - break; - case TRANSMITTER_UNIPHY_F: - return 5; - break; - case TRANSMITTER_UNIPHY_G: - return 6; - break; - default: - ASSERT(0); - return 0; - } -} - static void read_dce_straps( struct dc_context *ctx, struct resource_straps *straps) @@ -656,18 +617,14 @@ static struct link_encoder *dce110_link_encoder_create( { struct dce110_link_encoder *enc110 = kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL); - int link_regs_id; if (!enc110) return NULL; - link_regs_id = - map_transmitter_id_to_phy_instance(enc_init_data->transmitter); - dce110_link_encoder_construct(enc110, enc_init_data, &link_enc_feature, - &link_enc_regs[link_regs_id], + &link_enc_regs[enc_init_data->transmitter], &link_enc_aux_regs[enc_init_data->channel - 1], &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; @@ -700,10 +657,7 @@ struct dce_aux *dce110_aux_engine_create( dce110_aux_engine_construct(aux_engine, ctx, inst, SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); + &aux_engine_regs[inst]); return &aux_engine->base; } @@ -1339,7 +1293,6 @@ static bool construct( dc->caps.i2c_speed_in_khz = 100; dc->caps.max_cursor_size = 128; dc->caps.is_apu = true; - dc->caps.extended_aux_timeout_support = false; /************************************************* * Create resources * diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index 97dcc5d0862b..21a657e79306 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -172,14 +172,6 @@ static const struct dce_abm_mask abm_mask = { ABM_MASK_SH_LIST_DCE110(_MASK) }; -static const struct dce110_aux_registers_shift aux_shift = { - DCE_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCE_AUX_MASK_SH_LIST(_MASK) -}; - #define ipp_regs(id)\ [id] = {\ IPP_DCE110_REG_LIST_DCE_BASE(id)\ @@ -425,37 +417,6 @@ static const struct dc_plane_cap plane_cap = { #define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER__SHIFT 0x8 #endif -static int map_transmitter_id_to_phy_instance( - enum transmitter transmitter) -{ - switch (transmitter) { - case TRANSMITTER_UNIPHY_A: - return 0; - break; - case TRANSMITTER_UNIPHY_B: - return 1; - break; - case TRANSMITTER_UNIPHY_C: - return 2; - break; - case TRANSMITTER_UNIPHY_D: - return 3; - break; - case TRANSMITTER_UNIPHY_E: - return 4; - break; - case TRANSMITTER_UNIPHY_F: - return 5; - break; - case TRANSMITTER_UNIPHY_G: - return 6; - break; - default: - ASSERT(0); - return 0; - } -} - static void read_dce_straps( struct dc_context *ctx, struct resource_straps *straps) @@ -614,18 +575,14 @@ struct link_encoder *dce112_link_encoder_create( { struct dce110_link_encoder *enc110 = kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL); - int link_regs_id; if (!enc110) return NULL; - link_regs_id = - map_transmitter_id_to_phy_instance(enc_init_data->transmitter); - dce110_link_encoder_construct(enc110, enc_init_data, &link_enc_feature, - &link_enc_regs[link_regs_id], + &link_enc_regs[enc_init_data->transmitter], &link_enc_aux_regs[enc_init_data->channel - 1], &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; @@ -673,10 +630,7 @@ struct dce_aux *dce112_aux_engine_create( dce110_aux_engine_construct(aux_engine, ctx, inst, SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); + &aux_engine_regs[inst]); return &aux_engine->base; } @@ -1209,7 +1163,7 @@ static bool construct( dc->caps.i2c_speed_in_khz = 100; dc->caps.max_cursor_size = 128; dc->caps.dual_link_dvi = true; - dc->caps.extended_aux_timeout_support = false; + /************************************************* * Create resources * diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 63543f6918ff..7c52f7f9196c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -293,14 +293,6 @@ static const struct dce_stream_encoder_mask se_mask = { SE_COMMON_MASK_SH_LIST_DCE120(_MASK) }; -static const struct dce110_aux_registers_shift aux_shift = { - DCE12_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCE12_AUX_MASK_SH_LIST(_MASK) -}; - #define opp_regs(id)\ [id] = {\ OPP_DCE_120_REG_LIST(id),\ @@ -364,37 +356,6 @@ static const struct dce_audio_mask audio_mask = { DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) }; -static int map_transmitter_id_to_phy_instance( - enum transmitter transmitter) -{ - switch (transmitter) { - case TRANSMITTER_UNIPHY_A: - return 0; - break; - case TRANSMITTER_UNIPHY_B: - return 1; - break; - case TRANSMITTER_UNIPHY_C: - return 2; - break; - case TRANSMITTER_UNIPHY_D: - return 3; - break; - case TRANSMITTER_UNIPHY_E: - return 4; - break; - case TRANSMITTER_UNIPHY_F: - return 5; - break; - case TRANSMITTER_UNIPHY_G: - return 6; - break; - default: - ASSERT(0); - return 0; - } -} - #define clk_src_regs(index, id)\ [index] = {\ CS_COMMON_REG_LIST_DCE_112(id),\ @@ -443,10 +404,7 @@ struct dce_aux *dce120_aux_engine_create( dce110_aux_engine_construct(aux_engine, ctx, inst, SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); + &aux_engine_regs[inst]); return &aux_engine->base; } @@ -697,18 +655,14 @@ static struct link_encoder *dce120_link_encoder_create( { struct dce110_link_encoder *enc110 = kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL); - int link_regs_id; if (!enc110) return NULL; - link_regs_id = - map_transmitter_id_to_phy_instance(enc_init_data->transmitter); - dce110_link_encoder_construct(enc110, enc_init_data, &link_enc_feature, - &link_enc_regs[link_regs_id], + &link_enc_regs[enc_init_data->transmitter], &link_enc_aux_regs[enc_init_data->channel - 1], &link_enc_hpd_regs[enc_init_data->hpd_source]); @@ -1052,7 +1006,7 @@ static bool construct( dc->caps.max_cursor_size = 128; dc->caps.dual_link_dvi = true; dc->caps.psp_setup_panel_mode = true; - dc->caps.extended_aux_timeout_support = false; + dc->debug = debug_defaults; /************************************************* diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c index 3e8d4b49f279..643ccb0ade00 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c @@ -288,14 +288,6 @@ static const struct dce_opp_mask opp_mask = { OPP_COMMON_MASK_SH_LIST_DCE_80(_MASK) }; -static const struct dce110_aux_registers_shift aux_shift = { - DCE10_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCE10_AUX_MASK_SH_LIST(_MASK) -}; - #define aux_engine_regs(id)\ [id] = {\ AUX_COMMON_REG_LIST(id), \ @@ -439,37 +431,6 @@ static const struct dce_abm_mask abm_mask = { #define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER__SHIFT 0x8 #endif -static int map_transmitter_id_to_phy_instance( - enum transmitter transmitter) -{ - switch (transmitter) { - case TRANSMITTER_UNIPHY_A: - return 0; - break; - case TRANSMITTER_UNIPHY_B: - return 1; - break; - case TRANSMITTER_UNIPHY_C: - return 2; - break; - case TRANSMITTER_UNIPHY_D: - return 3; - break; - case TRANSMITTER_UNIPHY_E: - return 4; - break; - case TRANSMITTER_UNIPHY_F: - return 5; - break; - case TRANSMITTER_UNIPHY_G: - return 6; - break; - default: - ASSERT(0); - return 0; - } -} - static void read_dce_straps( struct dc_context *ctx, struct resource_straps *straps) @@ -530,10 +491,7 @@ struct dce_aux *dce80_aux_engine_create( dce110_aux_engine_construct(aux_engine, ctx, inst, SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); + &aux_engine_regs[inst]); return &aux_engine->base; } @@ -711,18 +669,14 @@ struct link_encoder *dce80_link_encoder_create( { struct dce110_link_encoder *enc110 = kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL); - int link_regs_id; if (!enc110) return NULL; - link_regs_id = - map_transmitter_id_to_phy_instance(enc_init_data->transmitter); - dce110_link_encoder_construct(enc110, enc_init_data, &link_enc_feature, - &link_enc_regs[link_regs_id], + &link_enc_regs[enc_init_data->transmitter], &link_enc_aux_regs[enc_init_data->channel - 1], &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; @@ -941,7 +895,6 @@ static bool dce80_construct( dc->caps.i2c_speed_in_khz = 40; dc->caps.max_cursor_size = 128; dc->caps.dual_link_dvi = true; - dc->caps.extended_aux_timeout_support = false; /************************************************* * Create resources * diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c index bbd6e01b3eca..01c7e30b9ce1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c @@ -393,10 +393,6 @@ bool cm_helper_translate_curve_to_hw_format( rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; - rgb_resulted[hw_points].red = rgb_resulted[hw_points - 1].red; - rgb_resulted[hw_points].green = rgb_resulted[hw_points - 1].green; - rgb_resulted[hw_points].blue = rgb_resulted[hw_points - 1].blue; - // All 3 color channels have same x corner_points[0].red.x = dc_fixpt_pow(dc_fixpt_from_int(2), dc_fixpt_from_int(region_start)); @@ -468,6 +464,13 @@ bool cm_helper_translate_curve_to_hw_format( i = 1; while (i != hw_points + 1) { + if (dc_fixpt_lt(rgb_plus_1->red, rgb->red)) + rgb_plus_1->red = rgb->red; + if (dc_fixpt_lt(rgb_plus_1->green, rgb->green)) + rgb_plus_1->green = rgb->green; + if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue)) + rgb_plus_1->blue = rgb->blue; + rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red); rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green); rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue); @@ -559,10 +562,6 @@ bool cm_helper_translate_curve_to_degamma_hw_format( rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; - rgb_resulted[hw_points].red = rgb_resulted[hw_points - 1].red; - rgb_resulted[hw_points].green = rgb_resulted[hw_points - 1].green; - rgb_resulted[hw_points].blue = rgb_resulted[hw_points - 1].blue; - corner_points[0].red.x = dc_fixpt_pow(dc_fixpt_from_int(2), dc_fixpt_from_int(region_start)); corner_points[0].green.x = corner_points[0].red.x; @@ -625,6 +624,13 @@ bool cm_helper_translate_curve_to_degamma_hw_format( i = 1; while (i != hw_points + 1) { + if (dc_fixpt_lt(rgb_plus_1->red, rgb->red)) + rgb_plus_1->red = rgb->red; + if (dc_fixpt_lt(rgb_plus_1->green, rgb->green)) + rgb_plus_1->green = rgb->green; + if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue)) + rgb_plus_1->blue = rgb->blue; + rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red); rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green); rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index 997e9582edc7..d8b2da18db39 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -129,7 +129,7 @@ void dpp_set_gamut_remap_bypass(struct dcn10_dpp *dpp) #define IDENTITY_RATIO(ratio) (dc_fixpt_u2d19(ratio) == (1 << 19)) -bool dpp1_get_optimal_number_of_taps( +static bool dpp_get_optimal_number_of_taps( struct dpp *dpp, struct scaler_data *scl_data, const struct scaling_taps *in_taps) @@ -521,7 +521,7 @@ static const struct dpp_funcs dcn10_dpp_funcs = { .dpp_read_state = dpp_read_state, .dpp_reset = dpp_reset, .dpp_set_scaler = dpp1_dscl_set_scaler_manual_scale, - .dpp_get_optimal_number_of_taps = dpp1_get_optimal_number_of_taps, + .dpp_get_optimal_number_of_taps = dpp_get_optimal_number_of_taps, .dpp_set_gamut_remap = dpp1_cm_set_gamut_remap, .dpp_set_csc_adjustment = dpp1_cm_set_output_csc_adjustment, .dpp_set_csc_default = dpp1_cm_set_output_csc_default, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h index 1d4a7d640334..e2c613611ac9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h @@ -1504,11 +1504,6 @@ void dpp1_set_hdr_multiplier( struct dpp *dpp_base, uint32_t multiplier); -bool dpp1_get_optimal_number_of_taps( - struct dpp *dpp, - struct scaler_data *scl_data, - const struct scaling_taps *in_taps); - void dpp1_construct(struct dcn10_dpp *dpp1, struct dc_context *ctx, uint32_t inst, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index 14d1be6c66e6..001db49e4bb2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -841,14 +841,6 @@ void min_set_viewport( REG_SET_2(DCSURF_PRI_VIEWPORT_START_C, 0, PRI_VIEWPORT_X_START_C, viewport_c->x, PRI_VIEWPORT_Y_START_C, viewport_c->y); - - REG_SET_2(DCSURF_SEC_VIEWPORT_DIMENSION_C, 0, - SEC_VIEWPORT_WIDTH_C, viewport_c->width, - SEC_VIEWPORT_HEIGHT_C, viewport_c->height); - - REG_SET_2(DCSURF_SEC_VIEWPORT_START_C, 0, - SEC_VIEWPORT_X_START_C, viewport_c->x, - SEC_VIEWPORT_Y_START_C, viewport_c->y); } void hubp1_read_state_common(struct hubp *hubp) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h index ae70d9c0aa1d..cb20d10288c0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h @@ -47,8 +47,6 @@ SRI(DCSURF_SEC_VIEWPORT_START, HUBP, id), \ SRI(DCSURF_PRI_VIEWPORT_DIMENSION_C, HUBP, id), \ SRI(DCSURF_PRI_VIEWPORT_START_C, HUBP, id), \ - SRI(DCSURF_SEC_VIEWPORT_DIMENSION_C, HUBP, id), \ - SRI(DCSURF_SEC_VIEWPORT_START_C, HUBP, id), \ SRI(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, HUBPREQ, id),\ SRI(DCSURF_PRIMARY_SURFACE_ADDRESS, HUBPREQ, id),\ SRI(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, HUBPREQ, id),\ @@ -59,12 +57,8 @@ SRI(DCSURF_SECONDARY_META_SURFACE_ADDRESS, HUBPREQ, id),\ SRI(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, HUBPREQ, id),\ SRI(DCSURF_PRIMARY_SURFACE_ADDRESS_C, HUBPREQ, id),\ - SRI(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH_C, HUBPREQ, id),\ - SRI(DCSURF_SECONDARY_SURFACE_ADDRESS_C, HUBPREQ, id),\ SRI(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C, HUBPREQ, id),\ SRI(DCSURF_PRIMARY_META_SURFACE_ADDRESS_C, HUBPREQ, id),\ - SRI(DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH_C, HUBPREQ, id),\ - SRI(DCSURF_SECONDARY_META_SURFACE_ADDRESS_C, HUBPREQ, id),\ SRI(DCSURF_SURFACE_INUSE, HUBPREQ, id),\ SRI(DCSURF_SURFACE_INUSE_HIGH, HUBPREQ, id),\ SRI(DCSURF_SURFACE_INUSE_C, HUBPREQ, id),\ @@ -156,8 +150,6 @@ uint32_t DCSURF_SEC_VIEWPORT_START; \ uint32_t DCSURF_PRI_VIEWPORT_DIMENSION_C; \ uint32_t DCSURF_PRI_VIEWPORT_START_C; \ - uint32_t DCSURF_SEC_VIEWPORT_DIMENSION_C; \ - uint32_t DCSURF_SEC_VIEWPORT_START_C; \ uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH; \ uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS; \ uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH; \ @@ -168,12 +160,8 @@ uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS; \ uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C; \ uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_C; \ - uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH_C; \ - uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS_C; \ uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C; \ uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_C; \ - uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH_C; \ - uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS_C; \ uint32_t DCSURF_SURFACE_INUSE; \ uint32_t DCSURF_SURFACE_INUSE_HIGH; \ uint32_t DCSURF_SURFACE_INUSE_C; \ @@ -291,10 +279,6 @@ HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_C, PRI_VIEWPORT_HEIGHT_C, mask_sh),\ HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_START_C, PRI_VIEWPORT_X_START_C, mask_sh),\ HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_START_C, PRI_VIEWPORT_Y_START_C, mask_sh),\ - HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_DIMENSION_C, SEC_VIEWPORT_WIDTH_C, mask_sh),\ - HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_DIMENSION_C, SEC_VIEWPORT_HEIGHT_C, mask_sh),\ - HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_START_C, SEC_VIEWPORT_X_START_C, mask_sh),\ - HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_START_C, SEC_VIEWPORT_Y_START_C, mask_sh),\ HUBP_SF(HUBPREQ0_DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, PRIMARY_SURFACE_ADDRESS_HIGH, mask_sh),\ HUBP_SF(HUBPREQ0_DCSURF_PRIMARY_SURFACE_ADDRESS, PRIMARY_SURFACE_ADDRESS, mask_sh),\ HUBP_SF(HUBPREQ0_DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, SECONDARY_SURFACE_ADDRESS_HIGH, mask_sh),\ @@ -305,12 +289,8 @@ HUBP_SF(HUBPREQ0_DCSURF_SECONDARY_META_SURFACE_ADDRESS, SECONDARY_META_SURFACE_ADDRESS, mask_sh),\ HUBP_SF(HUBPREQ0_DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, PRIMARY_SURFACE_ADDRESS_HIGH_C, mask_sh),\ HUBP_SF(HUBPREQ0_DCSURF_PRIMARY_SURFACE_ADDRESS_C, PRIMARY_SURFACE_ADDRESS_C, mask_sh),\ - HUBP_SF(HUBPREQ0_DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH_C, SECONDARY_SURFACE_ADDRESS_HIGH_C, mask_sh),\ - HUBP_SF(HUBPREQ0_DCSURF_SECONDARY_SURFACE_ADDRESS_C, SECONDARY_SURFACE_ADDRESS_C, mask_sh),\ HUBP_SF(HUBPREQ0_DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C, PRIMARY_META_SURFACE_ADDRESS_HIGH_C, mask_sh),\ HUBP_SF(HUBPREQ0_DCSURF_PRIMARY_META_SURFACE_ADDRESS_C, PRIMARY_META_SURFACE_ADDRESS_C, mask_sh),\ - HUBP_SF(HUBPREQ0_DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH_C, SECONDARY_META_SURFACE_ADDRESS_HIGH_C, mask_sh),\ - HUBP_SF(HUBPREQ0_DCSURF_SECONDARY_META_SURFACE_ADDRESS_C, SECONDARY_META_SURFACE_ADDRESS_C, mask_sh),\ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_INUSE, SURFACE_INUSE_ADDRESS, mask_sh),\ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_INUSE_HIGH, SURFACE_INUSE_ADDRESS_HIGH, mask_sh),\ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_INUSE_C, SURFACE_INUSE_ADDRESS_C, mask_sh),\ @@ -489,10 +469,6 @@ type PRI_VIEWPORT_HEIGHT_C; \ type PRI_VIEWPORT_X_START_C; \ type PRI_VIEWPORT_Y_START_C; \ - type SEC_VIEWPORT_WIDTH_C; \ - type SEC_VIEWPORT_HEIGHT_C; \ - type SEC_VIEWPORT_X_START_C; \ - type SEC_VIEWPORT_Y_START_C; \ type PRIMARY_SURFACE_ADDRESS_HIGH;\ type PRIMARY_SURFACE_ADDRESS;\ type SECONDARY_SURFACE_ADDRESS_HIGH;\ @@ -503,12 +479,8 @@ type SECONDARY_META_SURFACE_ADDRESS;\ type PRIMARY_SURFACE_ADDRESS_HIGH_C;\ type PRIMARY_SURFACE_ADDRESS_C;\ - type SECONDARY_SURFACE_ADDRESS_HIGH_C;\ - type SECONDARY_SURFACE_ADDRESS_C;\ type PRIMARY_META_SURFACE_ADDRESS_HIGH_C;\ type PRIMARY_META_SURFACE_ADDRESS_C;\ - type SECONDARY_META_SURFACE_ADDRESS_HIGH_C;\ - type SECONDARY_META_SURFACE_ADDRESS_C;\ type SURFACE_INUSE_ADDRESS;\ type SURFACE_INUSE_ADDRESS_HIGH;\ type SURFACE_INUSE_ADDRESS_C;\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index eb91432621ab..60123db7ba02 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -670,10 +670,6 @@ static void dcn10_bios_golden_init(struct dc *dc) int i; bool allow_self_fresh_force_enable = true; -#if defined(CONFIG_DRM_AMD_DC_DCN2_1) - if (dc->hwss.s0i3_golden_init_wa && dc->hwss.s0i3_golden_init_wa(dc)) - return; -#endif if (dc->res_pool->hubbub->funcs->is_allow_self_refresh_enabled) allow_self_fresh_force_enable = dc->res_pool->hubbub->funcs->is_allow_self_refresh_enabled(dc->res_pool->hubbub); @@ -1304,10 +1300,6 @@ static void dcn10_init_hw(struct dc *dc) } dc->hwss.enable_power_gating_plane(dc->hwseq, true); - - if (dc->clk_mgr->funcs->notify_wm_ranges) - dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); - } static void dcn10_reset_hw_ctx_wrap( @@ -1460,15 +1452,15 @@ static void log_tf(struct dc_context *ctx, DC_LOG_ALL_TF_CHANNELS("Logging all channels..."); for (i = 0; i < hw_points_num; i++) { - DC_LOG_GAMMA("R\t%d\t%llu", i, tf->tf_pts.red[i].value); - DC_LOG_ALL_TF_CHANNELS("G\t%d\t%llu", i, tf->tf_pts.green[i].value); - DC_LOG_ALL_TF_CHANNELS("B\t%d\t%llu", i, tf->tf_pts.blue[i].value); + DC_LOG_GAMMA("R\t%d\t%llu\n", i, tf->tf_pts.red[i].value); + DC_LOG_ALL_TF_CHANNELS("G\t%d\t%llu\n", i, tf->tf_pts.green[i].value); + DC_LOG_ALL_TF_CHANNELS("B\t%d\t%llu\n", i, tf->tf_pts.blue[i].value); } for (i = hw_points_num; i < MAX_NUM_HW_POINTS; i++) { - DC_LOG_ALL_GAMMA("R\t%d\t%llu", i, tf->tf_pts.red[i].value); - DC_LOG_ALL_TF_CHANNELS("G\t%d\t%llu", i, tf->tf_pts.green[i].value); - DC_LOG_ALL_TF_CHANNELS("B\t%d\t%llu", i, tf->tf_pts.blue[i].value); + DC_LOG_ALL_GAMMA("R\t%d\t%llu\n", i, tf->tf_pts.red[i].value); + DC_LOG_ALL_TF_CHANNELS("G\t%d\t%llu\n", i, tf->tf_pts.green[i].value); + DC_LOG_ALL_TF_CHANNELS("B\t%d\t%llu\n", i, tf->tf_pts.blue[i].value); } } @@ -2312,7 +2304,8 @@ void update_dchubp_dpp( dc->res_pool->dccg->funcs->update_dpp_dto( dc->res_pool->dccg, dpp->inst, - pipe_ctx->plane_res.bw.dppclk_khz); + pipe_ctx->plane_res.bw.dppclk_khz, + false); else dc->clk_mgr->clks.dppclk_khz = should_divided_by_2 ? dc->clk_mgr->clks.dispclk_khz / 2 : @@ -2519,10 +2512,8 @@ static void program_all_pipe_in_tree( pipe_ctx->stream_res.tg->funcs->set_vtg_params( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); - if (dc->hwss.setup_vupdate_interrupt) - dc->hwss.setup_vupdate_interrupt(pipe_ctx); - dc->hwss.blank_pixel_data(dc, pipe_ctx, blank); + } if (pipe_ctx->plane_state != NULL) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h index 88fcc395adf5..8bf5f0f2301d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h @@ -113,20 +113,6 @@ struct dcn10_link_enc_registers { uint32_t DIG_LANE_ENABLE; /* UNIPHY */ uint32_t CHANNEL_XBAR_CNTL; - /* DPCS */ - uint32_t RDPCSTX_PHY_CNTL3; - uint32_t RDPCSTX_PHY_CNTL4; - uint32_t RDPCSTX_PHY_CNTL5; - uint32_t RDPCSTX_PHY_CNTL6; - uint32_t RDPCSTX_PHY_CNTL7; - uint32_t RDPCSTX_PHY_CNTL8; - uint32_t RDPCSTX_PHY_CNTL9; - uint32_t RDPCSTX_PHY_CNTL10; - uint32_t RDPCSTX_PHY_CNTL11; - uint32_t RDPCSTX_PHY_CNTL12; - uint32_t RDPCSTX_PHY_CNTL13; - uint32_t RDPCSTX_PHY_CNTL14; - uint32_t RDPCSTX_PHY_CNTL15; /* indirect registers */ uint32_t RAWLANE0_DIG_PCS_XF_RX_OVRD_IN_2; uint32_t RAWLANE0_DIG_PCS_XF_RX_OVRD_IN_3; @@ -264,10 +250,6 @@ struct dcn10_link_enc_registers { type RDPCS_EXT_REFCLK_EN;\ type RDPCS_TX_FIFO_EN;\ type UNIPHY_LINK_ENABLE;\ - type UNIPHY_CHANNEL0_XBAR_SOURCE;\ - type UNIPHY_CHANNEL1_XBAR_SOURCE;\ - type UNIPHY_CHANNEL2_XBAR_SOURCE;\ - type UNIPHY_CHANNEL3_XBAR_SOURCE;\ type UNIPHY_CHANNEL0_INVERT;\ type UNIPHY_CHANNEL1_INVERT;\ type UNIPHY_CHANNEL2_INVERT;\ @@ -355,46 +337,16 @@ struct dcn10_link_enc_registers { type RDPCS_TX_FIFO_ERROR_MASK;\ type RDPCS_DPALT_DISABLE_TOGGLE_MASK;\ type RDPCS_DPALT_4LANE_TOGGLE_MASK;\ - type RDPCS_PHY_DPALT_DP4;\ type RDPCS_PHY_DPALT_DISABLE;\ type RDPCS_PHY_DPALT_DISABLE_ACK;\ type RDPCS_PHY_DP_MPLLB_V2I;\ type RDPCS_PHY_DP_MPLLB_FREQ_VCO;\ - type RDPCS_PHY_DP_MPLLB_CP_INT_GS;\ - type RDPCS_PHY_RX_VREF_CTRL;\ type RDPCS_PHY_DP_MPLLB_CP_INT;\ type RDPCS_PHY_DP_MPLLB_CP_PROP;\ type RDPCS_PHY_RX_REF_LD_VAL;\ type RDPCS_PHY_RX_VCO_LD_VAL;\ type DPCSTX_DEBUG_CONFIG; \ - type RDPCSTX_DEBUG_CONFIG; \ - type RDPCS_PHY_DP_TX0_EQ_MAIN;\ - type RDPCS_PHY_DP_TX0_EQ_PRE;\ - type RDPCS_PHY_DP_TX0_EQ_POST;\ - type RDPCS_PHY_DP_TX1_EQ_MAIN;\ - type RDPCS_PHY_DP_TX1_EQ_PRE;\ - type RDPCS_PHY_DP_TX1_EQ_POST;\ - type RDPCS_PHY_DP_TX2_EQ_MAIN;\ - type RDPCS_PHY_DP_MPLLB_CP_PROP_GS;\ - type RDPCS_PHY_DP_TX2_EQ_PRE;\ - type RDPCS_PHY_DP_TX2_EQ_POST;\ - type RDPCS_PHY_DP_TX3_EQ_MAIN;\ - type RDPCS_PHY_DCO_RANGE;\ - type RDPCS_PHY_DCO_FINETUNE;\ - type RDPCS_PHY_DP_TX3_EQ_PRE;\ - type RDPCS_PHY_DP_TX3_EQ_POST;\ - type RDPCS_PHY_SUP_PRE_HP;\ - type RDPCS_PHY_DP_TX0_VREGDRV_BYP;\ - type RDPCS_PHY_DP_TX1_VREGDRV_BYP;\ - type RDPCS_PHY_DP_TX2_VREGDRV_BYP;\ - type RDPCS_PHY_DP_TX3_VREGDRV_BYP;\ - type RDPCS_DMCU_DPALT_DIS_BLOCK_REG;\ - type UNIPHYA_SOFT_RESET;\ - type UNIPHYB_SOFT_RESET;\ - type UNIPHYC_SOFT_RESET;\ - type UNIPHYD_SOFT_RESET;\ - type UNIPHYE_SOFT_RESET;\ - type UNIPHYF_SOFT_RESET + type RDPCSTX_DEBUG_CONFIG #define DCN20_LINK_ENCODER_REG_FIELD_LIST(type) \ type DIG_LANE0EN;\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c index 0a9ad692f541..e9ebbbe256b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c @@ -168,10 +168,7 @@ static void opp1_set_pixel_encoding( REG_UPDATE(FMT_CONTROL, FMT_PIXEL_ENCODING, 0); break; case PIXEL_ENCODING_YCBCR422: - REG_UPDATE_3(FMT_CONTROL, - FMT_PIXEL_ENCODING, 1, - FMT_SUBSAMPLING_MODE, 2, - FMT_CBCR_BIT_REDUCTION_BYPASS, 0); + REG_UPDATE(FMT_CONTROL, FMT_PIXEL_ENCODING, 1); break; case PIXEL_ENCODING_YCBCR420: REG_UPDATE(FMT_CONTROL, FMT_PIXEL_ENCODING, 2); @@ -240,9 +237,6 @@ void opp1_set_dyn_expansion( FMT_DYNAMIC_EXP_EN, 0, FMT_DYNAMIC_EXP_MODE, 0); - if (opp->dyn_expansion == DYN_EXPANSION_DISABLE) - return; - /*00 - 10-bit -> 12-bit dynamic expansion*/ /*01 - 8-bit -> 12-bit dynamic expansion*/ if (signal == SIGNAL_TYPE_HDMI_TYPE_A || diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h index 2c0ecfa5a643..0f10adea000c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h @@ -116,8 +116,6 @@ type FMT_RAND_G_SEED; \ type FMT_RAND_B_SEED; \ type FMT_PIXEL_ENCODING; \ - type FMT_SUBSAMPLING_MODE; \ - type FMT_CBCR_BIT_REDUCTION_BYPASS; \ type FMT_CLAMP_DATA_EN; \ type FMT_CLAMP_COLOR_FORMAT; \ type FMT_DYNAMIC_EXP_EN; \ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index dabccbd49ad4..e74a07d03fde 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -1230,25 +1230,59 @@ bool optc1_is_stereo_left_eye(struct timing_generator *optc) return ret; } -bool optc1_get_hw_timing(struct timing_generator *tg, - struct dc_crtc_timing *hw_crtc_timing) +bool optc1_is_matching_timing(struct timing_generator *tg, + const struct dc_crtc_timing *otg_timing) { + struct dc_crtc_timing hw_crtc_timing = {0}; struct dcn_otg_state s = {0}; - if (tg == NULL || hw_crtc_timing == NULL) + if (tg == NULL || otg_timing == NULL) return false; optc1_read_otg_state(DCN10TG_FROM_TG(tg), &s); - hw_crtc_timing->h_total = s.h_total + 1; - hw_crtc_timing->h_addressable = s.h_total - ((s.h_total - s.h_blank_start) + s.h_blank_end); - hw_crtc_timing->h_front_porch = s.h_total + 1 - s.h_blank_start; - hw_crtc_timing->h_sync_width = s.h_sync_a_end - s.h_sync_a_start; + hw_crtc_timing.h_total = s.h_total + 1; + hw_crtc_timing.h_addressable = s.h_total - ((s.h_total - s.h_blank_start) + s.h_blank_end); + hw_crtc_timing.h_front_porch = s.h_total + 1 - s.h_blank_start; + hw_crtc_timing.h_sync_width = s.h_sync_a_end - s.h_sync_a_start; - hw_crtc_timing->v_total = s.v_total + 1; - hw_crtc_timing->v_addressable = s.v_total - ((s.v_total - s.v_blank_start) + s.v_blank_end); - hw_crtc_timing->v_front_porch = s.v_total + 1 - s.v_blank_start; - hw_crtc_timing->v_sync_width = s.v_sync_a_end - s.v_sync_a_start; + hw_crtc_timing.v_total = s.v_total + 1; + hw_crtc_timing.v_addressable = s.v_total - ((s.v_total - s.v_blank_start) + s.v_blank_end); + hw_crtc_timing.v_front_porch = s.v_total + 1 - s.v_blank_start; + hw_crtc_timing.v_sync_width = s.v_sync_a_end - s.v_sync_a_start; + + if (otg_timing->h_total != hw_crtc_timing.h_total) + return false; + + if (otg_timing->h_border_left != hw_crtc_timing.h_border_left) + return false; + + if (otg_timing->h_addressable != hw_crtc_timing.h_addressable) + return false; + + if (otg_timing->h_border_right != hw_crtc_timing.h_border_right) + return false; + + if (otg_timing->h_front_porch != hw_crtc_timing.h_front_porch) + return false; + + if (otg_timing->h_sync_width != hw_crtc_timing.h_sync_width) + return false; + + if (otg_timing->v_total != hw_crtc_timing.v_total) + return false; + + if (otg_timing->v_border_top != hw_crtc_timing.v_border_top) + return false; + + if (otg_timing->v_addressable != hw_crtc_timing.v_addressable) + return false; + + if (otg_timing->v_border_bottom != hw_crtc_timing.v_border_bottom) + return false; + + if (otg_timing->v_sync_width != hw_crtc_timing.v_sync_width) + return false; return true; } @@ -1452,6 +1486,7 @@ static const struct timing_generator_funcs dcn10_tg_funcs = { .get_frame_count = optc1_get_vblank_counter, .get_scanoutpos = optc1_get_crtc_scanoutpos, .get_otg_active_size = optc1_get_otg_active_size, + .is_matching_timing = optc1_is_matching_timing, .set_early_control = optc1_set_early_control, /* used by enable_timing_synchronization. Not need for FPGA */ .wait_for_state = optc1_wait_for_state, @@ -1479,8 +1514,7 @@ static const struct timing_generator_funcs dcn10_tg_funcs = { .configure_crc = optc1_configure_crc, .set_vtg_params = optc1_set_vtg_params, .program_manual_trigger = optc1_program_manual_trigger, - .setup_manual_trigger = optc1_setup_manual_trigger, - .get_hw_timing = optc1_get_hw_timing, + .setup_manual_trigger = optc1_setup_manual_trigger }; void dcn10_timing_generator_init(struct optc *optc1) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index c8d795b335ba..83575599672e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -547,8 +547,9 @@ struct dcn_otg_state { void optc1_read_otg_state(struct optc *optc1, struct dcn_otg_state *s); -bool optc1_get_hw_timing(struct timing_generator *tg, - struct dc_crtc_timing *hw_crtc_timing); +bool optc1_is_matching_timing( + struct timing_generator *tg, + const struct dc_crtc_timing *otg_timing); bool optc1_validate_timing( struct timing_generator *optc, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 15640aedd664..1599bb971111 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -319,14 +319,6 @@ static const struct dcn10_link_enc_mask le_mask = { LINK_ENCODER_MASK_SH_LIST_DCN10(_MASK) }; -static const struct dce110_aux_registers_shift aux_shift = { - DCN10_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCN10_AUX_MASK_SH_LIST(_MASK) -}; - #define ipp_regs(id)\ [id] = {\ IPP_REG_LIST_DCN10(id),\ @@ -479,28 +471,6 @@ static const struct dcn_hubbub_mask hubbub_mask = { HUBBUB_MASK_SH_LIST_DCN10(_MASK) }; -static int map_transmitter_id_to_phy_instance( - enum transmitter transmitter) -{ - switch (transmitter) { - case TRANSMITTER_UNIPHY_A: - return 0; - break; - case TRANSMITTER_UNIPHY_B: - return 1; - break; - case TRANSMITTER_UNIPHY_C: - return 2; - break; - case TRANSMITTER_UNIPHY_D: - return 3; - break; - default: - ASSERT(0); - return 0; - } -} - #define clk_src_regs(index, pllid)\ [index] = {\ CS_COMMON_REG_LIST_DCN1_0(index, pllid),\ @@ -672,10 +642,7 @@ struct dce_aux *dcn10_aux_engine_create( dce110_aux_engine_construct(aux_engine, ctx, inst, SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); + &aux_engine_regs[inst]); return &aux_engine->base; } @@ -784,18 +751,14 @@ struct link_encoder *dcn10_link_encoder_create( { struct dcn10_link_encoder *enc10 = kzalloc(sizeof(struct dcn10_link_encoder), GFP_KERNEL); - int link_regs_id; if (!enc10) return NULL; - link_regs_id = - map_transmitter_id_to_phy_instance(enc_init_data->transmitter); - dcn10_link_encoder_construct(enc10, enc_init_data, &link_enc_feature, - &link_enc_regs[link_regs_id], + &link_enc_regs[enc_init_data->transmitter], &link_enc_aux_regs[enc_init_data->channel - 1], &link_enc_hpd_regs[enc_init_data->hpd_source], &le_shift, @@ -1345,8 +1308,6 @@ static bool construct( dc->caps.max_slave_planes = 1; dc->caps.is_apu = true; dc->caps.post_blend_color_processing = false; - dc->caps.extended_aux_timeout_support = false; - /* Raven DP PHY HBR2 eye diagram pattern is not stable. Use TP4 */ dc->caps.force_dp_tps4_for_cp2520 = true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index 06e5bbb4545c..9aa258f3550b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -1553,66 +1553,6 @@ unsigned int enc1_dig_source_otg( return tg_inst; } -bool enc1_stream_encoder_dp_get_pixel_format( - struct stream_encoder *enc, - enum dc_pixel_encoding *encoding, - enum dc_color_depth *depth) -{ - uint32_t hw_encoding = 0; - uint32_t hw_depth = 0; - struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); - - if (enc == NULL || - encoding == NULL || - depth == NULL) - return false; - - REG_GET_2(DP_PIXEL_FORMAT, - DP_PIXEL_ENCODING, &hw_encoding, - DP_COMPONENT_DEPTH, &hw_depth); - - switch (hw_depth) { - case DP_COMPONENT_PIXEL_DEPTH_6BPC: - *depth = COLOR_DEPTH_666; - break; - case DP_COMPONENT_PIXEL_DEPTH_8BPC: - *depth = COLOR_DEPTH_888; - break; - case DP_COMPONENT_PIXEL_DEPTH_10BPC: - *depth = COLOR_DEPTH_101010; - break; - case DP_COMPONENT_PIXEL_DEPTH_12BPC: - *depth = COLOR_DEPTH_121212; - break; - case DP_COMPONENT_PIXEL_DEPTH_16BPC: - *depth = COLOR_DEPTH_161616; - break; - default: - *depth = COLOR_DEPTH_UNDEFINED; - break; - } - - switch (hw_encoding) { - case DP_PIXEL_ENCODING_TYPE_RGB444: - *encoding = PIXEL_ENCODING_RGB; - break; - case DP_PIXEL_ENCODING_TYPE_YCBCR422: - *encoding = PIXEL_ENCODING_YCBCR422; - break; - case DP_PIXEL_ENCODING_TYPE_YCBCR444: - case DP_PIXEL_ENCODING_TYPE_Y_ONLY: - *encoding = PIXEL_ENCODING_YCBCR444; - break; - case DP_PIXEL_ENCODING_TYPE_YCBCR420: - *encoding = PIXEL_ENCODING_YCBCR420; - break; - default: - *encoding = PIXEL_ENCODING_UNDEFINED; - break; - } - return true; -} - static const struct stream_encoder_funcs dcn10_str_enc_funcs = { .dp_set_stream_attribute = enc1_stream_encoder_dp_set_stream_attribute, @@ -1649,8 +1589,6 @@ static const struct stream_encoder_funcs dcn10_str_enc_funcs = { .dig_connect_to_otg = enc1_dig_connect_to_otg, .hdmi_reset_stream_attribute = enc1_reset_hdmi_stream_attribute, .dig_source_otg = enc1_dig_source_otg, - - .dp_get_pixel_format = enc1_stream_encoder_dp_get_pixel_format, }; void dcn10_stream_encoder_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h index c9cbc21d121e..a512cbea00d1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h @@ -621,9 +621,4 @@ void get_audio_clock_info( void enc1_reset_hdmi_stream_attribute( struct stream_encoder *enc); -bool enc1_stream_encoder_dp_get_pixel_format( - struct stream_encoder *enc, - enum dc_pixel_encoding *encoding, - enum dc_color_depth *depth); - #endif /* __DC_STREAM_ENCODER_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.c index 1e1151356e60..16476ed25536 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.c @@ -44,12 +44,16 @@ #define DC_LOGGER \ dccg->ctx->logger -void dccg2_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk) +void dccg2_update_dpp_dto(struct dccg *dccg, + int dpp_inst, + int req_dppclk, + bool reduce_divider_only) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); if (dccg->ref_dppclk && req_dppclk) { int ref_dppclk = dccg->ref_dppclk; + int current_phase, current_modulo; ASSERT(req_dppclk <= ref_dppclk); /* need to clamp to 8 bits */ @@ -61,9 +65,28 @@ void dccg2_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk) if (req_dppclk > ref_dppclk) req_dppclk = ref_dppclk; } - REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0, - DPPCLK0_DTO_PHASE, req_dppclk, - DPPCLK0_DTO_MODULO, ref_dppclk); + + REG_GET_2(DPPCLK_DTO_PARAM[dpp_inst], + DPPCLK0_DTO_PHASE, ¤t_phase, + DPPCLK0_DTO_MODULO, ¤t_modulo); + + if (reduce_divider_only) { + // requested phase/modulo greater than current + if (req_dppclk * current_modulo >= current_phase * ref_dppclk) { + REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0, + DPPCLK0_DTO_PHASE, req_dppclk, + DPPCLK0_DTO_MODULO, ref_dppclk); + } else { + REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0, + DPPCLK0_DTO_PHASE, current_phase, + DPPCLK0_DTO_MODULO, current_modulo); + } + } else { + REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0, + DPPCLK0_DTO_PHASE, req_dppclk, + DPPCLK0_DTO_MODULO, ref_dppclk); + } + REG_UPDATE(DPPCLK_DTO_CTRL, DPPCLK_DTO_ENABLE[dpp_inst], 1); } else { @@ -96,6 +119,32 @@ void dccg2_get_dccg_ref_freq(struct dccg *dccg, void dccg2_init(struct dccg *dccg) { + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + // Fallthrough intentional to program all available dpp_dto's + switch (dccg_dcn->base.ctx->dc->res_pool->pipe_count) { + case 6: + REG_UPDATE(DPPCLK_DTO_CTRL, DPPCLK_DTO_DB_EN[5], 1); + /* Fall through */ + case 5: + REG_UPDATE(DPPCLK_DTO_CTRL, DPPCLK_DTO_DB_EN[4], 1); + /* Fall through */ + case 4: + REG_UPDATE(DPPCLK_DTO_CTRL, DPPCLK_DTO_DB_EN[3], 1); + /* Fall through */ + case 3: + REG_UPDATE(DPPCLK_DTO_CTRL, DPPCLK_DTO_DB_EN[2], 1); + /* Fall through */ + case 2: + REG_UPDATE(DPPCLK_DTO_CTRL, DPPCLK_DTO_DB_EN[1], 1); + /* Fall through */ + case 1: + REG_UPDATE(DPPCLK_DTO_CTRL, DPPCLK_DTO_DB_EN[0], 1); + break; + default: + ASSERT(false); + break; + } } static const struct dccg_funcs dccg2_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h index 2205cb0204e7..74a074a873cd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h @@ -97,7 +97,7 @@ struct dcn_dccg { const struct dccg_mask *dccg_mask; }; -void dccg2_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk); +void dccg2_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk, bool raise_divider_only); void dccg2_get_dccg_ref_freq(struct dccg *dccg, unsigned int xtalin_freq_inKhz, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c index 4d7e45892f08..2f5aade1e882 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c @@ -376,6 +376,13 @@ bool dpp2_get_optimal_number_of_taps( struct scaler_data *scl_data, const struct scaling_taps *in_taps) { + uint32_t pixel_width; + + if (scl_data->viewport.width > scl_data->recout.width) + pixel_width = scl_data->recout.width; + else + pixel_width = scl_data->viewport.width; + /* Some ASICs does not support FP16 scaling, so we reject modes require this*/ if (scl_data->viewport.width != scl_data->h_active && scl_data->viewport.height != scl_data->v_active && @@ -457,7 +464,7 @@ static struct dpp_funcs dcn20_dpp_funcs = { .dpp_read_state = dpp20_read_state, .dpp_reset = dpp_reset, .dpp_set_scaler = dpp1_dscl_set_scaler_manual_scale, - .dpp_get_optimal_number_of_taps = dpp1_get_optimal_number_of_taps, + .dpp_get_optimal_number_of_taps = dpp2_get_optimal_number_of_taps, .dpp_set_gamut_remap = dpp1_cm_set_gamut_remap, .dpp_set_csc_adjustment = NULL, .dpp_set_csc_default = NULL, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h index 5b03b737b1d6..290b2854bd2c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h @@ -30,20 +30,16 @@ #define TO_DCN20_DPP(dpp)\ container_of(dpp, struct dcn20_dpp, base) -#define TF_REG_LIST_DCN20_COMMON_UPDATED(id) \ +#define TF_REG_LIST_DCN20(id) \ + TF_REG_LIST_DCN(id), \ SRI(CM_BLNDGAM_LUT_WRITE_EN_MASK, CM, id), \ - SRI(CM_BLNDGAM_RAMB_SLOPE_CNTL_B, CM, id), \ - SRI(CM_BLNDGAM_RAMB_SLOPE_CNTL_G, CM, id), \ - SRI(CM_BLNDGAM_RAMB_SLOPE_CNTL_R, CM, id), \ - SRI(CM_BLNDGAM_RAMA_SLOPE_CNTL_B, CM, id), \ - SRI(CM_BLNDGAM_RAMA_SLOPE_CNTL_G, CM, id), \ - SRI(CM_BLNDGAM_RAMA_SLOPE_CNTL_R, CM, id) - -#define TF_REG_LIST_DCN20_COMMON(id) \ SRI(CM_BLNDGAM_CONTROL, CM, id), \ SRI(CM_BLNDGAM_RAMB_START_CNTL_B, CM, id), \ SRI(CM_BLNDGAM_RAMB_START_CNTL_G, CM, id), \ SRI(CM_BLNDGAM_RAMB_START_CNTL_R, CM, id), \ + SRI(CM_BLNDGAM_RAMB_SLOPE_CNTL_B, CM, id), \ + SRI(CM_BLNDGAM_RAMB_SLOPE_CNTL_G, CM, id), \ + SRI(CM_BLNDGAM_RAMB_SLOPE_CNTL_R, CM, id), \ SRI(CM_BLNDGAM_RAMB_END_CNTL1_B, CM, id), \ SRI(CM_BLNDGAM_RAMB_END_CNTL2_B, CM, id), \ SRI(CM_BLNDGAM_RAMB_END_CNTL1_G, CM, id), \ @@ -70,6 +66,9 @@ SRI(CM_BLNDGAM_RAMA_START_CNTL_B, CM, id), \ SRI(CM_BLNDGAM_RAMA_START_CNTL_G, CM, id), \ SRI(CM_BLNDGAM_RAMA_START_CNTL_R, CM, id), \ + SRI(CM_BLNDGAM_RAMA_SLOPE_CNTL_B, CM, id), \ + SRI(CM_BLNDGAM_RAMA_SLOPE_CNTL_G, CM, id), \ + SRI(CM_BLNDGAM_RAMA_SLOPE_CNTL_R, CM, id), \ SRI(CM_BLNDGAM_RAMA_END_CNTL1_B, CM, id), \ SRI(CM_BLNDGAM_RAMA_END_CNTL2_B, CM, id), \ SRI(CM_BLNDGAM_RAMA_END_CNTL1_G, CM, id), \ @@ -148,12 +147,7 @@ SRI(CM_SHAPER_RAMA_REGION_28_29, CM, id), \ SRI(CM_SHAPER_RAMA_REGION_30_31, CM, id), \ SRI(CM_SHAPER_RAMA_REGION_32_33, CM, id), \ - SRI(CM_SHAPER_LUT_INDEX, CM, id) - -#define TF_REG_LIST_DCN20(id) \ - TF_REG_LIST_DCN(id), \ - TF_REG_LIST_DCN20_COMMON(id), \ - TF_REG_LIST_DCN20_COMMON_UPDATED(id), \ + SRI(CM_SHAPER_LUT_INDEX, CM, id), \ SRI(CURSOR_CONTROL, CURSOR0_, id), \ SRI(ALPHA_2BIT_LUT, CNVC_CFG, id), \ SRI(FCNV_FP_BIAS_R, CNVC_CFG, id), \ @@ -172,41 +166,27 @@ SRI(OBUF_MEM_PWR_CTRL, DSCL, id),\ SRI(DSCL_MEM_PWR_CTRL, DSCL, id) - -#define TF_REG_LIST_SH_MASK_DCN20_UPDATED(mask_sh)\ - TF_SF(CM0_CM_BLNDGAM_RAMB_SLOPE_CNTL_B, CM_BLNDGAM_RAMB_EXP_REGION_LINEAR_SLOPE_B, mask_sh), \ - TF_SF(CM0_CM_BLNDGAM_RAMB_SLOPE_CNTL_G, CM_BLNDGAM_RAMB_EXP_REGION_LINEAR_SLOPE_G, mask_sh), \ - TF_SF(CM0_CM_BLNDGAM_RAMB_SLOPE_CNTL_R, CM_BLNDGAM_RAMB_EXP_REGION_LINEAR_SLOPE_R, mask_sh), \ - TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL1_B, CM_BLNDGAM_RAMB_EXP_REGION_END_B, mask_sh), \ - TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL1_G, CM_BLNDGAM_RAMB_EXP_REGION_END_G, mask_sh), \ - TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL1_R, CM_BLNDGAM_RAMB_EXP_REGION_END_R, mask_sh), \ - TF_SF(CM0_CM_BLNDGAM_RAMA_SLOPE_CNTL_B, CM_BLNDGAM_RAMA_EXP_REGION_LINEAR_SLOPE_B, mask_sh), \ - TF_SF(CM0_CM_BLNDGAM_RAMA_SLOPE_CNTL_G, CM_BLNDGAM_RAMA_EXP_REGION_LINEAR_SLOPE_G, mask_sh), \ - TF_SF(CM0_CM_BLNDGAM_RAMA_SLOPE_CNTL_R, CM_BLNDGAM_RAMA_EXP_REGION_LINEAR_SLOPE_R, mask_sh), \ - TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL1_B, CM_BLNDGAM_RAMA_EXP_REGION_END_B, mask_sh), \ - TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL1_G, CM_BLNDGAM_RAMA_EXP_REGION_END_G, mask_sh), \ - TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL1_R, CM_BLNDGAM_RAMA_EXP_REGION_END_R, mask_sh), \ - TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL2_B, CM_BLNDGAM_RAMA_EXP_REGION_END_BASE_B, mask_sh), \ - TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL2_G, CM_BLNDGAM_RAMA_EXP_REGION_END_BASE_G, mask_sh), \ - TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL2_R, CM_BLNDGAM_RAMA_EXP_REGION_END_BASE_R, mask_sh), \ +#define TF_REG_LIST_SH_MASK_DCN20(mask_sh)\ + TF_REG_LIST_SH_MASK_DCN(mask_sh), \ TF_SF(CM0_CM_BLNDGAM_CONTROL, CM_BLNDGAM_LUT_MODE, mask_sh), \ - TF_SF(CM0_CM_BLNDGAM_LUT_WRITE_EN_MASK, CM_BLNDGAM_LUT_WRITE_EN_MASK, mask_sh), \ - TF_SF(CM0_CM_BLNDGAM_LUT_WRITE_EN_MASK, CM_BLNDGAM_LUT_WRITE_SEL, mask_sh), \ - TF_SF(CM0_CM_BLNDGAM_LUT_WRITE_EN_MASK, CM_BLNDGAM_CONFIG_STATUS, mask_sh), \ - TF_SF(CM0_CM_SHAPER_CONTROL, CM_SHAPER_LUT_MODE, mask_sh) - - -#define TF_REG_LIST_SH_MASK_DCN20_COMMON(mask_sh)\ - TF_SF(CM0_CM_3DLUT_MODE, CM_3DLUT_MODE, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMB_START_CNTL_B, CM_BLNDGAM_RAMB_EXP_REGION_START_B, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMB_START_CNTL_B, CM_BLNDGAM_RAMB_EXP_REGION_START_SEGMENT_B, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMB_START_CNTL_G, CM_BLNDGAM_RAMB_EXP_REGION_START_G, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMB_START_CNTL_G, CM_BLNDGAM_RAMB_EXP_REGION_START_SEGMENT_G, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMB_START_CNTL_R, CM_BLNDGAM_RAMB_EXP_REGION_START_R, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMB_START_CNTL_R, CM_BLNDGAM_RAMB_EXP_REGION_START_SEGMENT_R, mask_sh), \ + TF_SF(CM0_CM_BLNDGAM_RAMB_SLOPE_CNTL_B, CM_BLNDGAM_RAMB_EXP_REGION_LINEAR_SLOPE_B, mask_sh), \ + TF_SF(CM0_CM_BLNDGAM_RAMB_SLOPE_CNTL_G, CM_BLNDGAM_RAMB_EXP_REGION_LINEAR_SLOPE_G, mask_sh), \ + TF_SF(CM0_CM_BLNDGAM_RAMB_SLOPE_CNTL_R, CM_BLNDGAM_RAMB_EXP_REGION_LINEAR_SLOPE_R, mask_sh), \ + TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL1_B, CM_BLNDGAM_RAMB_EXP_REGION_END_B, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL2_B, CM_BLNDGAM_RAMB_EXP_REGION_END_SLOPE_B, mask_sh), \ + TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL2_B, CM_BLNDGAM_RAMB_EXP_REGION_END_BASE_B, mask_sh), \ + TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL1_G, CM_BLNDGAM_RAMB_EXP_REGION_END_G, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL2_G, CM_BLNDGAM_RAMB_EXP_REGION_END_SLOPE_G, mask_sh), \ + TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL2_G, CM_BLNDGAM_RAMB_EXP_REGION_END_BASE_G, mask_sh), \ + TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL1_R, CM_BLNDGAM_RAMB_EXP_REGION_END_R, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL2_R, CM_BLNDGAM_RAMB_EXP_REGION_END_SLOPE_R, mask_sh), \ + TF_SF(CM0_CM_BLNDGAM_RAMB_END_CNTL2_R, CM_BLNDGAM_RAMB_EXP_REGION_END_BASE_R, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMB_REGION_0_1, CM_BLNDGAM_RAMB_EXP_REGION0_LUT_OFFSET, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMB_REGION_0_1, CM_BLNDGAM_RAMB_EXP_REGION0_NUM_SEGMENTS, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMB_REGION_0_1, CM_BLNDGAM_RAMB_EXP_REGION1_LUT_OFFSET, mask_sh), \ @@ -281,9 +261,18 @@ TF_SF(CM0_CM_BLNDGAM_RAMA_START_CNTL_G, CM_BLNDGAM_RAMA_EXP_REGION_START_SEGMENT_G, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMA_START_CNTL_R, CM_BLNDGAM_RAMA_EXP_REGION_START_R, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMA_START_CNTL_R, CM_BLNDGAM_RAMA_EXP_REGION_START_SEGMENT_R, mask_sh), \ + TF_SF(CM0_CM_BLNDGAM_RAMA_SLOPE_CNTL_B, CM_BLNDGAM_RAMA_EXP_REGION_LINEAR_SLOPE_B, mask_sh), \ + TF_SF(CM0_CM_BLNDGAM_RAMA_SLOPE_CNTL_G, CM_BLNDGAM_RAMA_EXP_REGION_LINEAR_SLOPE_G, mask_sh), \ + TF_SF(CM0_CM_BLNDGAM_RAMA_SLOPE_CNTL_R, CM_BLNDGAM_RAMA_EXP_REGION_LINEAR_SLOPE_R, mask_sh), \ + TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL1_B, CM_BLNDGAM_RAMA_EXP_REGION_END_B, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL2_B, CM_BLNDGAM_RAMA_EXP_REGION_END_SLOPE_B, mask_sh), \ + TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL2_B, CM_BLNDGAM_RAMA_EXP_REGION_END_BASE_B, mask_sh), \ + TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL1_G, CM_BLNDGAM_RAMA_EXP_REGION_END_G, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL2_G, CM_BLNDGAM_RAMA_EXP_REGION_END_SLOPE_G, mask_sh), \ + TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL2_G, CM_BLNDGAM_RAMA_EXP_REGION_END_BASE_G, mask_sh), \ + TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL1_R, CM_BLNDGAM_RAMA_EXP_REGION_END_R, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL2_R, CM_BLNDGAM_RAMA_EXP_REGION_END_SLOPE_R, mask_sh), \ + TF_SF(CM0_CM_BLNDGAM_RAMA_END_CNTL2_R, CM_BLNDGAM_RAMA_EXP_REGION_END_BASE_R, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMA_REGION_0_1, CM_BLNDGAM_RAMA_EXP_REGION0_LUT_OFFSET, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMA_REGION_0_1, CM_BLNDGAM_RAMA_EXP_REGION0_NUM_SEGMENTS, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMA_REGION_0_1, CM_BLNDGAM_RAMA_EXP_REGION1_LUT_OFFSET, mask_sh), \ @@ -352,6 +341,9 @@ TF_SF(CM0_CM_BLNDGAM_RAMA_REGION_32_33, CM_BLNDGAM_RAMA_EXP_REGION32_NUM_SEGMENTS, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMA_REGION_32_33, CM_BLNDGAM_RAMA_EXP_REGION33_LUT_OFFSET, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_RAMA_REGION_32_33, CM_BLNDGAM_RAMA_EXP_REGION33_NUM_SEGMENTS, mask_sh), \ + TF_SF(CM0_CM_BLNDGAM_LUT_WRITE_EN_MASK, CM_BLNDGAM_LUT_WRITE_EN_MASK, mask_sh), \ + TF_SF(CM0_CM_BLNDGAM_LUT_WRITE_EN_MASK, CM_BLNDGAM_LUT_WRITE_SEL, mask_sh), \ + TF_SF(CM0_CM_BLNDGAM_LUT_WRITE_EN_MASK, CM_BLNDGAM_CONFIG_STATUS, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_LUT_INDEX, CM_BLNDGAM_LUT_INDEX, mask_sh), \ TF_SF(CM0_CM_BLNDGAM_LUT_DATA, CM_BLNDGAM_LUT_DATA, mask_sh), \ TF_SF(CM0_CM_MEM_PWR_CTRL, BLNDGAM_MEM_PWR_FORCE, mask_sh), \ @@ -364,6 +356,7 @@ TF_SF(CM0_CM_3DLUT_READ_WRITE_CONTROL, CM_3DLUT_WRITE_EN_MASK, mask_sh), \ TF_SF(CM0_CM_3DLUT_READ_WRITE_CONTROL, CM_3DLUT_RAM_SEL, mask_sh), \ TF_SF(CM0_CM_3DLUT_READ_WRITE_CONTROL, CM_3DLUT_30BIT_EN, mask_sh), \ + TF_SF(CM0_CM_3DLUT_READ_WRITE_CONTROL, CM_3DLUT_CONFIG_STATUS, mask_sh), \ TF_SF(CM0_CM_3DLUT_READ_WRITE_CONTROL, CM_3DLUT_READ_SEL, mask_sh), \ TF_SF(CM0_CM_SHAPER_CONTROL, CM_SHAPER_LUT_MODE, mask_sh), \ TF_SF(CM0_CM_SHAPER_RAMB_START_CNTL_B, CM_SHAPER_RAMB_EXP_REGION_START_B, mask_sh), \ @@ -528,14 +521,9 @@ TF_SF(CM0_CM_SHAPER_RAMA_REGION_32_33, CM_SHAPER_RAMA_EXP_REGION33_NUM_SEGMENTS, mask_sh), \ TF_SF(CM0_CM_SHAPER_LUT_WRITE_EN_MASK, CM_SHAPER_LUT_WRITE_EN_MASK, mask_sh), \ TF_SF(CM0_CM_SHAPER_LUT_WRITE_EN_MASK, CM_SHAPER_LUT_WRITE_SEL, mask_sh), \ + TF_SF(CM0_CM_SHAPER_LUT_WRITE_EN_MASK, CM_SHAPER_CONFIG_STATUS, mask_sh), \ TF_SF(CM0_CM_SHAPER_LUT_INDEX, CM_SHAPER_LUT_INDEX, mask_sh), \ - TF_SF(CM0_CM_SHAPER_LUT_DATA, CM_SHAPER_LUT_DATA, mask_sh) - - -#define TF_REG_LIST_SH_MASK_DCN20(mask_sh)\ - TF_REG_LIST_SH_MASK_DCN(mask_sh), \ - TF_REG_LIST_SH_MASK_DCN20_COMMON(mask_sh), \ - TF_REG_LIST_SH_MASK_DCN20_UPDATED(mask_sh), \ + TF_SF(CM0_CM_SHAPER_LUT_DATA, CM_SHAPER_LUT_DATA, mask_sh), \ TF_SF(CM0_CM_DGAM_LUT_WRITE_EN_MASK, CM_DGAM_CONFIG_STATUS, mask_sh), \ TF_SF(CM0_CM_CONTROL, CM_BYPASS, mask_sh), \ TF_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_MODE, mask_sh), \ @@ -572,7 +560,6 @@ TF_SF(DSCL0_OBUF_MEM_PWR_CTRL, OBUF_MEM_PWR_FORCE, mask_sh),\ TF_SF(DSCL0_DSCL_MEM_PWR_CTRL, LUT_MEM_PWR_FORCE, mask_sh) - #define TF_REG_FIELD_LIST_DCN2_0(type) \ TF_REG_FIELD_LIST(type) \ type CM_BLNDGAM_LUT_DATA; \ @@ -606,7 +593,6 @@ type OBUF_MEM_PWR_FORCE;\ type LUT_MEM_PWR_FORCE - struct dcn2_dpp_shift { TF_REG_FIELD_LIST_DCN2_0(uint8_t); }; @@ -705,6 +691,11 @@ void dpp2_set_hdr_multiplier( struct dpp *dpp_base, uint32_t multiplier); +bool dpp2_get_optimal_number_of_taps( + struct dpp *dpp, + struct scaler_data *scl_data, + const struct scaling_taps *in_taps); + bool dpp2_construct(struct dcn20_dpp *dpp2, struct dc_context *ctx, uint32_t inst, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c index 63eb377ed9c0..1b419407af94 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c @@ -118,7 +118,7 @@ static void dsc2_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock dsc_enc_caps->color_formats.bits.RGB = 1; dsc_enc_caps->color_formats.bits.YCBCR_444 = 1; - dsc_enc_caps->color_formats.bits.YCBCR_SIMPLE_422 = 1; + dsc_enc_caps->color_formats.bits.YCBCR_SIMPLE_422 = 0; dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 0; dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_420 = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c index 880954ac0b02..cd8bc92ce3ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c @@ -722,6 +722,7 @@ bool dwb_program_horz_scalar(struct dcn20_dwbc *dwbc20, struct scaling_taps num_taps) { uint32_t h_ratio_luma = 1; + uint32_t h_ratio_chroma = 1; uint32_t h_taps_luma = num_taps.h_taps; uint32_t h_taps_chroma = num_taps.h_taps_c; int32_t h_init_phase_luma = 0; @@ -746,6 +747,7 @@ bool dwb_program_horz_scalar(struct dcn20_dwbc *dwbc20, h_ratio_luma = -1; else h_ratio_luma = dc_fixpt_u3d19(tmp_h_ratio_luma) << 5; + h_ratio_chroma = h_ratio_luma * 2; /*Program ratio*/ REG_UPDATE(WBSCL_HORZ_FILTER_SCALE_RATIO, WBSCL_H_SCALE_RATIO, h_ratio_luma); @@ -801,6 +803,7 @@ bool dwb_program_vert_scalar(struct dcn20_dwbc *dwbc20, enum dwb_subsample_position subsample_position) { uint32_t v_ratio_luma = 1; + uint32_t v_ratio_chroma = 1; uint32_t v_taps_luma = num_taps.v_taps; uint32_t v_taps_chroma = num_taps.v_taps_c; int32_t v_init_phase_luma = 0; @@ -824,6 +827,7 @@ bool dwb_program_vert_scalar(struct dcn20_dwbc *dwbc20, v_ratio_luma = -1; else v_ratio_luma = dc_fixpt_u3d19(tmp_v_ratio_luma) << 5; + v_ratio_chroma = v_ratio_luma * 2; /*Program ratio*/ REG_UPDATE(WBSCL_VERT_FILTER_SCALE_RATIO, WBSCL_V_SCALE_RATIO, v_ratio_luma); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c index 8b8438566101..b83c022e2c6f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c @@ -186,13 +186,14 @@ static void hubbub2_get_blk256_size(unsigned int *blk256_width, unsigned int *bl } static void hubbub2_det_request_size( - unsigned int detile_buf_size, unsigned int height, unsigned int width, unsigned int bpe, bool *req128_horz_wc, bool *req128_vert_wc) { + unsigned int detile_buf_size = 164 * 1024; /* 164KB for DCN1.0 */ + unsigned int blk256_height = 0; unsigned int blk256_width = 0; unsigned int swath_bytes_horz_wc, swath_bytes_vert_wc; @@ -235,8 +236,7 @@ bool hubbub2_get_dcc_compression_cap(struct hubbub *hubbub, &segment_order_horz, &segment_order_vert)) return false; - hubbub2_det_request_size(TO_DCN20_HUBBUB(hubbub)->detile_buf_size, - input->surface_size.height, input->surface_size.width, + hubbub2_det_request_size(input->surface_size.height, input->surface_size.width, bpe, &req128_horz_wc, &req128_vert_wc); if (!req128_horz_wc && !req128_vert_wc) { @@ -588,7 +588,7 @@ static void hubbub2_program_watermarks( DCHUBBUB_ARB_SAT_LEVEL, 60 * refclk_mhz); REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 180); - hubbub->funcs->allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); + hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); } static const struct hubbub_funcs hubbub2_funcs = { @@ -600,8 +600,7 @@ static const struct hubbub_funcs hubbub2_funcs = { .get_dcc_compression_cap = hubbub2_get_dcc_compression_cap, .wm_read_state = hubbub2_wm_read_state, .get_dchub_ref_freq = hubbub2_get_dchub_ref_freq, - .program_watermarks = hubbub2_program_watermarks, - .allow_self_refresh_control = hubbub1_allow_self_refresh_control + .program_watermarks = hubbub2_program_watermarks }; void hubbub2_construct(struct dcn20_hubbub *hubbub, @@ -619,5 +618,4 @@ void hubbub2_construct(struct dcn20_hubbub *hubbub, hubbub->masks = hubbub_mask; hubbub->debug_test_index_pstate = 0xB; - hubbub->detile_buf_size = 164 * 1024; /* 164KB for DCN2.0 */ } diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h index 501532dd523a..626117d3b4e9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h @@ -81,7 +81,6 @@ struct dcn20_hubbub { unsigned int debug_test_index_pstate; struct dcn_watermark_set watermarks; struct dcn20_vmid vmid[16]; - unsigned int detile_buf_size; }; void hubbub2_construct(struct dcn20_hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 921a36668ced..1212da12c414 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -688,7 +688,7 @@ bool dcn20_set_output_transfer_func(struct pipe_ctx *pipe_ctx, return true; } -bool dcn20_set_blend_lut( +static bool dcn20_set_blend_lut( struct pipe_ctx *pipe_ctx, const struct dc_plane_state *plane_state) { struct dpp *dpp_base = pipe_ctx->plane_res.dpp; @@ -710,7 +710,7 @@ bool dcn20_set_blend_lut( return result; } -bool dcn20_set_shaper_3dlut( +static bool dcn20_set_shaper_3dlut( struct pipe_ctx *pipe_ctx, const struct dc_plane_state *plane_state) { struct dpp *dpp_base = pipe_ctx->plane_res.dpp; @@ -999,6 +999,72 @@ void dcn20_enable_plane( } +static void dcn20_program_pipe( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context) +{ + pipe_ctx->plane_state->update_flags.bits.full_update = + context->commit_hints.full_update_needed ? 1 : pipe_ctx->plane_state->update_flags.bits.full_update; + + if (pipe_ctx->plane_state->update_flags.bits.full_update) + dcn20_enable_plane(dc, pipe_ctx, context); + + update_dchubp_dpp(dc, pipe_ctx, context); + + set_hdr_multiplier(pipe_ctx); + + if (pipe_ctx->plane_state->update_flags.bits.full_update || + pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || + pipe_ctx->plane_state->update_flags.bits.gamma_change) + dc->hwss.set_input_transfer_func(pipe_ctx, pipe_ctx->plane_state); + + /* dcn10_translate_regamma_to_hw_format takes 750us to finish + * only do gamma programming for full update. + * TODO: This can be further optimized/cleaned up + * Always call this for now since it does memcmp inside before + * doing heavy calculation and programming + */ + if (pipe_ctx->plane_state->update_flags.bits.full_update) + dc->hwss.set_output_transfer_func(pipe_ctx, pipe_ctx->stream); +} + +static void dcn20_program_all_pipe_in_tree( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context) +{ + if (pipe_ctx->top_pipe == NULL && !pipe_ctx->prev_odm_pipe) { + bool blank = !is_pipe_tree_visible(pipe_ctx); + + pipe_ctx->stream_res.tg->funcs->program_global_sync( + pipe_ctx->stream_res.tg, + pipe_ctx->pipe_dlg_param.vready_offset, + pipe_ctx->pipe_dlg_param.vstartup_start, + pipe_ctx->pipe_dlg_param.vupdate_offset, + pipe_ctx->pipe_dlg_param.vupdate_width); + + pipe_ctx->stream_res.tg->funcs->set_vtg_params( + pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); + + dc->hwss.blank_pixel_data(dc, pipe_ctx, blank); + + if (dc->hwss.update_odm) + dc->hwss.update_odm(dc, context, pipe_ctx); + } + + if (pipe_ctx->plane_state != NULL) + dcn20_program_pipe(dc, pipe_ctx, context); + + if (pipe_ctx->bottom_pipe != NULL) { + ASSERT(pipe_ctx->bottom_pipe != pipe_ctx); + dcn20_program_all_pipe_in_tree(dc, pipe_ctx->bottom_pipe, context); + } else if (pipe_ctx->next_odm_pipe != NULL) { + ASSERT(pipe_ctx->next_odm_pipe != pipe_ctx); + dcn20_program_all_pipe_in_tree(dc, pipe_ctx->next_odm_pipe, context); + } +} + void dcn20_pipe_control_lock_global( struct dc *dc, struct pipe_ctx *pipe, @@ -1058,456 +1124,114 @@ void dcn20_pipe_control_lock( } } -static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx *new_pipe) -{ - new_pipe->update_flags.raw = 0; - - /* Exit on unchanged, unused pipe */ - if (!old_pipe->plane_state && !new_pipe->plane_state) - return; - /* Detect pipe enable/disable */ - if (!old_pipe->plane_state && new_pipe->plane_state) { - new_pipe->update_flags.bits.enable = 1; - new_pipe->update_flags.bits.mpcc = 1; - new_pipe->update_flags.bits.dppclk = 1; - new_pipe->update_flags.bits.hubp_interdependent = 1; - new_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1; - new_pipe->update_flags.bits.gamut_remap = 1; - new_pipe->update_flags.bits.scaler = 1; - new_pipe->update_flags.bits.viewport = 1; - if (!new_pipe->top_pipe && !new_pipe->prev_odm_pipe) { - new_pipe->update_flags.bits.odm = 1; - new_pipe->update_flags.bits.global_sync = 1; - } - return; - } - if (old_pipe->plane_state && !new_pipe->plane_state) { - new_pipe->update_flags.bits.disable = 1; - return; - } - - /* Detect top pipe only changes */ - if (!new_pipe->top_pipe && !new_pipe->prev_odm_pipe) { - /* Detect odm changes */ - if ((old_pipe->next_odm_pipe && new_pipe->next_odm_pipe - && old_pipe->next_odm_pipe->pipe_idx != new_pipe->next_odm_pipe->pipe_idx) - || (!old_pipe->next_odm_pipe && new_pipe->next_odm_pipe) - || (old_pipe->next_odm_pipe && !new_pipe->next_odm_pipe) - || old_pipe->stream_res.opp != new_pipe->stream_res.opp) - new_pipe->update_flags.bits.odm = 1; - - /* Detect global sync changes */ - if (old_pipe->pipe_dlg_param.vready_offset != new_pipe->pipe_dlg_param.vready_offset - || old_pipe->pipe_dlg_param.vstartup_start != new_pipe->pipe_dlg_param.vstartup_start - || old_pipe->pipe_dlg_param.vupdate_offset != new_pipe->pipe_dlg_param.vupdate_offset - || old_pipe->pipe_dlg_param.vupdate_width != new_pipe->pipe_dlg_param.vupdate_width) - new_pipe->update_flags.bits.global_sync = 1; - } - - /* - * Detect opp / tg change, only set on change, not on enable - * Assume mpcc inst = pipe index, if not this code needs to be updated - * since mpcc is what is affected by these. In fact all of our sequence - * makes this assumption at the moment with how hubp reset is matched to - * same index mpcc reset. - */ - if (old_pipe->stream_res.opp != new_pipe->stream_res.opp) - new_pipe->update_flags.bits.opp_changed = 1; - if (old_pipe->stream_res.tg != new_pipe->stream_res.tg) - new_pipe->update_flags.bits.tg_changed = 1; - - /* Detect mpcc blending changes, only dpp inst and bot matter here */ - if (old_pipe->plane_res.dpp != new_pipe->plane_res.dpp - || old_pipe->stream_res.opp != new_pipe->stream_res.opp - || (!old_pipe->bottom_pipe && new_pipe->bottom_pipe) - || (old_pipe->bottom_pipe && !new_pipe->bottom_pipe) - || (old_pipe->bottom_pipe && new_pipe->bottom_pipe - && old_pipe->bottom_pipe->plane_res.mpcc_inst - != new_pipe->bottom_pipe->plane_res.mpcc_inst)) - new_pipe->update_flags.bits.mpcc = 1; - - /* Detect dppclk change */ - if (old_pipe->plane_res.bw.dppclk_khz != new_pipe->plane_res.bw.dppclk_khz) - new_pipe->update_flags.bits.dppclk = 1; - - /* Check for scl update */ - if (memcmp(&old_pipe->plane_res.scl_data, &new_pipe->plane_res.scl_data, sizeof(struct scaler_data))) - new_pipe->update_flags.bits.scaler = 1; - /* Check for vp update */ - if (memcmp(&old_pipe->plane_res.scl_data.viewport, &new_pipe->plane_res.scl_data.viewport, sizeof(struct rect)) - || memcmp(&old_pipe->plane_res.scl_data.viewport_c, - &new_pipe->plane_res.scl_data.viewport_c, sizeof(struct rect))) - new_pipe->update_flags.bits.viewport = 1; - - /* Detect dlg/ttu/rq updates */ - { - struct _vcs_dpi_display_dlg_regs_st old_dlg_attr = old_pipe->dlg_regs; - struct _vcs_dpi_display_ttu_regs_st old_ttu_attr = old_pipe->ttu_regs; - struct _vcs_dpi_display_dlg_regs_st *new_dlg_attr = &new_pipe->dlg_regs; - struct _vcs_dpi_display_ttu_regs_st *new_ttu_attr = &new_pipe->ttu_regs; - - /* Detect pipe interdependent updates */ - if (old_dlg_attr.dst_y_prefetch != new_dlg_attr->dst_y_prefetch || - old_dlg_attr.vratio_prefetch != new_dlg_attr->vratio_prefetch || - old_dlg_attr.vratio_prefetch_c != new_dlg_attr->vratio_prefetch_c || - old_dlg_attr.dst_y_per_vm_vblank != new_dlg_attr->dst_y_per_vm_vblank || - old_dlg_attr.dst_y_per_row_vblank != new_dlg_attr->dst_y_per_row_vblank || - old_dlg_attr.dst_y_per_vm_flip != new_dlg_attr->dst_y_per_vm_flip || - old_dlg_attr.dst_y_per_row_flip != new_dlg_attr->dst_y_per_row_flip || - old_dlg_attr.refcyc_per_meta_chunk_vblank_l != new_dlg_attr->refcyc_per_meta_chunk_vblank_l || - old_dlg_attr.refcyc_per_meta_chunk_vblank_c != new_dlg_attr->refcyc_per_meta_chunk_vblank_c || - old_dlg_attr.refcyc_per_meta_chunk_flip_l != new_dlg_attr->refcyc_per_meta_chunk_flip_l || - old_dlg_attr.refcyc_per_line_delivery_pre_l != new_dlg_attr->refcyc_per_line_delivery_pre_l || - old_dlg_attr.refcyc_per_line_delivery_pre_c != new_dlg_attr->refcyc_per_line_delivery_pre_c || - old_ttu_attr.refcyc_per_req_delivery_pre_l != new_ttu_attr->refcyc_per_req_delivery_pre_l || - old_ttu_attr.refcyc_per_req_delivery_pre_c != new_ttu_attr->refcyc_per_req_delivery_pre_c || - old_ttu_attr.refcyc_per_req_delivery_pre_cur0 != new_ttu_attr->refcyc_per_req_delivery_pre_cur0 || - old_ttu_attr.refcyc_per_req_delivery_pre_cur1 != new_ttu_attr->refcyc_per_req_delivery_pre_cur1 || - old_ttu_attr.min_ttu_vblank != new_ttu_attr->min_ttu_vblank || - old_ttu_attr.qos_level_flip != new_ttu_attr->qos_level_flip) { - old_dlg_attr.dst_y_prefetch = new_dlg_attr->dst_y_prefetch; - old_dlg_attr.vratio_prefetch = new_dlg_attr->vratio_prefetch; - old_dlg_attr.vratio_prefetch_c = new_dlg_attr->vratio_prefetch_c; - old_dlg_attr.dst_y_per_vm_vblank = new_dlg_attr->dst_y_per_vm_vblank; - old_dlg_attr.dst_y_per_row_vblank = new_dlg_attr->dst_y_per_row_vblank; - old_dlg_attr.dst_y_per_vm_flip = new_dlg_attr->dst_y_per_vm_flip; - old_dlg_attr.dst_y_per_row_flip = new_dlg_attr->dst_y_per_row_flip; - old_dlg_attr.refcyc_per_meta_chunk_vblank_l = new_dlg_attr->refcyc_per_meta_chunk_vblank_l; - old_dlg_attr.refcyc_per_meta_chunk_vblank_c = new_dlg_attr->refcyc_per_meta_chunk_vblank_c; - old_dlg_attr.refcyc_per_meta_chunk_flip_l = new_dlg_attr->refcyc_per_meta_chunk_flip_l; - old_dlg_attr.refcyc_per_line_delivery_pre_l = new_dlg_attr->refcyc_per_line_delivery_pre_l; - old_dlg_attr.refcyc_per_line_delivery_pre_c = new_dlg_attr->refcyc_per_line_delivery_pre_c; - old_ttu_attr.refcyc_per_req_delivery_pre_l = new_ttu_attr->refcyc_per_req_delivery_pre_l; - old_ttu_attr.refcyc_per_req_delivery_pre_c = new_ttu_attr->refcyc_per_req_delivery_pre_c; - old_ttu_attr.refcyc_per_req_delivery_pre_cur0 = new_ttu_attr->refcyc_per_req_delivery_pre_cur0; - old_ttu_attr.refcyc_per_req_delivery_pre_cur1 = new_ttu_attr->refcyc_per_req_delivery_pre_cur1; - old_ttu_attr.min_ttu_vblank = new_ttu_attr->min_ttu_vblank; - old_ttu_attr.qos_level_flip = new_ttu_attr->qos_level_flip; - new_pipe->update_flags.bits.hubp_interdependent = 1; - } - /* Detect any other updates to ttu/rq/dlg */ - if (memcmp(&old_dlg_attr, &new_pipe->dlg_regs, sizeof(old_dlg_attr)) || - memcmp(&old_ttu_attr, &new_pipe->ttu_regs, sizeof(old_ttu_attr)) || - memcmp(&old_pipe->rq_regs, &new_pipe->rq_regs, sizeof(old_pipe->rq_regs))) - new_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1; - } -} - -static void dcn20_update_dchubp_dpp( - struct dc *dc, - struct pipe_ctx *pipe_ctx, - struct dc_state *context) -{ - struct hubp *hubp = pipe_ctx->plane_res.hubp; - struct dpp *dpp = pipe_ctx->plane_res.dpp; - struct dc_plane_state *plane_state = pipe_ctx->plane_state; - - if (pipe_ctx->update_flags.bits.dppclk) - dpp->funcs->dpp_dppclk_control(dpp, false, true); - - /* TODO: Need input parameter to tell current DCHUB pipe tie to which OTG - * VTG is within DCHUBBUB which is commond block share by each pipe HUBP. - * VTG is 1:1 mapping with OTG. Each pipe HUBP will select which VTG - */ - if (pipe_ctx->update_flags.bits.hubp_rq_dlg_ttu) { - hubp->funcs->hubp_vtg_sel(hubp, pipe_ctx->stream_res.tg->inst); - - hubp->funcs->hubp_setup( - hubp, - &pipe_ctx->dlg_regs, - &pipe_ctx->ttu_regs, - &pipe_ctx->rq_regs, - &pipe_ctx->pipe_dlg_param); - } - if (pipe_ctx->update_flags.bits.hubp_interdependent) - hubp->funcs->hubp_setup_interdependent( - hubp, - &pipe_ctx->dlg_regs, - &pipe_ctx->ttu_regs); - - if (pipe_ctx->update_flags.bits.enable || - plane_state->update_flags.bits.bpp_change || - plane_state->update_flags.bits.input_csc_change || - plane_state->update_flags.bits.color_space_change || - plane_state->update_flags.bits.coeff_reduction_change) { - struct dc_bias_and_scale bns_params = {0}; - - // program the input csc - dpp->funcs->dpp_setup(dpp, - plane_state->format, - EXPANSION_MODE_ZERO, - plane_state->input_csc_color_matrix, - plane_state->color_space, - NULL); - - if (dpp->funcs->dpp_program_bias_and_scale) { - //TODO :for CNVC set scale and bias registers if necessary - dcn10_build_prescale_params(&bns_params, plane_state); - dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params); - } - } - - if (pipe_ctx->update_flags.bits.mpcc - || plane_state->update_flags.bits.global_alpha_change - || plane_state->update_flags.bits.per_pixel_alpha_change) { - /* Need mpcc to be idle if changing opp */ - if (pipe_ctx->update_flags.bits.opp_changed) { - struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[pipe_ctx->pipe_idx]; - int mpcc_inst; - - for (mpcc_inst = 0; mpcc_inst < MAX_PIPES; mpcc_inst++) { - if (!old_pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst]) - continue; - dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst); - old_pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst] = false; - } - } - dc->hwss.update_mpcc(dc, pipe_ctx); - } - - if (pipe_ctx->update_flags.bits.scaler || - plane_state->update_flags.bits.scaling_change || - plane_state->update_flags.bits.position_change || - plane_state->update_flags.bits.per_pixel_alpha_change || - pipe_ctx->stream->update_flags.bits.scaling) { - pipe_ctx->plane_res.scl_data.lb_params.alpha_en = pipe_ctx->plane_state->per_pixel_alpha; - ASSERT(pipe_ctx->plane_res.scl_data.lb_params.depth == LB_PIXEL_DEPTH_30BPP); - /* scaler configuration */ - pipe_ctx->plane_res.dpp->funcs->dpp_set_scaler( - pipe_ctx->plane_res.dpp, &pipe_ctx->plane_res.scl_data); - } - - if (pipe_ctx->update_flags.bits.viewport || - (context == dc->current_state && plane_state->update_flags.bits.scaling_change) || - (context == dc->current_state && pipe_ctx->stream->update_flags.bits.scaling)) - hubp->funcs->mem_program_viewport( - hubp, - &pipe_ctx->plane_res.scl_data.viewport, - &pipe_ctx->plane_res.scl_data.viewport_c); - - /* Any updates are handled in dc interface, just need to apply existing for plane enable */ - if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed) - && pipe_ctx->stream->cursor_attributes.address.quad_part != 0) { - dc->hwss.set_cursor_position(pipe_ctx); - dc->hwss.set_cursor_attribute(pipe_ctx); - - if (dc->hwss.set_cursor_sdr_white_level) - dc->hwss.set_cursor_sdr_white_level(pipe_ctx); - } - - /* Any updates are handled in dc interface, just need - * to apply existing for plane enable / opp change */ - if (pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed - || pipe_ctx->stream->update_flags.bits.gamut_remap - || pipe_ctx->stream->update_flags.bits.out_csc) { - /* dpp/cm gamut remap*/ - dc->hwss.program_gamut_remap(pipe_ctx); - - /*call the dcn2 method which uses mpc csc*/ - dc->hwss.program_output_csc(dc, - pipe_ctx, - pipe_ctx->stream->output_color_space, - pipe_ctx->stream->csc_color_matrix.matrix, - hubp->opp_id); - } - - if (pipe_ctx->update_flags.bits.enable || - pipe_ctx->update_flags.bits.opp_changed || - plane_state->update_flags.bits.pixel_format_change || - plane_state->update_flags.bits.horizontal_mirror_change || - plane_state->update_flags.bits.rotation_change || - plane_state->update_flags.bits.swizzle_change || - plane_state->update_flags.bits.dcc_change || - plane_state->update_flags.bits.bpp_change || - plane_state->update_flags.bits.scaling_change || - plane_state->update_flags.bits.plane_size_change) { - struct plane_size size = plane_state->plane_size; - - size.surface_size = pipe_ctx->plane_res.scl_data.viewport; - hubp->funcs->hubp_program_surface_config( - hubp, - plane_state->format, - &plane_state->tiling_info, - &size, - plane_state->rotation, - &plane_state->dcc, - plane_state->horizontal_mirror, - 0); - hubp->power_gated = false; - } - - if (pipe_ctx->update_flags.bits.enable || plane_state->update_flags.bits.addr_update) - dc->hwss.update_plane_addr(dc, pipe_ctx); - - if (pipe_ctx->update_flags.bits.enable) - hubp->funcs->set_blank(hubp, false); -} - - -static void dcn20_program_pipe( - struct dc *dc, - struct pipe_ctx *pipe_ctx, - struct dc_state *context) -{ - /* Only need to unblank on top pipe */ - if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->stream->update_flags.bits.abm_level) - && !pipe_ctx->top_pipe && !pipe_ctx->prev_odm_pipe) - dc->hwss.blank_pixel_data(dc, pipe_ctx, !pipe_ctx->plane_state->visible); - - if (pipe_ctx->update_flags.bits.global_sync) { - pipe_ctx->stream_res.tg->funcs->program_global_sync( - pipe_ctx->stream_res.tg, - pipe_ctx->pipe_dlg_param.vready_offset, - pipe_ctx->pipe_dlg_param.vstartup_start, - pipe_ctx->pipe_dlg_param.vupdate_offset, - pipe_ctx->pipe_dlg_param.vupdate_width); - - pipe_ctx->stream_res.tg->funcs->set_vtg_params( - pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); - - if (dc->hwss.setup_vupdate_interrupt) - dc->hwss.setup_vupdate_interrupt(pipe_ctx); - } - - if (pipe_ctx->update_flags.bits.odm) - dc->hwss.update_odm(dc, context, pipe_ctx); - - if (pipe_ctx->update_flags.bits.enable) - dcn20_enable_plane(dc, pipe_ctx, context); - - if (pipe_ctx->update_flags.raw || pipe_ctx->plane_state->update_flags.raw || pipe_ctx->stream->update_flags.raw) - dcn20_update_dchubp_dpp(dc, pipe_ctx, context); - - if (pipe_ctx->update_flags.bits.enable - || pipe_ctx->plane_state->update_flags.bits.sdr_white_level) - set_hdr_multiplier(pipe_ctx); - - if (pipe_ctx->update_flags.bits.enable || - pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || - pipe_ctx->plane_state->update_flags.bits.gamma_change) - dc->hwss.set_input_transfer_func(pipe_ctx, pipe_ctx->plane_state); - - /* dcn10_translate_regamma_to_hw_format takes 750us to finish - * only do gamma programming for powering on, internal memcmp to avoid - * updating on slave planes - */ - if (pipe_ctx->update_flags.bits.enable || pipe_ctx->stream->update_flags.bits.out_tf) - dc->hwss.set_output_transfer_func(pipe_ctx, pipe_ctx->stream); - - /* If the pipe has been enabled or has a different opp, we - * should reprogram the fmt. This deals with cases where - * interation between mpc and odm combine on different streams - * causes a different pipe to be chosen to odm combine with. - */ - if (pipe_ctx->update_flags.bits.enable - || pipe_ctx->update_flags.bits.opp_changed) { - - pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion( - pipe_ctx->stream_res.opp, - COLOR_SPACE_YCBCR601, - pipe_ctx->stream->timing.display_color_depth, - pipe_ctx->stream->signal); - - pipe_ctx->stream_res.opp->funcs->opp_program_fmt( - pipe_ctx->stream_res.opp, - &pipe_ctx->stream->bit_depth_params, - &pipe_ctx->stream->clamping); - } -} - -static bool does_pipe_need_lock(struct pipe_ctx *pipe) -{ - if ((pipe->plane_state && pipe->plane_state->update_flags.raw) - || pipe->update_flags.raw) - return true; - if (pipe->bottom_pipe) - return does_pipe_need_lock(pipe->bottom_pipe); - - return false; -} - -static void dcn20_program_front_end_for_ctx( +static void dcn20_apply_ctx_for_surface( struct dc *dc, + const struct dc_stream_state *stream, + int num_planes, struct dc_state *context) { const unsigned int TIMEOUT_FOR_PIPE_ENABLE_MS = 100; int i; - bool pipe_locked[MAX_PIPES] = {false}; + struct timing_generator *tg; + bool removed_pipe[6] = { false }; + bool interdependent_update = false; + struct pipe_ctx *top_pipe_to_program = + find_top_pipe_for_stream(dc, context, stream); + struct pipe_ctx *prev_top_pipe_to_program = + find_top_pipe_for_stream(dc, dc->current_state, stream); DC_LOGGER_INIT(dc->ctx->logger); + if (!top_pipe_to_program) + return; + /* Carry over GSL groups in case the context is changing. */ - for (i = 0; i < dc->res_pool->pipe_count; i++) - if (context->res_ctx.pipe_ctx[i].stream == dc->current_state->res_ctx.pipe_ctx[i].stream) - context->res_ctx.pipe_ctx[i].stream_res.gsl_group = - dc->current_state->res_ctx.pipe_ctx[i].stream_res.gsl_group; - - /* Set pipe update flags and lock pipes */ - for (i = 0; i < dc->res_pool->pipe_count; i++) - dcn20_detect_pipe_changes(&dc->current_state->res_ctx.pipe_ctx[i], - &context->res_ctx.pipe_ctx[i]); - for (i = 0; i < dc->res_pool->pipe_count; i++) - if (!context->res_ctx.pipe_ctx[i].top_pipe && - does_pipe_need_lock(&context->res_ctx.pipe_ctx[i])) { - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - - if (pipe_ctx->update_flags.bits.tg_changed || pipe_ctx->update_flags.bits.enable) - dc->hwss.pipe_control_lock(dc, pipe_ctx, true); - if (!pipe_ctx->update_flags.bits.enable) - dc->hwss.pipe_control_lock(dc, &dc->current_state->res_ctx.pipe_ctx[i], true); - pipe_locked[i] = true; - } - - /* OTG blank before disabling all front ends */ - for (i = 0; i < dc->res_pool->pipe_count; i++) - if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable - && !context->res_ctx.pipe_ctx[i].top_pipe - && !context->res_ctx.pipe_ctx[i].prev_odm_pipe - && context->res_ctx.pipe_ctx[i].stream) - dc->hwss.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true); - - /* Disconnect mpcc */ - for (i = 0; i < dc->res_pool->pipe_count; i++) - if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable - || context->res_ctx.pipe_ctx[i].update_flags.bits.opp_changed) { - dc->hwss.plane_atomic_disconnect(dc, &dc->current_state->res_ctx.pipe_ctx[i]); - DC_LOG_DC("Reset mpcc for pipe %d\n", dc->current_state->res_ctx.pipe_ctx[i].pipe_idx); - } - - /* - * Program all updated pipes, order matters for mpcc setup. Start with - * top pipe and program all pipes that follow in order - */ for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe_ctx = + &dc->current_state->res_ctx.pipe_ctx[i]; - if (pipe->plane_state && !pipe->top_pipe) { - while (pipe) { - dcn20_program_pipe(dc, pipe, context); - pipe = pipe->bottom_pipe; - } - /* Program secondary blending tree and writeback pipes */ - pipe = &context->res_ctx.pipe_ctx[i]; - if (!pipe->prev_odm_pipe && pipe->stream->num_wb_info > 0 - && (pipe->update_flags.raw || pipe->plane_state->update_flags.raw || pipe->stream->update_flags.raw) - && dc->hwss.program_all_writeback_pipes_in_tree) - dc->hwss.program_all_writeback_pipes_in_tree(dc, pipe->stream, context); + if (pipe_ctx->stream == stream && + pipe_ctx->stream == old_pipe_ctx->stream) + pipe_ctx->stream_res.gsl_group = + old_pipe_ctx->stream_res.gsl_group; + } + + tg = top_pipe_to_program->stream_res.tg; + + interdependent_update = top_pipe_to_program->plane_state && + top_pipe_to_program->plane_state->update_flags.bits.full_update; + + if (interdependent_update) + lock_all_pipes(dc, context, true); + else + dcn20_pipe_control_lock(dc, top_pipe_to_program, true); + + if (num_planes == 0) { + /* OTG blank before remove all front end */ + dc->hwss.blank_pixel_data(dc, top_pipe_to_program, true); + } + + /* Disconnect unused mpcc */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe_ctx = + &dc->current_state->res_ctx.pipe_ctx[i]; + /* + * Powergate reused pipes that are not powergated + * fairly hacky right now, using opp_id as indicator + * TODO: After move dc_post to dc_update, this will + * be removed. + */ + if (pipe_ctx->plane_state && !old_pipe_ctx->plane_state) { + if (old_pipe_ctx->stream_res.tg == tg && + old_pipe_ctx->plane_res.hubp && + old_pipe_ctx->plane_res.hubp->opp_id != OPP_ID_INVALID) + dc->hwss.disable_plane(dc, old_pipe_ctx); + } + + if ((!pipe_ctx->plane_state || + pipe_ctx->stream_res.tg != old_pipe_ctx->stream_res.tg) && + old_pipe_ctx->plane_state && + old_pipe_ctx->stream_res.tg == tg) { + + dc->hwss.plane_atomic_disconnect(dc, old_pipe_ctx); + removed_pipe[i] = true; + + DC_LOG_DC("Reset mpcc for pipe %d\n", + old_pipe_ctx->pipe_idx); } } - /* Unlock all locked pipes */ - for (i = 0; i < dc->res_pool->pipe_count; i++) - if (pipe_locked[i]) { + if (num_planes > 0) + dcn20_program_all_pipe_in_tree(dc, top_pipe_to_program, context); + + /* Program secondary blending tree and writeback pipes */ + if ((stream->num_wb_info > 0) && (dc->hwss.program_all_writeback_pipes_in_tree)) + dc->hwss.program_all_writeback_pipes_in_tree(dc, stream, context); + + if (interdependent_update) + for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - if (pipe_ctx->update_flags.bits.tg_changed || pipe_ctx->update_flags.bits.enable) - dc->hwss.pipe_control_lock(dc, pipe_ctx, false); - if (!pipe_ctx->update_flags.bits.enable) - dc->hwss.pipe_control_lock(dc, &dc->current_state->res_ctx.pipe_ctx[i], false); + /* Skip inactive pipes and ones already updated */ + if (!pipe_ctx->stream || pipe_ctx->stream == stream || + !pipe_ctx->plane_state || !tg->funcs->is_tg_enabled(tg)) + continue; + + pipe_ctx->plane_res.hubp->funcs->hubp_setup_interdependent( + pipe_ctx->plane_res.hubp, + &pipe_ctx->dlg_regs, + &pipe_ctx->ttu_regs); } + if (interdependent_update) + lock_all_pipes(dc, context, false); + else + dcn20_pipe_control_lock(dc, top_pipe_to_program, false); + for (i = 0; i < dc->res_pool->pipe_count; i++) - if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable) - dc->hwss.disable_plane(dc, &dc->current_state->res_ctx.pipe_ctx[i]); + if (removed_pipe[i]) + dcn20_disable_plane(dc, &dc->current_state->res_ctx.pipe_ctx[i]); /* * If we are enabling a pipe, we need to wait for pending clear as this is a critical @@ -1515,22 +1239,15 @@ static void dcn20_program_front_end_for_ctx( * will cause HW to perform an "immediate enable" (as opposed to "vsync enable") which * is unsupported on DCN. */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable) { - struct hubp *hubp = pipe->plane_res.hubp; - int j = 0; - - for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_MS - && hubp->funcs->hubp_is_flip_pending(hubp); j++) - msleep(1); + i = 0; + if (num_planes > 0 && top_pipe_to_program && + (prev_top_pipe_to_program == NULL || prev_top_pipe_to_program->plane_state == NULL)) { + while (i < TIMEOUT_FOR_PIPE_ENABLE_MS && + top_pipe_to_program->plane_res.hubp->funcs->hubp_is_flip_pending(top_pipe_to_program->plane_res.hubp)) { + i += 1; + msleep(1); } } - - /* WA to apply WM setting*/ - if (dc->hwseq->wa.DEGVIDCN21) - dc->res_pool->hubbub->funcs->apply_DEDCN21_147_wa(dc->res_pool->hubbub); } @@ -1602,12 +1319,8 @@ bool dcn20_update_bandwidth( pipe_ctx->stream_res.tg->funcs->set_vtg_params( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); - if (pipe_ctx->prev_odm_pipe == NULL) dc->hwss.blank_pixel_data(dc, pipe_ctx, blank); - - if (dc->hwss.setup_vupdate_interrupt) - dc->hwss.setup_vupdate_interrupt(pipe_ctx); } pipe_ctx->plane_res.hubp->funcs->hubp_setup( @@ -1624,8 +1337,7 @@ bool dcn20_update_bandwidth( static void dcn20_enable_writeback( struct dc *dc, const struct dc_stream_status *stream_status, - struct dc_writeback_info *wb_info, - struct dc_state *context) + struct dc_writeback_info *wb_info) { struct dwbc *dwb; struct mcif_wb *mcif_wb; @@ -1642,7 +1354,7 @@ static void dcn20_enable_writeback( optc->funcs->set_dwb_source(optc, wb_info->dwb_pipe_inst); /* set MCIF_WB buffer and arbitration configuration */ mcif_wb->funcs->config_mcif_buf(mcif_wb, &wb_info->mcif_buf_params, wb_info->dwb_params.dest_height); - mcif_wb->funcs->config_mcif_arb(mcif_wb, &context->bw_ctx.bw.dcn.bw_writeback.mcif_wb_arb[wb_info->dwb_pipe_inst]); + mcif_wb->funcs->config_mcif_arb(mcif_wb, &dc->current_state->bw_ctx.bw.dcn.bw_writeback.mcif_wb_arb[wb_info->dwb_pipe_inst]); /* Enable MCIF_WB */ mcif_wb->funcs->enable_mcif(mcif_wb); /* Enable DWB */ @@ -1990,28 +1702,6 @@ static void dcn20_reset_hw_ctx_wrap( } } -void dcn20_get_mpctree_visual_confirm_color( - struct pipe_ctx *pipe_ctx, - struct tg_color *color) -{ - const struct tg_color pipe_colors[6] = { - {MAX_TG_COLOR_VALUE, 0, 0}, // red - {MAX_TG_COLOR_VALUE, 0, MAX_TG_COLOR_VALUE}, // yellow - {0, MAX_TG_COLOR_VALUE, 0}, // blue - {MAX_TG_COLOR_VALUE / 2, 0, MAX_TG_COLOR_VALUE / 2}, // purple - {0, 0, MAX_TG_COLOR_VALUE}, // green - {MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE * 2 / 3, 0}, // orange - }; - - struct pipe_ctx *top_pipe = pipe_ctx; - - while (top_pipe->top_pipe) { - top_pipe = top_pipe->top_pipe; - } - - *color = pipe_colors[top_pipe->pipe_idx]; -} - static void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) { struct hubp *hubp = pipe_ctx->plane_res.hubp; @@ -2029,9 +1719,6 @@ static void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) } else if (dc->debug.visual_confirm == VISUAL_CONFIRM_SURFACE) { dcn10_get_surface_visual_confirm_color( pipe_ctx, &blnd_cfg.black_color); - } else if (dc->debug.visual_confirm == VISUAL_CONFIRM_MPCTREE) { - dcn20_get_mpctree_visual_confirm_color( - pipe_ctx, &blnd_cfg.black_color); } if (per_pixel_alpha) @@ -2232,10 +1919,8 @@ static void dcn20_enable_stream(struct pipe_ctx *pipe_ctx) link->link_enc->funcs->connect_dig_be_to_fe(link->link_enc, pipe_ctx->stream_res.stream_enc->id, true); - if (pipe_ctx->plane_state && pipe_ctx->plane_state->flip_immediate != 1) { - if (link->dc->hwss.program_dmdata_engine) - link->dc->hwss.program_dmdata_engine(pipe_ctx); - } + if (link->dc->hwss.program_dmdata_engine) + link->dc->hwss.program_dmdata_engine(pipe_ctx); link->dc->hwss.update_info_frame(pipe_ctx); @@ -2410,8 +2095,7 @@ void dcn20_hw_sequencer_construct(struct dc *dc) dc->hwss.program_triplebuffer = dcn20_program_tripleBuffer; dc->hwss.set_input_transfer_func = dcn20_set_input_transfer_func; dc->hwss.set_output_transfer_func = dcn20_set_output_transfer_func; - dc->hwss.apply_ctx_for_surface = NULL; - dc->hwss.program_front_end_for_ctx = dcn20_program_front_end_for_ctx; + dc->hwss.apply_ctx_for_surface = dcn20_apply_ctx_for_surface; dc->hwss.pipe_control_lock = dcn20_pipe_control_lock; dc->hwss.pipe_control_lock_global = dcn20_pipe_control_lock_global; dc->hwss.optimize_bandwidth = dcn20_optimize_bandwidth; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h index 3098f1049ed7..92ab3dd91814 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h @@ -96,20 +96,4 @@ void dcn20_init_blank( struct dc *dc, struct timing_generator *tg); void dcn20_display_init(struct dc *dc); -void dcn20_pipe_control_lock( - struct dc *dc, - struct pipe_ctx *pipe, - bool lock); -void dcn20_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx); -void dcn20_enable_plane( - struct dc *dc, - struct pipe_ctx *pipe_ctx, - struct dc_state *context); -bool dcn20_set_blend_lut( - struct pipe_ctx *pipe_ctx, const struct dc_plane_state *plane_state); -bool dcn20_set_shaper_3dlut( - struct pipe_ctx *pipe_ctx, const struct dc_plane_state *plane_state); -void dcn20_get_mpctree_visual_confirm_color( - struct pipe_ctx *pipe_ctx, - struct tg_color *color); #endif /* __DC_HWSS_DCN20_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h index 0c98a0bbbd14..3736b5548a25 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h @@ -91,13 +91,6 @@ struct mpll_cfg { uint32_t ref_range; uint32_t ref_clk; bool hdmimode_enable; - bool sup_pre_hp; - bool dp_tx0_vergdrv_byp; - bool dp_tx1_vergdrv_byp; - bool dp_tx2_vergdrv_byp; - bool dp_tx3_vergdrv_byp; - - }; struct dpcssys_phy_seq_cfg { diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c index 3b613fb93ef8..2137e2be2140 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c @@ -287,10 +287,6 @@ void optc2_get_optc_source(struct timing_generator *optc, *num_of_src_opp = 2; else *num_of_src_opp = 1; - - /* Work around VBIOS not updating OPTC_NUM_OF_INPUT_SEGMENT */ - if (*src_opp_id_1 == 0xf) - *num_of_src_opp = 1; } void optc2_set_dwb_source(struct timing_generator *optc, @@ -460,7 +456,7 @@ static struct timing_generator_funcs dcn20_tg_funcs = { .set_vtg_params = optc1_set_vtg_params, .program_manual_trigger = optc2_program_manual_trigger, .setup_manual_trigger = optc2_setup_manual_trigger, - .get_hw_timing = optc1_get_hw_timing, + .is_matching_timing = optc1_is_matching_timing }; void dcn20_timing_generator_init(struct optc *optc1) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 924c2e303588..5a2763daff4d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -581,13 +581,11 @@ static const struct dcn2_dpp_registers tf_regs[] = { }; static const struct dcn2_dpp_shift tf_shift = { - TF_REG_LIST_SH_MASK_DCN20(__SHIFT), - TF_DEBUG_REG_LIST_SH_DCN10 + TF_REG_LIST_SH_MASK_DCN20(__SHIFT) }; static const struct dcn2_dpp_mask tf_mask = { - TF_REG_LIST_SH_MASK_DCN20(_MASK), - TF_DEBUG_REG_LIST_MASK_DCN10 + TF_REG_LIST_SH_MASK_DCN20(_MASK) }; #define dwbc_regs_dcn2(id)\ @@ -734,42 +732,6 @@ static const struct dcn20_vmid_mask vmid_masks = { DCN20_VMID_MASK_SH_LIST(_MASK) }; -static const struct dce110_aux_registers_shift aux_shift = { - DCN_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCN_AUX_MASK_SH_LIST(_MASK) -}; - -static int map_transmitter_id_to_phy_instance( - enum transmitter transmitter) -{ - switch (transmitter) { - case TRANSMITTER_UNIPHY_A: - return 0; - break; - case TRANSMITTER_UNIPHY_B: - return 1; - break; - case TRANSMITTER_UNIPHY_C: - return 2; - break; - case TRANSMITTER_UNIPHY_D: - return 3; - break; - case TRANSMITTER_UNIPHY_E: - return 4; - break; - case TRANSMITTER_UNIPHY_F: - return 5; - break; - default: - ASSERT(0); - return 0; - } -} - #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT #define dsc_regsDCN20(id)\ [id] = {\ @@ -852,7 +814,7 @@ static const struct resource_caps res_cap_nv14 = { .num_audio = 6, .num_stream_encoder = 5, .num_pll = 5, - .num_dwb = 1, + .num_dwb = 0, .num_ddc = 5, }; @@ -863,7 +825,7 @@ static const struct dc_debug_options debug_defaults_drv = { .clock_trace = true, .disable_pplib_clock_request = true, .pipe_split_policy = MPC_SPLIT_DYNAMIC, - .force_single_disp_pipe_split = false, + .force_single_disp_pipe_split = true, .disable_dcc = DCC_ENABLE, .vsr_support = true, .performance_trace = false, @@ -960,10 +922,7 @@ struct dce_aux *dcn20_aux_engine_create( dce110_aux_engine_construct(aux_engine, ctx, inst, SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); + &aux_engine_regs[inst]); return &aux_engine->base; } @@ -1083,18 +1042,14 @@ struct link_encoder *dcn20_link_encoder_create( { struct dcn20_link_encoder *enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); - int link_regs_id; if (!enc20) return NULL; - link_regs_id = - map_transmitter_id_to_phy_instance(enc_init_data->transmitter); - dcn20_link_encoder_construct(enc20, enc_init_data, &link_enc_feature, - &link_enc_regs[link_regs_id], + &link_enc_regs[enc_init_data->transmitter], &link_enc_aux_regs[enc_init_data->channel - 1], &link_enc_hpd_regs[enc_init_data->hpd_source], &le_shift, @@ -1199,8 +1154,6 @@ static const struct resource_create_funcs res_create_maximus_funcs = { .create_hwseq = dcn20_hwseq_create, }; -static void dcn20_pp_smu_destroy(struct pp_smu_funcs **pp_smu); - void dcn20_clock_source_destroy(struct clock_source **clk_src) { kfree(TO_DCE110_CLK_SRC(*clk_src)); @@ -1643,7 +1596,7 @@ static void swizzle_to_dml_params( } } -bool dcn20_split_stream_for_odm( +static bool dcn20_split_stream_for_odm( struct resource_context *res_ctx, const struct resource_pool *pool, struct pipe_ctx *prev_odm_pipe, @@ -1664,6 +1617,7 @@ bool dcn20_split_stream_for_odm( next_odm_pipe->stream_res.dsc = NULL; #endif if (prev_odm_pipe->next_odm_pipe && prev_odm_pipe->next_odm_pipe != next_odm_pipe) { + ASSERT(!next_odm_pipe->next_odm_pipe); next_odm_pipe->next_odm_pipe = prev_odm_pipe->next_odm_pipe; next_odm_pipe->next_odm_pipe->prev_odm_pipe = next_odm_pipe; } @@ -1720,7 +1674,7 @@ bool dcn20_split_stream_for_odm( return true; } -void dcn20_split_stream_for_mpc( +static void dcn20_split_stream_for_mpc( struct resource_context *res_ctx, const struct resource_pool *pool, struct pipe_ctx *primary_pipe, @@ -1806,7 +1760,7 @@ int dcn20_populate_dml_pipes_from_context( pipe_cnt = i; continue; } - if (dc->debug.disable_timing_sync || !resource_are_streams_timing_synchronizable( + if (!resource_are_streams_timing_synchronizable( res_ctx->pipe_ctx[pipe_cnt].stream, res_ctx->pipe_ctx[i].stream)) { synchronized_vblank = false; @@ -1938,7 +1892,7 @@ int dcn20_populate_dml_pipes_from_context( break; case PIXEL_ENCODING_YCBCR420: pipes[pipe_cnt].dout.output_format = dm_420; - pipes[pipe_cnt].dout.output_bpp = (output_bpc * 3.0) / 2; + pipes[pipe_cnt].dout.output_bpp = (output_bpc * 3) / 2; break; case PIXEL_ENCODING_YCBCR422: if (true) /* todo */ @@ -1952,11 +1906,6 @@ int dcn20_populate_dml_pipes_from_context( pipes[pipe_cnt].dout.output_bpp = output_bpc * 3; } -#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT - if (res_ctx->pipe_ctx[i].stream->timing.flags.DSC) - pipes[pipe_cnt].dout.output_bpp = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.bits_per_pixel / 16.0; -#endif - /* todo: default max for now, until there is logic reflecting this in dc*/ pipes[pipe_cnt].dout.output_bpc = 12; /* @@ -2178,7 +2127,7 @@ void dcn20_set_mcif_arb_params( } #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT -bool dcn20_validate_dsc(struct dc *dc, struct dc_state *new_ctx) +static bool dcn20_validate_dsc(struct dc *dc, struct dc_state *new_ctx) { int i; @@ -2213,7 +2162,7 @@ bool dcn20_validate_dsc(struct dc *dc, struct dc_state *new_ctx) } #endif -struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc, +static struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc, struct resource_context *res_ctx, const struct resource_pool *pool, const struct pipe_ctx *primary_pipe) @@ -2253,8 +2202,7 @@ struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc, */ if (secondary_pipe == NULL) { for (j = dc->res_pool->pipe_count - 1; j >= 0; j--) { - if (dc->current_state->res_ctx.pipe_ctx[j].top_pipe == NULL - && dc->current_state->res_ctx.pipe_ctx[j].prev_odm_pipe == NULL) { + if (dc->current_state->res_ctx.pipe_ctx[j].top_pipe == NULL) { preferred_pipe_idx = j; if (res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) { @@ -2290,11 +2238,29 @@ struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc, return secondary_pipe; } -void dcn20_merge_pipes_for_validate( +bool dcn20_fast_validate_bw( struct dc *dc, - struct dc_state *context) + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *pipe_cnt_out, + int *pipe_split_from, + int *vlevel_out) { - int i; + bool out = false; + + int pipe_cnt, i, pipe_idx, vlevel, vlevel_unsplit; + bool odm_capable = context->bw_ctx.dml.ip.odm_capable; + bool force_split = false; +#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT + bool failed_non_odm_dsc = false; +#endif + int split_threshold = dc->res_pool->pipe_count / 2; + bool avoid_split = dc->debug.pipe_split_policy != MPC_SPLIT_DYNAMIC; + + + ASSERT(pipes); + if (!pipes) + return false; /* merge previously split odm pipes since mode support needs to make the decision */ for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -2349,19 +2315,51 @@ void dcn20_merge_pipes_for_validate( if (pipe->plane_state) resource_build_scaling_params(pipe); } -} -int dcn20_validate_apply_pipe_split_flags( - struct dc *dc, - struct dc_state *context, - int vlevel, - bool *split) -{ - int i, pipe_idx, vlevel_split; - bool force_split = false; - bool avoid_split = dc->debug.pipe_split_policy != MPC_SPLIT_DYNAMIC; + if (dc->res_pool->funcs->populate_dml_pipes) + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, + &context->res_ctx, pipes); + else + pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, + &context->res_ctx, pipes); - /* Single display loop, exits if there is more than one display */ + *pipe_cnt_out = pipe_cnt; + + if (!pipe_cnt) { + out = true; + goto validate_out; + } + + context->bw_ctx.dml.ip.odm_capable = 0; + + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + + context->bw_ctx.dml.ip.odm_capable = odm_capable; + +#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT + /* 1 dsc per stream dsc validation */ + if (vlevel <= context->bw_ctx.dml.soc.num_states) + if (!dcn20_validate_dsc(dc, context)) { + failed_non_odm_dsc = true; + vlevel = context->bw_ctx.dml.soc.num_states + 1; + } +#endif + + if (vlevel > context->bw_ctx.dml.soc.num_states && odm_capable) + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + + if (vlevel > context->bw_ctx.dml.soc.num_states) + goto validate_fail; + + if ((context->stream_count > split_threshold && dc->current_state->stream_count <= split_threshold) + || (context->stream_count <= split_threshold && dc->current_state->stream_count > split_threshold)) + context->commit_hints.full_update_needed = true; + + /*initialize pipe_just_split_from to invalid idx*/ + for (i = 0; i < MAX_PIPES; i++) + pipe_split_from[i] = -1; + + /* Single display only conditionals get set here */ for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; bool exit_loop = false; @@ -2388,107 +2386,38 @@ int dcn20_validate_apply_pipe_split_flags( if (exit_loop) break; } - /* TODO: fix dc bugs and remove this split threshold thing */ - if (context->stream_count > dc->res_pool->pipe_count / 2) + + if (context->stream_count > split_threshold) avoid_split = true; - /* Avoid split loop looks for lowest voltage level that allows most unsplit pipes possible */ - if (avoid_split) { - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - for (vlevel_split = vlevel; vlevel <= context->bw_ctx.dml.soc.num_states; vlevel++) - if (context->bw_ctx.dml.vba.NoOfDPP[vlevel][0][pipe_idx] == 1) - break; - /* Impossible to not split this pipe */ - if (vlevel > context->bw_ctx.dml.soc.num_states) - vlevel = vlevel_split; - pipe_idx++; - } - context->bw_ctx.dml.vba.maxMpcComb = 0; - } - - /* Split loop sets which pipe should be split based on dml outputs and dc flags */ + vlevel_unsplit = vlevel; for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - if (!context->res_ctx.pipe_ctx[i].stream) continue; - - if (force_split || context->bw_ctx.dml.vba.NoOfDPP[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx] > 1) - split[i] = true; - if ((pipe->stream->view_format == - VIEW_3D_FORMAT_SIDE_BY_SIDE || - pipe->stream->view_format == - VIEW_3D_FORMAT_TOP_AND_BOTTOM) && - (pipe->stream->timing.timing_3d_format == - TIMING_3D_FORMAT_TOP_AND_BOTTOM || - pipe->stream->timing.timing_3d_format == - TIMING_3D_FORMAT_SIDE_BY_SIDE)) - split[i] = true; - if (dc->debug.force_odm_combine & (1 << pipe->stream_res.tg->inst)) { - split[i] = true; - context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_idx] = true; - } - context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx] = - context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_idx]; - /* Adjust dppclk when split is forced, do not bother with dispclk */ - if (split[i] && context->bw_ctx.dml.vba.NoOfDPP[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx] == 1) - context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx] /= 2; + for (; vlevel_unsplit <= context->bw_ctx.dml.soc.num_states; vlevel_unsplit++) + if (context->bw_ctx.dml.vba.NoOfDPP[vlevel_unsplit][0][pipe_idx] == 1) + break; pipe_idx++; } - return vlevel; -} - -bool dcn20_fast_validate_bw( - struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int *pipe_cnt_out, - int *pipe_split_from, - int *vlevel_out) -{ - bool out = false; - bool split[MAX_PIPES] = { false }; - int pipe_cnt, i, pipe_idx, vlevel; - - ASSERT(pipes); - if (!pipes) - return false; - - dcn20_merge_pipes_for_validate(dc, context); - - pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, &context->res_ctx, pipes); - - *pipe_cnt_out = pipe_cnt; - - if (!pipe_cnt) { - out = true; - goto validate_out; - } - - vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); - - if (vlevel > context->bw_ctx.dml.soc.num_states) - goto validate_fail; - - vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split); - - /*initialize pipe_just_split_from to invalid idx*/ - for (i = 0; i < MAX_PIPES; i++) - pipe_split_from[i] = -1; - for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; struct pipe_ctx *hsplit_pipe = pipe->bottom_pipe; + bool need_split = true; + bool need_split3d; if (!pipe->stream || pipe_split_from[i] >= 0) continue; pipe_idx++; + if (dc->debug.force_odm_combine & (1 << pipe->stream_res.tg->inst)) { + force_split = true; + context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx] = true; + context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_idx] = true; + } + if (force_split && context->bw_ctx.dml.vba.NoOfDPP[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx] == 1) + context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx] /= 2; if (!pipe->top_pipe && !pipe->plane_state && context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]) { hsplit_pipe = dcn20_find_secondary_pipe(dc, &context->res_ctx, dc->res_pool, pipe); ASSERT(hsplit_pipe); @@ -2506,26 +2435,40 @@ bool dcn20_fast_validate_bw( if (pipe->top_pipe && pipe->plane_state == pipe->top_pipe->plane_state) continue; + need_split3d = ((pipe->stream->view_format == + VIEW_3D_FORMAT_SIDE_BY_SIDE || + pipe->stream->view_format == + VIEW_3D_FORMAT_TOP_AND_BOTTOM) && + (pipe->stream->timing.timing_3d_format == + TIMING_3D_FORMAT_TOP_AND_BOTTOM || + pipe->stream->timing.timing_3d_format == + TIMING_3D_FORMAT_SIDE_BY_SIDE)); + + if (avoid_split && vlevel_unsplit <= context->bw_ctx.dml.soc.num_states && !force_split && !need_split3d) { + need_split = false; + vlevel = vlevel_unsplit; + context->bw_ctx.dml.vba.maxMpcComb = 0; + } else + need_split = context->bw_ctx.dml.vba.NoOfDPP[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx] == 2; + /* We do not support mpo + odm at the moment */ if (hsplit_pipe && hsplit_pipe->plane_state != pipe->plane_state && context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]) goto validate_fail; - if (split[i]) { + if (need_split3d || need_split || force_split) { if (!hsplit_pipe || hsplit_pipe->plane_state != pipe->plane_state) { /* pipe not split previously needs split */ hsplit_pipe = dcn20_find_secondary_pipe(dc, &context->res_ctx, dc->res_pool, pipe); - ASSERT(hsplit_pipe); - if (!hsplit_pipe) { - context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx] *= 2; + ASSERT(hsplit_pipe || force_split); + if (!hsplit_pipe) continue; - } + if (context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]) { if (!dcn20_split_stream_for_odm( &context->res_ctx, dc->res_pool, pipe, hsplit_pipe)) goto validate_fail; - dcn20_build_mapped_resource(dc, context, pipe->stream); } else dcn20_split_stream_for_mpc( &context->res_ctx, dc->res_pool, @@ -2539,7 +2482,7 @@ bool dcn20_fast_validate_bw( } #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT /* Actual dsc count per stream dsc validation*/ - if (!dcn20_validate_dsc(dc, context)) { + if (failed_non_odm_dsc && !dcn20_validate_dsc(dc, context)) { context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states] = DML_FAIL_DSC_VALIDATION_FAILURE; goto validate_fail; @@ -2558,7 +2501,7 @@ validate_out: return out; } -static void dcn20_calculate_wm( +void dcn20_calculate_wm( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int *out_pipe_cnt, @@ -2579,7 +2522,7 @@ static void dcn20_calculate_wm( context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_idx]; if (context->bw_ctx.dml.vba.BlendingAndTiming[pipe_idx] == pipe_idx) pipes[pipe_cnt].pipe.dest.odm_combine = - context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]; + context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_idx]; else pipes[pipe_cnt].pipe.dest.odm_combine = 0; pipe_idx++; @@ -2588,7 +2531,7 @@ static void dcn20_calculate_wm( context->bw_ctx.dml.vba.RequiredDPPCLK[vlevel][context->bw_ctx.dml.vba.maxMpcComb][pipe_split_from[i]]; if (context->bw_ctx.dml.vba.BlendingAndTiming[pipe_split_from[i]] == pipe_split_from[i]) pipes[pipe_cnt].pipe.dest.odm_combine = - context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_split_from[i]]; + context->bw_ctx.dml.vba.ODMCombineEnablePerState[vlevel][pipe_split_from[i]]; else pipes[pipe_cnt].pipe.dest.odm_combine = 0; } @@ -2631,11 +2574,6 @@ static void dcn20_calculate_wm( context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; -#if defined(CONFIG_DRM_AMD_DC_DCN2_1) - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; -#endif if (vlevel < 2) { pipes[0].clks_cfg.voltage = 2; @@ -2647,10 +2585,6 @@ static void dcn20_calculate_wm( context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; -#if defined(CONFIG_DRM_AMD_DC_DCN2_1) - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; -#endif if (vlevel < 3) { pipes[0].clks_cfg.voltage = 3; @@ -2662,10 +2596,6 @@ static void dcn20_calculate_wm( context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; -#if defined(CONFIG_DRM_AMD_DC_DCN2_1) - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; -#endif pipes[0].clks_cfg.voltage = vlevel; pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].dcfclk_mhz; @@ -2675,10 +2605,6 @@ static void dcn20_calculate_wm( context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; -#if defined(CONFIG_DRM_AMD_DC_DCN2_1) - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; -#endif } void dcn20_calculate_dlg_params( @@ -2698,7 +2624,7 @@ void dcn20_calculate_dlg_params( context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000; context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16; context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000; - context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000; + context->bw_ctx.bw.dcn.clk.fclk_khz = 0; context->bw_ctx.bw.dcn.clk.p_state_change_support = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != dm_dram_clock_change_unsupported; @@ -2714,8 +2640,8 @@ void dcn20_calculate_dlg_params( continue; if (!visited[pipe_idx]) { - display_pipe_source_params_st *src = &pipes[pipe_idx].pipe.src; - display_pipe_dest_params_st *dst = &pipes[pipe_idx].pipe.dest; + display_pipe_source_params_st *src = &pipes[pipe_idx_unsplit].pipe.src; + display_pipe_dest_params_st *dst = &pipes[pipe_idx_unsplit].pipe.dest; dst->vstartup_start = context->bw_ctx.dml.vba.VStartup[pipe_idx_unsplit]; dst->vupdate_offset = context->bw_ctx.dml.vba.VUpdateOffsetPix[pipe_idx_unsplit]; @@ -2875,6 +2801,7 @@ bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, ASSERT(false); restore_dml_state: + memcpy(&context->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib)); context->bw_ctx.dml.soc.dram_clock_change_latency_us = p_state_latency_us; return voltage_supported; @@ -2960,7 +2887,6 @@ static struct resource_funcs dcn20_res_pool_funcs = { .populate_dml_writeback_from_context = dcn20_populate_dml_writeback_from_context, .get_default_swizzle_mode = dcn20_get_default_swizzle_mode, .set_mcif_arb_params = dcn20_set_mcif_arb_params, - .populate_dml_pipes = dcn20_populate_dml_pipes_from_context, .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link }; @@ -2969,6 +2895,8 @@ bool dcn20_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) int i; uint32_t pipe_count = pool->res_cap->num_dwb; + ASSERT(pipe_count > 0); + for (i = 0; i < pipe_count; i++) { struct dcn20_dwbc *dwbc20 = kzalloc(sizeof(struct dcn20_dwbc), GFP_KERNEL); @@ -3014,7 +2942,7 @@ bool dcn20_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) return true; } -static struct pp_smu_funcs *dcn20_pp_smu_create(struct dc_context *ctx) +struct pp_smu_funcs *dcn20_pp_smu_create(struct dc_context *ctx) { struct pp_smu_funcs *pp_smu = kzalloc(sizeof(*pp_smu), GFP_KERNEL); @@ -3029,7 +2957,7 @@ static struct pp_smu_funcs *dcn20_pp_smu_create(struct dc_context *ctx) return pp_smu; } -static void dcn20_pp_smu_destroy(struct pp_smu_funcs **pp_smu) +void dcn20_pp_smu_destroy(struct pp_smu_funcs **pp_smu) { if (pp_smu && *pp_smu) { kfree(*pp_smu); @@ -3037,7 +2965,7 @@ static void dcn20_pp_smu_destroy(struct pp_smu_funcs **pp_smu) } } -void dcn20_cap_soc_clocks( +static void cap_soc_clocks( struct _vcs_dpi_soc_bounding_box_st *bb, struct pp_smu_nv_clock_table max_clocks) { @@ -3104,10 +3032,10 @@ void dcn20_cap_soc_clocks( } } -void dcn20_update_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb, +static void update_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb, struct pp_smu_nv_clock_table *max_clocks, unsigned int *uclk_states, unsigned int num_states) { - struct _vcs_dpi_voltage_scaling_st calculated_states[MAX_CLOCK_LIMIT_STATES]; + struct _vcs_dpi_voltage_scaling_st calculated_states[MAX_CLOCK_LIMIT_STATES] = {0}; int i; int num_calculated_states = 0; int min_dcfclk = 0; @@ -3115,18 +3043,12 @@ void dcn20_update_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_s if (num_states == 0) return; - memset(calculated_states, 0, sizeof(calculated_states)); - if (dc->bb_overrides.min_dcfclk_mhz > 0) min_dcfclk = dc->bb_overrides.min_dcfclk_mhz; - else { - if (ASICREV_IS_NAVI12_P(dc->ctx->asic_id.hw_internal_rev)) - min_dcfclk = 310; - else - // Accounting for SOC/DCF relationship, we can go as high as - // 506Mhz in Vmin. - min_dcfclk = 506; - } + else + // Accounting for SOC/DCF relationship, we can go as high as + // 506Mhz in Vmin. We need to code 507 since SMU will round down to 506. + min_dcfclk = 507; for (i = 0; i < num_states; i++) { int min_fclk_required_by_uclk; @@ -3166,7 +3088,7 @@ void dcn20_update_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_s bb->clock_limits[num_calculated_states].state = bb->num_states; } -void dcn20_patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb) +static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb) { kernel_fpu_begin(); if ((int)(bb->sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns @@ -3365,14 +3287,14 @@ static bool init_soc_bounding_box(struct dc *dc, } if (clock_limits_available && uclk_states_available && num_states) - dcn20_update_bounding_box(dc, loaded_bb, &max_clocks, uclk_states, num_states); + update_bounding_box(dc, loaded_bb, &max_clocks, uclk_states, num_states); else if (clock_limits_available) - dcn20_cap_soc_clocks(loaded_bb, max_clocks); + cap_soc_clocks(loaded_bb, max_clocks); } loaded_ip->max_num_otg = pool->base.res_cap->num_timing_generator; loaded_ip->max_num_dpp = pool->base.pipe_count; - dcn20_patch_bounding_box(dc, loaded_bb); + patch_bounding_box(dc, loaded_bb); return true; } @@ -3418,7 +3340,6 @@ static bool construct( dc->caps.post_blend_color_processing = true; dc->caps.force_dp_tps4_for_cp2520 = true; dc->caps.hw_3d_lut = true; - dc->caps.extended_aux_timeout_support = true; if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) { dc->debug = debug_defaults_drv; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h index fef473d68a4a..44f95aa0d61e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h @@ -95,12 +95,9 @@ struct display_stream_compressor *dcn20_dsc_create( struct dc_context *ctx, uint32_t inst); void dcn20_dsc_destroy(struct display_stream_compressor **dsc); -void dcn20_patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb); -void dcn20_cap_soc_clocks( - struct _vcs_dpi_soc_bounding_box_st *bb, - struct pp_smu_nv_clock_table max_clocks); -void dcn20_update_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb, - struct pp_smu_nv_clock_table *max_clocks, unsigned int *uclk_states, unsigned int num_states); +struct pp_smu_funcs *dcn20_pp_smu_create(struct dc_context *ctx); +void dcn20_pp_smu_destroy(struct pp_smu_funcs **pp_smu); + struct hubp *dcn20_hubp_create( struct dc_context *ctx, uint32_t inst); @@ -119,31 +116,6 @@ void dcn20_set_mcif_arb_params( display_e2e_pipe_params_st *pipes, int pipe_cnt); bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate); -void dcn20_merge_pipes_for_validate( - struct dc *dc, - struct dc_state *context); -int dcn20_validate_apply_pipe_split_flags( - struct dc *dc, - struct dc_state *context, - int vlevel, - bool *split); -#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT -bool dcn20_validate_dsc(struct dc *dc, struct dc_state *new_ctx); -#endif -void dcn20_split_stream_for_mpc( - struct resource_context *res_ctx, - const struct resource_pool *pool, - struct pipe_ctx *primary_pipe, - struct pipe_ctx *secondary_pipe); -bool dcn20_split_stream_for_odm( - struct resource_context *res_ctx, - const struct resource_pool *pool, - struct pipe_ctx *prev_odm_pipe, - struct pipe_ctx *next_odm_pipe); -struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc, - struct resource_context *res_ctx, - const struct resource_pool *pool, - const struct pipe_ctx *primary_pipe); bool dcn20_fast_validate_bw( struct dc *dc, struct dc_state *context, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c index 4b3401616434..5ab9d6240498 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c @@ -578,10 +578,6 @@ static const struct stream_encoder_funcs dcn20_str_enc_funcs = { .set_avmute = enc1_stream_encoder_set_avmute, .dig_connect_to_otg = enc1_dig_connect_to_otg, .dig_source_otg = enc1_dig_source_otg, - - .dp_get_pixel_format = - enc1_stream_encoder_dp_get_pixel_format, - #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT .enc_read_state = enc2_read_state, .dp_set_dsc_config = enc2_dp_set_dsc_config, diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index 19fabac13c65..ef673bffc241 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -1,7 +1,7 @@ # # Makefile for DCN21. -DCN21 = dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o dcn21_hwseq.o dcn21_link_encoder.o +DCN21 = dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) cc_stack_align := -mpreferred-stack-boundary=4 diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c index f546260c15b7..d1266741763b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c @@ -22,7 +22,6 @@ * Authors: AMD * */ -#include #include "dm_services.h" #include "dcn20/dcn20_hubbub.h" #include "dcn21_hubbub.h" @@ -52,7 +51,7 @@ #ifdef NUM_VMID #undef NUM_VMID #endif -#define NUM_VMID 16 +#define NUM_VMID 1 static uint32_t convert_and_clamp( uint32_t wm_ns, @@ -72,76 +71,56 @@ static uint32_t convert_and_clamp( void dcn21_dchvm_init(struct hubbub *hubbub) { struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); - uint32_t riommu_active; - int i; //Init DCHVM block REG_UPDATE(DCHVM_CTRL0, HOSTVM_INIT_REQ, 1); //Poll until RIOMMU_ACTIVE = 1 - for (i = 0; i < 100; i++) { - REG_GET(DCHVM_RIOMMU_STAT0, RIOMMU_ACTIVE, &riommu_active); + //TODO: Figure out interval us and retry count + REG_WAIT(DCHVM_RIOMMU_STAT0, RIOMMU_ACTIVE, 1, 5, 100); - if (riommu_active) - break; - else - udelay(5); - } + //Reflect the power status of DCHUBBUB + REG_UPDATE(DCHVM_RIOMMU_CTRL0, HOSTVM_POWERSTATUS, 1); - if (riommu_active) { - //Reflect the power status of DCHUBBUB - REG_UPDATE(DCHVM_RIOMMU_CTRL0, HOSTVM_POWERSTATUS, 1); + //Start rIOMMU prefetching + REG_UPDATE(DCHVM_RIOMMU_CTRL0, HOSTVM_PREFETCH_REQ, 1); - //Start rIOMMU prefetching - REG_UPDATE(DCHVM_RIOMMU_CTRL0, HOSTVM_PREFETCH_REQ, 1); + // Enable dynamic clock gating + REG_UPDATE_4(DCHVM_CLK_CTRL, + HVM_DISPCLK_R_GATE_DIS, 0, + HVM_DISPCLK_G_GATE_DIS, 0, + HVM_DCFCLK_R_GATE_DIS, 0, + HVM_DCFCLK_G_GATE_DIS, 0); - // Enable dynamic clock gating - REG_UPDATE_4(DCHVM_CLK_CTRL, - HVM_DISPCLK_R_GATE_DIS, 0, - HVM_DISPCLK_G_GATE_DIS, 0, - HVM_DCFCLK_R_GATE_DIS, 0, - HVM_DCFCLK_G_GATE_DIS, 0); - - //Poll until HOSTVM_PREFETCH_DONE = 1 - REG_WAIT(DCHVM_RIOMMU_STAT0, HOSTVM_PREFETCH_DONE, 1, 5, 100); - } + //Poll until HOSTVM_PREFETCH_DONE = 1 + //TODO: Figure out interval us and retry count + REG_WAIT(DCHVM_RIOMMU_STAT0, HOSTVM_PREFETCH_DONE, 1, 5, 100); } -int hubbub21_init_dchub(struct hubbub *hubbub, +static int hubbub21_init_dchub(struct hubbub *hubbub, struct dcn_hubbub_phys_addr_config *pa_config) { struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); - struct dcn_vmid_page_table_config phys_config; REG_SET(DCN_VM_FB_LOCATION_BASE, 0, - FB_BASE, pa_config->system_aperture.fb_base >> 24); + FB_BASE, pa_config->system_aperture.fb_base); REG_SET(DCN_VM_FB_LOCATION_TOP, 0, - FB_TOP, pa_config->system_aperture.fb_top >> 24); + FB_TOP, pa_config->system_aperture.fb_top); REG_SET(DCN_VM_FB_OFFSET, 0, - FB_OFFSET, pa_config->system_aperture.fb_offset >> 24); + FB_OFFSET, pa_config->system_aperture.fb_offset); REG_SET(DCN_VM_AGP_BOT, 0, - AGP_BOT, pa_config->system_aperture.agp_bot >> 24); + AGP_BOT, pa_config->system_aperture.agp_bot); REG_SET(DCN_VM_AGP_TOP, 0, - AGP_TOP, pa_config->system_aperture.agp_top >> 24); + AGP_TOP, pa_config->system_aperture.agp_top); REG_SET(DCN_VM_AGP_BASE, 0, - AGP_BASE, pa_config->system_aperture.agp_base >> 24); - - if (pa_config->gart_config.page_table_start_addr != pa_config->gart_config.page_table_end_addr) { - phys_config.page_table_start_addr = pa_config->gart_config.page_table_start_addr >> 12; - phys_config.page_table_end_addr = pa_config->gart_config.page_table_end_addr >> 12; - phys_config.page_table_base_addr = pa_config->gart_config.page_table_base_addr | 1; //Note: hack - phys_config.depth = 0; - phys_config.block_size = 0; - // Init VMID 0 based on PA config - dcn20_vmid_setup(&hubbub1->vmid[0], &phys_config); - } + AGP_BASE, pa_config->system_aperture.agp_base); dcn21_dchvm_init(hubbub); return NUM_VMID; } -void hubbub21_program_urgent_watermarks( +static void hubbub21_program_urgent_watermarks( struct hubbub *hubbub, struct dcn_watermark_set *watermarks, unsigned int refclk_mhz, @@ -181,13 +160,6 @@ void hubbub21_program_urgent_watermarks( REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, 0, DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, watermarks->a.frac_urg_bw_nom); } - if (safe_to_lower || watermarks->a.urgent_latency_ns > hubbub1->watermarks.a.urgent_latency_ns) { - hubbub1->watermarks.a.urgent_latency_ns = watermarks->a.urgent_latency_ns; - prog_wm_value = convert_and_clamp(watermarks->a.urgent_latency_ns, - refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, 0, - DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, prog_wm_value); - } /* clock state B */ if (safe_to_lower || watermarks->b.urgent_ns > hubbub1->watermarks.b.urgent_ns) { @@ -220,14 +192,6 @@ void hubbub21_program_urgent_watermarks( DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, watermarks->a.frac_urg_bw_nom); } - if (safe_to_lower || watermarks->b.urgent_latency_ns > hubbub1->watermarks.b.urgent_latency_ns) { - hubbub1->watermarks.b.urgent_latency_ns = watermarks->b.urgent_latency_ns; - prog_wm_value = convert_and_clamp(watermarks->b.urgent_latency_ns, - refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, 0, - DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, prog_wm_value); - } - /* clock state C */ if (safe_to_lower || watermarks->c.urgent_ns > hubbub1->watermarks.c.urgent_ns) { hubbub1->watermarks.c.urgent_ns = watermarks->c.urgent_ns; @@ -259,14 +223,6 @@ void hubbub21_program_urgent_watermarks( DCHUBBUB_ARB_FRAC_URG_BW_NOM_C, watermarks->a.frac_urg_bw_nom); } - if (safe_to_lower || watermarks->c.urgent_latency_ns > hubbub1->watermarks.c.urgent_latency_ns) { - hubbub1->watermarks.c.urgent_latency_ns = watermarks->c.urgent_latency_ns; - prog_wm_value = convert_and_clamp(watermarks->c.urgent_latency_ns, - refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_C, 0, - DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_C, prog_wm_value); - } - /* clock state D */ if (safe_to_lower || watermarks->d.urgent_ns > hubbub1->watermarks.d.urgent_ns) { hubbub1->watermarks.d.urgent_ns = watermarks->d.urgent_ns; @@ -297,17 +253,9 @@ void hubbub21_program_urgent_watermarks( REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_NOM_D, 0, DCHUBBUB_ARB_FRAC_URG_BW_NOM_D, watermarks->a.frac_urg_bw_nom); } - - if (safe_to_lower || watermarks->d.urgent_latency_ns > hubbub1->watermarks.d.urgent_latency_ns) { - hubbub1->watermarks.d.urgent_latency_ns = watermarks->d.urgent_latency_ns; - prog_wm_value = convert_and_clamp(watermarks->d.urgent_latency_ns, - refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D, 0, - DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D, prog_wm_value); - } } -void hubbub21_program_stutter_watermarks( +static void hubbub21_program_stutter_watermarks( struct hubbub *hubbub, struct dcn_watermark_set *watermarks, unsigned int refclk_mhz, @@ -441,7 +389,7 @@ void hubbub21_program_stutter_watermarks( } } -void hubbub21_program_pstate_watermarks( +static void hubbub21_program_pstate_watermarks( struct hubbub *hubbub, struct dcn_watermark_set *watermarks, unsigned int refclk_mhz, @@ -616,26 +564,17 @@ void hubbub21_wm_read_state(struct hubbub *hubbub, DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, &s->dram_clk_chanage); } -void hubbub21_apply_DEDCN21_147_wa(struct hubbub *hubbub) -{ - struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); - uint32_t prog_wm_value; - - prog_wm_value = REG_READ(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A); - REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, prog_wm_value); -} static const struct hubbub_funcs hubbub21_funcs = { .update_dchub = hubbub2_update_dchub, .init_dchub_sys_ctx = hubbub21_init_dchub, - .init_vm_ctx = hubbub2_init_vm_ctx, + .init_vm_ctx = NULL, .dcc_support_swizzle = hubbub2_dcc_support_swizzle, .dcc_support_pixel_format = hubbub2_dcc_support_pixel_format, .get_dcc_compression_cap = hubbub2_get_dcc_compression_cap, .wm_read_state = hubbub21_wm_read_state, .get_dchub_ref_freq = hubbub2_get_dchub_ref_freq, .program_watermarks = hubbub21_program_watermarks, - .apply_DEDCN21_147_wa = hubbub21_apply_DEDCN21_147_wa, }; void hubbub21_construct(struct dcn20_hubbub *hubbub, @@ -653,5 +592,4 @@ void hubbub21_construct(struct dcn20_hubbub *hubbub, hubbub->masks = hubbub_mask; hubbub->debug_test_index_pstate = 0xB; - hubbub->detile_buf_size = 164 * 1024; /* 164KB for DCN2.0 */ } diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.h index c4840dfb1fa5..6ff3cdb89178 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.h @@ -36,10 +36,6 @@ SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B),\ SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_C),\ SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_D),\ - SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A),\ - SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B),\ - SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_C),\ - SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D),\ SR(DCHUBBUB_ARB_HOSTVM_CNTL), \ SR(DCHVM_CTRL0), \ SR(DCHVM_MEM_CTRL), \ @@ -48,9 +44,16 @@ SR(DCHVM_RIOMMU_STAT0) #define HUBBUB_REG_LIST_DCN21()\ - HUBBUB_REG_LIST_DCN20_COMMON(), \ + HUBBUB_REG_LIST_DCN_COMMON(), \ HUBBUB_SR_WATERMARK_REG_LIST(), \ - HUBBUB_HVM_REG_LIST() + HUBBUB_HVM_REG_LIST(), \ + SR(DCHUBBUB_CRC_CTRL), \ + SR(DCN_VM_FB_LOCATION_BASE),\ + SR(DCN_VM_FB_LOCATION_TOP),\ + SR(DCN_VM_FB_OFFSET),\ + SR(DCN_VM_AGP_BOT),\ + SR(DCN_VM_AGP_TOP),\ + SR(DCN_VM_AGP_BASE) #define HUBBUB_MASK_SH_LIST_HVM(mask_sh) \ HUBBUB_SF(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND_COMMIT_THRESHOLD, mask_sh), \ @@ -99,7 +102,7 @@ HUBBUB_SF(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D, DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D, mask_sh) #define HUBBUB_MASK_SH_LIST_DCN21(mask_sh)\ - HUBBUB_MASK_SH_LIST_HVM(mask_sh), \ + HUBBUB_MASK_SH_LIST_HVM(mask_sh),\ HUBBUB_MASK_SH_LIST_DCN_COMMON(mask_sh), \ HUBBUB_MASK_SH_LIST_STUTTER(mask_sh), \ HUBBUB_SF(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \ @@ -111,28 +114,11 @@ HUBBUB_SF(DCN_VM_AGP_BASE, AGP_BASE, mask_sh) void dcn21_dchvm_init(struct hubbub *hubbub); -int hubbub21_init_dchub(struct hubbub *hubbub, - struct dcn_hubbub_phys_addr_config *pa_config); void hubbub21_program_watermarks( struct hubbub *hubbub, struct dcn_watermark_set *watermarks, unsigned int refclk_mhz, bool safe_to_lower); -void hubbub21_program_urgent_watermarks( - struct hubbub *hubbub, - struct dcn_watermark_set *watermarks, - unsigned int refclk_mhz, - bool safe_to_lower); -void hubbub21_program_stutter_watermarks( - struct hubbub *hubbub, - struct dcn_watermark_set *watermarks, - unsigned int refclk_mhz, - bool safe_to_lower); -void hubbub21_program_pstate_watermarks( - struct hubbub *hubbub, - struct dcn_watermark_set *watermarks, - unsigned int refclk_mhz, - bool safe_to_lower); void hubbub21_wm_read_state(struct hubbub *hubbub, struct dcn_hubbub_wm *wm); diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c index 2f5a5867e674..a00af513aa2b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c @@ -22,8 +22,6 @@ * Authors: AMD * */ - -#include "dcn10/dcn10_hubp.h" #include "dcn21_hubp.h" #include "dm_services.h" @@ -204,7 +202,7 @@ static struct hubp_funcs dcn21_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled, .hubp_program_surface_flip_and_addr = hubp2_program_surface_flip_and_addr, - .hubp_program_surface_config = hubp1_program_surface_config, + .hubp_program_surface_config = hubp2_program_surface_config, .hubp_is_flip_pending = hubp1_is_flip_pending, .hubp_setup = hubp21_setup, .hubp_setup_interdependent = hubp2_setup_interdependent, diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 459bd9a5caed..de182185fe1f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -23,6 +23,8 @@ * */ +#include + #include "dm_services.h" #include "dc.h" @@ -40,11 +42,11 @@ #include "irq/dcn21/irq_service_dcn21.h" #include "dcn20/dcn20_dpp.h" #include "dcn20/dcn20_optc.h" -#include "dcn21/dcn21_hwseq.h" +#include "dcn20/dcn20_hwseq.h" #include "dce110/dce110_hw_sequencer.h" #include "dcn20/dcn20_opp.h" #include "dcn20/dcn20_dsc.h" -#include "dcn21/dcn21_link_encoder.h" +#include "dcn20/dcn20_link_encoder.h" #include "dcn20/dcn20_stream_encoder.h" #include "dce/dce_clock_source.h" #include "dce/dce_audio.h" @@ -82,9 +84,8 @@ struct _vcs_dpi_ip_params_st dcn2_1_ip = { - .odm_capable = 1, - .gpuvm_enable = 1, - .hostvm_enable = 1, + .gpuvm_enable = 0, + .hostvm_enable = 0, .gpuvm_max_page_table_levels = 1, .hostvm_max_page_table_levels = 4, .hostvm_cached_page_table_levels = 2, @@ -204,11 +205,11 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = { .state = 4, .dcfclk_mhz = 810.0, .fabricclk_mhz = 1600.0, - .dispclk_mhz = 1395.0, - .dppclk_mhz = 1285.0, - .phyclk_mhz = 1325.0, + .dispclk_mhz = 1015.0, + .dppclk_mhz = 1015.0, + .phyclk_mhz = 810.0, .socclk_mhz = 953.0, - .dscclk_mhz = 489.0, + .dscclk_mhz = 318.334, .dram_speed_mts = 4266.0, }, /*Extra state, no dispclk ramping*/ @@ -216,18 +217,18 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = { .state = 5, .dcfclk_mhz = 810.0, .fabricclk_mhz = 1600.0, - .dispclk_mhz = 1395.0, - .dppclk_mhz = 1285.0, - .phyclk_mhz = 1325.0, + .dispclk_mhz = 1015.0, + .dppclk_mhz = 1015.0, + .phyclk_mhz = 810.0, .socclk_mhz = 953.0, - .dscclk_mhz = 489.0, + .dscclk_mhz = 318.334, .dram_speed_mts = 4266.0, }, }, - .sr_exit_time_us = 12.5, - .sr_enter_plus_exit_time_us = 17.0, + .sr_exit_time_us = 9.0, + .sr_enter_plus_exit_time_us = 11.0, .urgent_latency_us = 4.0, .urgent_latency_pixel_data_only_us = 4.0, .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, @@ -349,30 +350,6 @@ static const struct bios_registers bios_regs = { NBIO_SR(BIOS_SCRATCH_6) }; -static const struct dce_dmcu_registers dmcu_regs = { - DMCU_DCN10_REG_LIST() -}; - -static const struct dce_dmcu_shift dmcu_shift = { - DMCU_MASK_SH_LIST_DCN10(__SHIFT) -}; - -static const struct dce_dmcu_mask dmcu_mask = { - DMCU_MASK_SH_LIST_DCN10(_MASK) -}; - -static const struct dce_abm_registers abm_regs = { - ABM_DCN20_REG_LIST() -}; - -static const struct dce_abm_shift abm_shift = { - ABM_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dce_abm_mask abm_mask = { - ABM_MASK_SH_LIST_DCN20(_MASK) -}; - #ifdef CONFIG_DRM_AMD_DC_DMUB static const struct dcn21_dmcub_registers dmcub_regs = { DMCUB_REG_LIST_DCN() @@ -651,14 +628,6 @@ static const struct dcn10_stream_enc_registers stream_enc_regs[] = { stream_enc_regs(4), }; -static const struct dce110_aux_registers_shift aux_shift = { - DCN_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCN_AUX_MASK_SH_LIST(_MASK) -}; - static const struct dcn10_stream_encoder_shift se_shift = { SE_COMMON_MASK_SH_LIST_DCN20(__SHIFT) }; @@ -667,11 +636,6 @@ static const struct dcn10_stream_encoder_mask se_mask = { SE_COMMON_MASK_SH_LIST_DCN20(_MASK) }; -static void dcn21_pp_smu_destroy(struct pp_smu_funcs **pp_smu); - -static int dcn21_populate_dml_pipes_from_context( - struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes); - static struct input_pixel_processor *dcn21_ipp_create( struct dc_context *ctx, uint32_t inst) { @@ -719,10 +683,7 @@ static struct dce_aux *dcn21_aux_engine_create( dce110_aux_engine_construct(aux_engine, ctx, inst, SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); + &aux_engine_regs[inst]); return &aux_engine->base; } @@ -765,12 +726,11 @@ static const struct resource_caps res_cap_rn = { .num_timing_generator = 4, .num_opp = 4, .num_video_plane = 4, - .num_audio = 4, // 4 audio endpoints. 4 audio streams + .num_audio = 6, // 6 audio endpoints. 4 audio streams .num_stream_encoder = 5, .num_pll = 5, // maybe 3 because the last two used for USB-c .num_dwb = 1, .num_ddc = 5, - .num_vmid = 1, #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT .num_dsc = 3, #endif @@ -836,15 +796,15 @@ static const struct dc_debug_options debug_defaults_drv = { .clock_trace = true, .disable_pplib_clock_request = true, .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, - .force_single_disp_pipe_split = false, + .force_single_disp_pipe_split = true, .disable_dcc = DCC_ENABLE, .vsr_support = true, .performance_trace = false, - .max_downscale_src_width = 3840, + .max_downscale_src_width = 5120,/*upto 5K*/ .disable_pplib_wm_range = false, .scl_reset_length10 = true, .sanity_checks = true, - .disable_48mhz_pwrdwn = false, + .disable_48mhz_pwrdwn = true, }; static const struct dc_debug_options debug_defaults_diags = { @@ -979,7 +939,7 @@ static void destruct(struct dcn21_resource_pool *pool) dcn_dccg_destroy(&pool->base.dccg); if (pool->base.pp_smu != NULL) - dcn21_pp_smu_destroy(&pool->base.pp_smu); + dcn20_pp_smu_destroy(&pool->base.pp_smu); } @@ -1009,35 +969,11 @@ static void calculate_wm_set_for_vlevel( #if defined(CONFIG_DRM_AMD_DC_DCN2_1) wm_set->frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(dml, pipes, pipe_cnt) * 1000; wm_set->frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(dml, pipes, pipe_cnt) * 1000; - wm_set->urgent_latency_ns = get_urgent_latency(dml, pipes, pipe_cnt) * 1000; #endif dml->soc.dram_clock_change_latency_us = dram_clock_change_latency_cached; } -static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb) -{ - kernel_fpu_begin(); - if (dc->bb_overrides.sr_exit_time_ns) { - bb->sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0; - } - - if (dc->bb_overrides.sr_enter_plus_exit_time_ns) { - bb->sr_enter_plus_exit_time_us = - dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; - } - - if (dc->bb_overrides.urgent_latency_ns) { - bb->urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0; - } - - if (dc->bb_overrides.dram_clock_change_latency_ns) { - bb->dram_clock_change_latency_us = - dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; - } - kernel_fpu_end(); -} - void dcn21_calculate_wm( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, @@ -1052,8 +988,6 @@ void dcn21_calculate_wm( ASSERT(bw_params); - patch_bounding_box(dc, &context->bw_ctx.dml.soc); - for (i = 0, pipe_idx = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { if (!context->res_ctx.pipe_ctx[i].stream) continue; @@ -1087,7 +1021,7 @@ void dcn21_calculate_wm( pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, &context->res_ctx, pipes); else - pipe_cnt = dcn21_populate_dml_pipes_from_context(dc, + pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, &context->res_ctx, pipes); } @@ -1337,12 +1271,6 @@ struct display_stream_compressor *dcn21_dsc_create( static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { - /* - TODO: Fix this function to calcualte correct values. - There are known issues with this function currently - that will need to be investigated. Use hardcoded known good values for now. - - struct dcn21_resource_pool *pool = TO_DCN21_RES_POOL(dc->res_pool); struct clk_limit_table *clk_table = &bw_params->clk_table; int i; @@ -1350,6 +1278,7 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dcn2_1_ip.max_num_otg = pool->base.res_cap->num_timing_generator; dcn2_1_ip.max_num_dpp = pool->base.pipe_count; dcn2_1_soc.num_chans = bw_params->num_channels; + dcn2_1_soc.num_states = 0; for (i = 0; i < clk_table->num_entries; i++) { @@ -1357,11 +1286,10 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dcn2_1_soc.clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; dcn2_1_soc.clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; dcn2_1_soc.clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; + /* This is probably wrong, TODO: find correct calculation */ dcn2_1_soc.clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 16 / 1000; + dcn2_1_soc.num_states++; } - dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i - i]; - dcn2_1_soc.num_states = i; - */ } /* Temporary Place holder until we can get them from fuse */ @@ -1389,42 +1317,32 @@ static struct dpm_clocks dummy_clocks = { }; -static enum pp_smu_status dummy_set_wm_ranges(struct pp_smu *pp, +enum pp_smu_status dummy_set_wm_ranges(struct pp_smu *pp, struct pp_smu_wm_range_sets *ranges) { return PP_SMU_RESULT_OK; } -static enum pp_smu_status dummy_get_dpm_clock_table(struct pp_smu *pp, +enum pp_smu_status dummy_get_dpm_clock_table(struct pp_smu *pp, struct dpm_clocks *clock_table) { *clock_table = dummy_clocks; return PP_SMU_RESULT_OK; } -static struct pp_smu_funcs *dcn21_pp_smu_create(struct dc_context *ctx) +struct pp_smu_funcs *dcn21_pp_smu_create(struct dc_context *ctx) { struct pp_smu_funcs *pp_smu = kzalloc(sizeof(*pp_smu), GFP_KERNEL); - if (!pp_smu) - return pp_smu; + pp_smu->ctx.ver = PP_SMU_VER_RN; - if (IS_FPGA_MAXIMUS_DC(ctx->dce_environment) || IS_DIAG_DC(ctx->dce_environment)) { - pp_smu->ctx.ver = PP_SMU_VER_RN; - pp_smu->rn_funcs.get_dpm_clock_table = dummy_get_dpm_clock_table; - pp_smu->rn_funcs.set_wm_ranges = dummy_set_wm_ranges; - } else { - - dm_pp_get_funcs(ctx, pp_smu); - - if (pp_smu->ctx.ver != PP_SMU_VER_RN) - pp_smu = memset(pp_smu, 0, sizeof(struct pp_smu_funcs)); - } + pp_smu->rn_funcs.get_dpm_clock_table = dummy_get_dpm_clock_table; + pp_smu->rn_funcs.set_wm_ranges = dummy_set_wm_ranges; return pp_smu; } -static void dcn21_pp_smu_destroy(struct pp_smu_funcs **pp_smu) +void dcn21_pp_smu_destroy(struct pp_smu_funcs **pp_smu) { if (pp_smu && *pp_smu) { kfree(*pp_smu); @@ -1482,7 +1400,6 @@ static struct dce_hwseq *dcn21_hwseq_create( hws->regs = &hwseq_reg; hws->shifts = &hwseq_shift; hws->masks = &hwseq_mask; - hws->wa.DEGVIDCN21 = true; } return hws; } @@ -1501,152 +1418,10 @@ static const struct resource_create_funcs res_create_maximus_funcs = { .create_hwseq = dcn21_hwseq_create, }; -static const struct encoder_feature_support link_enc_feature = { - .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 600000, - .hdmi_ycbcr420_supported = true, - .dp_ycbcr420_supported = true, - .flags.bits.IS_HBR2_CAPABLE = true, - .flags.bits.IS_HBR3_CAPABLE = true, - .flags.bits.IS_TPS3_CAPABLE = true, - .flags.bits.IS_TPS4_CAPABLE = true -}; - - -#define link_regs(id, phyid)\ -[id] = {\ - LE_DCN10_REG_LIST(id), \ - UNIPHY_DCN2_REG_LIST(phyid), \ - SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \ -} - -static const struct dcn10_link_enc_registers link_enc_regs[] = { - link_regs(0, A), - link_regs(1, B), - link_regs(2, C), - link_regs(3, D), - link_regs(4, E), -}; - -#define aux_regs(id)\ -[id] = {\ - DCN2_AUX_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3), - aux_regs(4) -}; - -#define hpd_regs(id)\ -[id] = {\ - HPD_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { - hpd_regs(0), - hpd_regs(1), - hpd_regs(2), - hpd_regs(3), - hpd_regs(4) -}; - -static const struct dcn10_link_enc_shift le_shift = { - LINK_ENCODER_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dcn10_link_enc_mask le_mask = { - LINK_ENCODER_MASK_SH_LIST_DCN20(_MASK) -}; - -static int map_transmitter_id_to_phy_instance( - enum transmitter transmitter) -{ - switch (transmitter) { - case TRANSMITTER_UNIPHY_A: - return 0; - break; - case TRANSMITTER_UNIPHY_B: - return 1; - break; - case TRANSMITTER_UNIPHY_C: - return 2; - break; - case TRANSMITTER_UNIPHY_D: - return 3; - break; - case TRANSMITTER_UNIPHY_E: - return 4; - break; - default: - ASSERT(0); - return 0; - } -} - -static struct link_encoder *dcn21_link_encoder_create( - const struct encoder_init_data *enc_init_data) -{ - struct dcn21_link_encoder *enc21 = - kzalloc(sizeof(struct dcn21_link_encoder), GFP_KERNEL); - int link_regs_id; - - if (!enc21) - return NULL; - - link_regs_id = - map_transmitter_id_to_phy_instance(enc_init_data->transmitter); - - dcn21_link_encoder_construct(enc21, - enc_init_data, - &link_enc_feature, - &link_enc_regs[link_regs_id], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source], - &le_shift, - &le_mask); - - return &enc21->enc10.base; -} -#define CTX ctx - -#define REG(reg_name) \ - (DCN_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name) - -static uint32_t read_pipe_fuses(struct dc_context *ctx) -{ - uint32_t value = REG_READ(CC_DC_PIPE_DIS); - /* RV1 support max 4 pipes */ - value = value & 0xf; - return value; -} - -static int dcn21_populate_dml_pipes_from_context( - struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes) -{ - uint32_t pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, res_ctx, pipes); - int i; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - - if (!res_ctx->pipe_ctx[i].stream) - continue; - - pipes[i].pipe.src.hostvm = 1; - pipes[i].pipe.src.gpuvm = 1; - } - - return pipe_cnt; -} - static struct resource_funcs dcn21_res_pool_funcs = { .destroy = dcn21_destroy_resource_pool, - .link_enc_create = dcn21_link_encoder_create, + .link_enc_create = dcn20_link_encoder_create, .validate_bandwidth = dcn21_validate_bandwidth, - .populate_dml_pipes = dcn21_populate_dml_pipes_from_context, .add_stream_to_ctx = dcn20_add_stream_to_ctx, .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, .acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer, @@ -1662,11 +1437,9 @@ static bool construct( struct dc *dc, struct dcn21_resource_pool *pool) { - int i, j; + int i; struct dc_context *ctx = dc->ctx; struct irq_service_init_data init_data; - uint32_t pipe_fuses = read_pipe_fuses(ctx); - uint32_t num_pipes; ctx->dc_bios->regs = &bios_regs; @@ -1684,9 +1457,7 @@ static bool construct( *************************************************/ pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; - /* max pipe num for ASIC before check pipe fuses */ - pool->base.pipe_count = pool->base.res_cap->num_timing_generator; - + pool->base.pipe_count = 4; dc->caps.max_downscale_ratio = 200; dc->caps.i2c_speed_in_khz = 100; dc->caps.max_cursor_size = 256; @@ -1696,7 +1467,6 @@ static bool construct( dc->caps.max_slave_planes = 1; dc->caps.post_blend_color_processing = true; dc->caps.force_dp_tps4_for_cp2520 = true; - dc->caps.extended_aux_timeout_support = true; if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; @@ -1746,26 +1516,6 @@ static bool construct( goto create_fail; } - pool->base.dmcu = dcn20_dmcu_create(ctx, - &dmcu_regs, - &dmcu_shift, - &dmcu_mask); - if (pool->base.dmcu == NULL) { - dm_error("DC: failed to create dmcu!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - pool->base.abm = dce_abm_create(ctx, - &abm_regs, - &abm_shift, - &abm_mask); - if (pool->base.abm == NULL) { - dm_error("DC: failed to create abm!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - #ifdef CONFIG_DRM_AMD_DC_DMUB pool->base.dmcub = dcn21_dmcub_create(ctx, &dmcub_regs, @@ -1780,14 +1530,6 @@ static bool construct( pool->base.pp_smu = dcn21_pp_smu_create(ctx); - num_pipes = dcn2_1_ip.max_num_dpp; - - for (i = 0; i < dcn2_1_ip.max_num_dpp; i++) - if (pipe_fuses & 1 << i) - num_pipes--; - dcn2_1_ip.max_num_dpp = num_pipes; - dcn2_1_ip.max_num_otg = num_pipes; - dml_init_instance(&dc->dml, &dcn2_1_soc, &dcn2_1_ip, DML_PROJECT_DCN21); init_data.ctx = dc->ctx; @@ -1795,15 +1537,8 @@ static bool construct( if (!pool->base.irqs) goto create_fail; - j = 0; /* mem input -> ipp -> dpp -> opp -> TG */ for (i = 0; i < pool->base.pipe_count; i++) { - /* if pipe is disabled, skip instance of HW pipe, - * i.e, skip ASIC register instance - */ - if ((pipe_fuses & (1 << i)) != 0) - continue; - pool->base.hubps[i] = dcn21_hubp_create(ctx, i); if (pool->base.hubps[i] == NULL) { BREAK_TO_DEBUGGER(); @@ -1827,23 +1562,6 @@ static bool construct( "DC: failed to create dpps!\n"); goto create_fail; } - - pool->base.opps[i] = dcn21_opp_create(ctx, i); - if (pool->base.opps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create output pixel processor!\n"); - goto create_fail; - } - - pool->base.timing_generators[i] = dcn21_timing_generator_create( - ctx, i); - if (pool->base.timing_generators[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto create_fail; - } - j++; } for (i = 0; i < pool->base.res_cap->num_ddc; i++) { @@ -1864,9 +1582,27 @@ static bool construct( pool->base.sw_i2cs[i] = NULL; } - pool->base.timing_generator_count = j; - pool->base.pipe_count = j; - pool->base.mpcc_count = j; + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + pool->base.opps[i] = dcn21_opp_create(ctx, i); + if (pool->base.opps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create output pixel processor!\n"); + goto create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + pool->base.timing_generators[i] = dcn21_timing_generator_create( + ctx, i); + if (pool->base.timing_generators[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto create_fail; + } + } + + pool->base.timing_generator_count = i; pool->base.mpc = dcn21_mpc_create(ctx); if (pool->base.mpc == NULL) { @@ -1909,7 +1645,7 @@ static bool construct( &res_create_funcs : &res_create_maximus_funcs))) goto create_fail; - dcn21_hw_sequencer_construct(dc); + dcn20_hw_sequencer_construct(dc); dc->caps.max_planes = pool->base.pipe_count; diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h index 94b75e942607..b6b4333737f2 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h +++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h @@ -74,7 +74,7 @@ void dm_helpers_dp_mst_clear_payload_allocation_table( /* * Polls for ACT (allocation change trigger) handled and */ -enum act_return_status dm_helpers_dp_mst_poll_for_allocation_change_trigger( +bool dm_helpers_dp_mst_poll_for_allocation_change_trigger( struct dc_context *ctx, const struct dc_stream_state *stream); /* diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h index ef7df9ef6d7e..c03a441ee638 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h +++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h @@ -249,8 +249,10 @@ struct pp_smu_funcs_nv { }; #endif +#if defined(CONFIG_DRM_AMD_DC_DCN2_1) + #define PP_SMU_NUM_SOCCLK_DPM_LEVELS 8 -#define PP_SMU_NUM_DCFCLK_DPM_LEVELS 8 +#define PP_SMU_NUM_DCFCLK_DPM_LEVELS 4 #define PP_SMU_NUM_FCLK_DPM_LEVELS 4 #define PP_SMU_NUM_MEMCLK_DPM_LEVELS 4 @@ -286,6 +288,7 @@ struct pp_smu_funcs_rn { enum pp_smu_status (*get_dpm_clock_table) (struct pp_smu *pp, struct dpm_clocks *clock_table); }; +#endif struct pp_smu_funcs { struct pp_smu ctx; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c index 6c6c486b774a..649883777f62 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c @@ -2577,8 +2577,7 @@ static void dml20_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPer mode_lib->vba.MinActiveDRAMClockChangeMargin + mode_lib->vba.DRAMClockChangeLatency; - if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 50) { - mode_lib->vba.DRAMClockChangeWatermark += 25; + if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) { mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive; } else { if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c index 3c70dd577292..0fafd693ffb4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c @@ -38,7 +38,6 @@ #define BPP_INVALID 0 #define BPP_BLENDED_PIPE 0xffffffff -#define DCN20_MAX_DSC_IMAGE_WIDTH 5184 static double adjust_ReturnBW( struct display_mode_lib *mode_lib, @@ -2611,8 +2610,7 @@ static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndP mode_lib->vba.MinActiveDRAMClockChangeMargin + mode_lib->vba.DRAMClockChangeLatency; - if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 50) { - mode_lib->vba.DRAMClockChangeWatermark += 25; + if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) { mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive; } else { if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) { @@ -3903,10 +3901,6 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode mode_lib->vba.MaximumSwathWidthInLineBuffer); } for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { - double MaxMaxDispclkRoundedDown = RoundToDFSGranularityDown( - mode_lib->vba.MaxDispclk[mode_lib->vba.soc.num_states], - mode_lib->vba.DISPCLKDPPCLKVCOSpeed); - for (j = 0; j < 2; j++) { mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown( mode_lib->vba.MaxDispclk[i], @@ -3931,9 +3925,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode && i == mode_lib->vba.soc.num_states) mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); - if (mode_lib->vba.ODMCapability == false || - (locals->PlaneRequiredDISPCLKWithoutODMCombine <= MaxMaxDispclkRoundedDown - && (!locals->DSCEnabled[k] || locals->HActive[k] <= DCN20_MAX_DSC_IMAGE_WIDTH))) { + if (mode_lib->vba.ODMCapability == false || mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine <= mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) { locals->ODMCombineEnablePerState[i][k] = false; mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine; } else { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c index 2c7455e22a65..878bf4782ce6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c @@ -207,7 +207,7 @@ static void extract_rq_regs(struct display_mode_lib *mode_lib, rq_regs->rq_regs_l.swath_height = dml_log2(rq_param.dlg.rq_l.swath_height); rq_regs->rq_regs_c.swath_height = dml_log2(rq_param.dlg.rq_c.swath_height); - // TODO: take the max between luma, chroma chunk size? + // FIXME: take the max between luma, chroma chunk size? // okay for now, as we are setting chunk_bytes to 8kb anyways if (rq_param.sizing.rq_l.chunk_bytes >= 32 * 1024) { //32kb rq_regs->drq_expansion_mode = 0; @@ -677,7 +677,7 @@ static void get_surf_rq_param(struct display_mode_lib *mode_lib, unsigned int meta_pitch = 0; unsigned int ppe = mode_422 ? 2 : 1; - // TODO check if ppe apply for both luma and chroma in 422 case + // FIXME check if ppe apply for both luma and chroma in 422 case if (is_chroma) { vp_width = pipe_src_param.viewport_width_c / ppe; vp_height = pipe_src_param.viewport_height_c; @@ -959,7 +959,7 @@ static void dml20_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib, // Source // dcc_en = src.dcc; dual_plane = is_dual_plane((enum source_format_class)(src->source_format)); - mode_422 = 0; // TODO + mode_422 = 0; // FIXME access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed // bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0); // bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1); @@ -1655,7 +1655,7 @@ static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1) * (double) cur_req_width; cur_req_per_width = cur_width_ub / (double) cur_req_width; - hactive_cur = (double) cur_src_width / hscl_ratio; // TODO: oswin to think about what to do for cursor + hactive_cur = (double) cur_src_width / hscl_ratio; // FIXME: oswin to think about what to do for cursor if (vratio_pre_l <= 1.0) { *refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c index 1e6aeb1bd2bf..ed8bf5f723c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c @@ -207,7 +207,7 @@ static void extract_rq_regs(struct display_mode_lib *mode_lib, rq_regs->rq_regs_l.swath_height = dml_log2(rq_param.dlg.rq_l.swath_height); rq_regs->rq_regs_c.swath_height = dml_log2(rq_param.dlg.rq_c.swath_height); - // TODO: take the max between luma, chroma chunk size? + // FIXME: take the max between luma, chroma chunk size? // okay for now, as we are setting chunk_bytes to 8kb anyways if (rq_param.sizing.rq_l.chunk_bytes >= 32 * 1024) { //32kb rq_regs->drq_expansion_mode = 0; @@ -677,7 +677,7 @@ static void get_surf_rq_param(struct display_mode_lib *mode_lib, unsigned int meta_pitch = 0; unsigned int ppe = mode_422 ? 2 : 1; - // TODO check if ppe apply for both luma and chroma in 422 case + // FIXME check if ppe apply for both luma and chroma in 422 case if (is_chroma) { vp_width = pipe_src_param.viewport_width_c / ppe; vp_height = pipe_src_param.viewport_height_c; @@ -959,7 +959,7 @@ static void dml20v2_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib, // Source // dcc_en = src.dcc; dual_plane = is_dual_plane((enum source_format_class)(src->source_format)); - mode_422 = 0; // TODO + mode_422 = 0; // FIXME access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed // bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0); // bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1); @@ -1655,7 +1655,7 @@ static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1) * (double) cur_req_width; cur_req_per_width = cur_width_ub / (double) cur_req_width; - hactive_cur = (double) cur_src_width / hscl_ratio; // TODO: oswin to think about what to do for cursor + hactive_cur = (double) cur_src_width / hscl_ratio; // FIXME: oswin to think about what to do for cursor if (vratio_pre_l <= 1.0) { *refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c index ba77957aefe3..3b6ed60dcd35 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c @@ -65,7 +65,6 @@ typedef struct { #define BPP_INVALID 0 #define BPP_BLENDED_PIPE 0xffffffff -#define DCN21_MAX_DSC_IMAGE_WIDTH 5184 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( @@ -3380,8 +3379,6 @@ static unsigned int TruncToValidBPP( return 30; else if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24)) return 24; - else if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18)) - return 18; else return BPP_INVALID; } @@ -3939,10 +3936,6 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.MaximumSwathWidthInLineBuffer); } for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { - double MaxMaxDispclkRoundedDown = RoundToDFSGranularityDown( - mode_lib->vba.MaxDispclk[mode_lib->vba.soc.num_states], - mode_lib->vba.DISPCLKDPPCLKVCOSpeed); - for (j = 0; j < 2; j++) { mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown( mode_lib->vba.MaxDispclk[i], @@ -3972,9 +3965,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l && i == mode_lib->vba.soc.num_states) mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); - if (mode_lib->vba.ODMCapability == false || - (locals->PlaneRequiredDISPCLKWithoutODMCombine <= MaxMaxDispclkRoundedDown - && (!locals->DSCEnabled[k] || locals->HActive[k] <= DCN21_MAX_DSC_IMAGE_WIDTH))) { + if (mode_lib->vba.ODMCapability == false || mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine <= mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) { locals->ODMCombineEnablePerState[i][k] = false; mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine; } else { diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index cfacd6027467..f4c1ef9046bf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -269,7 +269,7 @@ struct writeback_st { struct _vcs_dpi_display_output_params_st { int dp_lanes; - double output_bpp; + int output_bpp; int dsc_enable; int wb_enable; int num_active_wb; @@ -318,7 +318,6 @@ struct _vcs_dpi_display_pipe_dest_params_st { unsigned int vupdate_width; unsigned int vready_offset; unsigned char interlaced; - unsigned char embedded; double pixel_rate_mhz; unsigned char synchronized_vblank_all_planes; unsigned char otg_inst; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index 7f9a5621922f..65cf4edddaff 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -375,7 +375,6 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) mode_lib->vba.pipe_plane[j] = mode_lib->vba.NumberOfActivePlanes; - mode_lib->vba.EmbeddedPanel[mode_lib->vba.NumberOfActivePlanes] = dst->embedded; mode_lib->vba.DPPPerPlane[mode_lib->vba.NumberOfActivePlanes] = 1; mode_lib->vba.SourceScan[mode_lib->vba.NumberOfActivePlanes] = (enum scan_direction_class) (src->source_scan); @@ -433,12 +432,8 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) dst->recout_width; // TODO: or should this be full_recout_width???...maybe only when in hsplit mode? mode_lib->vba.ODMCombineEnabled[mode_lib->vba.NumberOfActivePlanes] = dst->odm_combine; - mode_lib->vba.ODMCombineTypeEnabled[mode_lib->vba.NumberOfActivePlanes] = - dst->odm_combine; mode_lib->vba.OutputFormat[mode_lib->vba.NumberOfActivePlanes] = (enum output_format_class) (dout->output_format); - mode_lib->vba.OutputBpp[mode_lib->vba.NumberOfActivePlanes] = - dout->output_bpp; mode_lib->vba.Output[mode_lib->vba.NumberOfActivePlanes] = (enum output_encoder_class) (dout->output_type); diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 1540ffbe3979..91decac50557 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -387,7 +387,6 @@ struct vba_vars_st { /* vba mode support */ /*inputs*/ - bool EmbeddedPanel[DC__NUM_DPP__MAX]; bool SupportGFX7CompatibleTilingIn32bppAnd64bpp; double MaxHSCLRatio; double MaxVSCLRatio; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c index 4c3e9cc30167..ad8571f5a142 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c @@ -243,7 +243,7 @@ void dml1_extract_rq_regs( rq_regs->rq_regs_l.swath_height = dml_log2(rq_param.dlg.rq_l.swath_height); rq_regs->rq_regs_c.swath_height = dml_log2(rq_param.dlg.rq_c.swath_height); - /* TODO: take the max between luma, chroma chunk size? + /* FIXME: take the max between luma, chroma chunk size? * okay for now, as we are setting chunk_bytes to 8kb anyways */ if (rq_param.sizing.rq_l.chunk_bytes >= 32 * 1024) { /*32kb */ @@ -602,7 +602,7 @@ static void get_surf_rq_param( unsigned int log2_dpte_group_length; unsigned int func_meta_row_height, func_dpte_row_height; - /* TODO check if ppe apply for both luma and chroma in 422 case */ + /* FIXME check if ppe apply for both luma and chroma in 422 case */ if (is_chroma) { vp_width = pipe_src_param.viewport_width_c / ppe; vp_height = pipe_src_param.viewport_height_c; @@ -1141,7 +1141,7 @@ void dml1_rq_dlg_get_dlg_params( ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int) dml_pow(2, 13)); disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; /* 15 bits */ - prefetch_xy_calc_in_dcfclk = 24.0; /* TODO: ip_param */ + prefetch_xy_calc_in_dcfclk = 24.0; /* FIXME: ip_param */ min_dcfclk_mhz = dlg_sys_param.deepsleep_dcfclk_mhz; t_calc_us = prefetch_xy_calc_in_dcfclk / min_dcfclk_mhz; min_ttu_vblank = dlg_sys_param.t_urg_wm_us; @@ -1182,7 +1182,7 @@ void dml1_rq_dlg_get_dlg_params( dcc_en = e2e_pipe_param.pipe.src.dcc; dual_plane = is_dual_plane( (enum source_format_class) e2e_pipe_param.pipe.src.source_format); - mode_422 = 0; /* TODO */ + mode_422 = 0; /* FIXME */ access_dir = (e2e_pipe_param.pipe.src.source_scan == dm_vert); /* vp access direction: horizontal or vertical accessed */ bytes_per_element_l = get_bytes_per_element( (enum source_format_class) e2e_pipe_param.pipe.src.source_format, @@ -1837,7 +1837,7 @@ void dml1_rq_dlg_get_dlg_params( cur0_width_ub = dml_ceil((double) cur0_src_width / (double) cur0_req_width, 1) * (double) cur0_req_width; cur0_req_per_width = cur0_width_ub / (double) cur0_req_width; - hactive_cur0 = (double) cur0_src_width / hratios_cur0; /* TODO: oswin to think about what to do for cursor */ + hactive_cur0 = (double) cur0_src_width / hratios_cur0; /* FIXME: oswin to think about what to do for cursor */ if (vratio_pre_l <= 1.0) { refcyc_per_req_delivery_pre_cur0 = hactive_cur0 * ref_freq_to_pix_freq diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index e60f760585e4..5995bcdfed54 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -23,7 +23,8 @@ */ #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT -#include "dc_hw_types.h" +#include "dc.h" +#include "core_types.h" #include "dsc.h" #include @@ -46,59 +47,6 @@ const struct dc_dsc_policy dsc_policy = { /* This module's internal functions */ -static uint32_t dc_dsc_bandwidth_in_kbps_from_timing( - const struct dc_crtc_timing *timing) -{ - uint32_t bits_per_channel = 0; - uint32_t kbps; - - if (timing->flags.DSC) { - kbps = (timing->pix_clk_100hz * timing->dsc_cfg.bits_per_pixel); - kbps = kbps / 160 + ((kbps % 160) ? 1 : 0); - return kbps; - } - - switch (timing->display_color_depth) { - case COLOR_DEPTH_666: - bits_per_channel = 6; - break; - case COLOR_DEPTH_888: - bits_per_channel = 8; - break; - case COLOR_DEPTH_101010: - bits_per_channel = 10; - break; - case COLOR_DEPTH_121212: - bits_per_channel = 12; - break; - case COLOR_DEPTH_141414: - bits_per_channel = 14; - break; - case COLOR_DEPTH_161616: - bits_per_channel = 16; - break; - default: - break; - } - - ASSERT(bits_per_channel != 0); - - kbps = timing->pix_clk_100hz / 10; - kbps *= bits_per_channel; - - if (timing->flags.Y_ONLY != 1) { - /*Only YOnly make reduce bandwidth by 1/3 compares to RGB*/ - kbps *= 3; - if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) - kbps /= 2; - else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) - kbps = kbps * 2 / 3; - } - - return kbps; - -} - static bool dsc_buff_block_size_from_dpcd(int dpcd_buff_block_size, int *buff_block_size) { @@ -230,11 +178,12 @@ static bool dsc_bpp_increment_div_from_dpcd(int bpp_increment_dpcd, uint32_t *bp } static void get_dsc_enc_caps( - const struct display_stream_compressor *dsc, + const struct dc *dc, struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz) { // This is a static HW query, so we can use any DSC + struct display_stream_compressor *dsc = dc->res_pool->dscs[0]; memset(dsc_enc_caps, 0, sizeof(struct dsc_enc_caps)); if (dsc) @@ -341,7 +290,7 @@ static void get_dsc_bandwidth_range( struct dc_dsc_bw_range *range) { /* native stream bandwidth */ - range->stream_kbps = dc_dsc_bandwidth_in_kbps_from_timing(timing); + range->stream_kbps = dc_bandwidth_in_kbps_from_timing(timing); /* max dsc target bpp */ range->max_kbps = dsc_div_by_10_round_up(max_bpp * timing->pix_clk_100hz); @@ -563,7 +512,6 @@ static bool setup_dsc_config( const struct dsc_enc_caps *dsc_enc_caps, int target_bandwidth_kbps, const struct dc_crtc_timing *timing, - int min_slice_height_override, struct dc_dsc_config *dsc_cfg) { struct dsc_enc_caps dsc_common_caps; @@ -732,10 +680,7 @@ static bool setup_dsc_config( // Slice height (i.e. number of slices per column): start with policy and pick the first one that height is divisible by. // For 4:2:0 make sure the slice height is divisible by 2 as well. - if (min_slice_height_override == 0) - slice_height = min(dsc_policy.min_sice_height, pic_height); - else - slice_height = min(min_slice_height_override, pic_height); + slice_height = min(dsc_policy.min_sice_height, pic_height); while (slice_height < pic_height && (pic_height % slice_height != 0 || (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420 && slice_height % 2 != 0))) @@ -857,8 +802,7 @@ bool dc_dsc_parse_dsc_dpcd(const uint8_t *dpcd_dsc_basic_data, const uint8_t *dp * If DSC is not possible, leave '*range' untouched. */ bool dc_dsc_compute_bandwidth_range( - const struct display_stream_compressor *dsc, - const uint32_t dsc_min_slice_height_override, + const struct dc *dc, const uint32_t min_bpp, const uint32_t max_bpp, const struct dsc_dec_dpcd_caps *dsc_sink_caps, @@ -870,14 +814,16 @@ bool dc_dsc_compute_bandwidth_range( struct dsc_enc_caps dsc_common_caps; struct dc_dsc_config config; - get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz); + get_dsc_enc_caps(dc, &dsc_enc_caps, timing->pix_clk_100hz); is_dsc_possible = intersect_dsc_caps(dsc_sink_caps, &dsc_enc_caps, timing->pixel_encoding, &dsc_common_caps); if (is_dsc_possible) - is_dsc_possible = setup_dsc_config(dsc_sink_caps, &dsc_enc_caps, 0, timing, - dsc_min_slice_height_override, &config); + is_dsc_possible = setup_dsc_config(dsc_sink_caps, + &dsc_enc_caps, + 0, + timing, &config); if (is_dsc_possible) get_dsc_bandwidth_range(min_bpp, max_bpp, &dsc_common_caps, timing, range); @@ -886,9 +832,8 @@ bool dc_dsc_compute_bandwidth_range( } bool dc_dsc_compute_config( - const struct display_stream_compressor *dsc, + const struct dc *dc, const struct dsc_dec_dpcd_caps *dsc_sink_caps, - const uint32_t dsc_min_slice_height_override, uint32_t target_bandwidth_kbps, const struct dc_crtc_timing *timing, struct dc_dsc_config *dsc_cfg) @@ -896,11 +841,11 @@ bool dc_dsc_compute_config( bool is_dsc_possible = false; struct dsc_enc_caps dsc_enc_caps; - get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz); + get_dsc_enc_caps(dc, &dsc_enc_caps, timing->pix_clk_100hz); is_dsc_possible = setup_dsc_config(dsc_sink_caps, &dsc_enc_caps, target_bandwidth_kbps, - timing, dsc_min_slice_height_override, dsc_cfg); + timing, dsc_cfg); return is_dsc_possible; } #endif /* CONFIG_DRM_AMD_DC_DSC_SUPPORT */ diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c index 76c4b12d6824..ca51e83f8764 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c @@ -177,6 +177,7 @@ void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_com { float bpp_group; float initial_xmit_delay_factor; + int source_bpp; int padding_pixels; int i; @@ -216,6 +217,8 @@ void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_com rc->initial_xmit_delay++; } + source_bpp = MODE_SELECT(bpc * 3, bpc * 2, bpc * 1.5); + rc->flatness_min_qp = ((bpc == BPC_8) ? (3) : ((bpc == BPC_10) ? (7) : (11))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0); rc->flatness_max_qp = ((bpc == BPC_8) ? (12) : ((bpc == BPC_10) ? (16) : (20))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0); rc->flatness_det_thresh = 2 << (bpc - 8); diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c b/drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c index f67c18375bfd..f8f85490e77e 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c @@ -321,6 +321,8 @@ void dal_gpio_destroy( return; } + dal_gpio_close(*gpio); + switch ((*gpio)->id) { case GPIO_ID_DDC_DATA: kfree((*gpio)->hw_container.ddc); diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c index 92280cc05e2d..d03165e71dc6 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c @@ -169,6 +169,7 @@ void dal_gpio_destroy_generic_mux( return; } + dal_gpio_close(*mux); dal_gpio_destroy(mux); kfree(*mux); @@ -459,6 +460,7 @@ void dal_gpio_destroy_irq( return; } + dal_gpio_close(*irq); dal_gpio_destroy(irq); kfree(*irq); diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index a831079607cd..f189307750ab 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -52,9 +52,7 @@ void enable_surface_flip_reporting(struct dc_plane_state *plane_state, #include "clock_source.h" #include "audio.h" #include "dm_pp_smu.h" -#ifdef CONFIG_DRM_AMD_DC_HDCP -#include "dm_cp_psp.h" -#endif + /************ link *****************/ struct link_init_data { @@ -233,7 +231,6 @@ struct resource_pool { struct dcn_fe_bandwidth { int dppclk_khz; - }; struct stream_resource { @@ -398,6 +395,10 @@ struct dc_state { struct clk_mgr *clk_mgr; + struct { + bool full_update_needed : 1; + } commit_hints; + struct kref refcount; }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h index 14716ba35662..b1fab251c09b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h @@ -95,9 +95,6 @@ bool dal_ddc_service_query_ddc_data( uint8_t *read_buf, uint32_t read_size); -bool dal_ddc_submit_aux_command(struct ddc_service *ddc, - struct aux_payload *payload); - int dc_link_aux_transfer_raw(struct ddc_service *ddc, struct aux_payload *payload, enum aux_channel_operation_result *operation_result); @@ -105,9 +102,6 @@ int dc_link_aux_transfer_raw(struct ddc_service *ddc, bool dc_link_aux_transfer_with_retries(struct ddc_service *ddc, struct aux_payload *payload); -enum dc_status dc_link_aux_configure_timeout(struct ddc_service *ddc, - uint32_t timeout); - void dal_ddc_service_write_scdc_data( struct ddc_service *ddc_service, uint32_t pix_clk, diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h index 045138dbdccb..08a4df2c61a8 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h @@ -28,8 +28,6 @@ #define LINK_TRAINING_ATTEMPTS 4 #define LINK_TRAINING_RETRY_DELAY 50 /* ms */ -#define LINK_AUX_DEFAULT_EXTENDED_TIMEOUT_PERIOD 32000 /*us*/ -#define LINK_AUX_DEFAULT_TIMEOUT_PERIOD 400 /*us*/ struct dc_link; struct dc_stream_state; @@ -45,9 +43,6 @@ bool dp_verify_link_cap_with_retries( struct dc_link_settings *known_limit_link_setting, int attempts); -bool dp_verify_mst_link_cap( - struct dc_link *link); - bool dp_validate_mode_timing( struct dc_link *link, const struct dc_crtc_timing *timing); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/aux_engine.h b/drivers/gpu/drm/amd/display/dc/inc/hw/aux_engine.h index e77b3a76766d..e79cd4e92919 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/aux_engine.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/aux_engine.h @@ -140,9 +140,6 @@ struct write_command_context { struct aux_engine_funcs { - bool (*configure_timeout)( - struct ddc_service *ddc, - uint32_t timeout); void (*destroy)( struct aux_engine **ptr); bool (*acquire_engine)( diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index 4e18e77dcf42..76f9ad1b23df 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -47,7 +47,7 @@ #ifdef CONFIG_DRM_AMD_DC_DCN2_1 /* Will these bw structures be ASIC specific? */ -#define MAX_NUM_DPM_LVL 8 +#define MAX_NUM_DPM_LVL 4 #define WM_SET_COUNT 4 @@ -180,19 +180,13 @@ struct clk_mgr_funcs { struct dc_state *context, enum dc_clock_type clock_type, struct dc_clock_config *clock_cfg); - - bool (*are_clock_states_equal) (struct dc_clocks *a, - struct dc_clocks *b); - void (*notify_wm_ranges)(struct clk_mgr *clk_mgr); }; struct clk_mgr { struct dc_context *ctx; struct clk_mgr_funcs *funcs; struct dc_clocks clks; - bool psr_allow_active_cache; int dprefclk_khz; // Used by program pixel clock in clock source funcs, need to figureout where this goes - int dentist_vco_freq_khz; #ifdef CONFIG_DRM_AMD_DC_DCN2_1 struct clk_bw_params *bw_params; #endif @@ -205,8 +199,4 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p void dc_destroy_clk_mgr(struct clk_mgr *clk_mgr); -void clk_mgr_exit_optimized_pwr_state(const struct dc *dc, struct clk_mgr *clk_mgr); - -void clk_mgr_optimize_pwr_state(const struct dc *dc, struct clk_mgr *clk_mgr); - #endif /* __DAL_CLK_MGR_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h index a17a77192690..7dd46eb96d67 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h @@ -184,21 +184,6 @@ struct clk_mgr_registers { uint32_t MP1_SMN_C2PMSG_91; }; -enum clock_type { - clock_type_dispclk = 1, - clock_type_dcfclk, - clock_type_socclk, - clock_type_pixelclk, - clock_type_phyclk, - clock_type_dppclk, - clock_type_fclk, - clock_type_dcfdsclk, - clock_type_dscclk, - clock_type_uclk, - clock_type_dramclk, -}; - - struct state_dependent_clocks { int display_clk_khz; int pixel_clk_khz; @@ -225,6 +210,8 @@ struct clk_mgr_internal { struct state_dependent_clocks max_clks_by_state[DM_PP_CLOCKS_MAX_STATES]; /*TODO: figure out which of the below fields should be here vs in asic specific portion */ + int dentist_vco_freq_khz; + /* Cache the status of DFS-bypass feature*/ bool dfs_bypass_enabled; /* True if the DFS-bypass feature is enabled and active. */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h index 05ee5295d2c1..d8e744f366e5 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -38,7 +38,8 @@ struct dccg { struct dccg_funcs { void (*update_dpp_dto)(struct dccg *dccg, int dpp_inst, - int req_dppclk); + int req_dppclk, + bool reduce_divider_only); void (*get_dccg_ref_freq)(struct dccg *dccg, unsigned int xtalin_freq_inKhz, unsigned int *dccg_ref_freq_inKhz); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index c81a17aeaa25..a6297219d7fc 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -147,7 +147,6 @@ struct hubbub_funcs { bool (*is_allow_self_refresh_enabled)(struct hubbub *hubbub); void (*allow_self_refresh_control)(struct hubbub *hubbub, bool allow); - void (*apply_DEDCN21_147_wa)(struct hubbub *hubbub); }; struct hubbub { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h index c6ff3d78b435..1ddb1c6fa149 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h @@ -28,11 +28,7 @@ #include "dc_dsc.h" #include "dc_hw_types.h" -#include "dc_types.h" -/* do not include any other headers - * or else it might break Edid Utility functionality. - */ - +#include "dc_dp_types.h" /* Input parameters for configuring DSC from the outside of DSC */ struct dsc_config { @@ -85,6 +81,12 @@ struct dsc_enc_caps { uint32_t bpp_increment_div; /* bpp increment divisor, e.g. if 16, it's 1/16th of a bit */ }; +struct display_stream_compressor { + const struct dsc_funcs *funcs; + struct dc_context *ctx; + int inst; +}; + struct dsc_funcs { void (*dsc_get_enc_caps)(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz); void (*dsc_read_state)(struct display_stream_compressor *dsc, struct dcn_dsc_state *s); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h index b21909216fb6..abb4e4237fb6 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h @@ -184,10 +184,6 @@ struct link_encoder_funcs { bool (*fec_is_active)(struct link_encoder *enc); #endif bool (*is_in_alt_mode) (struct link_encoder *enc); - - void (*get_max_link_cap)(struct link_encoder *enc, - struct dc_link_settings *link_settings); - enum signal_type (*get_dig_mode)( struct link_encoder *enc); }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h index 67b610d6d91f..e8668388581b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h @@ -43,7 +43,6 @@ struct dcn_watermarks { #if defined(CONFIG_DRM_AMD_DC_DCN2_1) uint32_t frac_urg_bw_nom; uint32_t frac_urg_bw_flip; - int32_t urgent_latency_ns; #endif struct cstate_pstate_watermarks_st cstate_pstate; }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h index 18def2b6fafe..957e9047381a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h @@ -208,7 +208,6 @@ struct output_pixel_processor { struct mpc_tree mpc_tree_params; bool mpcc_disconnect_pending[MAX_PIPES]; const struct opp_funcs *funcs; - uint32_t dyn_expansion; }; enum fmt_stereo_action { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index 6305e388612a..fe9b7a10a1c3 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -214,11 +214,6 @@ struct stream_encoder_funcs { unsigned int (*dig_source_otg)( struct stream_encoder *enc); - bool (*dp_get_pixel_format)( - struct stream_encoder *enc, - enum dc_pixel_encoding *encoding, - enum dc_color_depth *depth); - #if defined(CONFIG_DRM_AMD_DC_DCN2_0) #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT void (*enc_read_state)(struct stream_encoder *enc, struct enc_state *s); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 27c73caf74ee..6196cc32356e 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -261,8 +261,6 @@ struct timing_generator_funcs { void (*program_manual_trigger)(struct timing_generator *optc); void (*setup_manual_trigger)(struct timing_generator *optc); - bool (*get_hw_timing)(struct timing_generator *optc, - struct dc_crtc_timing *hw_crtc_timing); void (*set_vtg_params)(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index d39c1e11def5..3a938cd414ea 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -114,9 +114,6 @@ struct hw_sequencer_funcs { int opp_id); #if defined(CONFIG_DRM_AMD_DC_DCN2_0) - void (*program_front_end_for_ctx)( - struct dc *dc, - struct dc_state *context); void (*program_triplebuffer)( const struct dc *dc, struct pipe_ctx *pipe_ctx, @@ -232,13 +229,6 @@ struct hw_sequencer_funcs { struct dc *dc, struct dc_state *context); - void (*exit_optimized_pwr_state)( - const struct dc *dc, - struct dc_state *context); - void (*optimize_pwr_state)( - const struct dc *dc, - struct dc_state *context); - #if defined(CONFIG_DRM_AMD_DC_DCN2_0) bool (*update_bandwidth)( struct dc *dc, @@ -331,12 +321,10 @@ struct hw_sequencer_funcs { struct dc_state *context); void (*update_writeback)(struct dc *dc, const struct dc_stream_status *stream_status, - struct dc_writeback_info *wb_info, - struct dc_state *context); + struct dc_writeback_info *wb_info); void (*enable_writeback)(struct dc *dc, const struct dc_stream_status *stream_status, - struct dc_writeback_info *wb_info, - struct dc_state *context); + struct dc_writeback_info *wb_info); void (*disable_writeback)(struct dc *dc, unsigned int dwb_pipe_inst); #endif @@ -349,9 +337,6 @@ struct hw_sequencer_funcs { enum dc_clock_type clock_type, struct dc_clock_config *clock_cfg); -#if defined(CONFIG_DRM_AMD_DC_DCN2_1) - bool (*s0i3_golden_init_wa)(struct dc *dc); -#endif }; void color_space_to_black_color( diff --git a/drivers/gpu/drm/amd/display/include/ddc_service_types.h b/drivers/gpu/drm/amd/display/include/ddc_service_types.h index 9ad49da50a17..18961707db23 100644 --- a/drivers/gpu/drm/amd/display/include/ddc_service_types.h +++ b/drivers/gpu/drm/amd/display/include/ddc_service_types.h @@ -31,8 +31,6 @@ #define DP_BRANCH_DEVICE_ID_0022B9 0x0022B9 #define DP_BRANCH_DEVICE_ID_00001A 0x00001A #define DP_BRANCH_DEVICE_ID_0080E1 0x0080e1 -#define DP_BRANCH_DEVICE_ID_90CC24 0x90CC24 -#define DP_BRANCH_DEVICE_ID_00E04C 0x00E04C enum ddc_result { DDC_RESULT_UNKNOWN = 0, diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index 1de4805cb8c7..2d8f14b69117 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -373,42 +373,7 @@ static struct fixed31_32 translate_from_linear_space( return dc_fixpt_mul(args->arg, args->a1); } - -static struct fixed31_32 translate_from_linear_space_long( - struct translate_from_linear_space_args *args) -{ - const struct fixed31_32 one = dc_fixpt_from_int(1); - - if (dc_fixpt_lt(one, args->arg)) - return one; - - if (dc_fixpt_le(args->arg, dc_fixpt_neg(args->a0))) - return dc_fixpt_sub( - args->a2, - dc_fixpt_mul( - dc_fixpt_add( - one, - args->a3), - dc_fixpt_pow( - dc_fixpt_neg(args->arg), - dc_fixpt_recip(args->gamma)))); - else if (dc_fixpt_le(args->a0, args->arg)) - return dc_fixpt_sub( - dc_fixpt_mul( - dc_fixpt_add( - one, - args->a3), - dc_fixpt_pow( - args->arg, - dc_fixpt_recip(args->gamma))), - args->a2); - else - return dc_fixpt_mul( - args->arg, - args->a1); -} - -static struct fixed31_32 calculate_gamma22(struct fixed31_32 arg, bool use_eetf) +static struct fixed31_32 calculate_gamma22(struct fixed31_32 arg) { struct fixed31_32 gamma = dc_fixpt_from_fraction(22, 10); @@ -419,13 +384,9 @@ static struct fixed31_32 calculate_gamma22(struct fixed31_32 arg, bool use_eetf) scratch_gamma_args.a3 = dc_fixpt_zero; scratch_gamma_args.gamma = gamma; - if (use_eetf) - return translate_from_linear_space_long(&scratch_gamma_args); - return translate_from_linear_space(&scratch_gamma_args); } - static struct fixed31_32 translate_to_linear_space( struct fixed31_32 arg, struct fixed31_32 a0, @@ -959,7 +920,11 @@ static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma, if (fs_params->max_display < 100) // cap at 100 at the top max_display = dc_fixpt_from_int(100); - // only max used, we don't adjust min luminance + if (fs_params->min_content < fs_params->min_display) + use_eetf = true; + else + min_content = min_display; + if (fs_params->max_content > fs_params->max_display) use_eetf = true; else @@ -985,7 +950,7 @@ static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma, if (dc_fixpt_lt(scaledX, dc_fixpt_zero)) output = dc_fixpt_zero; else - output = calculate_gamma22(scaledX, use_eetf); + output = calculate_gamma22(scaledX); rgb->r = output; rgb->g = output; @@ -2208,3 +2173,5 @@ bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, rgb_degamma_alloc_fail: return ret; } + + diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index 16e69bbc69aa..ec70c9b12e1a 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -37,8 +37,8 @@ #define STATIC_SCREEN_RAMP_DELTA_REFRESH_RATE_PER_FRAME ((1000 / 60) * 65) /* Number of elements in the render times cache array */ #define RENDER_TIMES_MAX_COUNT 10 -/* Threshold to exit/exit BTR (to avoid frequent enter-exits at the lower limit) */ -#define BTR_MAX_MARGIN 2500 +/* Threshold to exit BTR (to avoid frequent enter-exits at the lower limit) */ +#define BTR_EXIT_MARGIN 2000 /* Threshold to change BTR multiplier (to avoid frequent changes) */ #define BTR_DRIFT_MARGIN 2000 /*Threshold to exit fixed refresh rate*/ @@ -234,10 +234,6 @@ static void update_v_total_for_static_ramp( current_duration_in_us) * (stream->timing.pix_clk_100hz / 10)), stream->timing.h_total), 1000); - /* v_total cannot be less than nominal */ - if (v_total < stream->timing.v_total) - v_total = stream->timing.v_total; - in_out_vrr->adjust.v_total_min = v_total; in_out_vrr->adjust.v_total_max = v_total; } @@ -254,22 +250,24 @@ static void apply_below_the_range(struct core_freesync *core_freesync, unsigned int delta_from_mid_point_in_us_1 = 0xFFFFFFFF; unsigned int delta_from_mid_point_in_us_2 = 0xFFFFFFFF; unsigned int frames_to_insert = 0; + unsigned int min_frame_duration_in_ns = 0; + unsigned int max_render_time_in_us = in_out_vrr->max_duration_in_us; unsigned int delta_from_mid_point_delta_in_us; - unsigned int max_render_time_in_us = - in_out_vrr->max_duration_in_us - in_out_vrr->btr.margin_in_us; + + min_frame_duration_in_ns = ((unsigned int) (div64_u64( + (1000000000ULL * 1000000), + in_out_vrr->max_refresh_in_uhz))); /* Program BTR */ - if ((last_render_time_in_us + in_out_vrr->btr.margin_in_us / 2) < max_render_time_in_us) { + if (last_render_time_in_us + BTR_EXIT_MARGIN < max_render_time_in_us) { /* Exit Below the Range */ if (in_out_vrr->btr.btr_active) { in_out_vrr->btr.frame_counter = 0; in_out_vrr->btr.btr_active = false; } - } else if (last_render_time_in_us > (max_render_time_in_us + in_out_vrr->btr.margin_in_us / 2)) { + } else if (last_render_time_in_us > max_render_time_in_us) { /* Enter Below the Range */ - if (!in_out_vrr->btr.btr_active) { - in_out_vrr->btr.btr_active = true; - } + in_out_vrr->btr.btr_active = true; } /* BTR set to "not active" so disengage */ @@ -325,9 +323,7 @@ static void apply_below_the_range(struct core_freesync *core_freesync, /* Choose number of frames to insert based on how close it * can get to the mid point of the variable range. */ - if ((frame_time_in_us / mid_point_frames_ceil) > in_out_vrr->min_duration_in_us && - (delta_from_mid_point_in_us_1 < delta_from_mid_point_in_us_2 || - mid_point_frames_floor < 2)) { + if (delta_from_mid_point_in_us_1 < delta_from_mid_point_in_us_2) { frames_to_insert = mid_point_frames_ceil; delta_from_mid_point_delta_in_us = delta_from_mid_point_in_us_2 - delta_from_mid_point_in_us_1; @@ -343,7 +339,7 @@ static void apply_below_the_range(struct core_freesync *core_freesync, if (in_out_vrr->btr.frames_to_insert != 0 && delta_from_mid_point_delta_in_us < BTR_DRIFT_MARGIN) { if (((last_render_time_in_us / in_out_vrr->btr.frames_to_insert) < - max_render_time_in_us) && + in_out_vrr->max_duration_in_us) && ((last_render_time_in_us / in_out_vrr->btr.frames_to_insert) > in_out_vrr->min_duration_in_us)) frames_to_insert = in_out_vrr->btr.frames_to_insert; @@ -747,10 +743,6 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, nominal_field_rate_in_uhz = mod_freesync_calc_nominal_field_rate(stream); - /* Rounded to the nearest Hz */ - nominal_field_rate_in_uhz = 1000000ULL * - div_u64(nominal_field_rate_in_uhz + 500000, 1000000); - min_refresh_in_uhz = in_config->min_refresh_in_uhz; max_refresh_in_uhz = in_config->max_refresh_in_uhz; @@ -796,11 +788,6 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, refresh_range = in_out_vrr->max_refresh_in_uhz - in_out_vrr->min_refresh_in_uhz; - in_out_vrr->btr.margin_in_us = in_out_vrr->max_duration_in_us - - 2 * in_out_vrr->min_duration_in_us; - if (in_out_vrr->btr.margin_in_us > BTR_MAX_MARGIN) - in_out_vrr->btr.margin_in_us = BTR_MAX_MARGIN; - in_out_vrr->supported = true; } @@ -816,7 +803,6 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, in_out_vrr->btr.inserted_duration_in_us = 0; in_out_vrr->btr.frames_to_insert = 0; in_out_vrr->btr.frame_counter = 0; - in_out_vrr->btr.mid_point_in_us = (in_out_vrr->min_duration_in_us + in_out_vrr->max_duration_in_us) / 2; @@ -989,9 +975,13 @@ void mod_freesync_get_settings(struct mod_freesync *mod_freesync, unsigned int *inserted_frames, unsigned int *inserted_duration_in_us) { + struct core_freesync *core_freesync = NULL; + if (mod_freesync == NULL) return; + core_freesync = MOD_FREESYNC_TO_CORE(mod_freesync); + if (vrr->supported) { *v_total_min = vrr->adjust.v_total_min; *v_total_max = vrr->adjust.v_total_max; @@ -1006,13 +996,14 @@ unsigned long long mod_freesync_calc_nominal_field_rate( const struct dc_stream_state *stream) { unsigned long long nominal_field_rate_in_uhz = 0; - unsigned int total = stream->timing.h_total * stream->timing.v_total; - /* Calculate nominal field rate for stream, rounded up to nearest integer */ + /* Calculate nominal field rate for stream */ nominal_field_rate_in_uhz = stream->timing.pix_clk_100hz / 10; nominal_field_rate_in_uhz *= 1000ULL * 1000ULL * 1000ULL; - - nominal_field_rate_in_uhz = div_u64(nominal_field_rate_in_uhz, total); + nominal_field_rate_in_uhz = div_u64(nominal_field_rate_in_uhz, + stream->timing.h_total); + nominal_field_rate_in_uhz = div_u64(nominal_field_rate_in_uhz, + stream->timing.v_total); return nominal_field_rate_in_uhz; } diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h index dbe7835aabcf..dc187844d10b 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h @@ -92,7 +92,6 @@ struct mod_vrr_params_btr { uint32_t inserted_duration_in_us; uint32_t frames_to_insert; uint32_t frame_counter; - uint32_t margin_in_us; }; struct mod_vrr_params_fixed_refresh { diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h index ca8ce3c55337..d930bdecb117 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h @@ -35,7 +35,4 @@ struct mod_vrr_params; void mod_build_vsc_infopacket(const struct dc_stream_state *stream, struct dc_info_packet *info_packet); -void mod_build_hf_vsif_infopacket(const struct dc_stream_state *stream, - struct dc_info_packet *info_packet, int ALLMEnabled, int ALLMValue); - #endif diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index db6b08f6d093..d885d642ed7f 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -31,7 +31,6 @@ #include "dc.h" #define HDMI_INFOFRAME_TYPE_VENDOR 0x81 -#define HF_VSIF_VERSION 1 // VTEM Byte Offset #define VTEM_PB0 0 @@ -396,100 +395,3 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream, } -/** - ***************************************************************************** - * Function: mod_build_hf_vsif_infopacket - * - * @brief - * Prepare HDMI Vendor Specific info frame. - * Follows HDMI Spec to build up Vendor Specific info frame - * - * @param [in] stream: contains data we may need to construct VSIF (i.e. timing_3d_format, etc.) - * @param [out] info_packet: output structure where to store VSIF - ***************************************************************************** - */ -void mod_build_hf_vsif_infopacket(const struct dc_stream_state *stream, - struct dc_info_packet *info_packet, int ALLMEnabled, int ALLMValue) -{ - unsigned int length = 5; - bool hdmi_vic_mode = false; - uint8_t checksum = 0; - uint32_t i = 0; - enum dc_timing_3d_format format; - bool bALLM = (bool)ALLMEnabled; - bool bALLMVal = (bool)ALLMValue; - - info_packet->valid = false; - format = stream->timing.timing_3d_format; - if (stream->view_format == VIEW_3D_FORMAT_NONE) - format = TIMING_3D_FORMAT_NONE; - - if (stream->timing.hdmi_vic != 0 - && stream->timing.h_total >= 3840 - && stream->timing.v_total >= 2160 - && format == TIMING_3D_FORMAT_NONE) - hdmi_vic_mode = true; - - if ((format == TIMING_3D_FORMAT_NONE) && !hdmi_vic_mode && !bALLM) - return; - - info_packet->sb[1] = 0x03; - info_packet->sb[2] = 0x0C; - info_packet->sb[3] = 0x00; - - if (bALLM) { - info_packet->sb[1] = 0xD8; - info_packet->sb[2] = 0x5D; - info_packet->sb[3] = 0xC4; - info_packet->sb[4] = HF_VSIF_VERSION; - } - - if (format != TIMING_3D_FORMAT_NONE) - info_packet->sb[4] = (2 << 5); - - else if (hdmi_vic_mode) - info_packet->sb[4] = (1 << 5); - - switch (format) { - case TIMING_3D_FORMAT_HW_FRAME_PACKING: - case TIMING_3D_FORMAT_SW_FRAME_PACKING: - info_packet->sb[5] = (0x0 << 4); - break; - - case TIMING_3D_FORMAT_SIDE_BY_SIDE: - case TIMING_3D_FORMAT_SBS_SW_PACKED: - info_packet->sb[5] = (0x8 << 4); - length = 6; - break; - - case TIMING_3D_FORMAT_TOP_AND_BOTTOM: - case TIMING_3D_FORMAT_TB_SW_PACKED: - info_packet->sb[5] = (0x6 << 4); - break; - - default: - break; - } - - if (hdmi_vic_mode) - info_packet->sb[5] = stream->timing.hdmi_vic; - - info_packet->hb0 = HDMI_INFOFRAME_TYPE_VENDOR; - info_packet->hb1 = 0x01; - info_packet->hb2 = (uint8_t) (length); - - if (bALLM) - info_packet->sb[5] = (info_packet->sb[5] & ~0x02) | (bALLMVal << 1); - - checksum += info_packet->hb0; - checksum += info_packet->hb1; - checksum += info_packet->hb2; - - for (i = 1; i <= length; i++) - checksum += info_packet->sb[i]; - - info_packet->sb[0] = (uint8_t) (0x100 - checksum); - - info_packet->valid = true; -} - diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 4e2f615c3566..05e2be856037 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -80,18 +80,18 @@ struct abm_parameters { static const struct abm_parameters abm_settings_config0[abm_defines_max_level] = { // min_red max_red bright_pos dark_pos brightness_gain contrast deviation min_knee max_knee - {0xff, 0xbf, 0x20, 0x00, 0xff, 0x99, 0xb3, 0x40, 0xe0}, - {0xde, 0x85, 0x20, 0x00, 0xff, 0x90, 0xa8, 0x40, 0xdf}, - {0xb0, 0x50, 0x20, 0x00, 0xc0, 0x88, 0x78, 0x70, 0xa0}, - {0x82, 0x40, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70}, + {0xff, 0xbf, 0x20, 0x00, 0xff, 0x99, 0xb3, 0x40, 0xE0}, + {0xff, 0x85, 0x20, 0x00, 0xff, 0x90, 0xa8, 0x40, 0xE0}, + {0xff, 0x40, 0x20, 0x00, 0xff, 0x90, 0x68, 0x40, 0xE0}, + {0x82, 0x4d, 0x20, 0x00, 0x00, 0x90, 0xb3, 0x70, 0x70}, }; static const struct abm_parameters abm_settings_config1[abm_defines_max_level] = { // min_red max_red bright_pos dark_pos brightness_gain contrast deviation min_knee max_knee - {0xf0, 0xd9, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70}, - {0xcd, 0xa5, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70}, - {0x99, 0x65, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70}, - {0x82, 0x4d, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70}, + {0xf0, 0xd9, 0x20, 0x00, 0x00, 0xa8, 0xb3, 0x70, 0x70}, + {0xcd, 0xa5, 0x20, 0x00, 0x00, 0xa8, 0xb3, 0x70, 0x70}, + {0x99, 0x65, 0x20, 0x00, 0x00, 0xa8, 0xb3, 0x70, 0x70}, + {0x82, 0x4d, 0x20, 0x00, 0x00, 0xa8, 0xb3, 0x70, 0x70}, }; static const struct abm_parameters * const abm_settings[] = { @@ -115,7 +115,7 @@ static const struct abm_parameters * const abm_settings[] = { /* NOTE: iRAM is 256B in size */ struct iram_table_v_2 { /* flags */ - uint16_t min_abm_backlight; /* 0x00 U16 */ + uint16_t flags; /* 0x00 U16 */ /* parameters for ABM2.0 algorithm */ uint8_t min_reduction[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL]; /* 0x02 U0.8 */ @@ -140,10 +140,10 @@ struct iram_table_v_2 { /* For reading PSR State directly from IRAM */ uint8_t psr_state; /* 0xf0 */ - uint8_t dmcu_mcp_interface_version; /* 0xf1 */ - uint8_t dmcu_abm_feature_version; /* 0xf2 */ - uint8_t dmcu_psr_feature_version; /* 0xf3 */ - uint16_t dmcu_version; /* 0xf4 */ + uint8_t dmcu_mcp_interface_version; /* 0xf1 */ + uint8_t dmcu_abm_feature_version; /* 0xf2 */ + uint8_t dmcu_psr_feature_version; /* 0xf3 */ + uint16_t dmcu_version; /* 0xf4 */ uint8_t dmcu_state; /* 0xf6 */ uint16_t blRampReduction; /* 0xf7 */ @@ -164,43 +164,42 @@ struct iram_table_v_2_2 { uint8_t max_reduction[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL]; /* 0x16 U0.8 */ uint8_t bright_pos_gain[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL]; /* 0x2a U2.6 */ uint8_t dark_pos_gain[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL]; /* 0x3e U2.6 */ - uint8_t hybrid_factor[NUM_AGGR_LEVEL]; /* 0x52 U0.8 */ - uint8_t contrast_factor[NUM_AGGR_LEVEL]; /* 0x56 U0.8 */ - uint8_t deviation_gain[NUM_AGGR_LEVEL]; /* 0x5a U0.8 */ - uint8_t iir_curve[NUM_AMBI_LEVEL]; /* 0x5e U0.8 */ - uint8_t min_knee[NUM_AGGR_LEVEL]; /* 0x63 U0.8 */ - uint8_t max_knee[NUM_AGGR_LEVEL]; /* 0x67 U0.8 */ - uint16_t min_abm_backlight; /* 0x6b U16 */ - uint8_t pad[19]; /* 0x6d U0.8 */ + uint8_t hybrid_factor[NUM_AGGR_LEVEL]; /* 0x52 U0.8 */ + uint8_t contrast_factor[NUM_AGGR_LEVEL]; /* 0x56 U0.8 */ + uint8_t deviation_gain[NUM_AGGR_LEVEL]; /* 0x5a U0.8 */ + uint8_t iir_curve[NUM_AMBI_LEVEL]; /* 0x5e U0.8 */ + uint8_t min_knee[NUM_AGGR_LEVEL]; /* 0x63 U0.8 */ + uint8_t max_knee[NUM_AGGR_LEVEL]; /* 0x67 U0.8 */ + uint8_t pad[21]; /* 0x6b U0.8 */ /* parameters for crgb conversion */ - uint16_t crgb_thresh[NUM_POWER_FN_SEGS]; /* 0x80 U3.13 */ - uint16_t crgb_offset[NUM_POWER_FN_SEGS]; /* 0x90 U1.15 */ - uint16_t crgb_slope[NUM_POWER_FN_SEGS]; /* 0xa0 U4.12 */ + uint16_t crgb_thresh[NUM_POWER_FN_SEGS]; /* 0x80 U3.13 */ + uint16_t crgb_offset[NUM_POWER_FN_SEGS]; /* 0x90 U1.15 */ + uint16_t crgb_slope[NUM_POWER_FN_SEGS]; /* 0xa0 U4.12 */ /* parameters for custom curve */ /* thresholds for brightness --> backlight */ - uint16_t backlight_thresholds[NUM_BL_CURVE_SEGS]; /* 0xb0 U16.0 */ + uint16_t backlight_thresholds[NUM_BL_CURVE_SEGS]; /* 0xb0 U16.0 */ /* offsets for brightness --> backlight */ - uint16_t backlight_offsets[NUM_BL_CURVE_SEGS]; /* 0xd0 U16.0 */ + uint16_t backlight_offsets[NUM_BL_CURVE_SEGS]; /* 0xd0 U16.0 */ /* For reading PSR State directly from IRAM */ - uint8_t psr_state; /* 0xf0 */ - uint8_t dmcu_mcp_interface_version; /* 0xf1 */ - uint8_t dmcu_abm_feature_version; /* 0xf2 */ - uint8_t dmcu_psr_feature_version; /* 0xf3 */ - uint16_t dmcu_version; /* 0xf4 */ - uint8_t dmcu_state; /* 0xf6 */ + uint8_t psr_state; /* 0xf0 */ + uint8_t dmcu_mcp_interface_version; /* 0xf1 */ + uint8_t dmcu_abm_feature_version; /* 0xf2 */ + uint8_t dmcu_psr_feature_version; /* 0xf3 */ + uint16_t dmcu_version; /* 0xf4 */ + uint8_t dmcu_state; /* 0xf6 */ - uint8_t dummy1; /* 0xf7 */ - uint8_t dummy2; /* 0xf8 */ - uint8_t dummy3; /* 0xf9 */ - uint8_t dummy4; /* 0xfa */ - uint8_t dummy5; /* 0xfb */ - uint8_t dummy6; /* 0xfc */ - uint8_t dummy7; /* 0xfd */ - uint8_t dummy8; /* 0xfe */ - uint8_t dummy9; /* 0xff */ + uint8_t dummy1; /* 0xf7 */ + uint8_t dummy2; /* 0xf8 */ + uint8_t dummy3; /* 0xf9 */ + uint8_t dummy4; /* 0xfa */ + uint8_t dummy5; /* 0xfb */ + uint8_t dummy6; /* 0xfc */ + uint8_t dummy7; /* 0xfd */ + uint8_t dummy8; /* 0xfe */ + uint8_t dummy9; /* 0xff */ }; #pragma pack(pop) @@ -272,8 +271,7 @@ void fill_iram_v_2(struct iram_table_v_2 *ram_table, struct dmcu_iram_parameters { unsigned int set = params.set; - ram_table->min_abm_backlight = - cpu_to_be16(params.min_abm_backlight); + ram_table->flags = 0x0; ram_table->deviation_gain = 0xb3; ram_table->blRampReduction = @@ -447,9 +445,6 @@ void fill_iram_v_2_2(struct iram_table_v_2_2 *ram_table, struct dmcu_iram_parame ram_table->flags = 0x0; - ram_table->min_abm_backlight = - cpu_to_be16(params.min_abm_backlight); - ram_table->deviation_gain[0] = 0xb3; ram_table->deviation_gain[1] = 0xa8; ram_table->deviation_gain[2] = 0x98; @@ -593,10 +588,6 @@ void fill_iram_v_2_3(struct iram_table_v_2_2 *ram_table, struct dmcu_iram_parame unsigned int set = params.set; ram_table->flags = 0x0; - - ram_table->min_abm_backlight = - cpu_to_be16(params.min_abm_backlight); - for (i = 0; i < NUM_AGGR_LEVEL; i++) { ram_table->hybrid_factor[i] = abm_settings[set][i].brightness_gain; ram_table->contrast_factor[i] = abm_settings[set][i].contrast_factor; diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h index e54157026330..da5df00fedce 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h @@ -38,7 +38,6 @@ struct dmcu_iram_parameters { unsigned int backlight_lut_array_size; unsigned int backlight_ramping_reduction; unsigned int backlight_ramping_start; - unsigned int min_abm_backlight; unsigned int set; }; diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index dc7eb28f0296..8889aaceec60 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -143,8 +143,6 @@ enum PP_FEATURE_MASK { enum DC_FEATURE_MASK { DC_FBC_MASK = 0x1, DC_MULTI_MON_PP_MCLK_SWITCH_MASK = 0x2, - DC_DISABLE_FRACTIONAL_PWM_MASK = 0x4, - DC_PSR_MASK = 0x8, }; enum amd_dpm_forced_level; diff --git a/drivers/gpu/drm/amd/include/asic_reg/bif/bif_4_1_d.h b/drivers/gpu/drm/amd/include/asic_reg/bif/bif_4_1_d.h index fce965984e76..a761ba07f937 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/bif/bif_4_1_d.h +++ b/drivers/gpu/drm/amd/include/asic_reg/bif/bif_4_1_d.h @@ -27,7 +27,6 @@ #define mmMM_INDEX 0x0 #define mmMM_INDEX_HI 0x6 #define mmMM_DATA 0x1 -#define mmCC_BIF_BX_FUSESTRAP0 0x14D7 #define mmBUS_CNTL 0x1508 #define mmCONFIG_CNTL 0x1509 #define mmCONFIG_MEMSIZE 0x150a diff --git a/drivers/gpu/drm/amd/include/asic_reg/bif/bif_4_1_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/bif/bif_4_1_sh_mask.h index 39cc4880beb4..8fbfd0261d27 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/bif/bif_4_1_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/bif/bif_4_1_sh_mask.h @@ -32,8 +32,6 @@ #define MM_INDEX_HI__MM_OFFSET_HI__SHIFT 0x0 #define MM_DATA__MM_DATA_MASK 0xffffffff #define MM_DATA__MM_DATA__SHIFT 0x0 -#define CC_BIF_BX_FUSESTRAP0__STRAP_BIF_PX_CAPABLE_MASK 0x2 -#define CC_BIF_BX_FUSESTRAP0__STRAP_BIF_PX_CAPABLE__SHIFT 0x1 #define BUS_CNTL__BIOS_ROM_WRT_EN_MASK 0x1 #define BUS_CNTL__BIOS_ROM_WRT_EN__SHIFT 0x0 #define BUS_CNTL__BIOS_ROM_DIS_MASK 0x2 diff --git a/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_d.h index 8d05d6ca1c8d..809759f7bb81 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_d.h +++ b/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_d.h @@ -27,7 +27,6 @@ #define mmMM_INDEX 0x0 #define mmMM_INDEX_HI 0x6 #define mmMM_DATA 0x1 -#define mmCC_BIF_BX_FUSESTRAP0 0x14D7 #define mmCC_BIF_BX_STRAP2 0x152A #define mmBIF_MM_INDACCESS_CNTL 0x1500 #define mmBIF_DOORBELL_APER_EN 0x1501 diff --git a/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_sh_mask.h index 73435687d049..adc71b01f793 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_sh_mask.h @@ -32,8 +32,6 @@ #define MM_INDEX_HI__MM_OFFSET_HI__SHIFT 0x0 #define MM_DATA__MM_DATA_MASK 0xffffffff #define MM_DATA__MM_DATA__SHIFT 0x0 -#define CC_BIF_BX_FUSESTRAP0__STRAP_BIF_PX_CAPABLE_MASK 0x2 -#define CC_BIF_BX_FUSESTRAP0__STRAP_BIF_PX_CAPABLE__SHIFT 0x1 #define BIF_MM_INDACCESS_CNTL__MM_INDACCESS_DIS_MASK 0x2 #define BIF_MM_INDACCESS_CNTL__MM_INDACCESS_DIS__SHIFT 0x1 #define BIF_DOORBELL_APER_EN__BIF_DOORBELL_APER_EN_MASK 0x1 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h index eddf83ec1c39..be4249adb356 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_1_0_offset.h @@ -9859,8 +9859,6 @@ #define mmDP0_DP_STEER_FIFO_BASE_IDX 2 #define mmDP0_DP_MSA_MISC 0x210e #define mmDP0_DP_MSA_MISC_BASE_IDX 2 -#define mmDP0_DP_DPHY_INTERNAL_CTRL 0x210f -#define mmDP0_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 #define mmDP0_DP_VID_TIMING 0x2110 #define mmDP0_DP_VID_TIMING_BASE_IDX 2 #define mmDP0_DP_VID_N 0x2111 @@ -10189,8 +10187,6 @@ #define mmDP1_DP_STEER_FIFO_BASE_IDX 2 #define mmDP1_DP_MSA_MISC 0x220e #define mmDP1_DP_MSA_MISC_BASE_IDX 2 -#define mmDP1_DP_DPHY_INTERNAL_CTRL 0x220f -#define mmDP1_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 #define mmDP1_DP_VID_TIMING 0x2210 #define mmDP1_DP_VID_TIMING_BASE_IDX 2 #define mmDP1_DP_VID_N 0x2211 @@ -10519,8 +10515,6 @@ #define mmDP2_DP_STEER_FIFO_BASE_IDX 2 #define mmDP2_DP_MSA_MISC 0x230e #define mmDP2_DP_MSA_MISC_BASE_IDX 2 -#define mmDP2_DP_DPHY_INTERNAL_CTRL 0x230f -#define mmDP2_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 #define mmDP2_DP_VID_TIMING 0x2310 #define mmDP2_DP_VID_TIMING_BASE_IDX 2 #define mmDP2_DP_VID_N 0x2311 @@ -10849,8 +10843,6 @@ #define mmDP3_DP_STEER_FIFO_BASE_IDX 2 #define mmDP3_DP_MSA_MISC 0x240e #define mmDP3_DP_MSA_MISC_BASE_IDX 2 -#define mmDP3_DP_DPHY_INTERNAL_CTRL 0x240f -#define mmDP3_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 #define mmDP3_DP_VID_TIMING 0x2410 #define mmDP3_DP_VID_TIMING_BASE_IDX 2 #define mmDP3_DP_VID_N 0x2411 @@ -11179,8 +11171,6 @@ #define mmDP4_DP_STEER_FIFO_BASE_IDX 2 #define mmDP4_DP_MSA_MISC 0x250e #define mmDP4_DP_MSA_MISC_BASE_IDX 2 -#define mmDP4_DP_DPHY_INTERNAL_CTRL 0x250f -#define mmDP4_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 #define mmDP4_DP_VID_TIMING 0x2510 #define mmDP4_DP_VID_TIMING_BASE_IDX 2 #define mmDP4_DP_VID_N 0x2511 diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h index 2bfaaa8157d0..ca16d9125fbc 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h @@ -1146,14 +1146,7 @@ #define mmATC_L2_MEM_POWER_LS_BASE_IDX 0 #define mmATC_L2_CGTT_CLK_CTRL 0x080c #define mmATC_L2_CGTT_CLK_CTRL_BASE_IDX 0 -#define mmATC_L2_CACHE_4K_EDC_INDEX 0x080e -#define mmATC_L2_CACHE_4K_EDC_INDEX_BASE_IDX 0 -#define mmATC_L2_CACHE_2M_EDC_INDEX 0x080f -#define mmATC_L2_CACHE_2M_EDC_INDEX_BASE_IDX 0 -#define mmATC_L2_CACHE_4K_EDC_CNT 0x0810 -#define mmATC_L2_CACHE_4K_EDC_CNT_BASE_IDX 0 -#define mmATC_L2_CACHE_2M_EDC_CNT 0x0811 -#define mmATC_L2_CACHE_2M_EDC_CNT_BASE_IDX 0 + // addressBlock: gc_utcl2_vml2pfdec // base address: 0xa100 @@ -1213,14 +1206,7 @@ #define mmVM_L2_CACHE_PARITY_CNTL_BASE_IDX 0 #define mmVM_L2_CGTT_CLK_CTRL 0x085e #define mmVM_L2_CGTT_CLK_CTRL_BASE_IDX 0 -#define mmVM_L2_MEM_ECC_INDEX 0x0860 -#define mmVM_L2_MEM_ECC_INDEX_BASE_IDX 0 -#define mmVM_L2_WALKER_MEM_ECC_INDEX 0x0861 -#define mmVM_L2_WALKER_MEM_ECC_INDEX_BASE_IDX 0 -#define mmVM_L2_MEM_ECC_CNT 0x0862 -#define mmVM_L2_MEM_ECC_CNT_BASE_IDX 0 -#define mmVM_L2_WALKER_MEM_ECC_CNT 0x0863 -#define mmVM_L2_WALKER_MEM_ECC_CNT_BASE_IDX 0 + // addressBlock: gc_utcl2_vml2vcdec // base address: 0xa200 diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h index d4c613a85352..064c4bb1dc62 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h @@ -6661,6 +6661,7 @@ #define ATC_L2_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE_MASK 0x00FF0000L #define ATC_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK 0xFF000000L + // addressBlock: gc_utcl2_vml2pfdec //VM_L2_CNTL #define VM_L2_CNTL__ENABLE_L2_CACHE__SHIFT 0x0 @@ -6990,22 +6991,7 @@ #define VM_L2_CGTT_CLK_CTRL__MGLS_OVERRIDE_MASK 0x00008000L #define VM_L2_CGTT_CLK_CTRL__SOFT_STALL_OVERRIDE_MASK 0x00FF0000L #define VM_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK 0xFF000000L -//VM_L2_MEM_ECC_INDEX -#define VM_L2_MEM_ECC_INDEX__INDEX__SHIFT 0x0 -#define VM_L2_MEM_ECC_INDEX__INDEX_MASK 0x000000FFL -//VM_L2_WALKER_MEM_ECC_INDEX -#define VM_L2_WALKER_MEM_ECC_INDEX__INDEX__SHIFT 0x0 -#define VM_L2_WALKER_MEM_ECC_INDEX__INDEX_MASK 0x000000FFL -//VM_L2_MEM_ECC_CNT -#define VM_L2_MEM_ECC_CNT__SEC_COUNT__SHIFT 0xc -#define VM_L2_MEM_ECC_CNT__DED_COUNT__SHIFT 0xe -#define VM_L2_MEM_ECC_CNT__SEC_COUNT_MASK 0x00003000L -#define VM_L2_MEM_ECC_CNT__DED_COUNT_MASK 0x0000C000L -//VM_L2_WALKER_MEM_ECC_CNT -#define VM_L2_WALKER_MEM_ECC_CNT__SEC_COUNT__SHIFT 0xc -#define VM_L2_WALKER_MEM_ECC_CNT__DED_COUNT__SHIFT 0xe -#define VM_L2_WALKER_MEM_ECC_CNT__SEC_COUNT_MASK 0x00003000L -#define VM_L2_WALKER_MEM_ECC_CNT__DED_COUNT_MASK 0x0000C000L + // addressBlock: gc_utcl2_vml2vcdec //VM_CONTEXT0_CNTL diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h index 991128bb9476..4bcacf529852 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h @@ -22,9 +22,6 @@ #ifndef _nbio_7_4_0_SMN_HEADER #define _nbio_7_4_0_SMN_HEADER -// addressBlock: nbio_nbif0_bif_ras_bif_ras_regblk -// base address: 0x10100000 -#define smnBIFL_RAS_CENTRAL_STATUS 0x10139040 #define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c #define smnCPM_CONTROL 0x11180460 @@ -56,13 +53,4 @@ #define smnPCIE_RX_NUM_NAK 0x11180038 #define smnPCIE_RX_NUM_NAK_GENERATED 0x1118003c -// addressBlock: nbio_iohub_nb_misc_misc_cfgdec -// base address: 0x13a10000 -#define smnIOHC_INTERRUPT_EOI 0x13a10120 - -// addressBlock: nbio_iohub_nb_rascfg_ras_cfgdec -// base address: 0x13a20000 -#define smnRAS_GLOBAL_STATUS_LO 0x13a20020 -#define smnRAS_GLOBAL_STATUS_HI 0x13a20024 - #endif // _nbio_7_4_0_SMN_HEADER diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h index ce5830ebe095..994e796a28d7 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_offset.h @@ -2793,8 +2793,8 @@ #define mmBIF_DOORBELL_INT_CNTL_BASE_IDX 2 #define mmBIF_FB_EN 0x00ff #define mmBIF_FB_EN_BASE_IDX 2 -#define mmBIF_INTR_CNTL 0x0100 -#define mmBIF_INTR_CNTL_BASE_IDX 2 +#define mmBIF_BUSY_DELAY_CNTR 0x0100 +#define mmBIF_BUSY_DELAY_CNTR_BASE_IDX 2 #define mmBIF_MST_TRANS_PENDING_VF 0x0109 #define mmBIF_MST_TRANS_PENDING_VF_BASE_IDX 2 #define mmBIF_SLV_TRANS_PENDING_VF 0x010a diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_sh_mask.h index 07f04b2b5bdd..d467b939c971 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_sh_mask.h @@ -20420,9 +20420,9 @@ #define BIF_FB_EN__FB_WRITE_EN__SHIFT 0x1 #define BIF_FB_EN__FB_READ_EN_MASK 0x00000001L #define BIF_FB_EN__FB_WRITE_EN_MASK 0x00000002L -//BIF_INTR_CNTL -#define BIF_INTR_CNTL__RAS_INTR_VEC_SEL__SHIFT 0x0 -#define BIF_INTR_CNTL__RAS_INTR_VEC_SEL_MASK 0x00000001L +//BIF_BUSY_DELAY_CNTR +#define BIF_BUSY_DELAY_CNTR__DELAY_CNT__SHIFT 0x0 +#define BIF_BUSY_DELAY_CNTR__DELAY_CNT_MASK 0x0000003FL //BIF_MST_TRANS_PENDING_VF #define BIF_MST_TRANS_PENDING_VF__BIF_MST_TRANS_PENDING__SHIFT 0x0 #define BIF_MST_TRANS_PENDING_VF__BIF_MST_TRANS_PENDING_MASK 0x7FFFFFFFL @@ -48436,47 +48436,4 @@ #define RCC_DEV0_EPF0_VF15_GFXMSIX_PBA__MSIX_PENDING_BITS_1_MASK 0x00000002L #define RCC_DEV0_EPF0_VF15_GFXMSIX_PBA__MSIX_PENDING_BITS_2_MASK 0x00000004L -//IOHC_INTERRUPT_EOI -#define IOHC_INTERRUPT_EOI__SMI_EOI__SHIFT 0x0 -#define IOHC_INTERRUPT_EOI__SCI_EOI__SHIFT 0x1 -#define IOHC_INTERRUPT_EOI__NMI_EOI__SHIFT 0x2 -#define IOHC_INTERRUPT_EOI__SMI_EOI_MASK 0x00000001L -#define IOHC_INTERRUPT_EOI__SCI_EOI_MASK 0x00000002L -#define IOHC_INTERRUPT_EOI__NMI_EOI_MASK 0x00000004L - -//RAS_GLOBAL_STATUS_LO -#define RAS_GLOBAL_STATUS_LO__ParityErrCorr__SHIFT 0x0 -#define RAS_GLOBAL_STATUS_LO__ParityErrNonFatal__SHIFT 0x1 -#define RAS_GLOBAL_STATUS_LO__ParityErrFatal__SHIFT 0x2 -#define RAS_GLOBAL_STATUS_LO__ParityErrSerr__SHIFT 0x3 -#define RAS_GLOBAL_STATUS_LO__HPLGWA_NMI__SHIFT 0x6 -#define RAS_GLOBAL_STATUS_LO__HPLGWA_SCI__SHIFT 0x7 -#define RAS_GLOBAL_STATUS_LO__HPLGWA_SMI__SHIFT 0x8 -#define RAS_GLOBAL_STATUS_LO__SW_SMI__SHIFT 0x9 -#define RAS_GLOBAL_STATUS_LO__SW_SCI__SHIFT 0xa -#define RAS_GLOBAL_STATUS_LO__SW_NMI__SHIFT 0xb -#define RAS_GLOBAL_STATUS_LO__APML_NMI__SHIFT 0xc -#define RAS_GLOBAL_STATUS_LO__APML_SyncFld__SHIFT 0xd -#define RAS_GLOBAL_STATUS_LO__PIN_SyncFld_NMI__SHIFT 0xe -#define RAS_GLOBAL_STATUS_LO__APML_SyncFld_Private__SHIFT 0xf -#define RAS_GLOBAL_STATUS_LO__ParityErrCorr_MASK 0x00000001L -#define RAS_GLOBAL_STATUS_LO__ParityErrNonFatal_MASK 0x00000002L -#define RAS_GLOBAL_STATUS_LO__ParityErrFatal_MASK 0x00000004L -#define RAS_GLOBAL_STATUS_LO__ParityErrSerr_MASK 0x00000008L -#define RAS_GLOBAL_STATUS_LO__HPLGWA_NMI_MASK 0x00000040L -#define RAS_GLOBAL_STATUS_LO__HPLGWA_SCI_MASK 0x00000080L -#define RAS_GLOBAL_STATUS_LO__HPLGWA_SMI_MASK 0x00000100L -#define RAS_GLOBAL_STATUS_LO__SW_SMI_MASK 0x00000200L -#define RAS_GLOBAL_STATUS_LO__SW_SCI_MASK 0x00000400L -#define RAS_GLOBAL_STATUS_LO__SW_NMI_MASK 0x00000800L -#define RAS_GLOBAL_STATUS_LO__APML_NMI_MASK 0x00001000L -#define RAS_GLOBAL_STATUS_LO__APML_SyncFld_MASK 0x00002000L -#define RAS_GLOBAL_STATUS_LO__PIN_SyncFld_NMI_MASK 0x00004000L -#define RAS_GLOBAL_STATUS_LO__APML_SyncFld_Private_MASK 0x00008000L -//RAS_GLOBAL_STATUS_HI -#define RAS_GLOBAL_STATUS_HI__PCIE0PortAErr__SHIFT 0x0 -#define RAS_GLOBAL_STATUS_HI__NBIF0PortAErr__SHIFT 0x1 -#define RAS_GLOBAL_STATUS_HI__PCIE0PortAErr_MASK 0x00000001L -#define RAS_GLOBAL_STATUS_HI__NBIF0PortAErr_MASK 0x00000002L - #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_0_sh_mask.h index 096d878eb1de..dc9895a684fe 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_0_sh_mask.h @@ -588,15 +588,11 @@ #define IH_STORM_CLIENT_LIST_CNTL__CLIENT30_IS_STORM_CLIENT_MASK 0x40000000L #define IH_STORM_CLIENT_LIST_CNTL__CLIENT31_IS_STORM_CLIENT_MASK 0x80000000L //IH_CLK_CTRL -#define IH_CLK_CTRL__IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE__SHIFT 0x19 -#define IH_CLK_CTRL__IH_BUFFER_MEM_CLK_SOFT_OVERRIDE__SHIFT 0x1a #define IH_CLK_CTRL__DBUS_MUX_CLK_SOFT_OVERRIDE__SHIFT 0x1b #define IH_CLK_CTRL__OSSSYS_SHARE_CLK_SOFT_OVERRIDE__SHIFT 0x1c #define IH_CLK_CTRL__LIMIT_SMN_CLK_SOFT_OVERRIDE__SHIFT 0x1d #define IH_CLK_CTRL__DYN_CLK_SOFT_OVERRIDE__SHIFT 0x1e #define IH_CLK_CTRL__REG_CLK_SOFT_OVERRIDE__SHIFT 0x1f -#define IH_CLK_CTRL__IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE_MASK 0x02000000L -#define IH_CLK_CTRL__IH_BUFFER_MEM_CLK_SOFT_OVERRIDE_MASK 0x04000000L #define IH_CLK_CTRL__DBUS_MUX_CLK_SOFT_OVERRIDE_MASK 0x08000000L #define IH_CLK_CTRL__OSSSYS_SHARE_CLK_SOFT_OVERRIDE_MASK 0x10000000L #define IH_CLK_CTRL__LIMIT_SMN_CLK_SOFT_OVERRIDE_MASK 0x20000000L diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_d.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_d.h index 71169daa701a..dbc2e723f659 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_d.h +++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_d.h @@ -49,7 +49,6 @@ #define ixCG_SPLL_FUNC_CNTL_5 0xc0500150 #define ixCG_SPLL_FUNC_CNTL_6 0xc0500154 #define ixCG_SPLL_FUNC_CNTL_7 0xc0500158 -#define ixCG_SPLL_STATUS 0xC050015C #define ixSPLL_CNTL_MODE 0xc0500160 #define ixCG_SPLL_SPREAD_SPECTRUM 0xc0500164 #define ixCG_SPLL_SPREAD_SPECTRUM_2 0xc0500168 diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_sh_mask.h index 61a9a84e0c3a..6af9f0217b34 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_sh_mask.h @@ -194,8 +194,6 @@ #define CG_SPLL_FUNC_CNTL_6__SPLL_LF_CNTR__SHIFT 0x19 #define CG_SPLL_FUNC_CNTL_7__SPLL_BW_CNTRL_MASK 0xfff #define CG_SPLL_FUNC_CNTL_7__SPLL_BW_CNTRL__SHIFT 0x0 -#define CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK 0x2 -#define CG_SPLL_STATUS__SPLL_CHG_STATUS__SHIFT 0x1 #define SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK 0x1 #define SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL__SHIFT 0x0 #define SPLL_CNTL_MODE__SPLL_LEGACY_PDIV_MASK 0x2 diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_d.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_d.h index 351446754c72..bd3685166779 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_d.h +++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_d.h @@ -49,7 +49,6 @@ #define ixCG_SPLL_FUNC_CNTL_5 0xc0500150 #define ixCG_SPLL_FUNC_CNTL_6 0xc0500154 #define ixCG_SPLL_FUNC_CNTL_7 0xc0500158 -#define ixCG_SPLL_STATUS 0xC050015C #define ixSPLL_CNTL_MODE 0xc0500160 #define ixCG_SPLL_SPREAD_SPECTRUM 0xc0500164 #define ixCG_SPLL_SPREAD_SPECTRUM_2 0xc0500168 diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_sh_mask.h index 4bfd5f8ba66c..627906674fe8 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_sh_mask.h @@ -194,8 +194,6 @@ #define CG_SPLL_FUNC_CNTL_6__SPLL_LF_CNTR__SHIFT 0x19 #define CG_SPLL_FUNC_CNTL_7__SPLL_BW_CNTRL_MASK 0xfff #define CG_SPLL_FUNC_CNTL_7__SPLL_BW_CNTRL__SHIFT 0x0 -#define CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK 0x2 -#define CG_SPLL_STATUS__SPLL_CHG_STATUS__SHIFT 0x1 #define SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK 0x1 #define SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL__SHIFT 0x0 #define SPLL_CNTL_MODE__SPLL_LEGACY_PDIV_MASK 0x2 diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_d.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_d.h index 21da61c398f5..f35aba72e640 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_d.h +++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_d.h @@ -52,7 +52,6 @@ #define ixCG_SPLL_FUNC_CNTL_5 0xc0500150 #define ixCG_SPLL_FUNC_CNTL_6 0xc0500154 #define ixCG_SPLL_FUNC_CNTL_7 0xc0500158 -#define ixCG_SPLL_STATUS 0xC050015C #define ixSPLL_CNTL_MODE 0xc0500160 #define ixCG_SPLL_SPREAD_SPECTRUM 0xc0500164 #define ixCG_SPLL_SPREAD_SPECTRUM_2 0xc0500168 diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_sh_mask.h index f64fe0fbcb32..481ee6560aa9 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_sh_mask.h @@ -220,8 +220,6 @@ #define CG_SPLL_FUNC_CNTL_6__SPLL_LF_CNTR__SHIFT 0x19 #define CG_SPLL_FUNC_CNTL_7__SPLL_BW_CNTRL_MASK 0xfff #define CG_SPLL_FUNC_CNTL_7__SPLL_BW_CNTRL__SHIFT 0x0 -#define CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK 0x2 -#define CG_SPLL_STATUS__SPLL_CHG_STATUS__SHIFT 0x1 #define SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK 0x1 #define SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL__SHIFT 0x0 #define SPLL_CNTL_MODE__SPLL_LEGACY_PDIV_MASK 0x2 diff --git a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_11_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_11_0_0_offset.h index 687d6843c258..d3876052562b 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_11_0_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_11_0_0_offset.h @@ -121,98 +121,6 @@ #define mmCKSVII2C_IC_COMP_VERSION_BASE_IDX 0 #define mmCKSVII2C_IC_COMP_TYPE 0x006d #define mmCKSVII2C_IC_COMP_TYPE_BASE_IDX 0 -#define mmCKSVII2C1_IC_CON 0x0080 -#define mmCKSVII2C1_IC_CON_BASE_IDX 0 -#define mmCKSVII2C1_IC_TAR 0x0081 -#define mmCKSVII2C1_IC_TAR_BASE_IDX 0 -#define mmCKSVII2C1_IC_SAR 0x0082 -#define mmCKSVII2C1_IC_SAR_BASE_IDX 0 -#define mmCKSVII2C1_IC_HS_MADDR 0x0083 -#define mmCKSVII2C1_IC_HS_MADDR_BASE_IDX 0 -#define mmCKSVII2C1_IC_DATA_CMD 0x0084 -#define mmCKSVII2C1_IC_DATA_CMD_BASE_IDX 0 -#define mmCKSVII2C1_IC_SS_SCL_HCNT 0x0085 -#define mmCKSVII2C1_IC_SS_SCL_HCNT_BASE_IDX 0 -#define mmCKSVII2C1_IC_SS_SCL_LCNT 0x0086 -#define mmCKSVII2C1_IC_SS_SCL_LCNT_BASE_IDX 0 -#define mmCKSVII2C1_IC_FS_SCL_HCNT 0x0087 -#define mmCKSVII2C1_IC_FS_SCL_HCNT_BASE_IDX 0 -#define mmCKSVII2C1_IC_FS_SCL_LCNT 0x0088 -#define mmCKSVII2C1_IC_FS_SCL_LCNT_BASE_IDX 0 -#define mmCKSVII2C1_IC_HS_SCL_HCNT 0x0089 -#define mmCKSVII2C1_IC_HS_SCL_HCNT_BASE_IDX 0 -#define mmCKSVII2C1_IC_HS_SCL_LCNT 0x008a -#define mmCKSVII2C1_IC_HS_SCL_LCNT_BASE_IDX 0 -#define mmCKSVII2C1_IC_INTR_STAT 0x008b -#define mmCKSVII2C1_IC_INTR_STAT_BASE_IDX 0 -#define mmCKSVII2C1_IC_INTR_MASK 0x008c -#define mmCKSVII2C1_IC_INTR_MASK_BASE_IDX 0 -#define mmCKSVII2C1_IC_RAW_INTR_STAT 0x008d -#define mmCKSVII2C1_IC_RAW_INTR_STAT_BASE_IDX 0 -#define mmCKSVII2C1_IC_RX_TL 0x008e -#define mmCKSVII2C1_IC_RX_TL_BASE_IDX 0 -#define mmCKSVII2C1_IC_TX_TL 0x008f -#define mmCKSVII2C1_IC_TX_TL_BASE_IDX 0 -#define mmCKSVII2C1_IC_CLR_INTR 0x0090 -#define mmCKSVII2C1_IC_CLR_INTR_BASE_IDX 0 -#define mmCKSVII2C1_IC_CLR_RX_UNDER 0x0091 -#define mmCKSVII2C1_IC_CLR_RX_UNDER_BASE_IDX 0 -#define mmCKSVII2C1_IC_CLR_RX_OVER 0x0092 -#define mmCKSVII2C1_IC_CLR_RX_OVER_BASE_IDX 0 -#define mmCKSVII2C1_IC_CLR_TX_OVER 0x0093 -#define mmCKSVII2C1_IC_CLR_TX_OVER_BASE_IDX 0 -#define mmCKSVII2C1_IC_CLR_RD_REQ 0x0094 -#define mmCKSVII2C1_IC_CLR_RD_REQ_BASE_IDX 0 -#define mmCKSVII2C1_IC_CLR_TX_ABRT 0x0095 -#define mmCKSVII2C1_IC_CLR_TX_ABRT_BASE_IDX 0 -#define mmCKSVII2C1_IC_CLR_RX_DONE 0x0096 -#define mmCKSVII2C1_IC_CLR_RX_DONE_BASE_IDX 0 -#define mmCKSVII2C1_IC_CLR_ACTIVITY 0x0097 -#define mmCKSVII2C1_IC_CLR_ACTIVITY_BASE_IDX 0 -#define mmCKSVII2C1_IC_CLR_STOP_DET 0x0098 -#define mmCKSVII2C1_IC_CLR_STOP_DET_BASE_IDX 0 -#define mmCKSVII2C1_IC_CLR_START_DET 0x0099 -#define mmCKSVII2C1_IC_CLR_START_DET_BASE_IDX 0 -#define mmCKSVII2C1_IC_CLR_GEN_CALL 0x009a -#define mmCKSVII2C1_IC_CLR_GEN_CALL_BASE_IDX 0 -#define mmCKSVII2C1_IC_ENABLE 0x009b -#define mmCKSVII2C1_IC_ENABLE_BASE_IDX 0 -#define mmCKSVII2C1_IC_STATUS 0x009c -#define mmCKSVII2C1_IC_STATUS_BASE_IDX 0 -#define mmCKSVII2C1_IC_TXFLR 0x009d -#define mmCKSVII2C1_IC_TXFLR_BASE_IDX 0 -#define mmCKSVII2C1_IC_RXFLR 0x009e -#define mmCKSVII2C1_IC_RXFLR_BASE_IDX 0 -#define mmCKSVII2C1_IC_SDA_HOLD 0x009f -#define mmCKSVII2C1_IC_SDA_HOLD_BASE_IDX 0 -#define mmCKSVII2C1_IC_TX_ABRT_SOURCE 0x00a0 -#define mmCKSVII2C1_IC_TX_ABRT_SOURCE_BASE_IDX 0 -#define mmCKSVII2C1_IC_SLV_DATA_NACK_ONLY 0x00a1 -#define mmCKSVII2C1_IC_SLV_DATA_NACK_ONLY_BASE_IDX 0 -#define mmCKSVII2C1_IC_DMA_CR 0x00a2 -#define mmCKSVII2C1_IC_DMA_CR_BASE_IDX 0 -#define mmCKSVII2C1_IC_DMA_TDLR 0x00a3 -#define mmCKSVII2C1_IC_DMA_TDLR_BASE_IDX 0 -#define mmCKSVII2C1_IC_DMA_RDLR 0x00a4 -#define mmCKSVII2C1_IC_DMA_RDLR_BASE_IDX 0 -#define mmCKSVII2C1_IC_SDA_SETUP 0x00a5 -#define mmCKSVII2C1_IC_SDA_SETUP_BASE_IDX 0 -#define mmCKSVII2C1_IC_ACK_GENERAL_CALL 0x00a6 -#define mmCKSVII2C1_IC_ACK_GENERAL_CALL_BASE_IDX 0 -#define mmCKSVII2C1_IC_ENABLE_STATUS 0x00a7 -#define mmCKSVII2C1_IC_ENABLE_STATUS_BASE_IDX 0 -#define mmCKSVII2C1_IC_FS_SPKLEN 0x00a8 -#define mmCKSVII2C1_IC_FS_SPKLEN_BASE_IDX 0 -#define mmCKSVII2C1_IC_HS_SPKLEN 0x00a9 -#define mmCKSVII2C1_IC_HS_SPKLEN_BASE_IDX 0 -#define mmCKSVII2C1_IC_CLR_RESTART_DET 0x00aa -#define mmCKSVII2C1_IC_CLR_RESTART_DET_BASE_IDX 0 -#define mmCKSVII2C1_IC_COMP_PARAM_1 0x00ab -#define mmCKSVII2C1_IC_COMP_PARAM_1_BASE_IDX 0 -#define mmCKSVII2C1_IC_COMP_VERSION 0x00ac -#define mmCKSVII2C1_IC_COMP_VERSION_BASE_IDX 0 -#define mmCKSVII2C1_IC_COMP_TYPE 0x00ad -#define mmCKSVII2C1_IC_COMP_TYPE_BASE_IDX 0 #define mmSMUIO_MP_RESET_INTR 0x00c1 #define mmSMUIO_MP_RESET_INTR_BASE_IDX 0 #define mmSMUIO_SOC_HALT 0x00c2 diff --git a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_11_0_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_11_0_0_sh_mask.h index 6905a9618127..f8afa3518bf2 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_11_0_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_11_0_0_sh_mask.h @@ -268,182 +268,6 @@ //CKSVII2C_IC_COMP_TYPE #define CKSVII2C_IC_COMP_TYPE__COMP_TYPE__SHIFT 0x0 #define CKSVII2C_IC_COMP_TYPE__COMP_TYPE_MASK 0xFFFFFFFFL -//CKSVII2C1_IC_CON -#define CKSVII2C1_IC_CON__IC1_MASTER_MODE__SHIFT 0x0 -#define CKSVII2C1_IC_CON__IC1_MAX_SPEED_MODE__SHIFT 0x1 -#define CKSVII2C1_IC_CON__IC1_10BITADDR_SLAVE__SHIFT 0x3 -#define CKSVII2C1_IC_CON__IC1_10BITADDR_MASTER__SHIFT 0x4 -#define CKSVII2C1_IC_CON__IC1_RESTART_EN__SHIFT 0x5 -#define CKSVII2C1_IC_CON__IC1_SLAVE_DISABLE__SHIFT 0x6 -#define CKSVII2C1_IC_CON__STOP1_DET_IFADDRESSED__SHIFT 0x7 -#define CKSVII2C1_IC_CON__TX1_EMPTY_CTRL__SHIFT 0x8 -#define CKSVII2C1_IC_CON__RX1_FIFO_FULL_HLD_CTRL__SHIFT 0x9 -#define CKSVII2C1_IC_CON__IC1_MASTER_MODE_MASK 0x00000001L -#define CKSVII2C1_IC_CON__IC1_MAX_SPEED_MODE_MASK 0x00000006L -#define CKSVII2C1_IC_CON__IC1_10BITADDR_SLAVE_MASK 0x00000008L -#define CKSVII2C1_IC_CON__IC1_10BITADDR_MASTER_MASK 0x00000010L -#define CKSVII2C1_IC_CON__IC1_RESTART_EN_MASK 0x00000020L -#define CKSVII2C1_IC_CON__IC1_SLAVE_DISABLE_MASK 0x00000040L -#define CKSVII2C1_IC_CON__STOP1_DET_IFADDRESSED_MASK 0x00000080L -#define CKSVII2C1_IC_CON__TX1_EMPTY_CTRL_MASK 0x00000100L -#define CKSVII2C1_IC_CON__RX1_FIFO_FULL_HLD_CTRL_MASK 0x00000200L -//CKSVII2C1_IC_TAR -#define CKSVII2C1_IC_TAR__IC1_TAR__SHIFT 0x0 -#define CKSVII2C1_IC_TAR__GC1_OR_START__SHIFT 0xa -#define CKSVII2C1_IC_TAR__SPECIAL1__SHIFT 0xb -#define CKSVII2C1_IC_TAR__IC1_10BITADDR_MASTER__SHIFT 0xc -#define CKSVII2C1_IC_TAR__IC1_TAR_MASK 0x000003FFL -#define CKSVII2C1_IC_TAR__GC1_OR_START_MASK 0x00000400L -#define CKSVII2C1_IC_TAR__SPECIAL1_MASK 0x00000800L -#define CKSVII2C1_IC_TAR__IC1_10BITADDR_MASTER_MASK 0x00001000L -//CKSVII2C1_IC_SAR -#define CKSVII2C1_IC_SAR__IC1_SAR__SHIFT 0x0 -#define CKSVII2C1_IC_SAR__IC1_SAR_MASK 0x000003FFL -//CKSVII2C1_IC_HS_MADDR -#define CKSVII2C1_IC_HS_MADDR__IC1_HS_MADDR__SHIFT 0x0 -#define CKSVII2C1_IC_HS_MADDR__IC1_HS_MADDR_MASK 0x00000007L -//CKSVII2C1_IC_DATA_CMD -#define CKSVII2C1_IC_DATA_CMD__DAT1__SHIFT 0x0 -#define CKSVII2C1_IC_DATA_CMD__CMD1__SHIFT 0x8 -#define CKSVII2C1_IC_DATA_CMD__STOP1__SHIFT 0x9 -#define CKSVII2C1_IC_DATA_CMD__RESTART1__SHIFT 0xa -#define CKSVII2C1_IC_DATA_CMD__DAT1_MASK 0x000000FFL -#define CKSVII2C1_IC_DATA_CMD__CMD1_MASK 0x00000100L -#define CKSVII2C1_IC_DATA_CMD__STOP1_MASK 0x00000200L -#define CKSVII2C1_IC_DATA_CMD__RESTART1_MASK 0x00000400L -//CKSVII2C1_IC_SS_SCL_HCNT -#define CKSVII2C1_IC_SS_SCL_HCNT__IC1_SS_SCL_HCNT__SHIFT 0x0 -#define CKSVII2C1_IC_SS_SCL_HCNT__IC1_SS_SCL_HCNT_MASK 0x0000FFFFL -//CKSVII2C1_IC_SS_SCL_LCNT -#define CKSVII2C1_IC_SS_SCL_LCNT__IC1_SS_SCL_LCNT__SHIFT 0x0 -#define CKSVII2C1_IC_SS_SCL_LCNT__IC1_SS_SCL_LCNT_MASK 0x0000FFFFL -//CKSVII2C1_IC_FS_SCL_HCNT -#define CKSVII2C1_IC_FS_SCL_HCNT__IC1_FS_SCL_HCNT__SHIFT 0x0 -#define CKSVII2C1_IC_FS_SCL_HCNT__IC1_FS_SCL_HCNT_MASK 0x0000FFFFL -//CKSVII2C1_IC_FS_SCL_LCNT -#define CKSVII2C1_IC_FS_SCL_LCNT__IC1_FS_SCL_LCNT__SHIFT 0x0 -#define CKSVII2C1_IC_FS_SCL_LCNT__IC1_FS_SCL_LCNT_MASK 0x0000FFFFL -//CKSVII2C1_IC_HS_SCL_HCNT -#define CKSVII2C1_IC_HS_SCL_HCNT__IC1_HS_SCL_HCNT__SHIFT 0x0 -#define CKSVII2C1_IC_HS_SCL_HCNT__IC1_HS_SCL_HCNT_MASK 0x0000FFFFL -//CKSVII2C1_IC_HS_SCL_LCNT -#define CKSVII2C1_IC_HS_SCL_LCNT__IC1_HS_SCL_LCNT__SHIFT 0x0 -#define CKSVII2C1_IC_HS_SCL_LCNT__IC1_HS_SCL_LCNT_MASK 0x0000FFFFL -//CKSVII2C1_IC_INTR_STAT -#define CKSVII2C1_IC_INTR_STAT__R1_RX_UNDER__SHIFT 0x0 -#define CKSVII2C1_IC_INTR_STAT__R1_RX_OVER__SHIFT 0x1 -#define CKSVII2C1_IC_INTR_STAT__R1_RX_FULL__SHIFT 0x2 -#define CKSVII2C1_IC_INTR_STAT__R1_TX_OVER__SHIFT 0x3 -#define CKSVII2C1_IC_INTR_STAT__R1_TX_EMPTY__SHIFT 0x4 -#define CKSVII2C1_IC_INTR_STAT__R1_RD_REQ__SHIFT 0x5 -#define CKSVII2C1_IC_INTR_STAT__R1_TX_ABRT__SHIFT 0x6 -#define CKSVII2C1_IC_INTR_STAT__R1_RX_DONE__SHIFT 0x7 -#define CKSVII2C1_IC_INTR_STAT__R1_ACTIVITY__SHIFT 0x8 -#define CKSVII2C1_IC_INTR_STAT__R1_STOP_DET__SHIFT 0x9 -#define CKSVII2C1_IC_INTR_STAT__R1_START_DET__SHIFT 0xa -#define CKSVII2C1_IC_INTR_STAT__R1_GEN_CALL__SHIFT 0xb -#define CKSVII2C1_IC_INTR_STAT__R1_RESTART_DET__SHIFT 0xc -#define CKSVII2C1_IC_INTR_STAT__R1_MST_ON_HOLD__SHIFT 0xd -#define CKSVII2C1_IC_INTR_STAT__R1_RX_UNDER_MASK 0x00000001L -#define CKSVII2C1_IC_INTR_STAT__R1_RX_OVER_MASK 0x00000002L -#define CKSVII2C1_IC_INTR_STAT__R1_RX_FULL_MASK 0x00000004L -#define CKSVII2C1_IC_INTR_STAT__R1_TX_OVER_MASK 0x00000008L -#define CKSVII2C1_IC_INTR_STAT__R1_TX_EMPTY_MASK 0x00000010L -#define CKSVII2C1_IC_INTR_STAT__R1_RD_REQ_MASK 0x00000020L -#define CKSVII2C1_IC_INTR_STAT__R1_TX_ABRT_MASK 0x00000040L -#define CKSVII2C1_IC_INTR_STAT__R1_RX_DONE_MASK 0x00000080L -#define CKSVII2C1_IC_INTR_STAT__R1_ACTIVITY_MASK 0x00000100L -#define CKSVII2C1_IC_INTR_STAT__R1_STOP_DET_MASK 0x00000200L -#define CKSVII2C1_IC_INTR_STAT__R1_START_DET_MASK 0x00000400L -#define CKSVII2C1_IC_INTR_STAT__R1_GEN_CALL_MASK 0x00000800L -#define CKSVII2C1_IC_INTR_STAT__R1_RESTART_DET_MASK 0x00001000L -#define CKSVII2C1_IC_INTR_STAT__R1_MST_ON_HOLD_MASK 0x00002000L -//CKSVII2C1_IC_INTR_MASK -#define CKSVII2C1_IC_INTR_MASK__M1_RX_UNDER__SHIFT 0x0 -#define CKSVII2C1_IC_INTR_MASK__M1_RX_OVER__SHIFT 0x1 -#define CKSVII2C1_IC_INTR_MASK__M1_RX_FULL__SHIFT 0x2 -#define CKSVII2C1_IC_INTR_MASK__M1_TX_OVER__SHIFT 0x3 -#define CKSVII2C1_IC_INTR_MASK__M1_TX_EMPTY__SHIFT 0x4 -#define CKSVII2C1_IC_INTR_MASK__M1_RD_REQ__SHIFT 0x5 -#define CKSVII2C1_IC_INTR_MASK__M1_TX_ABRT__SHIFT 0x6 -#define CKSVII2C1_IC_INTR_MASK__M1_RX_DONE__SHIFT 0x7 -#define CKSVII2C1_IC_INTR_MASK__M1_ACTIVITY__SHIFT 0x8 -#define CKSVII2C1_IC_INTR_MASK__M1_STOP_DET__SHIFT 0x9 -#define CKSVII2C1_IC_INTR_MASK__M1_START_DET__SHIFT 0xa -#define CKSVII2C1_IC_INTR_MASK__M1_GEN_CALL__SHIFT 0xb -#define CKSVII2C1_IC_INTR_MASK__M1_RESTART_DET__SHIFT 0xc -#define CKSVII2C1_IC_INTR_MASK__M1_MST_ON_HOLD__SHIFT 0xd -#define CKSVII2C1_IC_INTR_MASK__M1_RX_UNDER_MASK 0x00000001L -#define CKSVII2C1_IC_INTR_MASK__M1_RX_OVER_MASK 0x00000002L -#define CKSVII2C1_IC_INTR_MASK__M1_RX_FULL_MASK 0x00000004L -#define CKSVII2C1_IC_INTR_MASK__M1_TX_OVER_MASK 0x00000008L -#define CKSVII2C1_IC_INTR_MASK__M1_TX_EMPTY_MASK 0x00000010L -#define CKSVII2C1_IC_INTR_MASK__M1_RD_REQ_MASK 0x00000020L -#define CKSVII2C1_IC_INTR_MASK__M1_TX_ABRT_MASK 0x00000040L -#define CKSVII2C1_IC_INTR_MASK__M1_RX_DONE_MASK 0x00000080L -#define CKSVII2C1_IC_INTR_MASK__M1_ACTIVITY_MASK 0x00000100L -#define CKSVII2C1_IC_INTR_MASK__M1_STOP_DET_MASK 0x00000200L -#define CKSVII2C1_IC_INTR_MASK__M1_START_DET_MASK 0x00000400L -#define CKSVII2C1_IC_INTR_MASK__M1_GEN_CALL_MASK 0x00000800L -#define CKSVII2C1_IC_INTR_MASK__M1_RESTART_DET_MASK 0x00001000L -#define CKSVII2C1_IC_INTR_MASK__M1_MST_ON_HOLD_MASK 0x00002000L -//CKSVII2C1_IC_RAW_INTR_STAT -//CKSVII2C1_IC_RX_TL -//CKSVII2C1_IC_TX_TL -//CKSVII2C1_IC_CLR_INTR -//CKSVII2C1_IC_CLR_RX_UNDER -//CKSVII2C1_IC_CLR_RX_OVER -//CKSVII2C1_IC_CLR_TX_OVER -//CKSVII2C1_IC_CLR_RD_REQ -//CKSVII2C1_IC_CLR_TX_ABRT -//CKSVII2C1_IC_CLR_RX_DONE -//CKSVII2C1_IC_CLR_ACTIVITY -//CKSVII2C1_IC_CLR_STOP_DET -//CKSVII2C1_IC_CLR_START_DET -//CKSVII2C1_IC_CLR_GEN_CALL -//CKSVII2C1_IC_ENABLE -#define CKSVII2C1_IC_ENABLE__ENABLE1__SHIFT 0x0 -#define CKSVII2C1_IC_ENABLE__ABORT1__SHIFT 0x1 -#define CKSVII2C1_IC_ENABLE__ENABLE1_MASK 0x00000001L -#define CKSVII2C1_IC_ENABLE__ABORT1_MASK 0x00000002L -//CKSVII2C1_IC_STATUS -#define CKSVII2C1_IC_STATUS__ACTIVITY1__SHIFT 0x0 -#define CKSVII2C1_IC_STATUS__TFNF1__SHIFT 0x1 -#define CKSVII2C1_IC_STATUS__TFE1__SHIFT 0x2 -#define CKSVII2C1_IC_STATUS__RFNE1__SHIFT 0x3 -#define CKSVII2C1_IC_STATUS__RFF1__SHIFT 0x4 -#define CKSVII2C1_IC_STATUS__MST1_ACTIVITY__SHIFT 0x5 -#define CKSVII2C1_IC_STATUS__SLV1_ACTIVITY__SHIFT 0x6 -#define CKSVII2C1_IC_STATUS__ACTIVITY1_MASK 0x00000001L -#define CKSVII2C1_IC_STATUS__TFNF1_MASK 0x00000002L -#define CKSVII2C1_IC_STATUS__TFE1_MASK 0x00000004L -#define CKSVII2C1_IC_STATUS__RFNE1_MASK 0x00000008L -#define CKSVII2C1_IC_STATUS__RFF1_MASK 0x00000010L -#define CKSVII2C1_IC_STATUS__MST1_ACTIVITY_MASK 0x00000020L -#define CKSVII2C1_IC_STATUS__SLV1_ACTIVITY_MASK 0x00000040L -//CKSVII2C1_IC_TXFLR -//CKSVII2C1_IC_RXFLR -//CKSVII2C1_IC_SDA_HOLD -#define CKSVII2C1_IC_SDA_HOLD__IC1_SDA_HOLD__SHIFT 0x0 -#define CKSVII2C1_IC_SDA_HOLD__IC1_SDA_HOLD_MASK 0x00FFFFFFL -//CKSVII2C1_IC_TX_ABRT_SOURCE -//CKSVII2C1_IC_SLV_DATA_NACK_ONLY -//CKSVII2C1_IC_DMA_CR -//CKSVII2C1_IC_DMA_TDLR -//CKSVII2C1_IC_DMA_RDLR -//CKSVII2C1_IC_SDA_SETUP -#define CKSVII2C1_IC_SDA_SETUP__SDA1_SETUP__SHIFT 0x0 -#define CKSVII2C1_IC_SDA_SETUP__SDA1_SETUP_MASK 0x000000FFL -//CKSVII2C1_IC_ACK_GENERAL_CALL -#define CKSVII2C1_IC_ACK_GENERAL_CALL__ACK1_GENERAL_CALL__SHIFT 0x0 -#define CKSVII2C1_IC_ACK_GENERAL_CALL__ACK1_GENERAL_CALL_MASK 0x00000001L -//CKSVII2C1_IC_ENABLE_STATUS -#define CKSVII2C1_IC_ENABLE_STATUS__IC1_EN__SHIFT 0x0 -#define CKSVII2C1_IC_ENABLE_STATUS__SLV1_RX_ABORTED__SHIFT 0x1 -#define CKSVII2C1_IC_ENABLE_STATUS__SLV1_FIFO_FILLED_AND_FLUSHED__SHIFT 0x2 -#define CKSVII2C1_IC_ENABLE_STATUS__IC1_EN_MASK 0x00000001L -#define CKSVII2C1_IC_ENABLE_STATUS__SLV1_RX_ABORTED_MASK 0x00000002L -#define CKSVII2C1_IC_ENABLE_STATUS__SLV1_FIFO_FILLED_AND_FLUSHED_MASK 0x00000004L //SMUIO_MP_RESET_INTR #define SMUIO_MP_RESET_INTR__SMUIO_MP_RESET_INTR__SHIFT 0x0 #define SMUIO_MP_RESET_INTR__SMUIO_MP_RESET_INTR_MASK 0x00000001L diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h index 90350f46a0c4..cf2149cc12ee 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h @@ -24,18 +24,6 @@ // addressBlock: uvd0_mmsch_dec // base address: 0x1e000 -#define mmMMSCH_VF_VMID 0x000b -#define mmMMSCH_VF_VMID_BASE_IDX 0 -#define mmMMSCH_VF_CTX_ADDR_LO 0x000c -#define mmMMSCH_VF_CTX_ADDR_LO_BASE_IDX 0 -#define mmMMSCH_VF_CTX_ADDR_HI 0x000d -#define mmMMSCH_VF_CTX_ADDR_HI_BASE_IDX 0 -#define mmMMSCH_VF_CTX_SIZE 0x000e -#define mmMMSCH_VF_CTX_SIZE_BASE_IDX 0 -#define mmMMSCH_VF_MAILBOX_HOST 0x0012 -#define mmMMSCH_VF_MAILBOX_HOST_BASE_IDX 0 -#define mmMMSCH_VF_MAILBOX_RESP 0x0013 -#define mmMMSCH_VF_MAILBOX_RESP_BASE_IDX 0 // addressBlock: uvd0_jpegnpdec diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index dd7cbc00a0aa..e88541d67aa0 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -492,13 +492,12 @@ struct atom_firmware_info_v3_1 /* Total 32bit cap indication */ enum atombios_firmware_capability { - ATOM_FIRMWARE_CAP_FIRMWARE_POSTED = 0x00000001, - ATOM_FIRMWARE_CAP_GPU_VIRTUALIZATION = 0x00000002, - ATOM_FIRMWARE_CAP_WMI_SUPPORT = 0x00000040, - ATOM_FIRMWARE_CAP_HWEMU_ENABLE = 0x00000080, - ATOM_FIRMWARE_CAP_HWEMU_UMC_CFG = 0x00000100, - ATOM_FIRMWARE_CAP_SRAM_ECC = 0x00000200, - ATOM_FIRMWARE_CAP_ENABLE_2STAGE_BIST_TRAINING = 0x00000400, + ATOM_FIRMWARE_CAP_FIRMWARE_POSTED = 0x00000001, + ATOM_FIRMWARE_CAP_GPU_VIRTUALIZATION = 0x00000002, + ATOM_FIRMWARE_CAP_WMI_SUPPORT = 0x00000040, + ATOM_FIRMWARE_CAP_HWEMU_ENABLE = 0x00000080, + ATOM_FIRMWARE_CAP_HWEMU_UMC_CFG = 0x00000100, + ATOM_FIRMWARE_CAP_SRAM_ECC = 0x00000200, }; enum atom_cooling_solution_id{ @@ -672,20 +671,6 @@ struct vram_usagebyfirmware_v2_1 uint16_t used_by_driver_in_kb; }; -/* This is part of vram_usagebyfirmware_v2_1 */ -struct vram_reserve_block -{ - uint32_t start_address_in_kb; - uint16_t used_by_firmware_in_kb; - uint16_t used_by_driver_in_kb; -}; - -/* Definitions for constance */ -enum atomfirmware_internal_constants -{ - ONE_KiB = 0x400, - ONE_MiB = 0x100000, -}; /* *************************************************************************** diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h index 7ec4331e67f2..5dcb776548d8 100644 --- a/drivers/gpu/drm/amd/include/discovery.h +++ b/drivers/gpu/drm/amd/include/discovery.h @@ -25,6 +25,7 @@ #define _DISCOVERY_H_ #define PSP_HEADER_SIZE 256 +#define BINARY_MAX_SIZE (64 << 10) #define BINARY_SIGNATURE 0x28211407 #define DISCOVERY_TABLE_SIGNATURE 0x53445049 diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 2cd217e60125..98b9533e672b 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -291,18 +291,15 @@ struct kfd2kgd_calls { uint32_t (*address_watch_get_offset)(struct kgd_dev *kgd, unsigned int watch_point_id, unsigned int reg_offset); - bool (*get_atc_vmid_pasid_mapping_info)( + bool (*get_atc_vmid_pasid_mapping_valid)( struct kgd_dev *kgd, - uint8_t vmid, - uint16_t *p_pasid); + uint8_t vmid); + uint16_t (*get_atc_vmid_pasid_mapping_pasid)( + struct kgd_dev *kgd, + uint8_t vmid); - /* No longer needed from GFXv9 onward. The scratch base address is - * passed to the shader by the CP. It's the user mode driver's - * responsibility. - */ void (*set_scratch_backing_va)(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); - int (*get_tile_config)(struct kgd_dev *kgd, struct tile_config *config); void (*set_vm_context_page_table_base)(struct kgd_dev *kgd, diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 5902f80d1fce..27cf0afaa0b4 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -179,11 +179,6 @@ enum pp_mp1_state { PP_MP1_STATE_RESET, }; -enum pp_df_cstate { - DF_CSTATE_DISALLOW = 0, - DF_CSTATE_ALLOW, -}; - #define PP_GROUP_MASK 0xF0000000 #define PP_GROUP_SHIFT 28 @@ -317,7 +312,6 @@ struct amd_pm_funcs { int (*get_ppfeature_status)(void *handle, char *buf); int (*set_ppfeature_status)(void *handle, uint64_t ppfeature_masks); int (*asic_reset_mode_2)(void *handle); - int (*set_df_cstate)(void *handle, enum pp_df_cstate state); }; #endif diff --git a/drivers/gpu/drm/amd/include/renoir_ip_offset.h b/drivers/gpu/drm/amd/include/renoir_ip_offset.h index 07633e22e99a..094648cac392 100644 --- a/drivers/gpu/drm/amd/include/renoir_ip_offset.h +++ b/drivers/gpu/drm/amd/include/renoir_ip_offset.h @@ -169,11 +169,6 @@ static const struct IP_BASE NBIF0_BASE ={ { { { 0x00000000, 0x00000014, 0x00000D { { 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0 } } } }; -static const struct IP_BASE DCN_BASE ={ { { { 0x00000012, 0x000000C0, 0x000034C0, 0, 0 } }, - { { 0, 0, 0, 0, 0 } }, - { { 0, 0, 0, 0, 0 } }, - { { 0, 0, 0, 0, 0 } }, - { { 0, 0, 0, 0, 0 } } } }; static const struct IP_BASE OSSSYS_BASE ={ { { { 0x000010A0, 0x0240A000, 0, 0, 0 } }, { { 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0 } }, @@ -1366,33 +1361,4 @@ static const struct IP_BASE UVD0_BASE ={ { { { 0x00007800, 0x00007E00, 0x0240300 #define UVD0_BASE__INST6_SEG3 0 #define UVD0_BASE__INST6_SEG4 0 -#define DCN_BASE__INST0_SEG0 0x00000012 -#define DCN_BASE__INST0_SEG1 0x000000C0 -#define DCN_BASE__INST0_SEG2 0x000034C0 -#define DCN_BASE__INST0_SEG3 0 -#define DCN_BASE__INST0_SEG4 0 - -#define DCN_BASE__INST1_SEG0 0 -#define DCN_BASE__INST1_SEG1 0 -#define DCN_BASE__INST1_SEG2 0 -#define DCN_BASE__INST1_SEG3 0 -#define DCN_BASE__INST1_SEG4 0 - -#define DCN_BASE__INST2_SEG0 0 -#define DCN_BASE__INST2_SEG1 0 -#define DCN_BASE__INST2_SEG2 0 -#define DCN_BASE__INST2_SEG3 0 -#define DCN_BASE__INST2_SEG4 0 - -#define DCN_BASE__INST3_SEG0 0 -#define DCN_BASE__INST3_SEG1 0 -#define DCN_BASE__INST3_SEG2 0 -#define DCN_BASE__INST3_SEG3 0 -#define DCN_BASE__INST3_SEG4 0 - -#define DCN_BASE__INST4_SEG0 0 -#define DCN_BASE__INST4_SEG1 0 -#define DCN_BASE__INST4_SEG2 0 -#define DCN_BASE__INST4_SEG3 0 -#define DCN_BASE__INST4_SEG4 0 #endif diff --git a/drivers/gpu/drm/amd/include/vega10_enum.h b/drivers/gpu/drm/amd/include/vega10_enum.h index adf1b754666e..c14ba65a2415 100644 --- a/drivers/gpu/drm/amd/include/vega10_enum.h +++ b/drivers/gpu/drm/amd/include/vega10_enum.h @@ -1037,7 +1037,6 @@ TCC_CACHE_POLICY_STREAM = 0x00000001, typedef enum MTYPE { MTYPE_NC = 0x00000000, MTYPE_WC = 0x00000001, -MTYPE_RW = 0x00000001, MTYPE_CC = 0x00000002, MTYPE_UC = 0x00000003, } MTYPE; diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index f4ff15378e61..fa8ad7db2b3a 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -1421,7 +1421,6 @@ static int pp_get_asic_baco_capability(void *handle, bool *cap) { struct pp_hwmgr *hwmgr = handle; - *cap = false; if (!hwmgr) return -EINVAL; @@ -1549,23 +1548,6 @@ static int pp_smu_i2c_bus_access(void *handle, bool acquire) return ret; } -static int pp_set_df_cstate(void *handle, enum pp_df_cstate state) -{ - struct pp_hwmgr *hwmgr = handle; - - if (!hwmgr) - return -EINVAL; - - if (!hwmgr->pm_en || !hwmgr->hwmgr_func->set_df_cstate) - return 0; - - mutex_lock(&hwmgr->smu_lock); - hwmgr->hwmgr_func->set_df_cstate(hwmgr, state); - mutex_unlock(&hwmgr->smu_lock); - - return 0; -} - static const struct amd_pm_funcs pp_dpm_funcs = { .load_firmware = pp_dpm_load_fw, .wait_for_fw_loading_complete = pp_dpm_fw_loading_complete, @@ -1624,5 +1606,4 @@ static const struct amd_pm_funcs pp_dpm_funcs = { .set_ppfeature_status = pp_set_ppfeature_status, .asic_reset_mode_2 = pp_asic_reset_mode_2, .smu_i2c_bus_access = pp_smu_i2c_bus_access, - .set_df_cstate = pp_set_df_cstate, }; diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 4e468b0272c3..4acf139ea014 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -25,16 +25,11 @@ #include "pp_debug.h" #include "amdgpu.h" #include "amdgpu_smu.h" -#include "smu_internal.h" #include "soc15_common.h" #include "smu_v11_0.h" #include "smu_v12_0.h" #include "atom.h" #include "amd_pcie.h" -#include "vega20_ppt.h" -#include "arcturus_ppt.h" -#include "navi10_ppt.h" -#include "renoir_ppt.h" #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) #type @@ -72,8 +67,6 @@ size_t smu_sys_get_pp_feature_mask(struct smu_context *smu, char *buf) uint32_t sort_feature[SMU_FEATURE_COUNT]; uint64_t hw_feature_count = 0; - mutex_lock(&smu->mutex); - ret = smu_feature_get_enabled_mask(smu, feature_mask, 2); if (ret) goto failed; @@ -99,57 +92,9 @@ size_t smu_sys_get_pp_feature_mask(struct smu_context *smu, char *buf) } failed: - mutex_unlock(&smu->mutex); - return size; } -static int smu_feature_update_enable_state(struct smu_context *smu, - uint64_t feature_mask, - bool enabled) -{ - struct smu_feature *feature = &smu->smu_feature; - uint32_t feature_low = 0, feature_high = 0; - int ret = 0; - - if (!smu->pm_enabled) - return ret; - - feature_low = (feature_mask >> 0 ) & 0xffffffff; - feature_high = (feature_mask >> 32) & 0xffffffff; - - if (enabled) { - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesLow, - feature_low); - if (ret) - return ret; - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesHigh, - feature_high); - if (ret) - return ret; - } else { - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesLow, - feature_low); - if (ret) - return ret; - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesHigh, - feature_high); - if (ret) - return ret; - } - - mutex_lock(&feature->mutex); - if (enabled) - bitmap_or(feature->enabled, feature->enabled, - (unsigned long *)(&feature_mask), SMU_FEATURE_MAX); - else - bitmap_andnot(feature->enabled, feature->enabled, - (unsigned long *)(&feature_mask), SMU_FEATURE_MAX); - mutex_unlock(&feature->mutex); - - return ret; -} - int smu_sys_set_pp_feature_mask(struct smu_context *smu, uint64_t new_mask) { int ret = 0; @@ -158,11 +103,9 @@ int smu_sys_set_pp_feature_mask(struct smu_context *smu, uint64_t new_mask) uint64_t feature_2_disabled = 0; uint64_t feature_enables = 0; - mutex_lock(&smu->mutex); - ret = smu_feature_get_enabled_mask(smu, feature_mask, 2); if (ret) - goto out; + return ret; feature_enables = ((uint64_t)feature_mask[1] << 32 | (uint64_t)feature_mask[0]); @@ -172,17 +115,14 @@ int smu_sys_set_pp_feature_mask(struct smu_context *smu, uint64_t new_mask) if (feature_2_enabled) { ret = smu_feature_update_enable_state(smu, feature_2_enabled, true); if (ret) - goto out; + return ret; } if (feature_2_disabled) { ret = smu_feature_update_enable_state(smu, feature_2_disabled, false); if (ret) - goto out; + return ret; } -out: - mutex_unlock(&smu->mutex); - return ret; } @@ -219,7 +159,8 @@ int smu_get_smc_version(struct smu_context *smu, uint32_t *if_version, uint32_t int smu_set_soft_freq_range(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t min, uint32_t max) { - int ret = 0; + int ret = 0, clk_id = 0; + uint32_t param; if (min <= 0 && max <= 0) return -EINVAL; @@ -227,7 +168,27 @@ int smu_set_soft_freq_range(struct smu_context *smu, enum smu_clk_type clk_type, if (!smu_clk_dpm_is_enabled(smu, clk_type)) return 0; - ret = smu_set_soft_freq_limited_range(smu, clk_type, min, max); + clk_id = smu_clk_get_index(smu, clk_type); + if (clk_id < 0) + return clk_id; + + if (max > 0) { + param = (uint32_t)((clk_id << 16) | (max & 0xffff)); + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxByFreq, + param); + if (ret) + return ret; + } + + if (min > 0) { + param = (uint32_t)((clk_id << 16) | (min & 0xffff)); + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMinByFreq, + param); + if (ret) + return ret; + } + + return ret; } @@ -268,7 +229,7 @@ int smu_set_hard_freq_range(struct smu_context *smu, enum smu_clk_type clk_type, } int smu_get_dpm_freq_range(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t *min, uint32_t *max, bool lock_needed) + uint32_t *min, uint32_t *max) { uint32_t clock_limit; int ret = 0; @@ -276,9 +237,6 @@ int smu_get_dpm_freq_range(struct smu_context *smu, enum smu_clk_type clk_type, if (!min && !max) return -EINVAL; - if (lock_needed) - mutex_lock(&smu->mutex); - if (!smu_clk_dpm_is_enabled(smu, clk_type)) { switch (clk_type) { case SMU_MCLK: @@ -302,17 +260,14 @@ int smu_get_dpm_freq_range(struct smu_context *smu, enum smu_clk_type clk_type, *min = clock_limit / 100; if (max) *max = clock_limit / 100; - } else { - /* - * Todo: Use each asic(ASIC_ppt funcs) control the callbacks exposed to the - * core driver and then have helpers for stuff that is common(SMU_v11_x | SMU_v12_x funcs). - */ - ret = smu_get_dpm_ultimate_freq(smu, clk_type, min, max); + + return 0; } - - if (lock_needed) - mutex_unlock(&smu->mutex); - + /* + * Todo: Use each asic(ASIC_ppt funcs) control the callbacks exposed to the + * core driver and then have helpers for stuff that is common(SMU_v11_x | SMU_v12_x funcs). + */ + ret = smu_get_dpm_ultimate_freq(smu, clk_type, min, max); return ret; } @@ -389,8 +344,6 @@ int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type, { int ret = 0; - mutex_lock(&smu->mutex); - switch (block_type) { case AMD_IP_BLOCK_TYPE_UVD: ret = smu_dpm_set_uvd_enable(smu, gate); @@ -408,11 +361,15 @@ int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type, break; } - mutex_unlock(&smu->mutex); - return ret; } +enum amd_pm_state_type smu_get_current_power_state(struct smu_context *smu) +{ + /* not support power state */ + return POWER_STATE_TYPE_DEFAULT; +} + int smu_get_power_num_states(struct smu_context *smu, struct pp_states_info *state_info) { @@ -482,7 +439,7 @@ int smu_update_table(struct smu_context *smu, enum smu_table_id table_index, int int ret = 0; int table_id = smu_table_get_index(smu, table_index); - if (!table_data || table_id >= SMU_TABLE_COUNT || table_id < 0) + if (!table_data || table_id >= smu_table->table_count || table_id < 0) return -EINVAL; table = &smu_table->tables[table_index]; @@ -506,7 +463,7 @@ int smu_update_table(struct smu_context *smu, enum smu_table_id table_index, int return ret; /* flush hdp cache */ - adev->nbio.funcs->hdp_flush(adev, NULL); + adev->nbio_funcs->hdp_flush(adev, NULL); if (!drv2smu) memcpy(table_data, table->cpu_addr, table->size); @@ -538,23 +495,16 @@ bool is_support_sw_smu_xgmi(struct amdgpu_device *adev) int smu_sys_get_pp_table(struct smu_context *smu, void **table) { struct smu_table_context *smu_table = &smu->smu_table; - uint32_t powerplay_table_size; if (!smu_table->power_play_table && !smu_table->hardcode_pptable) return -EINVAL; - mutex_lock(&smu->mutex); - if (smu_table->hardcode_pptable) *table = smu_table->hardcode_pptable; else *table = smu_table->power_play_table; - powerplay_table_size = smu_table->power_play_table_size; - - mutex_unlock(&smu->mutex); - - return powerplay_table_size; + return smu_table->power_play_table_size; } int smu_sys_set_pp_table(struct smu_context *smu, void *buf, size_t size) @@ -581,11 +531,14 @@ int smu_sys_set_pp_table(struct smu_context *smu, void *buf, size_t size) memcpy(smu_table->hardcode_pptable, buf, size); smu_table->power_play_table = smu_table->hardcode_pptable; smu_table->power_play_table_size = size; + mutex_unlock(&smu->mutex); ret = smu_reset(smu); if (ret) pr_info("smu reset failed, ret = %d\n", ret); + return ret; + failed: mutex_unlock(&smu->mutex); return ret; @@ -616,7 +569,41 @@ int smu_feature_init_dpm(struct smu_context *smu) return ret; } +int smu_feature_update_enable_state(struct smu_context *smu, uint64_t feature_mask, bool enabled) +{ + uint32_t feature_low = 0, feature_high = 0; + int ret = 0; + if (!smu->pm_enabled) + return ret; + + feature_low = (feature_mask >> 0 ) & 0xffffffff; + feature_high = (feature_mask >> 32) & 0xffffffff; + + if (enabled) { + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesLow, + feature_low); + if (ret) + return ret; + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesHigh, + feature_high); + if (ret) + return ret; + + } else { + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesLow, + feature_low); + if (ret) + return ret; + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesHigh, + feature_high); + if (ret) + return ret; + + } + + return ret; +} int smu_feature_is_enabled(struct smu_context *smu, enum smu_feature_mask mask) { @@ -646,6 +633,8 @@ int smu_feature_set_enabled(struct smu_context *smu, enum smu_feature_mask mask, { struct smu_feature *feature = &smu->smu_feature; int feature_id; + uint64_t feature_mask = 0; + int ret = 0; feature_id = smu_feature_get_index(smu, mask); if (feature_id < 0) @@ -653,9 +642,22 @@ int smu_feature_set_enabled(struct smu_context *smu, enum smu_feature_mask mask, WARN_ON(feature_id > feature->feature_num); - return smu_feature_update_enable_state(smu, - 1ULL << feature_id, - enable); + feature_mask = 1ULL << feature_id; + + mutex_lock(&feature->mutex); + ret = smu_feature_update_enable_state(smu, feature_mask, enable); + if (ret) + goto failed; + + if (enable) + test_and_set_bit(feature_id, feature->enabled); + else + test_and_clear_bit(feature_id, feature->enabled); + +failed: + mutex_unlock(&feature->mutex); + + return ret; } int smu_feature_is_supported(struct smu_context *smu, enum smu_feature_mask mask) @@ -707,26 +709,23 @@ static int smu_set_funcs(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA20: - vega20_set_ppt_funcs(smu); - break; case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: - navi10_set_ppt_funcs(smu); - break; case CHIP_ARCTURUS: - arcturus_set_ppt_funcs(smu); + if (adev->pm.pp_feature & PP_OVERDRIVE_MASK) + smu->od_enabled = true; + smu_v11_0_set_smu_funcs(smu); break; case CHIP_RENOIR: - renoir_set_ppt_funcs(smu); + if (adev->pm.pp_feature & PP_OVERDRIVE_MASK) + smu->od_enabled = true; + smu_v12_0_set_smu_funcs(smu); break; default: return -EINVAL; } - if (adev->pm.pp_feature & PP_OVERDRIVE_MASK) - smu->od_enabled = true; - return 0; } @@ -737,7 +736,6 @@ static int smu_early_init(void *handle) smu->adev = adev; smu->pm_enabled = !!amdgpu_dpm; - smu->is_apu = false; mutex_init(&smu->mutex); return smu_set_funcs(adev); @@ -751,10 +749,11 @@ static int smu_late_init(void *handle) if (!smu->pm_enabled) return 0; + mutex_lock(&smu->mutex); smu_handle_task(&adev->smu, smu->smu_dpm.dpm_level, - AMD_PP_TASK_COMPLETE_INIT, - false); + AMD_PP_TASK_COMPLETE_INIT); + mutex_unlock(&smu->mutex); return 0; } @@ -920,9 +919,14 @@ static int smu_init_fb_allocations(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; struct smu_table_context *smu_table = &smu->smu_table; struct smu_table *tables = smu_table->tables; - int ret, i; + uint32_t table_count = smu_table->table_count; + uint32_t i = 0; + int32_t ret = 0; - for (i = 0; i < SMU_TABLE_COUNT; i++) { + if (table_count <= 0) + return -EINVAL; + + for (i = 0 ; i < table_count; i++) { if (tables[i].size == 0) continue; ret = amdgpu_bo_create_kernel(adev, @@ -938,7 +942,7 @@ static int smu_init_fb_allocations(struct smu_context *smu) return 0; failed: - while (--i >= 0) { + for (; i > 0; i--) { if (tables[i].size == 0) continue; amdgpu_bo_free_kernel(&tables[i].bo, @@ -953,12 +957,13 @@ static int smu_fini_fb_allocations(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; struct smu_table *tables = smu_table->tables; + uint32_t table_count = smu_table->table_count; uint32_t i = 0; - if (!tables) + if (table_count == 0 || tables == NULL) return 0; - for (i = 0; i < SMU_TABLE_COUNT; i++) { + for (i = 0 ; i < table_count; i++) { if (tables[i].size == 0) continue; amdgpu_bo_free_kernel(&tables[i].bo, @@ -969,6 +974,50 @@ static int smu_fini_fb_allocations(struct smu_context *smu) return 0; } +static int smu_override_pcie_parameters(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t pcie_gen = 0, pcie_width = 0, smu_pcie_arg; + int ret; + + if (adev->flags & AMD_IS_APU) + return 0; + + if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) + pcie_gen = 3; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) + pcie_gen = 2; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) + pcie_gen = 1; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1) + pcie_gen = 0; + + /* Bit 31:16: LCLK DPM level. 0 is DPM0, and 1 is DPM1 + * Bit 15:8: PCIE GEN, 0 to 3 corresponds to GEN1 to GEN4 + * Bit 7:0: PCIE lane width, 1 to 7 corresponds is x1 to x32 + */ + if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) + pcie_width = 6; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12) + pcie_width = 5; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8) + pcie_width = 4; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4) + pcie_width = 3; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2) + pcie_width = 2; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1) + pcie_width = 1; + + smu_pcie_arg = (1 << 16) | (pcie_gen << 8) | pcie_width; + ret = smu_send_smc_msg_with_param(smu, + SMU_MSG_OverridePcieParameters, + smu_pcie_arg); + if (ret) + pr_err("[%s] Attempt to override pcie params failed!\n", __func__); + return ret; +} + static int smu_smc_table_hw_init(struct smu_context *smu, bool initialize) { @@ -1043,8 +1092,8 @@ static int smu_smc_table_hw_init(struct smu_context *smu, if (ret) return ret; - /* issue Run*Btc msg */ - ret = smu_run_btc(smu); + /* issue RunAfllBtc msg */ + ret = smu_run_afll_btc(smu); if (ret) return ret; @@ -1098,7 +1147,7 @@ static int smu_smc_table_hw_init(struct smu_context *smu, if (ret) return ret; - ret = smu_get_power_limit(smu, &smu->default_power_limit, true, false); + ret = smu_get_power_limit(smu, &smu->default_power_limit, true); if (ret) return ret; } @@ -1177,46 +1226,29 @@ static int smu_free_memory_pool(struct smu_context *smu) return ret; } -static int smu_start_smc_engine(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - int ret = 0; - - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - if (adev->asic_type < CHIP_NAVI10) { - if (smu->ppt_funcs->load_microcode) { - ret = smu->ppt_funcs->load_microcode(smu); - if (ret) - return ret; - } - } - } - - if (smu->ppt_funcs->check_fw_status) { - ret = smu->ppt_funcs->check_fw_status(smu); - if (ret) - pr_err("SMC is not ready\n"); - } - - return ret; -} - static int smu_hw_init(void *handle) { int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct smu_context *smu = &adev->smu; - ret = smu_start_smc_engine(smu); + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + if (adev->asic_type < CHIP_NAVI10) { + ret = smu_load_microcode(smu); + if (ret) + return ret; + } + } + + ret = smu_check_fw_status(smu); if (ret) { - pr_err("SMU is not ready yet!\n"); + pr_err("SMC firmware status is not correct\n"); return ret; } if (adev->flags & AMD_IS_APU) { smu_powergate_sdma(&adev->smu, false); smu_powergate_vcn(&adev->smu, false); - smu_set_gfx_cgpg(&adev->smu, true); } if (!smu->pm_enabled) @@ -1259,11 +1291,6 @@ failed: return ret; } -static int smu_stop_dpms(struct smu_context *smu) -{ - return smu_send_smc_msg(smu, SMU_MSG_DisableAllSmuFeatures); -} - static int smu_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1276,18 +1303,6 @@ static int smu_hw_fini(void *handle) smu_powergate_vcn(&adev->smu, true); } - ret = smu_stop_thermal_control(smu); - if (ret) { - pr_warn("Fail to stop thermal control!\n"); - return ret; - } - - ret = smu_stop_dpms(smu); - if (ret) { - pr_warn("Fail to stop Dpms!\n"); - return ret; - } - kfree(table_context->driver_pptable); table_context->driver_pptable = NULL; @@ -1329,10 +1344,7 @@ static int smu_suspend(void *handle) int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct smu_context *smu = &adev->smu; - bool baco_feature_is_enabled = false; - - if(!(adev->flags & AMD_IS_APU)) - baco_feature_is_enabled = smu_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT); + bool baco_feature_is_enabled = smu_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT); ret = smu_system_features_control(smu, false); if (ret) @@ -1351,8 +1363,6 @@ static int smu_suspend(void *handle) if (adev->asic_type >= CHIP_NAVI10 && adev->gfx.rlc.funcs->stop) adev->gfx.rlc.funcs->stop(adev); - if (smu->is_apu) - smu_set_gfx_cgpg(&adev->smu, false); return 0; } @@ -1365,11 +1375,7 @@ static int smu_resume(void *handle) pr_info("SMU is resuming...\n"); - ret = smu_start_smc_engine(smu); - if (ret) { - pr_err("SMU is not ready yet!\n"); - goto failed; - } + mutex_lock(&smu->mutex); ret = smu_smc_table_hw_init(smu, false); if (ret) @@ -1379,16 +1385,13 @@ static int smu_resume(void *handle) if (ret) goto failed; - if (smu->is_apu) - smu_set_gfx_cgpg(&adev->smu, true); - - smu->disable_uclk_switch = 0; + mutex_unlock(&smu->mutex); pr_info("SMU is resumed successfully!\n"); return 0; - failed: + mutex_unlock(&smu->mutex); return ret; } @@ -1406,9 +1409,8 @@ int smu_display_configuration_change(struct smu_context *smu, mutex_lock(&smu->mutex); - if (smu->ppt_funcs->set_deep_sleep_dcefclk) - smu->ppt_funcs->set_deep_sleep_dcefclk(smu, - display_config->min_dcef_deep_sleep_set_clk / 100); + smu_set_deep_sleep_dcefclk(smu, + display_config->min_dcef_deep_sleep_set_clk / 100); for (index = 0; index < display_config->num_path_including_non_display; index++) { if (display_config->displays[index].controller_id != 0) @@ -1527,8 +1529,7 @@ static int smu_enable_umd_pstate(void *handle, struct smu_context *smu = (struct smu_context*)(handle); struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); - - if (!smu->is_apu && (!smu->pm_enabled || !smu_dpm_ctx->dpm_context)) + if (!smu->pm_enabled || !smu_dpm_ctx->dpm_context) return -EINVAL; if (!(smu_dpm_ctx->dpm_level & profile_mode_mask)) { @@ -1586,9 +1587,9 @@ static int smu_default_set_performance_level(struct smu_context *smu, enum amd_d &soc_mask); if (ret) return ret; - smu_force_clk_levels(smu, SMU_SCLK, 1 << sclk_mask, false); - smu_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask, false); - smu_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask, false); + smu_force_clk_levels(smu, SMU_SCLK, 1 << sclk_mask); + smu_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask); + smu_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask); break; case AMD_DPM_FORCED_LEVEL_MANUAL: case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT: @@ -1652,7 +1653,7 @@ int smu_adjust_power_state_dynamic(struct smu_context *smu, workload = smu->workload_setting[index]; if (smu->power_profile_mode != workload) - smu_set_power_profile_mode(smu, &workload, 0, false); + smu_set_power_profile_mode(smu, &workload, 0); } return ret; @@ -1660,22 +1661,18 @@ int smu_adjust_power_state_dynamic(struct smu_context *smu, int smu_handle_task(struct smu_context *smu, enum amd_dpm_forced_level level, - enum amd_pp_task task_id, - bool lock_needed) + enum amd_pp_task task_id) { int ret = 0; - if (lock_needed) - mutex_lock(&smu->mutex); - switch (task_id) { case AMD_PP_TASK_DISPLAY_CONFIG_CHANGE: ret = smu_pre_display_config_changed(smu); if (ret) - goto out; + return ret; ret = smu_set_cpu_power_state(smu); if (ret) - goto out; + return ret; ret = smu_adjust_power_state_dynamic(smu, level, false); break; case AMD_PP_TASK_COMPLETE_INIT: @@ -1686,10 +1683,6 @@ int smu_handle_task(struct smu_context *smu, break; } -out: - if (lock_needed) - mutex_unlock(&smu->mutex); - return ret; } @@ -1722,7 +1715,7 @@ int smu_switch_power_profile(struct smu_context *smu, } if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) - smu_set_power_profile_mode(smu, &workload, 0, false); + smu_set_power_profile_mode(smu, &workload, 0); mutex_unlock(&smu->mutex); @@ -1734,7 +1727,7 @@ enum amd_dpm_forced_level smu_get_performance_level(struct smu_context *smu) struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); enum amd_dpm_forced_level level; - if (!smu->is_apu && !smu_dpm_ctx->dpm_context) + if (!smu_dpm_ctx->dpm_context) return -EINVAL; mutex_lock(&(smu->mutex)); @@ -1749,22 +1742,15 @@ int smu_force_performance_level(struct smu_context *smu, enum amd_dpm_forced_lev struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); int ret = 0; - if (!smu->is_apu && !smu_dpm_ctx->dpm_context) + if (!smu_dpm_ctx->dpm_context) return -EINVAL; - mutex_lock(&smu->mutex); - ret = smu_enable_umd_pstate(smu, &level); - if (ret) { - mutex_unlock(&smu->mutex); + if (ret) return ret; - } ret = smu_handle_task(smu, level, - AMD_PP_TASK_READJUST_POWER_STATE, - false); - - mutex_unlock(&smu->mutex); + AMD_PP_TASK_READJUST_POWER_STATE); return ret; } @@ -1780,144 +1766,6 @@ int smu_set_display_count(struct smu_context *smu, uint32_t count) return ret; } -int smu_force_clk_levels(struct smu_context *smu, - enum smu_clk_type clk_type, - uint32_t mask, - bool lock_needed) -{ - struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); - int ret = 0; - - if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) { - pr_debug("force clock level is for dpm manual mode only.\n"); - return -EINVAL; - } - - if (lock_needed) - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs && smu->ppt_funcs->force_clk_levels) - ret = smu->ppt_funcs->force_clk_levels(smu, clk_type, mask); - - if (lock_needed) - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_set_mp1_state(struct smu_context *smu, - enum pp_mp1_state mp1_state) -{ - uint16_t msg; - int ret; - - /* - * The SMC is not fully ready. That may be - * expected as the IP may be masked. - * So, just return without error. - */ - if (!smu->pm_enabled) - return 0; - - mutex_lock(&smu->mutex); - - switch (mp1_state) { - case PP_MP1_STATE_SHUTDOWN: - msg = SMU_MSG_PrepareMp1ForShutdown; - break; - case PP_MP1_STATE_UNLOAD: - msg = SMU_MSG_PrepareMp1ForUnload; - break; - case PP_MP1_STATE_RESET: - msg = SMU_MSG_PrepareMp1ForReset; - break; - case PP_MP1_STATE_NONE: - default: - mutex_unlock(&smu->mutex); - return 0; - } - - /* some asics may not support those messages */ - if (smu_msg_get_index(smu, msg) < 0) { - mutex_unlock(&smu->mutex); - return 0; - } - - ret = smu_send_smc_msg(smu, msg); - if (ret) - pr_err("[PrepareMp1] Failed!\n"); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_set_df_cstate(struct smu_context *smu, - enum pp_df_cstate state) -{ - int ret = 0; - - /* - * The SMC is not fully ready. That may be - * expected as the IP may be masked. - * So, just return without error. - */ - if (!smu->pm_enabled) - return 0; - - if (!smu->ppt_funcs || !smu->ppt_funcs->set_df_cstate) - return 0; - - mutex_lock(&smu->mutex); - - ret = smu->ppt_funcs->set_df_cstate(smu, state); - if (ret) - pr_err("[SetDfCstate] failed!\n"); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_write_watermarks_table(struct smu_context *smu) -{ - int ret = 0; - struct smu_table_context *smu_table = &smu->smu_table; - struct smu_table *table = NULL; - - table = &smu_table->tables[SMU_TABLE_WATERMARKS]; - - if (!table->cpu_addr) - return -EINVAL; - - ret = smu_update_table(smu, SMU_TABLE_WATERMARKS, 0, table->cpu_addr, - true); - - return ret; -} - -int smu_set_watermarks_for_clock_ranges(struct smu_context *smu, - struct dm_pp_wm_sets_with_clock_ranges_soc15 *clock_ranges) -{ - int ret = 0; - struct smu_table *watermarks = &smu->smu_table.tables[SMU_TABLE_WATERMARKS]; - void *table = watermarks->cpu_addr; - - mutex_lock(&smu->mutex); - - if (!smu->disable_watermark && - smu_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT) && - smu_feature_is_enabled(smu, SMU_FEATURE_DPM_SOCCLK_BIT)) { - smu_set_watermarks_table(smu, table, clock_ranges); - smu->watermarks_bitmap |= WATERMARKS_EXIST; - smu->watermarks_bitmap &= ~WATERMARKS_LOADED; - } - - mutex_unlock(&smu->mutex); - - return ret; -} - const struct amd_ip_funcs smu_ip_funcs = { .name = "smu", .early_init = smu_early_init, @@ -1954,549 +1802,3 @@ const struct amdgpu_ip_block_version smu_v12_0_ip_block = .rev = 0, .funcs = &smu_ip_funcs, }; - -int smu_load_microcode(struct smu_context *smu) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->load_microcode) - ret = smu->ppt_funcs->load_microcode(smu); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_check_fw_status(struct smu_context *smu) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->check_fw_status) - ret = smu->ppt_funcs->check_fw_status(smu); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->set_gfx_cgpg) - ret = smu->ppt_funcs->set_gfx_cgpg(smu, enabled); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_set_fan_speed_rpm(struct smu_context *smu, uint32_t speed) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->set_fan_speed_rpm) - ret = smu->ppt_funcs->set_fan_speed_rpm(smu, speed); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_get_power_limit(struct smu_context *smu, - uint32_t *limit, - bool def, - bool lock_needed) -{ - int ret = 0; - - if (lock_needed) - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->get_power_limit) - ret = smu->ppt_funcs->get_power_limit(smu, limit, def); - - if (lock_needed) - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_set_power_limit(struct smu_context *smu, uint32_t limit) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->set_power_limit) - ret = smu->ppt_funcs->set_power_limit(smu, limit); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->print_clk_levels) - ret = smu->ppt_funcs->print_clk_levels(smu, clk_type, buf); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_get_od_percentage(struct smu_context *smu, enum smu_clk_type type) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->get_od_percentage) - ret = smu->ppt_funcs->get_od_percentage(smu, type); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_set_od_percentage(struct smu_context *smu, enum smu_clk_type type, uint32_t value) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->set_od_percentage) - ret = smu->ppt_funcs->set_od_percentage(smu, type, value); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_od_edit_dpm_table(struct smu_context *smu, - enum PP_OD_DPM_TABLE_COMMAND type, - long *input, uint32_t size) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->od_edit_dpm_table) - ret = smu->ppt_funcs->od_edit_dpm_table(smu, type, input, size); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_read_sensor(struct smu_context *smu, - enum amd_pp_sensors sensor, - void *data, uint32_t *size) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->read_sensor) - ret = smu->ppt_funcs->read_sensor(smu, sensor, data, size); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_get_power_profile_mode(struct smu_context *smu, char *buf) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->get_power_profile_mode) - ret = smu->ppt_funcs->get_power_profile_mode(smu, buf); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_set_power_profile_mode(struct smu_context *smu, - long *param, - uint32_t param_size, - bool lock_needed) -{ - int ret = 0; - - if (lock_needed) - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->set_power_profile_mode) - ret = smu->ppt_funcs->set_power_profile_mode(smu, param, param_size); - - if (lock_needed) - mutex_unlock(&smu->mutex); - - return ret; -} - - -int smu_get_fan_control_mode(struct smu_context *smu) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->get_fan_control_mode) - ret = smu->ppt_funcs->get_fan_control_mode(smu); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_set_fan_control_mode(struct smu_context *smu, int value) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->set_fan_control_mode) - ret = smu->ppt_funcs->set_fan_control_mode(smu, value); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_get_fan_speed_percent(struct smu_context *smu, uint32_t *speed) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->get_fan_speed_percent) - ret = smu->ppt_funcs->get_fan_speed_percent(smu, speed); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_set_fan_speed_percent(struct smu_context *smu, uint32_t speed) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->set_fan_speed_percent) - ret = smu->ppt_funcs->set_fan_speed_percent(smu, speed); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_get_fan_speed_rpm(struct smu_context *smu, uint32_t *speed) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->get_fan_speed_rpm) - ret = smu->ppt_funcs->get_fan_speed_rpm(smu, speed); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_set_deep_sleep_dcefclk(struct smu_context *smu, int clk) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->set_deep_sleep_dcefclk) - ret = smu->ppt_funcs->set_deep_sleep_dcefclk(smu, clk); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_set_active_display_count(struct smu_context *smu, uint32_t count) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->set_active_display_count) - ret = smu->ppt_funcs->set_active_display_count(smu, count); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_get_clock_by_type(struct smu_context *smu, - enum amd_pp_clock_type type, - struct amd_pp_clocks *clocks) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->get_clock_by_type) - ret = smu->ppt_funcs->get_clock_by_type(smu, type, clocks); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_get_max_high_clocks(struct smu_context *smu, - struct amd_pp_simple_clock_info *clocks) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->get_max_high_clocks) - ret = smu->ppt_funcs->get_max_high_clocks(smu, clocks); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_get_clock_by_type_with_latency(struct smu_context *smu, - enum smu_clk_type clk_type, - struct pp_clock_levels_with_latency *clocks) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->get_clock_by_type_with_latency) - ret = smu->ppt_funcs->get_clock_by_type_with_latency(smu, clk_type, clocks); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_get_clock_by_type_with_voltage(struct smu_context *smu, - enum amd_pp_clock_type type, - struct pp_clock_levels_with_voltage *clocks) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->get_clock_by_type_with_voltage) - ret = smu->ppt_funcs->get_clock_by_type_with_voltage(smu, type, clocks); - - mutex_unlock(&smu->mutex); - - return ret; -} - - -int smu_display_clock_voltage_request(struct smu_context *smu, - struct pp_display_clock_request *clock_req) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->display_clock_voltage_request) - ret = smu->ppt_funcs->display_clock_voltage_request(smu, clock_req); - - mutex_unlock(&smu->mutex); - - return ret; -} - - -int smu_display_disable_memory_clock_switch(struct smu_context *smu, bool disable_memory_clock_switch) -{ - int ret = -EINVAL; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->display_disable_memory_clock_switch) - ret = smu->ppt_funcs->display_disable_memory_clock_switch(smu, disable_memory_clock_switch); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_notify_smu_enable_pwe(struct smu_context *smu) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->notify_smu_enable_pwe) - ret = smu->ppt_funcs->notify_smu_enable_pwe(smu); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_set_xgmi_pstate(struct smu_context *smu, - uint32_t pstate) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->set_xgmi_pstate) - ret = smu->ppt_funcs->set_xgmi_pstate(smu, pstate); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_set_azalia_d3_pme(struct smu_context *smu) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->set_azalia_d3_pme) - ret = smu->ppt_funcs->set_azalia_d3_pme(smu); - - mutex_unlock(&smu->mutex); - - return ret; -} - -bool smu_baco_is_support(struct smu_context *smu) -{ - bool ret = false; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->baco_is_support) - ret = smu->ppt_funcs->baco_is_support(smu); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_baco_get_state(struct smu_context *smu, enum smu_baco_state *state) -{ - if (smu->ppt_funcs->baco_get_state) - return -EINVAL; - - mutex_lock(&smu->mutex); - *state = smu->ppt_funcs->baco_get_state(smu); - mutex_unlock(&smu->mutex); - - return 0; -} - -int smu_baco_reset(struct smu_context *smu) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->baco_reset) - ret = smu->ppt_funcs->baco_reset(smu); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_mode2_reset(struct smu_context *smu) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->mode2_reset) - ret = smu->ppt_funcs->mode2_reset(smu); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_get_max_sustainable_clocks_by_dc(struct smu_context *smu, - struct pp_smu_nv_clock_table *max_clocks) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->get_max_sustainable_clocks_by_dc) - ret = smu->ppt_funcs->get_max_sustainable_clocks_by_dc(smu, max_clocks); - - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_get_uclk_dpm_states(struct smu_context *smu, - unsigned int *clock_values_in_khz, - unsigned int *num_states) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->get_uclk_dpm_states) - ret = smu->ppt_funcs->get_uclk_dpm_states(smu, clock_values_in_khz, num_states); - - mutex_unlock(&smu->mutex); - - return ret; -} - -enum amd_pm_state_type smu_get_current_power_state(struct smu_context *smu) -{ - enum amd_pm_state_type pm_state = POWER_STATE_TYPE_DEFAULT; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->get_current_power_state) - pm_state = smu->ppt_funcs->get_current_power_state(smu); - - mutex_unlock(&smu->mutex); - - return pm_state; -} - -int smu_get_dpm_clock_table(struct smu_context *smu, - struct dpm_clocks *clock_table) -{ - int ret = 0; - - mutex_lock(&smu->mutex); - - if (smu->ppt_funcs->get_dpm_clock_table) - ret = smu->ppt_funcs->get_dpm_clock_table(smu, clock_table); - - mutex_unlock(&smu->mutex); - - return ret; -} diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index 3099ac256bd3..d493a3f8c07a 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -25,7 +25,6 @@ #include #include "amdgpu.h" #include "amdgpu_smu.h" -#include "smu_internal.h" #include "atomfirmware.h" #include "amdgpu_atomfirmware.h" #include "smu_v11_0.h" @@ -37,12 +36,6 @@ #include "smu_v11_0_pptable.h" #include "arcturus_ppsmc.h" #include "nbio/nbio_7_4_sh_mask.h" -#include "amdgpu_xgmi.h" -#include -#include -#include "amdgpu_ras.h" - -#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control.eeprom_accessor))->adev #define CTF_OFFSET_EDGE 5 #define CTF_OFFSET_HOTSPOT 5 @@ -119,7 +112,8 @@ static struct smu_11_0_cmn2aisc_mapping arcturus_message_map[SMU_MSG_MAX_COUNT] MSG_MAP(PrepareMp1ForShutdown, PPSMC_MSG_PrepareMp1ForShutdown), MSG_MAP(SoftReset, PPSMC_MSG_SoftReset), MSG_MAP(RunAfllBtc, PPSMC_MSG_RunAfllBtc), - MSG_MAP(RunDcBtc, PPSMC_MSG_RunDcBtc), + MSG_MAP(RunGfxDcBtc, PPSMC_MSG_RunGfxDcBtc), + MSG_MAP(RunSocDcBtc, PPSMC_MSG_RunSocDcBtc), MSG_MAP(DramLogSetDramAddrHigh, PPSMC_MSG_DramLogSetDramAddrHigh), MSG_MAP(DramLogSetDramAddrLow, PPSMC_MSG_DramLogSetDramAddrLow), MSG_MAP(DramLogSetDramSize, PPSMC_MSG_DramLogSetDramSize), @@ -178,7 +172,6 @@ static struct smu_11_0_cmn2aisc_mapping arcturus_table_map[SMU_TABLE_COUNT] = { TAB_MAP(SMU_METRICS), TAB_MAP(DRIVER_SMU_CONFIG), TAB_MAP(OVERDRIVE), - TAB_MAP(I2C_COMMANDS), }; static struct smu_11_0_cmn2aisc_mapping arcturus_pwr_src_map[SMU_POWER_SOURCE_COUNT] = { @@ -301,9 +294,6 @@ static int arcturus_tables_init(struct smu_context *smu, struct smu_table *table SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_t), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); - SMU_TABLE_INIT(tables, SMU_TABLE_I2C_COMMANDS, sizeof(SwI2cRequest_t), - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); - smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL); if (!smu_table->metrics_table) return -ENOMEM; @@ -538,17 +528,9 @@ static int arcturus_append_powerplay_table(struct smu_context *smu) return 0; } -static int arcturus_run_btc(struct smu_context *smu) +static int arcturus_run_btc_afll(struct smu_context *smu) { - int ret = 0; - - ret = smu_send_smc_msg(smu, SMU_MSG_RunAfllBtc); - if (ret) { - pr_err("RunAfllBtc failed!\n"); - return ret; - } - - return smu_send_smc_msg(smu, SMU_MSG_RunDcBtc); + return smu_send_smc_msg(smu, SMU_MSG_RunAfllBtc); } static int arcturus_populate_umd_state_clk(struct smu_context *smu) @@ -628,17 +610,12 @@ static int arcturus_print_clk_levels(struct smu_context *smu, return ret; } - /* - * For DPM disabled case, there will be only one clock level. - * And it's safe to assume that is always the current clock. - */ for (i = 0; i < clocks.num_levels; i++) size += sprintf(buf + size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, - (clocks.num_levels == 1) ? "*" : - (arcturus_freqs_in_same_level( + arcturus_freqs_in_same_level( clocks.data[i].clocks_in_khz / 1000, - now / 100) ? "*" : "")); + now / 100) ? "*" : ""); break; case SMU_MCLK: @@ -658,10 +635,9 @@ static int arcturus_print_clk_levels(struct smu_context *smu, for (i = 0; i < clocks.num_levels; i++) size += sprintf(buf + size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, - (clocks.num_levels == 1) ? "*" : - (arcturus_freqs_in_same_level( + arcturus_freqs_in_same_level( clocks.data[i].clocks_in_khz / 1000, - now / 100) ? "*" : "")); + now / 100) ? "*" : ""); break; case SMU_SOCCLK: @@ -681,10 +657,9 @@ static int arcturus_print_clk_levels(struct smu_context *smu, for (i = 0; i < clocks.num_levels; i++) size += sprintf(buf + size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, - (clocks.num_levels == 1) ? "*" : - (arcturus_freqs_in_same_level( + arcturus_freqs_in_same_level( clocks.data[i].clocks_in_khz / 1000, - now / 100) ? "*" : "")); + now / 100) ? "*" : ""); break; case SMU_FCLK: @@ -704,10 +679,9 @@ static int arcturus_print_clk_levels(struct smu_context *smu, for (i = 0; i < single_dpm_table->count; i++) size += sprintf(buf + size, "%d: %uMhz %s\n", i, single_dpm_table->dpm_levels[i].value, - (clocks.num_levels == 1) ? "*" : - (arcturus_freqs_in_same_level( + arcturus_freqs_in_same_level( clocks.data[i].clocks_in_khz / 1000, - now / 100) ? "*" : "")); + now / 100) ? "*" : ""); break; default: @@ -782,6 +756,8 @@ static int arcturus_force_clk_levels(struct smu_context *smu, uint32_t soft_min_level, soft_max_level; int ret = 0; + mutex_lock(&(smu->mutex)); + soft_min_level = mask ? (ffs(mask) - 1) : 0; soft_max_level = mask ? (fls(mask) - 1) : 0; @@ -816,19 +792,91 @@ static int arcturus_force_clk_levels(struct smu_context *smu, break; case SMU_MCLK: + single_dpm_table = &(dpm_table->mem_table); + + if (soft_max_level >= single_dpm_table->count) { + pr_err("Clock level specified %d is over max allowed %d\n", + soft_max_level, single_dpm_table->count - 1); + ret = -EINVAL; + break; + } + + single_dpm_table->dpm_state.soft_min_level = + single_dpm_table->dpm_levels[soft_min_level].value; + single_dpm_table->dpm_state.soft_max_level = + single_dpm_table->dpm_levels[soft_max_level].value; + + ret = arcturus_upload_dpm_level(smu, false, FEATURE_DPM_UCLK_MASK); + if (ret) { + pr_err("Failed to upload boot level to lowest!\n"); + break; + } + + ret = arcturus_upload_dpm_level(smu, true, FEATURE_DPM_UCLK_MASK); + if (ret) + pr_err("Failed to upload dpm max level to highest!\n"); + + break; + case SMU_SOCCLK: + single_dpm_table = &(dpm_table->soc_table); + + if (soft_max_level >= single_dpm_table->count) { + pr_err("Clock level specified %d is over max allowed %d\n", + soft_max_level, single_dpm_table->count - 1); + ret = -EINVAL; + break; + } + + single_dpm_table->dpm_state.soft_min_level = + single_dpm_table->dpm_levels[soft_min_level].value; + single_dpm_table->dpm_state.soft_max_level = + single_dpm_table->dpm_levels[soft_max_level].value; + + ret = arcturus_upload_dpm_level(smu, false, FEATURE_DPM_SOCCLK_MASK); + if (ret) { + pr_err("Failed to upload boot level to lowest!\n"); + break; + } + + ret = arcturus_upload_dpm_level(smu, true, FEATURE_DPM_SOCCLK_MASK); + if (ret) + pr_err("Failed to upload dpm max level to highest!\n"); + + break; + case SMU_FCLK: - /* - * Should not arrive here since Arcturus does not - * support mclk/socclk/fclk softmin/softmax settings - */ - ret = -EINVAL; + single_dpm_table = &(dpm_table->fclk_table); + + if (soft_max_level >= single_dpm_table->count) { + pr_err("Clock level specified %d is over max allowed %d\n", + soft_max_level, single_dpm_table->count - 1); + ret = -EINVAL; + break; + } + + single_dpm_table->dpm_state.soft_min_level = + single_dpm_table->dpm_levels[soft_min_level].value; + single_dpm_table->dpm_state.soft_max_level = + single_dpm_table->dpm_levels[soft_max_level].value; + + ret = arcturus_upload_dpm_level(smu, false, FEATURE_DPM_FCLK_MASK); + if (ret) { + pr_err("Failed to upload boot level to lowest!\n"); + break; + } + + ret = arcturus_upload_dpm_level(smu, true, FEATURE_DPM_FCLK_MASK); + if (ret) + pr_err("Failed to upload dpm max level to highest!\n"); + break; default: break; } + mutex_unlock(&(smu->mutex)); return ret; } @@ -995,7 +1043,7 @@ static int arcturus_read_sensor(struct smu_context *smu, *size = 4; break; default: - ret = smu_v11_0_read_sensor(smu, sensor, data, size); + ret = smu_smc_read_sensor(smu, sensor, data, size); } mutex_unlock(&smu->sensor_lock); @@ -1138,7 +1186,6 @@ static int arcturus_force_dpm_limit_value(struct smu_context *smu, bool highest) { struct arcturus_dpm_table *dpm_table = (struct arcturus_dpm_table *)smu->smu_dpm.dpm_context; - struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(smu->adev, 0); uint32_t soft_level; int ret = 0; @@ -1152,27 +1199,40 @@ static int arcturus_force_dpm_limit_value(struct smu_context *smu, bool highest) dpm_table->gfx_table.dpm_state.soft_max_level = dpm_table->gfx_table.dpm_levels[soft_level].value; - ret = arcturus_upload_dpm_level(smu, false, FEATURE_DPM_GFXCLK_MASK); + /* uclk */ + if (highest) + soft_level = arcturus_find_highest_dpm_level(&(dpm_table->mem_table)); + else + soft_level = arcturus_find_lowest_dpm_level(&(dpm_table->mem_table)); + + dpm_table->mem_table.dpm_state.soft_min_level = + dpm_table->mem_table.dpm_state.soft_max_level = + dpm_table->mem_table.dpm_levels[soft_level].value; + + /* socclk */ + if (highest) + soft_level = arcturus_find_highest_dpm_level(&(dpm_table->soc_table)); + else + soft_level = arcturus_find_lowest_dpm_level(&(dpm_table->soc_table)); + + dpm_table->soc_table.dpm_state.soft_min_level = + dpm_table->soc_table.dpm_state.soft_max_level = + dpm_table->soc_table.dpm_levels[soft_level].value; + + ret = arcturus_upload_dpm_level(smu, false, 0xFFFFFFFF); if (ret) { pr_err("Failed to upload boot level to %s!\n", highest ? "highest" : "lowest"); return ret; } - ret = arcturus_upload_dpm_level(smu, true, FEATURE_DPM_GFXCLK_MASK); + ret = arcturus_upload_dpm_level(smu, true, 0xFFFFFFFF); if (ret) { pr_err("Failed to upload dpm max level to %s!\n!", highest ? "highest" : "lowest"); return ret; } - if (hive) - /* - * Force XGMI Pstate to highest or lowest - * TODO: revise this when xgmi dpm is functional - */ - ret = smu_v11_0_set_xgmi_pstate(smu, highest ? 1 : 0); - return ret; } @@ -1180,7 +1240,6 @@ static int arcturus_unforce_dpm_levels(struct smu_context *smu) { struct arcturus_dpm_table *dpm_table = (struct arcturus_dpm_table *)smu->smu_dpm.dpm_context; - struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(smu->adev, 0); uint32_t soft_min_level, soft_max_level; int ret = 0; @@ -1192,25 +1251,34 @@ static int arcturus_unforce_dpm_levels(struct smu_context *smu) dpm_table->gfx_table.dpm_state.soft_max_level = dpm_table->gfx_table.dpm_levels[soft_max_level].value; - ret = arcturus_upload_dpm_level(smu, false, FEATURE_DPM_GFXCLK_MASK); + /* uclk */ + soft_min_level = arcturus_find_lowest_dpm_level(&(dpm_table->mem_table)); + soft_max_level = arcturus_find_highest_dpm_level(&(dpm_table->mem_table)); + dpm_table->mem_table.dpm_state.soft_min_level = + dpm_table->gfx_table.dpm_levels[soft_min_level].value; + dpm_table->mem_table.dpm_state.soft_max_level = + dpm_table->gfx_table.dpm_levels[soft_max_level].value; + + /* socclk */ + soft_min_level = arcturus_find_lowest_dpm_level(&(dpm_table->soc_table)); + soft_max_level = arcturus_find_highest_dpm_level(&(dpm_table->soc_table)); + dpm_table->soc_table.dpm_state.soft_min_level = + dpm_table->soc_table.dpm_levels[soft_min_level].value; + dpm_table->soc_table.dpm_state.soft_max_level = + dpm_table->soc_table.dpm_levels[soft_max_level].value; + + ret = arcturus_upload_dpm_level(smu, false, 0xFFFFFFFF); if (ret) { pr_err("Failed to upload DPM Bootup Levels!"); return ret; } - ret = arcturus_upload_dpm_level(smu, true, FEATURE_DPM_GFXCLK_MASK); + ret = arcturus_upload_dpm_level(smu, true, 0xFFFFFFFF); if (ret) { pr_err("Failed to upload DPM Max Levels!"); return ret; } - if (hive) - /* - * Reset XGMI Pstate back to default - * TODO: revise this when xgmi dpm is functional - */ - ret = smu_v11_0_set_xgmi_pstate(smu, 0); - return ret; } @@ -1823,253 +1891,6 @@ static bool arcturus_is_dpm_running(struct smu_context *smu) return !!(feature_enabled & SMC_DPM_FEATURE); } -static int arcturus_dpm_set_uvd_enable(struct smu_context *smu, bool enable) -{ - struct smu_power_context *smu_power = &smu->smu_power; - struct smu_power_gate *power_gate = &smu_power->power_gate; - int ret = 0; - - if (enable) { - if (!smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) { - ret = smu_feature_set_enabled(smu, SMU_FEATURE_VCN_PG_BIT, 1); - if (ret) { - pr_err("[EnableVCNDPM] failed!\n"); - return ret; - } - } - power_gate->vcn_gated = false; - } else { - if (smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) { - ret = smu_feature_set_enabled(smu, SMU_FEATURE_VCN_PG_BIT, 0); - if (ret) { - pr_err("[DisableVCNDPM] failed!\n"); - return ret; - } - } - power_gate->vcn_gated = true; - } - - return ret; -} - - -static void arcturus_fill_eeprom_i2c_req(SwI2cRequest_t *req, bool write, - uint8_t address, uint32_t numbytes, - uint8_t *data) -{ - int i; - - BUG_ON(numbytes > MAX_SW_I2C_COMMANDS); - - req->I2CcontrollerPort = 0; - req->I2CSpeed = 2; - req->SlaveAddress = address; - req->NumCmds = numbytes; - - for (i = 0; i < numbytes; i++) { - SwI2cCmd_t *cmd = &req->SwI2cCmds[i]; - - /* First 2 bytes are always write for lower 2b EEPROM address */ - if (i < 2) - cmd->Cmd = 1; - else - cmd->Cmd = write; - - - /* Add RESTART for read after address filled */ - cmd->CmdConfig |= (i == 2 && !write) ? CMDCONFIG_RESTART_MASK : 0; - - /* Add STOP in the end */ - cmd->CmdConfig |= (i == (numbytes - 1)) ? CMDCONFIG_STOP_MASK : 0; - - /* Fill with data regardless if read or write to simplify code */ - cmd->RegisterAddr = data[i]; - } -} - -static int arcturus_i2c_eeprom_read_data(struct i2c_adapter *control, - uint8_t address, - uint8_t *data, - uint32_t numbytes) -{ - uint32_t i, ret = 0; - SwI2cRequest_t req; - struct amdgpu_device *adev = to_amdgpu_device(control); - struct smu_table_context *smu_table = &adev->smu.smu_table; - struct smu_table *table = &smu_table->tables[SMU_TABLE_I2C_COMMANDS]; - - memset(&req, 0, sizeof(req)); - arcturus_fill_eeprom_i2c_req(&req, false, address, numbytes, data); - - mutex_lock(&adev->smu.mutex); - /* Now read data starting with that address */ - ret = smu_update_table(&adev->smu, SMU_TABLE_I2C_COMMANDS, 0, &req, - true); - mutex_unlock(&adev->smu.mutex); - - if (!ret) { - SwI2cRequest_t *res = (SwI2cRequest_t *)table->cpu_addr; - - /* Assume SMU fills res.SwI2cCmds[i].Data with read bytes */ - for (i = 0; i < numbytes; i++) - data[i] = res->SwI2cCmds[i].Data; - - pr_debug("arcturus_i2c_eeprom_read_data, address = %x, bytes = %d, data :", - (uint16_t)address, numbytes); - - print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_NONE, - 8, 1, data, numbytes, false); - } else - pr_err("arcturus_i2c_eeprom_read_data - error occurred :%x", ret); - - return ret; -} - -static int arcturus_i2c_eeprom_write_data(struct i2c_adapter *control, - uint8_t address, - uint8_t *data, - uint32_t numbytes) -{ - uint32_t ret; - SwI2cRequest_t req; - struct amdgpu_device *adev = to_amdgpu_device(control); - - memset(&req, 0, sizeof(req)); - arcturus_fill_eeprom_i2c_req(&req, true, address, numbytes, data); - - mutex_lock(&adev->smu.mutex); - ret = smu_update_table(&adev->smu, SMU_TABLE_I2C_COMMANDS, 0, &req, true); - mutex_unlock(&adev->smu.mutex); - - if (!ret) { - pr_debug("arcturus_i2c_write(), address = %x, bytes = %d , data: ", - (uint16_t)address, numbytes); - - print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_NONE, - 8, 1, data, numbytes, false); - /* - * According to EEPROM spec there is a MAX of 10 ms required for - * EEPROM to flush internal RX buffer after STOP was issued at the - * end of write transaction. During this time the EEPROM will not be - * responsive to any more commands - so wait a bit more. - */ - msleep(10); - - } else - pr_err("arcturus_i2c_write- error occurred :%x", ret); - - return ret; -} - -static int arcturus_i2c_eeprom_i2c_xfer(struct i2c_adapter *i2c_adap, - struct i2c_msg *msgs, int num) -{ - uint32_t i, j, ret, data_size, data_chunk_size, next_eeprom_addr = 0; - uint8_t *data_ptr, data_chunk[MAX_SW_I2C_COMMANDS] = { 0 }; - - for (i = 0; i < num; i++) { - /* - * SMU interface allows at most MAX_SW_I2C_COMMANDS bytes of data at - * once and hence the data needs to be spliced into chunks and sent each - * chunk separately - */ - data_size = msgs[i].len - 2; - data_chunk_size = MAX_SW_I2C_COMMANDS - 2; - next_eeprom_addr = (msgs[i].buf[0] << 8 & 0xff00) | (msgs[i].buf[1] & 0xff); - data_ptr = msgs[i].buf + 2; - - for (j = 0; j < data_size / data_chunk_size; j++) { - /* Insert the EEPROM dest addess, bits 0-15 */ - data_chunk[0] = ((next_eeprom_addr >> 8) & 0xff); - data_chunk[1] = (next_eeprom_addr & 0xff); - - if (msgs[i].flags & I2C_M_RD) { - ret = arcturus_i2c_eeprom_read_data(i2c_adap, - (uint8_t)msgs[i].addr, - data_chunk, MAX_SW_I2C_COMMANDS); - - memcpy(data_ptr, data_chunk + 2, data_chunk_size); - } else { - - memcpy(data_chunk + 2, data_ptr, data_chunk_size); - - ret = arcturus_i2c_eeprom_write_data(i2c_adap, - (uint8_t)msgs[i].addr, - data_chunk, MAX_SW_I2C_COMMANDS); - } - - if (ret) { - num = -EIO; - goto fail; - } - - next_eeprom_addr += data_chunk_size; - data_ptr += data_chunk_size; - } - - if (data_size % data_chunk_size) { - data_chunk[0] = ((next_eeprom_addr >> 8) & 0xff); - data_chunk[1] = (next_eeprom_addr & 0xff); - - if (msgs[i].flags & I2C_M_RD) { - ret = arcturus_i2c_eeprom_read_data(i2c_adap, - (uint8_t)msgs[i].addr, - data_chunk, (data_size % data_chunk_size) + 2); - - memcpy(data_ptr, data_chunk + 2, data_size % data_chunk_size); - } else { - memcpy(data_chunk + 2, data_ptr, data_size % data_chunk_size); - - ret = arcturus_i2c_eeprom_write_data(i2c_adap, - (uint8_t)msgs[i].addr, - data_chunk, (data_size % data_chunk_size) + 2); - } - - if (ret) { - num = -EIO; - goto fail; - } - } - } - -fail: - return num; -} - -static u32 arcturus_i2c_eeprom_i2c_func(struct i2c_adapter *adap) -{ - return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; -} - - -static const struct i2c_algorithm arcturus_i2c_eeprom_i2c_algo = { - .master_xfer = arcturus_i2c_eeprom_i2c_xfer, - .functionality = arcturus_i2c_eeprom_i2c_func, -}; - -static int arcturus_i2c_eeprom_control_init(struct i2c_adapter *control) -{ - struct amdgpu_device *adev = to_amdgpu_device(control); - int res; - - control->owner = THIS_MODULE; - control->class = I2C_CLASS_SPD; - control->dev.parent = &adev->pdev->dev; - control->algo = &arcturus_i2c_eeprom_i2c_algo; - snprintf(control->name, sizeof(control->name), "RAS EEPROM"); - - res = i2c_add_adapter(control); - if (res) - DRM_ERROR("Failed to register hw i2c, err: %d\n", res); - - return res; -} - -static void arcturus_i2c_eeprom_control_fini(struct i2c_adapter *control) -{ - i2c_del_adapter(control); -} - static const struct pptable_funcs arcturus_ppt_funcs = { /* translate smu index into arcturus specific index */ .get_smu_msg_index = arcturus_get_smu_msg_index, @@ -2088,7 +1909,7 @@ static const struct pptable_funcs arcturus_ppt_funcs = { /* init dpm */ .get_allowed_feature_mask = arcturus_get_allowed_feature_mask, /* btc */ - .run_btc = arcturus_run_btc, + .run_afll_btc = arcturus_run_btc_afll, /* dpm/clk tables */ .set_default_dpm_table = arcturus_set_default_dpm_table, .populate_umd_state_clk = arcturus_populate_umd_state_clk, @@ -2108,61 +1929,12 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .dump_pptable = arcturus_dump_pptable, .get_power_limit = arcturus_get_power_limit, .is_dpm_running = arcturus_is_dpm_running, - .dpm_set_uvd_enable = arcturus_dpm_set_uvd_enable, - .i2c_eeprom_init = arcturus_i2c_eeprom_control_init, - .i2c_eeprom_fini = arcturus_i2c_eeprom_control_fini, - .init_microcode = smu_v11_0_init_microcode, - .load_microcode = smu_v11_0_load_microcode, - .init_smc_tables = smu_v11_0_init_smc_tables, - .fini_smc_tables = smu_v11_0_fini_smc_tables, - .init_power = smu_v11_0_init_power, - .fini_power = smu_v11_0_fini_power, - .check_fw_status = smu_v11_0_check_fw_status, - .setup_pptable = smu_v11_0_setup_pptable, - .get_vbios_bootup_values = smu_v11_0_get_vbios_bootup_values, - .get_clk_info_from_vbios = smu_v11_0_get_clk_info_from_vbios, - .check_pptable = smu_v11_0_check_pptable, - .parse_pptable = smu_v11_0_parse_pptable, - .populate_smc_tables = smu_v11_0_populate_smc_pptable, - .check_fw_version = smu_v11_0_check_fw_version, - .write_pptable = smu_v11_0_write_pptable, - .set_min_dcef_deep_sleep = smu_v11_0_set_min_dcef_deep_sleep, - .set_tool_table_location = smu_v11_0_set_tool_table_location, - .notify_memory_pool_location = smu_v11_0_notify_memory_pool_location, - .system_features_control = smu_v11_0_system_features_control, - .send_smc_msg = smu_v11_0_send_msg, - .send_smc_msg_with_param = smu_v11_0_send_msg_with_param, - .read_smc_arg = smu_v11_0_read_arg, - .init_display_count = smu_v11_0_init_display_count, - .set_allowed_mask = smu_v11_0_set_allowed_mask, - .get_enabled_mask = smu_v11_0_get_enabled_mask, - .notify_display_change = smu_v11_0_notify_display_change, - .set_power_limit = smu_v11_0_set_power_limit, - .get_current_clk_freq = smu_v11_0_get_current_clk_freq, - .init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks, - .start_thermal_control = smu_v11_0_start_thermal_control, - .stop_thermal_control = smu_v11_0_stop_thermal_control, - .set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk, - .display_clock_voltage_request = smu_v11_0_display_clock_voltage_request, - .get_fan_control_mode = smu_v11_0_get_fan_control_mode, - .set_fan_control_mode = smu_v11_0_set_fan_control_mode, - .set_fan_speed_percent = smu_v11_0_set_fan_speed_percent, - .set_fan_speed_rpm = smu_v11_0_set_fan_speed_rpm, - .set_xgmi_pstate = smu_v11_0_set_xgmi_pstate, - .gfx_off_control = smu_v11_0_gfx_off_control, - .register_irq_handler = smu_v11_0_register_irq_handler, - .set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme, - .get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc, - .baco_is_support= smu_v11_0_baco_is_support, - .baco_get_state = smu_v11_0_baco_get_state, - .baco_set_state = smu_v11_0_baco_set_state, - .baco_reset = smu_v11_0_baco_reset, - .get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq, - .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range, - .override_pcie_parameters = smu_v11_0_override_pcie_parameters, }; void arcturus_set_ppt_funcs(struct smu_context *smu) { + struct smu_table_context *smu_table = &smu->smu_table; + smu->ppt_funcs = &arcturus_ppt_funcs; + smu_table->table_count = TABLE_COUNT; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile index 2773966ae434..cc63705920dc 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile @@ -36,8 +36,7 @@ HARDWARE_MGR = hwmgr.o processpptables.o \ pp_overdriver.o smu_helper.o \ vega20_processpptables.o vega20_hwmgr.o vega20_powertune.o \ vega20_thermal.o common_baco.o vega10_baco.o vega20_baco.o \ - vega12_baco.o smu9_baco.o tonga_baco.o polaris_baco.o fiji_baco.o \ - ci_baco.o smu7_baco.o + vega12_baco.o smu9_baco.o AMD_PP_HWMGR = $(addprefix $(AMD_PP_PATH)/hwmgr/,$(HARDWARE_MGR)) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.c index 1c73776bd606..9c57c1f67749 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.c @@ -79,25 +79,6 @@ static bool baco_cmd_handler(struct pp_hwmgr *hwmgr, u32 command, u32 reg, u32 m return ret; } -bool baco_program_registers(struct pp_hwmgr *hwmgr, - const struct baco_cmd_entry *entry, - const u32 array_size) -{ - u32 i, reg = 0; - - for (i = 0; i < array_size; i++) { - if ((entry[i].cmd == CMD_WRITE) || - (entry[i].cmd == CMD_READMODIFYWRITE) || - (entry[i].cmd == CMD_WAITFOR)) - reg = entry[i].reg_offset; - if (!baco_cmd_handler(hwmgr, entry[i].cmd, reg, entry[i].mask, - entry[i].shift, entry[i].val, entry[i].timeout)) - return false; - } - - return true; -} - bool soc15_baco_program_registers(struct pp_hwmgr *hwmgr, const struct soc15_baco_cmd_entry *entry, const u32 array_size) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.h b/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.h index 8393eb62706d..95296c916f4e 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/common_baco.h @@ -33,15 +33,6 @@ enum baco_cmd_type { CMD_DELAY_US, }; -struct baco_cmd_entry { - enum baco_cmd_type cmd; - uint32_t reg_offset; - uint32_t mask; - uint32_t shift; - uint32_t timeout; - uint32_t val; -}; - struct soc15_baco_cmd_entry { enum baco_cmd_type cmd; uint32_t hwip; @@ -53,10 +44,6 @@ struct soc15_baco_cmd_entry { uint32_t timeout; uint32_t val; }; - -extern bool baco_program_registers(struct pp_hwmgr *hwmgr, - const struct baco_cmd_entry *entry, - const u32 array_size); extern bool soc15_baco_program_registers(struct pp_hwmgr *hwmgr, const struct soc15_baco_cmd_entry *entry, const u32 array_size); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index c805c6fcdba2..34f95e0e3ea4 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -48,7 +48,6 @@ #include "smu7_clockpowergating.h" #include "processpptables.h" #include "pp_thermal.h" -#include "smu7_baco.h" #include "ivsrcid/ivsrcid_vislands30.h" @@ -1995,6 +1994,7 @@ static int smu7_sort_lookup_table(struct pp_hwmgr *hwmgr, struct phm_ppt_v1_voltage_lookup_table *lookup_table) { uint32_t table_size, i, j; + struct phm_ppt_v1_voltage_lookup_record tmp_voltage_lookup_record; table_size = lookup_table->count; PP_ASSERT_WITH_CODE(0 != lookup_table->count, @@ -2005,8 +2005,9 @@ static int smu7_sort_lookup_table(struct pp_hwmgr *hwmgr, for (j = i + 1; j > 0; j--) { if (lookup_table->entries[j].us_vdd < lookup_table->entries[j - 1].us_vdd) { - swap(lookup_table->entries[j - 1], - lookup_table->entries[j]); + tmp_voltage_lookup_record = lookup_table->entries[j - 1]; + lookup_table->entries[j - 1] = lookup_table->entries[j]; + lookup_table->entries[j] = tmp_voltage_lookup_record; } } } @@ -5144,9 +5145,6 @@ static const struct pp_hwmgr_func smu7_hwmgr_funcs = { .get_power_profile_mode = smu7_get_power_profile_mode, .set_power_profile_mode = smu7_set_power_profile_mode, .get_performance_level = smu7_get_performance_level, - .get_asic_baco_capability = smu7_baco_get_capability, - .get_asic_baco_state = smu7_baco_get_state, - .set_asic_baco_state = smu7_baco_set_state, .power_off_asic = smu7_power_off_asic, }; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 4ea63a2e17da..d08493b67b67 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -712,6 +712,7 @@ static int vega10_sort_lookup_table(struct pp_hwmgr *hwmgr, struct phm_ppt_v1_voltage_lookup_table *lookup_table) { uint32_t table_size, i, j; + struct phm_ppt_v1_voltage_lookup_record tmp_voltage_lookup_record; PP_ASSERT_WITH_CODE(lookup_table && lookup_table->count, "Lookup table is empty", return -EINVAL); @@ -723,8 +724,9 @@ static int vega10_sort_lookup_table(struct pp_hwmgr *hwmgr, for (j = i + 1; j > 0; j--) { if (lookup_table->entries[j].us_vdd < lookup_table->entries[j - 1].us_vdd) { - swap(lookup_table->entries[j - 1], - lookup_table->entries[j]); + tmp_voltage_lookup_record = lookup_table->entries[j - 1]; + lookup_table->entries[j - 1] = lookup_table->entries[j]; + lookup_table->entries[j] = tmp_voltage_lookup_record; } } } @@ -5096,7 +5098,9 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr, if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) { podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_sclk; - for (i = 0; i < podn_vdd_dep->count; i++) + for (i = 0; i < podn_vdd_dep->count - 1; i++) + od_vddc_lookup_table->entries[i].us_vdd = podn_vdd_dep->entries[i].vddc; + if (od_vddc_lookup_table->entries[i].us_vdd < podn_vdd_dep->entries[i].vddc) od_vddc_lookup_table->entries[i].us_vdd = podn_vdd_dep->entries[i].vddc; } else if (type == PP_OD_EDIT_MCLK_VDDC_TABLE) { podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_mclk; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c index 9b5e72bdceca..df6ff9252401 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c @@ -29,7 +29,7 @@ #include "vega20_baco.h" #include "vega20_smumgr.h" -#include "amdgpu_ras.h" + static const struct soc15_baco_cmd_entry clean_baco_tbl[] = { @@ -74,7 +74,6 @@ int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state) int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state) { struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); - struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); enum BACO_STATE cur_state; uint32_t data; @@ -85,19 +84,13 @@ int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state) return 0; if (state == BACO_STATE_IN) { - if (!ras || !ras->supported) { - data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL); - data |= 0x80000000; - WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data); + data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL); + data |= 0x80000000; + WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data); - if(smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_EnterBaco, 0)) - return -EINVAL; - } else { - if(smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_EnterBaco, 1)) - return -EINVAL; - } + + if(smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_EnterBaco, 0)) + return -EINVAL; } else if (state == BACO_STATE_OUT) { if (smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ExitBaco)) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 9295bd90b792..f5915308e643 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -182,9 +182,6 @@ static int vega20_set_features_platform_caps(struct pp_hwmgr *hwmgr) phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TablelessHardwareInterface); - phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_BACO); - phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_EnableSMU7ThermalManagement); @@ -493,8 +490,8 @@ static int vega20_setup_asic_task(struct pp_hwmgr *hwmgr) "Failed to init sclk threshold!", return ret); - if (adev->in_gpu_reset && - (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) { + if (adev->in_baco_reset) { + adev->in_baco_reset = 0; ret = vega20_baco_apply_vdci_flush_workaround(hwmgr); if (ret) @@ -4158,24 +4155,6 @@ static int vega20_smu_i2c_bus_access(struct pp_hwmgr *hwmgr, bool acquire) return res; } -static int vega20_set_df_cstate(struct pp_hwmgr *hwmgr, - enum pp_df_cstate state) -{ - int ret; - - /* PPSMC_MSG_DFCstateControl is supported with 40.50 and later fws */ - if (hwmgr->smu_version < 0x283200) { - pr_err("Df cstate control is supported with 40.50 and later SMC fw!\n"); - return -EINVAL; - } - - ret = smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_DFCstateControl, state); - if (ret) - pr_err("SetDfCstate failed!\n"); - - return ret; -} - static const struct pp_hwmgr_func vega20_hwmgr_funcs = { /* init/fini related */ .backend_init = vega20_hwmgr_backend_init, @@ -4244,7 +4223,6 @@ static const struct pp_hwmgr_func vega20_hwmgr_funcs = { .set_asic_baco_state = vega20_baco_set_state, .set_mp1_state = vega20_set_mp1_state, .smu_i2c_bus_access = vega20_smu_i2c_bus_access, - .set_df_cstate = vega20_set_df_cstate, }; int vega20_hwmgr_init(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index 8120e7587585..23171a4d9a31 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -259,6 +259,7 @@ struct smu_table_context struct smu_bios_boot_up_values boot_values; void *driver_pptable; struct smu_table *tables; + uint32_t table_count; struct smu_table memory_pool; uint8_t thermal_controller_type; uint16_t TDPODLimit; @@ -321,13 +322,6 @@ struct mclock_latency_table { struct mclk_latency_entries entries[MAX_REGULAR_DPM_NUM]; }; -enum smu_reset_mode -{ - SMU_RESET_MODE_0, - SMU_RESET_MODE_1, - SMU_RESET_MODE_2, -}; - enum smu_baco_state { SMU_BACO_STATE_ENTER = 0, @@ -347,6 +341,7 @@ struct smu_context struct amdgpu_device *adev; struct amdgpu_irq_src *irq_source; + const struct smu_funcs *funcs; const struct pptable_funcs *ppt_funcs; struct mutex mutex; struct mutex sensor_lock; @@ -387,14 +382,11 @@ struct smu_context uint32_t power_profile_mode; uint32_t default_power_profile_mode; bool pm_enabled; - bool is_apu; uint32_t smc_if_version; }; -struct i2c_adapter; - struct pptable_funcs { int (*alloc_dpm_context)(struct smu_context *smu); int (*store_powerplay_table)(struct smu_context *smu); @@ -406,7 +398,7 @@ struct pptable_funcs { int (*get_smu_table_index)(struct smu_context *smu, uint32_t index); int (*get_smu_power_index)(struct smu_context *smu, uint32_t index); int (*get_workload_type)(struct smu_context *smu, enum PP_SMC_POWER_PROFILE profile); - int (*run_btc)(struct smu_context *smu); + int (*run_afll_btc)(struct smu_context *smu); int (*get_allowed_feature_mask)(struct smu_context *smu, uint32_t *feature_mask, uint32_t num); enum amd_pm_state_type (*get_current_power_state)(struct smu_context *smu); int (*set_default_dpm_table)(struct smu_context *smu); @@ -467,19 +459,17 @@ struct pptable_funcs { int (*display_disable_memory_clock_switch)(struct smu_context *smu, bool disable_memory_clock_switch); void (*dump_pptable)(struct smu_context *smu); int (*get_power_limit)(struct smu_context *smu, uint32_t *limit, bool asic_default); - int (*get_dpm_clk_limited)(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t dpm_level, uint32_t *freq); - int (*set_df_cstate)(struct smu_context *smu, enum pp_df_cstate state); - int (*update_pcie_parameters)(struct smu_context *smu, uint32_t pcie_gen_cap, uint32_t pcie_width_cap); - int (*i2c_eeprom_init)(struct i2c_adapter *control); - void (*i2c_eeprom_fini)(struct i2c_adapter *control); - int (*get_dpm_clock_table)(struct smu_context *smu, struct dpm_clocks *clock_table); + int (*get_dpm_uclk_limited)(struct smu_context *smu, uint32_t *clock, bool max); +}; + +struct smu_funcs +{ int (*init_microcode)(struct smu_context *smu); - int (*load_microcode)(struct smu_context *smu); int (*init_smc_tables)(struct smu_context *smu); int (*fini_smc_tables)(struct smu_context *smu); int (*init_power)(struct smu_context *smu); int (*fini_power)(struct smu_context *smu); + int (*load_microcode)(struct smu_context *smu); int (*check_fw_status)(struct smu_context *smu); int (*setup_pptable)(struct smu_context *smu); int (*get_vbios_bootup_values)(struct smu_context *smu); @@ -495,6 +485,7 @@ struct pptable_funcs { int (*set_min_dcef_deep_sleep)(struct smu_context *smu); int (*set_tool_table_location)(struct smu_context *smu); int (*notify_memory_pool_location)(struct smu_context *smu); + int (*write_watermarks_table)(struct smu_context *smu); int (*set_last_dcef_min_deep_sleep_clk)(struct smu_context *smu); int (*system_features_control)(struct smu_context *smu, bool en); int (*send_smc_msg)(struct smu_context *smu, uint16_t msg); @@ -508,7 +499,8 @@ struct pptable_funcs { int (*get_current_clk_freq)(struct smu_context *smu, enum smu_clk_type clk_id, uint32_t *value); int (*init_max_sustainable_clocks)(struct smu_context *smu); int (*start_thermal_control)(struct smu_context *smu); - int (*stop_thermal_control)(struct smu_context *smu); + int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors sensor, + void *data, uint32_t *size); int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t clk); int (*set_active_display_count)(struct smu_context *smu, uint32_t count); int (*store_cc6_data)(struct smu_context *smu, uint32_t separation_time, @@ -530,6 +522,8 @@ struct pptable_funcs { int (*get_current_shallow_sleep_clocks)(struct smu_context *smu, struct smu_clock_info *clocks); int (*notify_smu_enable_pwe)(struct smu_context *smu); + int (*set_watermarks_for_clock_ranges)(struct smu_context *smu, + struct dm_pp_wm_sets_with_clock_ranges_soc15 *clock_ranges); int (*conv_power_profile_to_pplib_workload)(int power_profile); uint32_t (*get_fan_control_mode)(struct smu_context *smu); int (*set_fan_control_mode)(struct smu_context *smu, uint32_t mode); @@ -544,89 +538,234 @@ struct pptable_funcs { enum smu_baco_state (*baco_get_state)(struct smu_context *smu); int (*baco_set_state)(struct smu_context *smu, enum smu_baco_state state); int (*baco_reset)(struct smu_context *smu); - int (*mode2_reset)(struct smu_context *smu); int (*get_dpm_ultimate_freq)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *min, uint32_t *max); - int (*set_soft_freq_limited_range)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t min, uint32_t max); - int (*override_pcie_parameters)(struct smu_context *smu); }; -int smu_load_microcode(struct smu_context *smu); +#define smu_init_microcode(smu) \ + ((smu)->funcs->init_microcode ? (smu)->funcs->init_microcode((smu)) : 0) +#define smu_init_smc_tables(smu) \ + ((smu)->funcs->init_smc_tables ? (smu)->funcs->init_smc_tables((smu)) : 0) +#define smu_fini_smc_tables(smu) \ + ((smu)->funcs->fini_smc_tables ? (smu)->funcs->fini_smc_tables((smu)) : 0) +#define smu_init_power(smu) \ + ((smu)->funcs->init_power ? (smu)->funcs->init_power((smu)) : 0) +#define smu_fini_power(smu) \ + ((smu)->funcs->fini_power ? (smu)->funcs->fini_power((smu)) : 0) +#define smu_load_microcode(smu) \ + ((smu)->funcs->load_microcode ? (smu)->funcs->load_microcode((smu)) : 0) +#define smu_check_fw_status(smu) \ + ((smu)->funcs->check_fw_status ? (smu)->funcs->check_fw_status((smu)) : 0) +#define smu_setup_pptable(smu) \ + ((smu)->funcs->setup_pptable ? (smu)->funcs->setup_pptable((smu)) : 0) +#define smu_powergate_sdma(smu, gate) \ + ((smu)->funcs->powergate_sdma ? (smu)->funcs->powergate_sdma((smu), (gate)) : 0) +#define smu_powergate_vcn(smu, gate) \ + ((smu)->funcs->powergate_vcn ? (smu)->funcs->powergate_vcn((smu), (gate)) : 0) +#define smu_set_gfx_cgpg(smu, enabled) \ + ((smu)->funcs->set_gfx_cgpg ? (smu)->funcs->set_gfx_cgpg((smu), (enabled)) : 0) +#define smu_get_vbios_bootup_values(smu) \ + ((smu)->funcs->get_vbios_bootup_values ? (smu)->funcs->get_vbios_bootup_values((smu)) : 0) +#define smu_get_clk_info_from_vbios(smu) \ + ((smu)->funcs->get_clk_info_from_vbios ? (smu)->funcs->get_clk_info_from_vbios((smu)) : 0) +#define smu_check_pptable(smu) \ + ((smu)->funcs->check_pptable ? (smu)->funcs->check_pptable((smu)) : 0) +#define smu_parse_pptable(smu) \ + ((smu)->funcs->parse_pptable ? (smu)->funcs->parse_pptable((smu)) : 0) +#define smu_populate_smc_tables(smu) \ + ((smu)->funcs->populate_smc_tables ? (smu)->funcs->populate_smc_tables((smu)) : 0) +#define smu_check_fw_version(smu) \ + ((smu)->funcs->check_fw_version ? (smu)->funcs->check_fw_version((smu)) : 0) +#define smu_write_pptable(smu) \ + ((smu)->funcs->write_pptable ? (smu)->funcs->write_pptable((smu)) : 0) +#define smu_set_min_dcef_deep_sleep(smu) \ + ((smu)->funcs->set_min_dcef_deep_sleep ? (smu)->funcs->set_min_dcef_deep_sleep((smu)) : 0) +#define smu_set_tool_table_location(smu) \ + ((smu)->funcs->set_tool_table_location ? (smu)->funcs->set_tool_table_location((smu)) : 0) +#define smu_notify_memory_pool_location(smu) \ + ((smu)->funcs->notify_memory_pool_location ? (smu)->funcs->notify_memory_pool_location((smu)) : 0) +#define smu_gfx_off_control(smu, enable) \ + ((smu)->funcs->gfx_off_control ? (smu)->funcs->gfx_off_control((smu), (enable)) : 0) -int smu_check_fw_status(struct smu_context *smu); +#define smu_write_watermarks_table(smu) \ + ((smu)->funcs->write_watermarks_table ? (smu)->funcs->write_watermarks_table((smu)) : 0) +#define smu_set_last_dcef_min_deep_sleep_clk(smu) \ + ((smu)->funcs->set_last_dcef_min_deep_sleep_clk ? (smu)->funcs->set_last_dcef_min_deep_sleep_clk((smu)) : 0) +#define smu_system_features_control(smu, en) \ + ((smu)->funcs->system_features_control ? (smu)->funcs->system_features_control((smu), (en)) : 0) +#define smu_init_max_sustainable_clocks(smu) \ + ((smu)->funcs->init_max_sustainable_clocks ? (smu)->funcs->init_max_sustainable_clocks((smu)) : 0) +#define smu_set_default_od_settings(smu, initialize) \ + ((smu)->ppt_funcs->set_default_od_settings ? (smu)->ppt_funcs->set_default_od_settings((smu), (initialize)) : 0) +#define smu_set_fan_speed_rpm(smu, speed) \ + ((smu)->funcs->set_fan_speed_rpm ? (smu)->funcs->set_fan_speed_rpm((smu), (speed)) : 0) +#define smu_send_smc_msg(smu, msg) \ + ((smu)->funcs->send_smc_msg? (smu)->funcs->send_smc_msg((smu), (msg)) : 0) +#define smu_send_smc_msg_with_param(smu, msg, param) \ + ((smu)->funcs->send_smc_msg_with_param? (smu)->funcs->send_smc_msg_with_param((smu), (msg), (param)) : 0) +#define smu_read_smc_arg(smu, arg) \ + ((smu)->funcs->read_smc_arg? (smu)->funcs->read_smc_arg((smu), (arg)) : 0) +#define smu_alloc_dpm_context(smu) \ + ((smu)->ppt_funcs->alloc_dpm_context ? (smu)->ppt_funcs->alloc_dpm_context((smu)) : 0) +#define smu_init_display_count(smu, count) \ + ((smu)->funcs->init_display_count ? (smu)->funcs->init_display_count((smu), (count)) : 0) +#define smu_feature_set_allowed_mask(smu) \ + ((smu)->funcs->set_allowed_mask? (smu)->funcs->set_allowed_mask((smu)) : 0) +#define smu_feature_get_enabled_mask(smu, mask, num) \ + ((smu)->funcs->get_enabled_mask? (smu)->funcs->get_enabled_mask((smu), (mask), (num)) : 0) +#define smu_is_dpm_running(smu) \ + ((smu)->ppt_funcs->is_dpm_running ? (smu)->ppt_funcs->is_dpm_running((smu)) : 0) +#define smu_notify_display_change(smu) \ + ((smu)->funcs->notify_display_change? (smu)->funcs->notify_display_change((smu)) : 0) +#define smu_store_powerplay_table(smu) \ + ((smu)->ppt_funcs->store_powerplay_table ? (smu)->ppt_funcs->store_powerplay_table((smu)) : 0) +#define smu_check_powerplay_table(smu) \ + ((smu)->ppt_funcs->check_powerplay_table ? (smu)->ppt_funcs->check_powerplay_table((smu)) : 0) +#define smu_append_powerplay_table(smu) \ + ((smu)->ppt_funcs->append_powerplay_table ? (smu)->ppt_funcs->append_powerplay_table((smu)) : 0) +#define smu_set_default_dpm_table(smu) \ + ((smu)->ppt_funcs->set_default_dpm_table ? (smu)->ppt_funcs->set_default_dpm_table((smu)) : 0) +#define smu_populate_umd_state_clk(smu) \ + ((smu)->ppt_funcs->populate_umd_state_clk ? (smu)->ppt_funcs->populate_umd_state_clk((smu)) : 0) +#define smu_set_default_od8_settings(smu) \ + ((smu)->ppt_funcs->set_default_od8_settings ? (smu)->ppt_funcs->set_default_od8_settings((smu)) : 0) +#define smu_get_power_limit(smu, limit, def) \ + ((smu)->ppt_funcs->get_power_limit ? (smu)->ppt_funcs->get_power_limit((smu), (limit), (def)) : 0) +#define smu_set_power_limit(smu, limit) \ + ((smu)->funcs->set_power_limit ? (smu)->funcs->set_power_limit((smu), (limit)) : 0) +#define smu_get_current_clk_freq(smu, clk_id, value) \ + ((smu)->funcs->get_current_clk_freq? (smu)->funcs->get_current_clk_freq((smu), (clk_id), (value)) : 0) +#define smu_print_clk_levels(smu, clk_type, buf) \ + ((smu)->ppt_funcs->print_clk_levels ? (smu)->ppt_funcs->print_clk_levels((smu), (clk_type), (buf)) : 0) +#define smu_force_clk_levels(smu, clk_type, level) \ + ((smu)->ppt_funcs->force_clk_levels ? (smu)->ppt_funcs->force_clk_levels((smu), (clk_type), (level)) : 0) +#define smu_get_od_percentage(smu, type) \ + ((smu)->ppt_funcs->get_od_percentage ? (smu)->ppt_funcs->get_od_percentage((smu), (type)) : 0) +#define smu_set_od_percentage(smu, type, value) \ + ((smu)->ppt_funcs->set_od_percentage ? (smu)->ppt_funcs->set_od_percentage((smu), (type), (value)) : 0) +#define smu_od_edit_dpm_table(smu, type, input, size) \ + ((smu)->ppt_funcs->od_edit_dpm_table ? (smu)->ppt_funcs->od_edit_dpm_table((smu), (type), (input), (size)) : 0) +#define smu_tables_init(smu, tab) \ + ((smu)->ppt_funcs->tables_init ? (smu)->ppt_funcs->tables_init((smu), (tab)) : 0) +#define smu_set_thermal_fan_table(smu) \ + ((smu)->ppt_funcs->set_thermal_fan_table ? (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0) +#define smu_start_thermal_control(smu) \ + ((smu)->funcs->start_thermal_control? (smu)->funcs->start_thermal_control((smu)) : 0) +#define smu_read_sensor(smu, sensor, data, size) \ + ((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs->read_sensor((smu), (sensor), (data), (size)) : 0) +#define smu_smc_read_sensor(smu, sensor, data, size) \ + ((smu)->funcs->read_sensor? (smu)->funcs->read_sensor((smu), (sensor), (data), (size)) : -EINVAL) +#define smu_get_power_profile_mode(smu, buf) \ + ((smu)->ppt_funcs->get_power_profile_mode ? (smu)->ppt_funcs->get_power_profile_mode((smu), buf) : 0) +#define smu_set_power_profile_mode(smu, param, param_size) \ + ((smu)->ppt_funcs->set_power_profile_mode ? (smu)->ppt_funcs->set_power_profile_mode((smu), (param), (param_size)) : 0) +#define smu_pre_display_config_changed(smu) \ + ((smu)->ppt_funcs->pre_display_config_changed ? (smu)->ppt_funcs->pre_display_config_changed((smu)) : 0) +#define smu_display_config_changed(smu) \ + ((smu)->ppt_funcs->display_config_changed ? (smu)->ppt_funcs->display_config_changed((smu)) : 0) +#define smu_apply_clocks_adjust_rules(smu) \ + ((smu)->ppt_funcs->apply_clocks_adjust_rules ? (smu)->ppt_funcs->apply_clocks_adjust_rules((smu)) : 0) +#define smu_notify_smc_dispaly_config(smu) \ + ((smu)->ppt_funcs->notify_smc_dispaly_config ? (smu)->ppt_funcs->notify_smc_dispaly_config((smu)) : 0) +#define smu_force_dpm_limit_value(smu, highest) \ + ((smu)->ppt_funcs->force_dpm_limit_value ? (smu)->ppt_funcs->force_dpm_limit_value((smu), (highest)) : 0) +#define smu_unforce_dpm_levels(smu) \ + ((smu)->ppt_funcs->unforce_dpm_levels ? (smu)->ppt_funcs->unforce_dpm_levels((smu)) : 0) +#define smu_get_profiling_clk_mask(smu, level, sclk_mask, mclk_mask, soc_mask) \ + ((smu)->ppt_funcs->get_profiling_clk_mask ? (smu)->ppt_funcs->get_profiling_clk_mask((smu), (level), (sclk_mask), (mclk_mask), (soc_mask)) : 0) +#define smu_set_cpu_power_state(smu) \ + ((smu)->ppt_funcs->set_cpu_power_state ? (smu)->ppt_funcs->set_cpu_power_state((smu)) : 0) +#define smu_get_fan_control_mode(smu) \ + ((smu)->funcs->get_fan_control_mode ? (smu)->funcs->get_fan_control_mode((smu)) : 0) +#define smu_set_fan_control_mode(smu, value) \ + ((smu)->funcs->set_fan_control_mode ? (smu)->funcs->set_fan_control_mode((smu), (value)) : 0) +#define smu_get_fan_speed_percent(smu, speed) \ + ((smu)->ppt_funcs->get_fan_speed_percent ? (smu)->ppt_funcs->get_fan_speed_percent((smu), (speed)) : 0) +#define smu_set_fan_speed_percent(smu, speed) \ + ((smu)->funcs->set_fan_speed_percent ? (smu)->funcs->set_fan_speed_percent((smu), (speed)) : 0) +#define smu_get_fan_speed_rpm(smu, speed) \ + ((smu)->ppt_funcs->get_fan_speed_rpm ? (smu)->ppt_funcs->get_fan_speed_rpm((smu), (speed)) : 0) -int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled); +#define smu_msg_get_index(smu, msg) \ + ((smu)->ppt_funcs? ((smu)->ppt_funcs->get_smu_msg_index? (smu)->ppt_funcs->get_smu_msg_index((smu), (msg)) : -EINVAL) : -EINVAL) +#define smu_clk_get_index(smu, msg) \ + ((smu)->ppt_funcs? ((smu)->ppt_funcs->get_smu_clk_index? (smu)->ppt_funcs->get_smu_clk_index((smu), (msg)) : -EINVAL) : -EINVAL) +#define smu_feature_get_index(smu, msg) \ + ((smu)->ppt_funcs? ((smu)->ppt_funcs->get_smu_feature_index? (smu)->ppt_funcs->get_smu_feature_index((smu), (msg)) : -EINVAL) : -EINVAL) +#define smu_table_get_index(smu, tab) \ + ((smu)->ppt_funcs? ((smu)->ppt_funcs->get_smu_table_index? (smu)->ppt_funcs->get_smu_table_index((smu), (tab)) : -EINVAL) : -EINVAL) +#define smu_power_get_index(smu, src) \ + ((smu)->ppt_funcs? ((smu)->ppt_funcs->get_smu_power_index? (smu)->ppt_funcs->get_smu_power_index((smu), (src)) : -EINVAL) : -EINVAL) +#define smu_workload_get_type(smu, profile) \ + ((smu)->ppt_funcs? ((smu)->ppt_funcs->get_workload_type? (smu)->ppt_funcs->get_workload_type((smu), (profile)) : -EINVAL) : -EINVAL) +#define smu_run_afll_btc(smu) \ + ((smu)->ppt_funcs? ((smu)->ppt_funcs->run_afll_btc? (smu)->ppt_funcs->run_afll_btc((smu)) : 0) : 0) +#define smu_get_allowed_feature_mask(smu, feature_mask, num) \ + ((smu)->ppt_funcs? ((smu)->ppt_funcs->get_allowed_feature_mask? (smu)->ppt_funcs->get_allowed_feature_mask((smu), (feature_mask), (num)) : 0) : 0) +#define smu_set_deep_sleep_dcefclk(smu, clk) \ + ((smu)->funcs->set_deep_sleep_dcefclk ? (smu)->funcs->set_deep_sleep_dcefclk((smu), (clk)) : 0) +#define smu_set_active_display_count(smu, count) \ + ((smu)->funcs->set_active_display_count ? (smu)->funcs->set_active_display_count((smu), (count)) : 0) +#define smu_store_cc6_data(smu, st, cc6_dis, pst_dis, pst_sw_dis) \ + ((smu)->funcs->store_cc6_data ? (smu)->funcs->store_cc6_data((smu), (st), (cc6_dis), (pst_dis), (pst_sw_dis)) : 0) +#define smu_get_clock_by_type(smu, type, clocks) \ + ((smu)->funcs->get_clock_by_type ? (smu)->funcs->get_clock_by_type((smu), (type), (clocks)) : 0) +#define smu_get_max_high_clocks(smu, clocks) \ + ((smu)->funcs->get_max_high_clocks ? (smu)->funcs->get_max_high_clocks((smu), (clocks)) : 0) +#define smu_get_clock_by_type_with_latency(smu, clk_type, clocks) \ + ((smu)->ppt_funcs->get_clock_by_type_with_latency ? (smu)->ppt_funcs->get_clock_by_type_with_latency((smu), (clk_type), (clocks)) : 0) +#define smu_get_clock_by_type_with_voltage(smu, type, clocks) \ + ((smu)->ppt_funcs->get_clock_by_type_with_voltage ? (smu)->ppt_funcs->get_clock_by_type_with_voltage((smu), (type), (clocks)) : 0) +#define smu_display_clock_voltage_request(smu, clock_req) \ + ((smu)->funcs->display_clock_voltage_request ? (smu)->funcs->display_clock_voltage_request((smu), (clock_req)) : 0) +#define smu_display_disable_memory_clock_switch(smu, disable_memory_clock_switch) \ + ((smu)->ppt_funcs->display_disable_memory_clock_switch ? (smu)->ppt_funcs->display_disable_memory_clock_switch((smu), (disable_memory_clock_switch)) : -EINVAL) +#define smu_get_dal_power_level(smu, clocks) \ + ((smu)->funcs->get_dal_power_level ? (smu)->funcs->get_dal_power_level((smu), (clocks)) : 0) +#define smu_get_perf_level(smu, designation, level) \ + ((smu)->funcs->get_perf_level ? (smu)->funcs->get_perf_level((smu), (designation), (level)) : 0) +#define smu_get_current_shallow_sleep_clocks(smu, clocks) \ + ((smu)->funcs->get_current_shallow_sleep_clocks ? (smu)->funcs->get_current_shallow_sleep_clocks((smu), (clocks)) : 0) +#define smu_notify_smu_enable_pwe(smu) \ + ((smu)->funcs->notify_smu_enable_pwe ? (smu)->funcs->notify_smu_enable_pwe((smu)) : 0) +#define smu_set_watermarks_for_clock_ranges(smu, clock_ranges) \ + ((smu)->funcs->set_watermarks_for_clock_ranges ? (smu)->funcs->set_watermarks_for_clock_ranges((smu), (clock_ranges)) : 0) +#define smu_dpm_set_uvd_enable(smu, enable) \ + ((smu)->ppt_funcs->dpm_set_uvd_enable ? (smu)->ppt_funcs->dpm_set_uvd_enable((smu), (enable)) : 0) +#define smu_dpm_set_vce_enable(smu, enable) \ + ((smu)->ppt_funcs->dpm_set_vce_enable ? (smu)->ppt_funcs->dpm_set_vce_enable((smu), (enable)) : 0) +#define smu_set_xgmi_pstate(smu, pstate) \ + ((smu)->funcs->set_xgmi_pstate ? (smu)->funcs->set_xgmi_pstate((smu), (pstate)) : 0) +#define smu_set_watermarks_table(smu, tab, clock_ranges) \ + ((smu)->ppt_funcs->set_watermarks_table ? (smu)->ppt_funcs->set_watermarks_table((smu), (tab), (clock_ranges)) : 0) +#define smu_get_current_clk_freq_by_table(smu, clk_type, value) \ + ((smu)->ppt_funcs->get_current_clk_freq_by_table ? (smu)->ppt_funcs->get_current_clk_freq_by_table((smu), (clk_type), (value)) : 0) +#define smu_thermal_temperature_range_update(smu, range, rw) \ + ((smu)->ppt_funcs->thermal_temperature_range_update? (smu)->ppt_funcs->thermal_temperature_range_update((smu), (range), (rw)) : 0) +#define smu_get_thermal_temperature_range(smu, range) \ + ((smu)->ppt_funcs->get_thermal_temperature_range? (smu)->ppt_funcs->get_thermal_temperature_range((smu), (range)) : 0) +#define smu_register_irq_handler(smu) \ + ((smu)->funcs->register_irq_handler ? (smu)->funcs->register_irq_handler(smu) : 0) +#define smu_set_azalia_d3_pme(smu) \ + ((smu)->funcs->set_azalia_d3_pme ? (smu)->funcs->set_azalia_d3_pme((smu)) : 0) +#define smu_get_dpm_ultimate_freq(smu, param, min, max) \ + ((smu)->funcs->get_dpm_ultimate_freq ? (smu)->funcs->get_dpm_ultimate_freq((smu), (param), (min), (max)) : 0) +#define smu_get_max_sustainable_clocks_by_dc(smu, max_clocks) \ + ((smu)->funcs->get_max_sustainable_clocks_by_dc ? (smu)->funcs->get_max_sustainable_clocks_by_dc((smu), (max_clocks)) : 0) +#define smu_get_uclk_dpm_states(smu, clocks_in_khz, num_states) \ + ((smu)->ppt_funcs->get_uclk_dpm_states ? (smu)->ppt_funcs->get_uclk_dpm_states((smu), (clocks_in_khz), (num_states)) : 0) +#define smu_baco_is_support(smu) \ + ((smu)->funcs->baco_is_support? (smu)->funcs->baco_is_support((smu)) : false) +#define smu_baco_get_state(smu, state) \ + ((smu)->funcs->baco_get_state? (smu)->funcs->baco_get_state((smu), (state)) : 0) +#define smu_baco_reset(smu) \ + ((smu)->funcs->baco_reset? (smu)->funcs->baco_reset((smu)) : 0) +#define smu_asic_set_performance_level(smu, level) \ + ((smu)->ppt_funcs->set_performance_level? (smu)->ppt_funcs->set_performance_level((smu), (level)) : -EINVAL); +#define smu_dump_pptable(smu) \ + ((smu)->ppt_funcs->dump_pptable ? (smu)->ppt_funcs->dump_pptable((smu)) : 0) +#define smu_get_dpm_uclk_limited(smu, clock, max) \ + ((smu)->ppt_funcs->get_dpm_uclk_limited ? (smu)->ppt_funcs->get_dpm_uclk_limited((smu), (clock), (max)) : -EINVAL) -#define smu_i2c_eeprom_init(smu, control) \ - ((smu)->ppt_funcs->i2c_eeprom_init ? (smu)->ppt_funcs->i2c_eeprom_init((control)) : -EINVAL) -#define smu_i2c_eeprom_fini(smu, control) \ - ((smu)->ppt_funcs->i2c_eeprom_fini ? (smu)->ppt_funcs->i2c_eeprom_fini((control)) : -EINVAL) - -int smu_set_fan_speed_rpm(struct smu_context *smu, uint32_t speed); - -int smu_get_power_limit(struct smu_context *smu, - uint32_t *limit, - bool def, - bool lock_needed); - -int smu_set_power_limit(struct smu_context *smu, uint32_t limit); -int smu_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf); -int smu_get_od_percentage(struct smu_context *smu, enum smu_clk_type type); -int smu_set_od_percentage(struct smu_context *smu, enum smu_clk_type type, uint32_t value); - -int smu_od_edit_dpm_table(struct smu_context *smu, - enum PP_OD_DPM_TABLE_COMMAND type, - long *input, uint32_t size); - -int smu_read_sensor(struct smu_context *smu, - enum amd_pp_sensors sensor, - void *data, uint32_t *size); -int smu_get_power_profile_mode(struct smu_context *smu, char *buf); - -int smu_set_power_profile_mode(struct smu_context *smu, - long *param, - uint32_t param_size, - bool lock_needed); -int smu_get_fan_control_mode(struct smu_context *smu); -int smu_set_fan_control_mode(struct smu_context *smu, int value); -int smu_get_fan_speed_percent(struct smu_context *smu, uint32_t *speed); -int smu_set_fan_speed_percent(struct smu_context *smu, uint32_t speed); -int smu_get_fan_speed_rpm(struct smu_context *smu, uint32_t *speed); - -int smu_set_deep_sleep_dcefclk(struct smu_context *smu, int clk); -int smu_set_active_display_count(struct smu_context *smu, uint32_t count); - -int smu_get_clock_by_type(struct smu_context *smu, - enum amd_pp_clock_type type, - struct amd_pp_clocks *clocks); - -int smu_get_max_high_clocks(struct smu_context *smu, - struct amd_pp_simple_clock_info *clocks); - -int smu_get_clock_by_type_with_latency(struct smu_context *smu, - enum smu_clk_type clk_type, - struct pp_clock_levels_with_latency *clocks); - -int smu_get_clock_by_type_with_voltage(struct smu_context *smu, - enum amd_pp_clock_type type, - struct pp_clock_levels_with_voltage *clocks); - -int smu_display_clock_voltage_request(struct smu_context *smu, - struct pp_display_clock_request *clock_req); -int smu_display_disable_memory_clock_switch(struct smu_context *smu, bool disable_memory_clock_switch); -int smu_notify_smu_enable_pwe(struct smu_context *smu); - -int smu_set_xgmi_pstate(struct smu_context *smu, - uint32_t pstate); - -int smu_set_azalia_d3_pme(struct smu_context *smu); - -bool smu_baco_is_support(struct smu_context *smu); - -int smu_baco_get_state(struct smu_context *smu, enum smu_baco_state *state); - -int smu_baco_reset(struct smu_context *smu); - -int smu_mode2_reset(struct smu_context *smu); extern int smu_get_atom_data_table(struct smu_context *smu, uint32_t table, uint16_t *size, uint8_t *frev, uint8_t *crev, @@ -660,10 +799,6 @@ int smu_sys_get_pp_table(struct smu_context *smu, void **table); int smu_sys_set_pp_table(struct smu_context *smu, void *buf, size_t size); int smu_get_power_num_states(struct smu_context *smu, struct pp_states_info *state_info); enum amd_pm_state_type smu_get_current_power_state(struct smu_context *smu); -int smu_write_watermarks_table(struct smu_context *smu); -int smu_set_watermarks_for_clock_ranges( - struct smu_context *smu, - struct dm_pp_wm_sets_with_clock_ranges_soc15 *clock_ranges); /* smu to display interface */ extern int smu_display_configuration_change(struct smu_context *smu, const @@ -674,8 +809,7 @@ extern int smu_get_current_clocks(struct smu_context *smu, extern int smu_dpm_set_power_gate(struct smu_context *smu,uint32_t block_type, bool gate); extern int smu_handle_task(struct smu_context *smu, enum amd_dpm_forced_level level, - enum amd_pp_task task_id, - bool lock_needed); + enum amd_pp_task task_id); int smu_switch_power_profile(struct smu_context *smu, enum PP_SMC_POWER_PROFILE type, bool en); @@ -685,7 +819,7 @@ int smu_get_dpm_freq_by_index(struct smu_context *smu, enum smu_clk_type clk_typ int smu_get_dpm_level_count(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *value); int smu_get_dpm_freq_range(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t *min, uint32_t *max, bool lock_needed); + uint32_t *min, uint32_t *max); int smu_set_soft_freq_range(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t min, uint32_t max); int smu_set_hard_freq_range(struct smu_context *smu, enum smu_clk_type clk_type, @@ -694,27 +828,10 @@ enum amd_dpm_forced_level smu_get_performance_level(struct smu_context *smu); int smu_force_performance_level(struct smu_context *smu, enum amd_dpm_forced_level level); int smu_set_display_count(struct smu_context *smu, uint32_t count); bool smu_clk_dpm_is_enabled(struct smu_context *smu, enum smu_clk_type clk_type); +int smu_feature_update_enable_state(struct smu_context *smu, uint64_t feature_mask, bool enabled); const char *smu_get_message_name(struct smu_context *smu, enum smu_message_type type); const char *smu_get_feature_name(struct smu_context *smu, enum smu_feature_mask feature); size_t smu_sys_get_pp_feature_mask(struct smu_context *smu, char *buf); int smu_sys_set_pp_feature_mask(struct smu_context *smu, uint64_t new_mask); -int smu_force_clk_levels(struct smu_context *smu, - enum smu_clk_type clk_type, - uint32_t mask, - bool lock_needed); -int smu_set_mp1_state(struct smu_context *smu, - enum pp_mp1_state mp1_state); -int smu_set_df_cstate(struct smu_context *smu, - enum pp_df_cstate state); - -int smu_get_max_sustainable_clocks_by_dc(struct smu_context *smu, - struct pp_smu_nv_clock_table *max_clocks); - -int smu_get_uclk_dpm_states(struct smu_context *smu, - unsigned int *clock_values_in_khz, - unsigned int *num_states); - -int smu_get_dpm_clock_table(struct smu_context *smu, - struct dpm_clocks *clock_table); #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/arcturus_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/arcturus_ppsmc.h index e3291259b249..78e5927b7711 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/arcturus_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/arcturus_ppsmc.h @@ -95,7 +95,8 @@ //BTC #define PPSMC_MSG_RunAfllBtc 0x30 -#define PPSMC_MSG_RunDcBtc 0x31 +#define PPSMC_MSG_RunGfxDcBtc 0x31 +#define PPSMC_MSG_RunSocDcBtc 0x32 //Debug #define PPSMC_MSG_DramLogSetDramAddrHigh 0x33 diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index bd8c922dfd3e..7bf9a14bfa0b 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -355,7 +355,6 @@ struct pp_hwmgr_func { int (*set_mp1_state)(struct pp_hwmgr *hwmgr, enum pp_mp1_state mp1_state); int (*asic_reset)(struct pp_hwmgr *hwmgr, enum SMU_ASIC_RESET_MODE mode); int (*smu_i2c_bus_access)(struct pp_hwmgr *hwmgr, bool aquire); - int (*set_df_cstate)(struct pp_hwmgr *hwmgr, enum pp_df_cstate state); }; struct pp_table_func { diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h index 886b9a21ebd8..e02950b505fa 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h @@ -137,23 +137,23 @@ #define FEATURE_DS_SOCCLK_MASK (1 << FEATURE_DS_SOCCLK_BIT ) #define FEATURE_DS_LCLK_MASK (1 << FEATURE_DS_LCLK_BIT ) #define FEATURE_DS_FCLK_MASK (1 << FEATURE_DS_FCLK_BIT ) -#define FEATURE_DS_UCLK_MASK (1 << FEATURE_DS_UCLK_BIT ) +#define FEATURE_DS_LCLK_MASK (1 << FEATURE_DS_LCLK_BIT ) #define FEATURE_GFX_ULV_MASK (1 << FEATURE_GFX_ULV_BIT ) -#define FEATURE_DPM_VCN_MASK (1 << FEATURE_DPM_VCN_BIT ) +#define FEATURE_VCN_PG_MASK (1 << FEATURE_VCN_PG_BIT ) #define FEATURE_RSMU_SMN_CG_MASK (1 << FEATURE_RSMU_SMN_CG_BIT ) #define FEATURE_WAFL_CG_MASK (1 << FEATURE_WAFL_CG_BIT ) #define FEATURE_PPT_MASK (1 << FEATURE_PPT_BIT ) #define FEATURE_TDC_MASK (1 << FEATURE_TDC_BIT ) -#define FEATURE_APCC_PLUS_MASK (1 << FEATURE_APCC_PLUS_BIT ) +#define FEATURE_APCC_MASK (1 << FEATURE_APCC_BIT ) #define FEATURE_VR0HOT_MASK (1 << FEATURE_VR0HOT_BIT ) #define FEATURE_VR1HOT_MASK (1 << FEATURE_VR1HOT_BIT ) #define FEATURE_FW_CTF_MASK (1 << FEATURE_FW_CTF_BIT ) #define FEATURE_FAN_CONTROL_MASK (1 << FEATURE_FAN_CONTROL_BIT ) #define FEATURE_THERMAL_MASK (1 << FEATURE_THERMAL_BIT ) -#define FEATURE_OUT_OF_BAND_MONITOR_MASK (1 << FEATURE_OUT_OF_BAND_MONITOR_BIT ) -#define FEATURE_TEMP_DEPENDENT_VMIN_MASK (1 << FEATURE_TEMP_DEPENDENT_VMIN_BIT ) +#define FEATURE_OUT_OF_BAND_MONITOR_MASK (1 << EATURE_OUT_OF_BAND_MONITOR_BIT ) +#define FEATURE_TEMP_DEPENDENT_VMIN_MASK (1 << FEATURE_TEMP_DEPENDENT_VMIN_MASK ) //FIXME need updating @@ -423,30 +423,18 @@ typedef enum { } PwrConfig_e; typedef enum { - XGMI_LINK_RATE_2 = 2, // 2Gbps - XGMI_LINK_RATE_4 = 4, // 4Gbps - XGMI_LINK_RATE_8 = 8, // 8Gbps - XGMI_LINK_RATE_12 = 12, // 12Gbps - XGMI_LINK_RATE_16 = 16, // 16Gbps - XGMI_LINK_RATE_17 = 17, // 17Gbps - XGMI_LINK_RATE_18 = 18, // 18Gbps - XGMI_LINK_RATE_19 = 19, // 19Gbps - XGMI_LINK_RATE_20 = 20, // 20Gbps - XGMI_LINK_RATE_21 = 21, // 21Gbps - XGMI_LINK_RATE_22 = 22, // 22Gbps - XGMI_LINK_RATE_23 = 23, // 23Gbps - XGMI_LINK_RATE_24 = 24, // 24Gbps - XGMI_LINK_RATE_25 = 25, // 25Gbps + XGMI_LINK_RATE_12 = 0, // 12Gbps + XGMI_LINK_RATE_16, // 16Gbps + XGMI_LINK_RATE_22, // 22Gbps + XGMI_LINK_RATE_25, // 25Gbps XGMI_LINK_RATE_COUNT } XGMI_LINK_RATE_e; typedef enum { - XGMI_LINK_WIDTH_1 = 1, // x1 - XGMI_LINK_WIDTH_2 = 2, // x2 - XGMI_LINK_WIDTH_4 = 4, // x4 - XGMI_LINK_WIDTH_8 = 8, // x8 - XGMI_LINK_WIDTH_9 = 9, // x9 - XGMI_LINK_WIDTH_16 = 16, // x16 + XGMI_LINK_WIDTH_2 = 0, // x2 + XGMI_LINK_WIDTH_4, // x4 + XGMI_LINK_WIDTH_8, // x8 + XGMI_LINK_WIDTH_16, // x16 XGMI_LINK_WIDTH_COUNT } XGMI_LINK_WIDTH_e; @@ -708,11 +696,7 @@ typedef struct { uint8_t GpioI2cSda; // Serial Data uint16_t GpioPadding; - // Platform input telemetry voltage coefficient - uint32_t BoardVoltageCoeffA; // decode by /1000 - uint32_t BoardVoltageCoeffB; // decode by /1000 - - uint32_t BoardReserved[7]; + uint32_t BoardReserved[9]; // Padding for MMHUB - do not modify this uint32_t MmHubPadding[8]; // SMU internal use @@ -818,7 +802,7 @@ typedef struct { uint32_t P2VCharzFreq[AVFS_VOLTAGE_COUNT]; // in 10KHz units - uint32_t EnabledAvfsModules[3]; + uint32_t EnabledAvfsModules[2]; uint32_t MmHubPadding[8]; // SMU internal use } AvfsFuseOverride_t; @@ -881,8 +865,7 @@ typedef struct { //#define TABLE_ACTIVITY_MONITOR_COEFF 7 #define TABLE_OVERDRIVE 7 #define TABLE_WAFL_XGMI_TOPOLOGY 8 -#define TABLE_I2C_COMMANDS 9 -#define TABLE_COUNT 10 +#define TABLE_COUNT 9 // These defines are used with the SMC_MSG_SetUclkFastSwitch message. typedef enum { diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_types.h b/drivers/gpu/drm/amd/powerplay/inc/smu_types.h index d8c9b7f91fcc..b0dd05d431dd 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_types.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_types.h @@ -114,7 +114,6 @@ __SMU_DUMMY_MAP(PowerDownJpeg), \ __SMU_DUMMY_MAP(BacoAudioD3PME), \ __SMU_DUMMY_MAP(ArmD3), \ - __SMU_DUMMY_MAP(RunDcBtc), \ __SMU_DUMMY_MAP(RunGfxDcBtc), \ __SMU_DUMMY_MAP(RunSocDcBtc), \ __SMU_DUMMY_MAP(SetMemoryChannelEnable), \ @@ -169,7 +168,6 @@ __SMU_DUMMY_MAP(PowerGateAtHub), \ __SMU_DUMMY_MAP(SetSoftMinJpeg), \ __SMU_DUMMY_MAP(SetHardMinFclkByFreq), \ - __SMU_DUMMY_MAP(DFCstateControl), \ #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type @@ -253,7 +251,6 @@ enum smu_clk_type { __SMU_DUMMY_MAP(TEMP_DEPENDENT_VMIN), \ __SMU_DUMMY_MAP(MMHUB_PG), \ __SMU_DUMMY_MAP(ATHUB_PG), \ - __SMU_DUMMY_MAP(APCC_DFLL), \ __SMU_DUMMY_MAP(WAFL_CG), #undef __SMU_DUMMY_MAP diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h index abd4debb3def..5bda8539447a 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h @@ -27,7 +27,7 @@ #define SMU11_DRIVER_IF_VERSION_INV 0xFFFFFFFF #define SMU11_DRIVER_IF_VERSION_VG20 0x13 -#define SMU11_DRIVER_IF_VERSION_ARCT 0x0F +#define SMU11_DRIVER_IF_VERSION_ARCT 0x09 #define SMU11_DRIVER_IF_VERSION_NV10 0x33 #define SMU11_DRIVER_IF_VERSION_NV14 0x34 @@ -130,124 +130,6 @@ enum smu_v11_0_baco_seq { BACO_SEQ_COUNT, }; -int smu_v11_0_init_microcode(struct smu_context *smu); - -int smu_v11_0_load_microcode(struct smu_context *smu); - -int smu_v11_0_init_smc_tables(struct smu_context *smu); - -int smu_v11_0_fini_smc_tables(struct smu_context *smu); - -int smu_v11_0_init_power(struct smu_context *smu); - -int smu_v11_0_fini_power(struct smu_context *smu); - -int smu_v11_0_check_fw_status(struct smu_context *smu); - -int smu_v11_0_setup_pptable(struct smu_context *smu); - -int smu_v11_0_get_vbios_bootup_values(struct smu_context *smu); - -int smu_v11_0_get_clk_info_from_vbios(struct smu_context *smu); - -int smu_v11_0_check_pptable(struct smu_context *smu); - -int smu_v11_0_parse_pptable(struct smu_context *smu); - -int smu_v11_0_populate_smc_pptable(struct smu_context *smu); - -int smu_v11_0_check_fw_version(struct smu_context *smu); - -int smu_v11_0_write_pptable(struct smu_context *smu); - -int smu_v11_0_set_min_dcef_deep_sleep(struct smu_context *smu); - -int smu_v11_0_set_tool_table_location(struct smu_context *smu); - -int smu_v11_0_notify_memory_pool_location(struct smu_context *smu); - -int smu_v11_0_system_features_control(struct smu_context *smu, - bool en); - -int smu_v11_0_send_msg(struct smu_context *smu, uint16_t msg); - -int -smu_v11_0_send_msg_with_param(struct smu_context *smu, uint16_t msg, - uint32_t param); - -int smu_v11_0_read_arg(struct smu_context *smu, uint32_t *arg); - -int smu_v11_0_init_display_count(struct smu_context *smu, uint32_t count); - -int smu_v11_0_set_allowed_mask(struct smu_context *smu); - -int smu_v11_0_get_enabled_mask(struct smu_context *smu, - uint32_t *feature_mask, uint32_t num); - -int smu_v11_0_notify_display_change(struct smu_context *smu); - -int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n); - -int smu_v11_0_get_current_clk_freq(struct smu_context *smu, - enum smu_clk_type clk_id, - uint32_t *value); - -int smu_v11_0_init_max_sustainable_clocks(struct smu_context *smu); - -int smu_v11_0_start_thermal_control(struct smu_context *smu); - -int smu_v11_0_stop_thermal_control(struct smu_context *smu); - -int smu_v11_0_read_sensor(struct smu_context *smu, - enum amd_pp_sensors sensor, - void *data, uint32_t *size); - -int smu_v11_0_set_deep_sleep_dcefclk(struct smu_context *smu, uint32_t clk); - -int -smu_v11_0_display_clock_voltage_request(struct smu_context *smu, - struct pp_display_clock_request - *clock_req); - -uint32_t -smu_v11_0_get_fan_control_mode(struct smu_context *smu); - -int -smu_v11_0_set_fan_control_mode(struct smu_context *smu, - uint32_t mode); - -int -smu_v11_0_set_fan_speed_percent(struct smu_context *smu, uint32_t speed); - -int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, - uint32_t speed); - -int smu_v11_0_set_xgmi_pstate(struct smu_context *smu, - uint32_t pstate); - -int smu_v11_0_gfx_off_control(struct smu_context *smu, bool enable); - -int smu_v11_0_register_irq_handler(struct smu_context *smu); - -int smu_v11_0_set_azalia_d3_pme(struct smu_context *smu); - -int smu_v11_0_get_max_sustainable_clocks_by_dc(struct smu_context *smu, - struct pp_smu_nv_clock_table *max_clocks); - -bool smu_v11_0_baco_is_support(struct smu_context *smu); - -enum smu_baco_state smu_v11_0_baco_get_state(struct smu_context *smu); - -int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state); - -int smu_v11_0_baco_reset(struct smu_context *smu); - -int smu_v11_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t *min, uint32_t *max); - -int smu_v11_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t min, uint32_t max); - -int smu_v11_0_override_pcie_parameters(struct smu_context *smu); +void smu_v11_0_set_smu_funcs(struct smu_context *smu); #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h index 9b9f5df0911c..acf3db12f59f 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h @@ -37,45 +37,6 @@ struct smu_12_0_cmn2aisc_mapping { int map_to; }; -int smu_v12_0_send_msg_without_waiting(struct smu_context *smu, - uint16_t msg); - -int smu_v12_0_read_arg(struct smu_context *smu, uint32_t *arg); - -int smu_v12_0_wait_for_response(struct smu_context *smu); - -int smu_v12_0_send_msg(struct smu_context *smu, uint16_t msg); - -int -smu_v12_0_send_msg_with_param(struct smu_context *smu, uint16_t msg, - uint32_t param); - -int smu_v12_0_check_fw_status(struct smu_context *smu); - -int smu_v12_0_check_fw_version(struct smu_context *smu); - -int smu_v12_0_powergate_sdma(struct smu_context *smu, bool gate); - -int smu_v12_0_powergate_vcn(struct smu_context *smu, bool gate); - -int smu_v12_0_set_gfx_cgpg(struct smu_context *smu, bool enable); - -uint32_t smu_v12_0_get_gfxoff_status(struct smu_context *smu); - -int smu_v12_0_gfx_off_control(struct smu_context *smu, bool enable); - -int smu_v12_0_init_smc_tables(struct smu_context *smu); - -int smu_v12_0_fini_smc_tables(struct smu_context *smu); - -int smu_v12_0_populate_smc_tables(struct smu_context *smu); - -int smu_v12_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t *min, uint32_t *max); - -int smu_v12_0_mode2_reset(struct smu_context *smu); - -int smu_v12_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t min, uint32_t max); +void smu_v12_0_set_smu_funcs(struct smu_context *smu); #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h index 0c66f0fe1aaf..a0883038f3c3 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h @@ -120,8 +120,7 @@ #define PPSMC_MSG_SetMGpuFanBoostLimitRpm 0x5D #define PPSMC_MSG_GetAVFSVoltageByDpm 0x5F #define PPSMC_MSG_BacoWorkAroundFlushVDCI 0x60 -#define PPSMC_MSG_DFCstateControl 0x63 -#define PPSMC_Message_Count 0x64 +#define PPSMC_Message_Count 0x61 typedef uint32_t PPSMC_Result; typedef uint32_t PPSMC_Msg; diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 769f9451d904..0b461404af6b 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -26,7 +26,6 @@ #include #include "amdgpu.h" #include "amdgpu_smu.h" -#include "smu_internal.h" #include "atomfirmware.h" #include "amdgpu_atomfirmware.h" #include "smu_v11_0.h" @@ -178,7 +177,6 @@ static struct smu_11_0_cmn2aisc_mapping navi10_feature_mask_map[SMU_FEATURE_COUN FEA_MAP(TEMP_DEPENDENT_VMIN), FEA_MAP(MMHUB_PG), FEA_MAP(ATHUB_PG), - FEA_MAP(APCC_DFLL), }; static struct smu_11_0_cmn2aisc_mapping navi10_table_map[SMU_TABLE_COUNT] = { @@ -329,52 +327,40 @@ navi10_get_allowed_feature_mask(struct smu_context *smu, memset(feature_mask, 0, sizeof(uint32_t) * num); *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_PREFETCHER_BIT) + | FEATURE_MASK(FEATURE_DPM_GFXCLK_BIT) + | FEATURE_MASK(FEATURE_DPM_SOCCLK_BIT) | FEATURE_MASK(FEATURE_DPM_MP0CLK_BIT) + | FEATURE_MASK(FEATURE_DPM_LINK_BIT) + | FEATURE_MASK(FEATURE_GFX_ULV_BIT) | FEATURE_MASK(FEATURE_RSMU_SMN_CG_BIT) | FEATURE_MASK(FEATURE_DS_SOCCLK_BIT) | FEATURE_MASK(FEATURE_PPT_BIT) | FEATURE_MASK(FEATURE_TDC_BIT) | FEATURE_MASK(FEATURE_GFX_EDC_BIT) - | FEATURE_MASK(FEATURE_APCC_PLUS_BIT) | FEATURE_MASK(FEATURE_VR0HOT_BIT) | FEATURE_MASK(FEATURE_FAN_CONTROL_BIT) | FEATURE_MASK(FEATURE_THERMAL_BIT) | FEATURE_MASK(FEATURE_LED_DISPLAY_BIT) - | FEATURE_MASK(FEATURE_DS_LCLK_BIT) + | FEATURE_MASK(FEATURE_DPM_DCEFCLK_BIT) + | FEATURE_MASK(FEATURE_DS_GFXCLK_BIT) | FEATURE_MASK(FEATURE_DS_DCEFCLK_BIT) | FEATURE_MASK(FEATURE_FW_DSTATE_BIT) | FEATURE_MASK(FEATURE_BACO_BIT) | FEATURE_MASK(FEATURE_ACDC_BIT) | FEATURE_MASK(FEATURE_GFX_SS_BIT) | FEATURE_MASK(FEATURE_APCC_DFLL_BIT) - | FEATURE_MASK(FEATURE_FW_CTF_BIT) - | FEATURE_MASK(FEATURE_OUT_OF_BAND_MONITOR_BIT); - - if (adev->pm.pp_feature & PP_SOCCLK_DPM_MASK) - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_SOCCLK_BIT); - - if (adev->pm.pp_feature & PP_SCLK_DPM_MASK) - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_GFXCLK_BIT); - - if (adev->pm.pp_feature & PP_PCIE_DPM_MASK) - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_LINK_BIT); - - if (adev->pm.pp_feature & PP_DCEFCLK_DPM_MASK) - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_DCEFCLK_BIT); + | FEATURE_MASK(FEATURE_FW_CTF_BIT); if (adev->pm.pp_feature & PP_MCLK_DPM_MASK) *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_UCLK_BIT) | FEATURE_MASK(FEATURE_MEM_VDDCI_SCALING_BIT) | FEATURE_MASK(FEATURE_MEM_MVDD_SCALING_BIT); - if (adev->pm.pp_feature & PP_ULV_MASK) - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFX_ULV_BIT); - - if (adev->pm.pp_feature & PP_SCLK_DEEP_SLEEP_MASK) - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_GFXCLK_BIT); - - if (adev->pm.pp_feature & PP_GFXOFF_MASK) + if (adev->pm.pp_feature & PP_GFXOFF_MASK) { *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFXOFF_BIT); + /* TODO: remove it once fw fix the bug */ + *(uint64_t *)feature_mask &= ~FEATURE_MASK(FEATURE_FW_DSTATE_BIT); + } if (smu->adev->pg_flags & AMD_PG_SUPPORT_MMHUB) *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_MMHUB_PG_BIT); @@ -797,13 +783,13 @@ static int navi10_populate_umd_state_clk(struct smu_context *smu) int ret = 0; uint32_t min_sclk_freq = 0, min_mclk_freq = 0; - ret = smu_get_dpm_freq_range(smu, SMU_SCLK, &min_sclk_freq, NULL, false); + ret = smu_get_dpm_freq_range(smu, SMU_SCLK, &min_sclk_freq, NULL); if (ret) return ret; smu->pstate_sclk = min_sclk_freq * 100; - ret = smu_get_dpm_freq_range(smu, SMU_MCLK, &min_mclk_freq, NULL, false); + ret = smu_get_dpm_freq_range(smu, SMU_MCLK, &min_mclk_freq, NULL); if (ret) return ret; @@ -856,7 +842,7 @@ static int navi10_pre_display_config_changed(struct smu_context *smu) return ret; if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT)) { - ret = smu_get_dpm_freq_range(smu, SMU_UCLK, NULL, &max_freq, false); + ret = smu_get_dpm_freq_range(smu, SMU_UCLK, NULL, &max_freq); if (ret) return ret; ret = smu_set_hard_freq_range(smu, SMU_UCLK, 0, max_freq); @@ -906,7 +892,7 @@ static int navi10_force_dpm_limit_value(struct smu_context *smu, bool highest) for (i = 0; i < ARRAY_SIZE(clks); i++) { clk_type = clks[i]; - ret = smu_get_dpm_freq_range(smu, clk_type, &min_freq, &max_freq, false); + ret = smu_get_dpm_freq_range(smu, clk_type, &min_freq, &max_freq); if (ret) return ret; @@ -933,7 +919,7 @@ static int navi10_unforce_dpm_levels(struct smu_context *smu) for (i = 0; i < ARRAY_SIZE(clks); i++) { clk_type = clks[i]; - ret = smu_get_dpm_freq_range(smu, clk_type, &min_freq, &max_freq, false); + ret = smu_get_dpm_freq_range(smu, clk_type, &min_freq, &max_freq); if (ret) return ret; @@ -1268,9 +1254,7 @@ static int navi10_notify_smc_dispaly_config(struct smu_context *smu) if (smu_feature_is_supported(smu, SMU_FEATURE_DPM_DCEFCLK_BIT)) { clock_req.clock_type = amd_pp_dcef_clock; clock_req.clock_freq_in_khz = min_clocks.dcef_clock * 10; - - ret = smu_v11_0_display_clock_voltage_request(smu, &clock_req); - if (!ret) { + if (!smu_display_clock_voltage_request(smu, &clock_req)) { if (smu_feature_is_supported(smu, SMU_FEATURE_DS_DCEFCLK_BIT)) { ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetMinDeepSleepDcefclk, @@ -1424,7 +1408,7 @@ static int navi10_read_sensor(struct smu_context *smu, *size = 4; break; default: - ret = smu_v11_0_read_sensor(smu, sensor, data, size); + ret = smu_smc_read_sensor(smu, sensor, data, size); } mutex_unlock(&smu->sensor_lock); @@ -1467,47 +1451,18 @@ static int navi10_set_peak_clock_by_device(struct smu_context *smu) uint32_t sclk_freq = 0, uclk_freq = 0; uint32_t uclk_level = 0; - switch (adev->asic_type) { - case CHIP_NAVI10: - switch (adev->pdev->revision) { - case 0xf0: /* XTX */ - case 0xc0: - sclk_freq = NAVI10_PEAK_SCLK_XTX; - break; - case 0xf1: /* XT */ - case 0xc1: - sclk_freq = NAVI10_PEAK_SCLK_XT; - break; - default: /* XL */ - sclk_freq = NAVI10_PEAK_SCLK_XL; - break; - } + switch (adev->pdev->revision) { + case 0xf0: /* XTX */ + case 0xc0: + sclk_freq = NAVI10_PEAK_SCLK_XTX; break; - case CHIP_NAVI14: - switch (adev->pdev->revision) { - case 0xc7: /* XT */ - case 0xf4: - sclk_freq = NAVI14_UMD_PSTATE_PEAK_XT_GFXCLK; - break; - case 0xc1: /* XTM */ - case 0xf2: - sclk_freq = NAVI14_UMD_PSTATE_PEAK_XTM_GFXCLK; - break; - case 0xc3: /* XLM */ - case 0xf3: - sclk_freq = NAVI14_UMD_PSTATE_PEAK_XLM_GFXCLK; - break; - case 0xc5: /* XTX */ - case 0xf6: - sclk_freq = NAVI14_UMD_PSTATE_PEAK_XLM_GFXCLK; - break; - default: /* XL */ - sclk_freq = NAVI14_UMD_PSTATE_PEAK_XL_GFXCLK; - break; - } + case 0xf1: /* XT */ + case 0xc1: + sclk_freq = NAVI10_PEAK_SCLK_XT; + break; + default: /* XL */ + sclk_freq = NAVI10_PEAK_SCLK_XL; break; - default: - return -EINVAL; } ret = smu_get_dpm_level_count(smu, SMU_UCLK, &uclk_level); @@ -1530,6 +1485,10 @@ static int navi10_set_peak_clock_by_device(struct smu_context *smu) static int navi10_set_performance_level(struct smu_context *smu, enum amd_dpm_forced_level level) { int ret = 0; + struct amdgpu_device *adev = smu->adev; + + if (adev->asic_type != CHIP_NAVI10) + return -EINVAL; switch (level) { case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: @@ -1632,28 +1591,6 @@ static int navi10_get_power_limit(struct smu_context *smu, return 0; } -static int navi10_update_pcie_parameters(struct smu_context *smu, - uint32_t pcie_gen_cap, - uint32_t pcie_width_cap) -{ - PPTable_t *pptable = smu->smu_table.driver_pptable; - int ret, i; - uint32_t smu_pcie_arg; - - for (i = 0; i < NUM_LINK_LEVELS; i++) { - smu_pcie_arg = (i << 16) | - ((pptable->PcieGenSpeed[i] <= pcie_gen_cap) ? (pptable->PcieGenSpeed[i] << 8) : - (pcie_gen_cap << 8)) | ((pptable->PcieLaneCount[i] <= pcie_width_cap) ? - pptable->PcieLaneCount[i] : pcie_width_cap); - ret = smu_send_smc_msg_with_param(smu, - SMU_MSG_OverridePcieParameters, - smu_pcie_arg); - } - - return ret; -} - - static const struct pptable_funcs navi10_ppt_funcs = { .tables_init = navi10_tables_init, .alloc_dpm_context = navi10_allocate_dpm_context, @@ -1692,59 +1629,12 @@ static const struct pptable_funcs navi10_ppt_funcs = { .get_thermal_temperature_range = navi10_get_thermal_temperature_range, .display_disable_memory_clock_switch = navi10_display_disable_memory_clock_switch, .get_power_limit = navi10_get_power_limit, - .update_pcie_parameters = navi10_update_pcie_parameters, - .init_microcode = smu_v11_0_init_microcode, - .load_microcode = smu_v11_0_load_microcode, - .init_smc_tables = smu_v11_0_init_smc_tables, - .fini_smc_tables = smu_v11_0_fini_smc_tables, - .init_power = smu_v11_0_init_power, - .fini_power = smu_v11_0_fini_power, - .check_fw_status = smu_v11_0_check_fw_status, - .setup_pptable = smu_v11_0_setup_pptable, - .get_vbios_bootup_values = smu_v11_0_get_vbios_bootup_values, - .get_clk_info_from_vbios = smu_v11_0_get_clk_info_from_vbios, - .check_pptable = smu_v11_0_check_pptable, - .parse_pptable = smu_v11_0_parse_pptable, - .populate_smc_tables = smu_v11_0_populate_smc_pptable, - .check_fw_version = smu_v11_0_check_fw_version, - .write_pptable = smu_v11_0_write_pptable, - .set_min_dcef_deep_sleep = smu_v11_0_set_min_dcef_deep_sleep, - .set_tool_table_location = smu_v11_0_set_tool_table_location, - .notify_memory_pool_location = smu_v11_0_notify_memory_pool_location, - .system_features_control = smu_v11_0_system_features_control, - .send_smc_msg = smu_v11_0_send_msg, - .send_smc_msg_with_param = smu_v11_0_send_msg_with_param, - .read_smc_arg = smu_v11_0_read_arg, - .init_display_count = smu_v11_0_init_display_count, - .set_allowed_mask = smu_v11_0_set_allowed_mask, - .get_enabled_mask = smu_v11_0_get_enabled_mask, - .notify_display_change = smu_v11_0_notify_display_change, - .set_power_limit = smu_v11_0_set_power_limit, - .get_current_clk_freq = smu_v11_0_get_current_clk_freq, - .init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks, - .start_thermal_control = smu_v11_0_start_thermal_control, - .stop_thermal_control = smu_v11_0_stop_thermal_control, - .set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk, - .display_clock_voltage_request = smu_v11_0_display_clock_voltage_request, - .get_fan_control_mode = smu_v11_0_get_fan_control_mode, - .set_fan_control_mode = smu_v11_0_set_fan_control_mode, - .set_fan_speed_percent = smu_v11_0_set_fan_speed_percent, - .set_fan_speed_rpm = smu_v11_0_set_fan_speed_rpm, - .set_xgmi_pstate = smu_v11_0_set_xgmi_pstate, - .gfx_off_control = smu_v11_0_gfx_off_control, - .register_irq_handler = smu_v11_0_register_irq_handler, - .set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme, - .get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc, - .baco_is_support= smu_v11_0_baco_is_support, - .baco_get_state = smu_v11_0_baco_get_state, - .baco_set_state = smu_v11_0_baco_set_state, - .baco_reset = smu_v11_0_baco_reset, - .get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq, - .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range, - .override_pcie_parameters = smu_v11_0_override_pcie_parameters, }; void navi10_set_ppt_funcs(struct smu_context *smu) { + struct smu_table_context *smu_table = &smu->smu_table; + smu->ppt_funcs = &navi10_ppt_funcs; + smu_table->table_count = TABLE_COUNT; } diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.h b/drivers/gpu/drm/amd/powerplay/navi10_ppt.h index a37e37c5f105..620ff17c2fef 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.h +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.h @@ -27,12 +27,6 @@ #define NAVI10_PEAK_SCLK_XT (1755) #define NAVI10_PEAK_SCLK_XL (1625) -#define NAVI14_UMD_PSTATE_PEAK_XT_GFXCLK (1670) -#define NAVI14_UMD_PSTATE_PEAK_XTM_GFXCLK (1448) -#define NAVI14_UMD_PSTATE_PEAK_XLM_GFXCLK (1181) -#define NAVI14_UMD_PSTATE_PEAK_XTX_GFXCLK (1717) -#define NAVI14_UMD_PSTATE_PEAK_XL_GFXCLK (1448) - extern void navi10_set_ppt_funcs(struct smu_context *smu); #endif diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c index 4a9751971a9d..e62bfba51562 100644 --- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c @@ -23,7 +23,6 @@ #include "amdgpu.h" #include "amdgpu_smu.h" -#include "smu_internal.h" #include "soc15_common.h" #include "smu_v12_0_ppsmc.h" #include "smu12_driver_if.h" @@ -161,17 +160,21 @@ static int renoir_tables_init(struct smu_context *smu, struct smu_table *tables) * This interface just for getting uclk ultimate freq and should't introduce * other likewise function result in overmuch callback. */ -static int renoir_get_dpm_clk_limited(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t dpm_level, uint32_t *freq) +static int renoir_get_dpm_uclk_limited(struct smu_context *smu, uint32_t *clock, bool max) { - DpmClocks_t *clk_table = smu->smu_table.clocks_table; - if (!clk_table || clk_type >= SMU_CLK_COUNT) + DpmClocks_t *table = smu->smu_table.clocks_table; + + if (!clock || !table) return -EINVAL; - GET_DPM_CUR_FREQ(clk_table, clk_type, dpm_level, *freq); + if (max) + *clock = table->FClocks[NUM_FCLK_DPM_LEVELS-1].Freq; + else + *clock = table->FClocks[0].Freq; return 0; + } static int renoir_print_clk_levels(struct smu_context *smu, @@ -195,7 +198,7 @@ static int renoir_print_clk_levels(struct smu_context *smu, case SMU_SCLK: /* retirve table returned paramters unit is MHz */ cur_value = metrics.ClockFrequency[CLOCK_GFXCLK]; - ret = smu_get_dpm_freq_range(smu, SMU_GFXCLK, &min, &max, false); + ret = smu_get_dpm_freq_range(smu, SMU_GFXCLK, &min, &max); if (!ret) { /* driver only know min/max gfx_clk, Add level 1 for all other gfx clks */ if (cur_value == max) @@ -243,474 +246,20 @@ static int renoir_print_clk_levels(struct smu_context *smu, return size; } -static enum amd_pm_state_type renoir_get_current_power_state(struct smu_context *smu) -{ - enum amd_pm_state_type pm_type; - struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); - - if (!smu_dpm_ctx->dpm_context || - !smu_dpm_ctx->dpm_current_power_state) - return -EINVAL; - - switch (smu_dpm_ctx->dpm_current_power_state->classification.ui_label) { - case SMU_STATE_UI_LABEL_BATTERY: - pm_type = POWER_STATE_TYPE_BATTERY; - break; - case SMU_STATE_UI_LABEL_BALLANCED: - pm_type = POWER_STATE_TYPE_BALANCED; - break; - case SMU_STATE_UI_LABEL_PERFORMANCE: - pm_type = POWER_STATE_TYPE_PERFORMANCE; - break; - default: - if (smu_dpm_ctx->dpm_current_power_state->classification.flags & SMU_STATE_CLASSIFICATION_FLAG_BOOT) - pm_type = POWER_STATE_TYPE_INTERNAL_BOOT; - else - pm_type = POWER_STATE_TYPE_DEFAULT; - break; - } - - return pm_type; -} - -static int renoir_dpm_set_uvd_enable(struct smu_context *smu, bool enable) -{ - struct smu_power_context *smu_power = &smu->smu_power; - struct smu_power_gate *power_gate = &smu_power->power_gate; - int ret = 0; - - if (enable) { - /* vcn dpm on is a prerequisite for vcn power gate messages */ - if (smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) { - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_PowerUpVcn, 0); - if (ret) - return ret; - } - power_gate->vcn_gated = false; - } else { - if (smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) { - ret = smu_send_smc_msg(smu, SMU_MSG_PowerDownVcn); - if (ret) - return ret; - } - power_gate->vcn_gated = true; - } - - return ret; -} - -static int renoir_force_dpm_limit_value(struct smu_context *smu, bool highest) -{ - int ret = 0, i = 0; - uint32_t min_freq, max_freq, force_freq; - enum smu_clk_type clk_type; - - enum smu_clk_type clks[] = { - SMU_GFXCLK, - SMU_MCLK, - SMU_SOCCLK, - }; - - for (i = 0; i < ARRAY_SIZE(clks); i++) { - clk_type = clks[i]; - ret = smu_get_dpm_freq_range(smu, clk_type, &min_freq, &max_freq, false); - if (ret) - return ret; - - force_freq = highest ? max_freq : min_freq; - ret = smu_set_soft_freq_range(smu, clk_type, force_freq, force_freq); - if (ret) - return ret; - } - - return ret; -} - -static int renoir_unforce_dpm_levels(struct smu_context *smu) { - - int ret = 0, i = 0; - uint32_t min_freq, max_freq; - enum smu_clk_type clk_type; - - struct clk_feature_map { - enum smu_clk_type clk_type; - uint32_t feature; - } clk_feature_map[] = { - {SMU_GFXCLK, SMU_FEATURE_DPM_GFXCLK_BIT}, - {SMU_MCLK, SMU_FEATURE_DPM_UCLK_BIT}, - {SMU_SOCCLK, SMU_FEATURE_DPM_SOCCLK_BIT}, - }; - - for (i = 0; i < ARRAY_SIZE(clk_feature_map); i++) { - if (!smu_feature_is_enabled(smu, clk_feature_map[i].feature)) - continue; - - clk_type = clk_feature_map[i].clk_type; - - ret = smu_get_dpm_freq_range(smu, clk_type, &min_freq, &max_freq, false); - if (ret) - return ret; - - ret = smu_set_soft_freq_range(smu, clk_type, min_freq, max_freq); - if (ret) - return ret; - } - - return ret; -} - -static int renoir_get_workload_type(struct smu_context *smu, uint32_t profile) -{ - - uint32_t pplib_workload = 0; - - switch (profile) { - case PP_SMC_POWER_PROFILE_FULLSCREEN3D: - pplib_workload = WORKLOAD_PPLIB_FULL_SCREEN_3D_BIT; - break; - case PP_SMC_POWER_PROFILE_CUSTOM: - pplib_workload = WORKLOAD_PPLIB_COUNT; - break; - case PP_SMC_POWER_PROFILE_VIDEO: - pplib_workload = WORKLOAD_PPLIB_VIDEO_BIT; - break; - case PP_SMC_POWER_PROFILE_VR: - pplib_workload = WORKLOAD_PPLIB_VR_BIT; - break; - case PP_SMC_POWER_PROFILE_COMPUTE: - pplib_workload = WORKLOAD_PPLIB_COMPUTE_BIT; - break; - default: - return -EINVAL; - } - - return pplib_workload; -} - -static int renoir_get_profiling_clk_mask(struct smu_context *smu, - enum amd_dpm_forced_level level, - uint32_t *sclk_mask, - uint32_t *mclk_mask, - uint32_t *soc_mask) -{ - - if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) { - if (sclk_mask) - *sclk_mask = 0; - } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK) { - if (mclk_mask) - *mclk_mask = 0; - } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) { - if(sclk_mask) - /* The sclk as gfxclk and has three level about max/min/current */ - *sclk_mask = 3 - 1; - - if(mclk_mask) - *mclk_mask = NUM_MEMCLK_DPM_LEVELS - 1; - - if(soc_mask) - *soc_mask = NUM_SOCCLK_DPM_LEVELS - 1; - } - - return 0; -} - -/** - * This interface get dpm clock table for dc - */ -static int renoir_get_dpm_clock_table(struct smu_context *smu, struct dpm_clocks *clock_table) -{ - DpmClocks_t *table = smu->smu_table.clocks_table; - int i; - - if (!clock_table || !table) - return -EINVAL; - - for (i = 0; i < NUM_DCFCLK_DPM_LEVELS; i++) { - clock_table->DcfClocks[i].Freq = table->DcfClocks[i].Freq; - clock_table->DcfClocks[i].Vol = table->DcfClocks[i].Vol; - } - - for (i = 0; i < NUM_SOCCLK_DPM_LEVELS; i++) { - clock_table->SocClocks[i].Freq = table->SocClocks[i].Freq; - clock_table->SocClocks[i].Vol = table->SocClocks[i].Vol; - } - - for (i = 0; i < NUM_FCLK_DPM_LEVELS; i++) { - clock_table->FClocks[i].Freq = table->FClocks[i].Freq; - clock_table->FClocks[i].Vol = table->FClocks[i].Vol; - } - - for (i = 0; i< NUM_MEMCLK_DPM_LEVELS; i++) { - clock_table->MemClocks[i].Freq = table->MemClocks[i].Freq; - clock_table->MemClocks[i].Vol = table->MemClocks[i].Vol; - } - - return 0; -} - -static int renoir_force_clk_levels(struct smu_context *smu, - enum smu_clk_type clk_type, uint32_t mask) -{ - - int ret = 0 ; - uint32_t soft_min_level = 0, soft_max_level = 0, min_freq = 0, max_freq = 0; - DpmClocks_t *clk_table = smu->smu_table.clocks_table; - - soft_min_level = mask ? (ffs(mask) - 1) : 0; - soft_max_level = mask ? (fls(mask) - 1) : 0; - - switch (clk_type) { - case SMU_GFXCLK: - case SMU_SCLK: - if (soft_min_level > 2 || soft_max_level > 2) { - pr_info("Currently sclk only support 3 levels on APU\n"); - return -EINVAL; - } - - ret = smu_get_dpm_freq_range(smu, SMU_GFXCLK, &min_freq, &max_freq, false); - if (ret) - return ret; - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxGfxClk, - soft_max_level == 0 ? min_freq : - soft_max_level == 1 ? RENOIR_UMD_PSTATE_GFXCLK : max_freq); - if (ret) - return ret; - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinGfxClk, - soft_min_level == 2 ? max_freq : - soft_min_level == 1 ? RENOIR_UMD_PSTATE_GFXCLK : min_freq); - if (ret) - return ret; - break; - case SMU_SOCCLK: - GET_DPM_CUR_FREQ(clk_table, clk_type, soft_min_level, min_freq); - GET_DPM_CUR_FREQ(clk_table, clk_type, soft_max_level, max_freq); - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxSocclkByFreq, max_freq); - if (ret) - return ret; - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinSocclkByFreq, min_freq); - if (ret) - return ret; - break; - case SMU_MCLK: - case SMU_FCLK: - GET_DPM_CUR_FREQ(clk_table, clk_type, soft_min_level, min_freq); - GET_DPM_CUR_FREQ(clk_table, clk_type, soft_max_level, max_freq); - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxFclkByFreq, max_freq); - if (ret) - return ret; - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinFclkByFreq, min_freq); - if (ret) - return ret; - break; - default: - break; - } - - return ret; -} - -static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size) -{ - int workload_type, ret; - uint32_t profile_mode = input[size]; - - if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) { - pr_err("Invalid power profile mode %d\n", smu->power_profile_mode); - return -EINVAL; - } - - /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ - workload_type = smu_workload_get_type(smu, smu->power_profile_mode); - if (workload_type < 0) { - pr_err("Unsupported power profile mode %d on RENOIR\n",smu->power_profile_mode); - return -EINVAL; - } - - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - 1 << workload_type); - if (ret) { - pr_err("Fail to set workload type %d\n", workload_type); - return ret; - } - - smu->power_profile_mode = profile_mode; - - return 0; -} - -static int renoir_set_peak_clock_by_device(struct smu_context *smu) -{ - int ret = 0; - uint32_t sclk_freq = 0, uclk_freq = 0; - - ret = smu_get_dpm_freq_range(smu, SMU_SCLK, NULL, &sclk_freq, false); - if (ret) - return ret; - - ret = smu_set_soft_freq_range(smu, SMU_SCLK, sclk_freq, sclk_freq); - if (ret) - return ret; - - ret = smu_get_dpm_freq_range(smu, SMU_UCLK, NULL, &uclk_freq, false); - if (ret) - return ret; - - ret = smu_set_soft_freq_range(smu, SMU_UCLK, uclk_freq, uclk_freq); - if (ret) - return ret; - - return ret; -} - -static int renoir_set_performance_level(struct smu_context *smu, enum amd_dpm_forced_level level) -{ - int ret = 0; - - switch (level) { - case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: - ret = renoir_set_peak_clock_by_device(smu); - break; - default: - ret = -EINVAL; - break; - } - - return ret; -} - -/* save watermark settings into pplib smu structure, - * also pass data to smu controller - */ -static int renoir_set_watermarks_table( - struct smu_context *smu, - void *watermarks, - struct dm_pp_wm_sets_with_clock_ranges_soc15 *clock_ranges) -{ - int i; - int ret = 0; - Watermarks_t *table = watermarks; - - if (!table || !clock_ranges) - return -EINVAL; - - if (clock_ranges->num_wm_dmif_sets > 4 || - clock_ranges->num_wm_mcif_sets > 4) - return -EINVAL; - - /* save into smu->smu_table.tables[SMU_TABLE_WATERMARKS]->cpu_addr*/ - for (i = 0; i < clock_ranges->num_wm_dmif_sets; i++) { - table->WatermarkRow[WM_DCFCLK][i].MinClock = - cpu_to_le16((uint16_t) - (clock_ranges->wm_dmif_clocks_ranges[i].wm_min_dcfclk_clk_in_khz)); - table->WatermarkRow[WM_DCFCLK][i].MaxClock = - cpu_to_le16((uint16_t) - (clock_ranges->wm_dmif_clocks_ranges[i].wm_max_dcfclk_clk_in_khz)); - table->WatermarkRow[WM_DCFCLK][i].MinMclk = - cpu_to_le16((uint16_t) - (clock_ranges->wm_dmif_clocks_ranges[i].wm_min_mem_clk_in_khz)); - table->WatermarkRow[WM_DCFCLK][i].MaxMclk = - cpu_to_le16((uint16_t) - (clock_ranges->wm_dmif_clocks_ranges[i].wm_max_mem_clk_in_khz)); - table->WatermarkRow[WM_DCFCLK][i].WmSetting = (uint8_t) - clock_ranges->wm_dmif_clocks_ranges[i].wm_set_id; - } - - for (i = 0; i < clock_ranges->num_wm_mcif_sets; i++) { - table->WatermarkRow[WM_SOCCLK][i].MinClock = - cpu_to_le16((uint16_t) - (clock_ranges->wm_mcif_clocks_ranges[i].wm_min_socclk_clk_in_khz)); - table->WatermarkRow[WM_SOCCLK][i].MaxClock = - cpu_to_le16((uint16_t) - (clock_ranges->wm_mcif_clocks_ranges[i].wm_max_socclk_clk_in_khz)); - table->WatermarkRow[WM_SOCCLK][i].MinMclk = - cpu_to_le16((uint16_t) - (clock_ranges->wm_mcif_clocks_ranges[i].wm_min_mem_clk_in_khz)); - table->WatermarkRow[WM_SOCCLK][i].MaxMclk = - cpu_to_le16((uint16_t) - (clock_ranges->wm_mcif_clocks_ranges[i].wm_max_mem_clk_in_khz)); - table->WatermarkRow[WM_SOCCLK][i].WmSetting = (uint8_t) - clock_ranges->wm_mcif_clocks_ranges[i].wm_set_id; - } - - /* pass data to smu controller */ - ret = smu_write_watermarks_table(smu); - - return ret; -} - -static int renoir_get_power_profile_mode(struct smu_context *smu, - char *buf) -{ - static const char *profile_name[] = { - "BOOTUP_DEFAULT", - "3D_FULL_SCREEN", - "POWER_SAVING", - "VIDEO", - "VR", - "COMPUTE", - "CUSTOM"}; - uint32_t i, size = 0; - int16_t workload_type = 0; - - if (!smu->pm_enabled || !buf) - return -EINVAL; - - for (i = 0; i <= PP_SMC_POWER_PROFILE_CUSTOM; i++) { - /* - * Conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT - * Not all profile modes are supported on arcturus. - */ - workload_type = smu_workload_get_type(smu, i); - if (workload_type < 0) - continue; - - size += sprintf(buf + size, "%2d %14s%s\n", - i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); - } - - return size; -} - static const struct pptable_funcs renoir_ppt_funcs = { .get_smu_msg_index = renoir_get_smu_msg_index, .get_smu_table_index = renoir_get_smu_table_index, .tables_init = renoir_tables_init, .set_power_state = NULL, - .get_dpm_clk_limited = renoir_get_dpm_clk_limited, + .get_dpm_uclk_limited = renoir_get_dpm_uclk_limited, .print_clk_levels = renoir_print_clk_levels, - .get_current_power_state = renoir_get_current_power_state, - .dpm_set_uvd_enable = renoir_dpm_set_uvd_enable, - .force_dpm_limit_value = renoir_force_dpm_limit_value, - .unforce_dpm_levels = renoir_unforce_dpm_levels, - .get_workload_type = renoir_get_workload_type, - .get_profiling_clk_mask = renoir_get_profiling_clk_mask, - .force_clk_levels = renoir_force_clk_levels, - .set_power_profile_mode = renoir_set_power_profile_mode, - .set_performance_level = renoir_set_performance_level, - .get_dpm_clock_table = renoir_get_dpm_clock_table, - .set_watermarks_table = renoir_set_watermarks_table, - .get_power_profile_mode = renoir_get_power_profile_mode, - .check_fw_status = smu_v12_0_check_fw_status, - .check_fw_version = smu_v12_0_check_fw_version, - .powergate_sdma = smu_v12_0_powergate_sdma, - .powergate_vcn = smu_v12_0_powergate_vcn, - .send_smc_msg = smu_v12_0_send_msg, - .send_smc_msg_with_param = smu_v12_0_send_msg_with_param, - .read_smc_arg = smu_v12_0_read_arg, - .set_gfx_cgpg = smu_v12_0_set_gfx_cgpg, - .gfx_off_control = smu_v12_0_gfx_off_control, - .init_smc_tables = smu_v12_0_init_smc_tables, - .fini_smc_tables = smu_v12_0_fini_smc_tables, - .populate_smc_tables = smu_v12_0_populate_smc_tables, - .get_dpm_ultimate_freq = smu_v12_0_get_dpm_ultimate_freq, - .mode2_reset = smu_v12_0_mode2_reset, - .set_soft_freq_limited_range = smu_v12_0_set_soft_freq_limited_range, }; void renoir_set_ppt_funcs(struct smu_context *smu) { + struct smu_table_context *smu_table = &smu->smu_table; + smu->ppt_funcs = &renoir_ppt_funcs; smu->smc_if_version = SMU12_DRIVER_IF_VERSION; - smu->is_apu = true; + smu_table->table_count = TABLE_COUNT; } diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index bbb74b1d5d80..c5257ae3188a 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -27,13 +27,14 @@ #include "pp_debug.h" #include "amdgpu.h" #include "amdgpu_smu.h" -#include "smu_internal.h" #include "atomfirmware.h" #include "amdgpu_atomfirmware.h" #include "smu_v11_0.h" #include "soc15_common.h" #include "atom.h" -#include "amd_pcie.h" +#include "vega20_ppt.h" +#include "arcturus_ppt.h" +#include "navi10_ppt.h" #include "asic_reg/thm/thm_11_0_2_offset.h" #include "asic_reg/thm/thm_11_0_2_sh_mask.h" @@ -60,7 +61,7 @@ static int smu_v11_0_send_msg_without_waiting(struct smu_context *smu, return 0; } -int smu_v11_0_read_arg(struct smu_context *smu, uint32_t *arg) +static int smu_v11_0_read_arg(struct smu_context *smu, uint32_t *arg) { struct amdgpu_device *adev = smu->adev; @@ -87,7 +88,7 @@ static int smu_v11_0_wait_for_response(struct smu_context *smu) return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90) == 0x1 ? 0 : -EIO; } -int smu_v11_0_send_msg(struct smu_context *smu, uint16_t msg) +static int smu_v11_0_send_msg(struct smu_context *smu, uint16_t msg) { struct amdgpu_device *adev = smu->adev; int ret = 0, index = 0; @@ -112,7 +113,7 @@ int smu_v11_0_send_msg(struct smu_context *smu, uint16_t msg) } -int +static int smu_v11_0_send_msg_with_param(struct smu_context *smu, uint16_t msg, uint32_t param) { @@ -143,7 +144,7 @@ smu_v11_0_send_msg_with_param(struct smu_context *smu, uint16_t msg, return ret; } -int smu_v11_0_init_microcode(struct smu_context *smu) +static int smu_v11_0_init_microcode(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; const char *chip_name; @@ -205,7 +206,7 @@ out: return err; } -int smu_v11_0_load_microcode(struct smu_context *smu) +static int smu_v11_0_load_microcode(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; const uint32_t *src; @@ -243,7 +244,7 @@ int smu_v11_0_load_microcode(struct smu_context *smu) return 0; } -int smu_v11_0_check_fw_status(struct smu_context *smu) +static int smu_v11_0_check_fw_status(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; uint32_t mp1_fw_flags; @@ -258,7 +259,7 @@ int smu_v11_0_check_fw_status(struct smu_context *smu) return -EIO; } -int smu_v11_0_check_fw_version(struct smu_context *smu) +static int smu_v11_0_check_fw_version(struct smu_context *smu) { uint32_t if_version = 0xff, smu_version = 0xff; uint16_t smu_major; @@ -353,7 +354,7 @@ static int smu_v11_0_set_pptable_v2_1(struct smu_context *smu, void **table, return 0; } -int smu_v11_0_setup_pptable(struct smu_context *smu) +static int smu_v11_0_setup_pptable(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; const struct smc_firmware_header_v1_0 *hdr; @@ -432,13 +433,13 @@ static int smu_v11_0_fini_dpm_context(struct smu_context *smu) return 0; } -int smu_v11_0_init_smc_tables(struct smu_context *smu) +static int smu_v11_0_init_smc_tables(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; struct smu_table *tables = NULL; int ret = 0; - if (smu_table->tables) + if (smu_table->tables || smu_table->table_count == 0) return -EINVAL; tables = kcalloc(SMU_TABLE_COUNT, sizeof(struct smu_table), @@ -459,17 +460,18 @@ int smu_v11_0_init_smc_tables(struct smu_context *smu) return 0; } -int smu_v11_0_fini_smc_tables(struct smu_context *smu) +static int smu_v11_0_fini_smc_tables(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; int ret = 0; - if (!smu_table->tables) + if (!smu_table->tables || smu_table->table_count == 0) return -EINVAL; kfree(smu_table->tables); kfree(smu_table->metrics_table); smu_table->tables = NULL; + smu_table->table_count = 0; smu_table->metrics_table = NULL; smu_table->metrics_time = 0; @@ -479,7 +481,7 @@ int smu_v11_0_fini_smc_tables(struct smu_context *smu) return 0; } -int smu_v11_0_init_power(struct smu_context *smu) +static int smu_v11_0_init_power(struct smu_context *smu) { struct smu_power_context *smu_power = &smu->smu_power; @@ -497,7 +499,7 @@ int smu_v11_0_init_power(struct smu_context *smu) return 0; } -int smu_v11_0_fini_power(struct smu_context *smu) +static int smu_v11_0_fini_power(struct smu_context *smu) { struct smu_power_context *smu_power = &smu->smu_power; @@ -574,7 +576,7 @@ int smu_v11_0_get_vbios_bootup_values(struct smu_context *smu) return 0; } -int smu_v11_0_get_clk_info_from_vbios(struct smu_context *smu) +static int smu_v11_0_get_clk_info_from_vbios(struct smu_context *smu) { int ret, index; struct amdgpu_device *adev = smu->adev; @@ -671,7 +673,7 @@ int smu_v11_0_get_clk_info_from_vbios(struct smu_context *smu) return 0; } -int smu_v11_0_notify_memory_pool_location(struct smu_context *smu) +static int smu_v11_0_notify_memory_pool_location(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; struct smu_table *memory_pool = &smu_table->memory_pool; @@ -717,7 +719,7 @@ int smu_v11_0_notify_memory_pool_location(struct smu_context *smu) return ret; } -int smu_v11_0_check_pptable(struct smu_context *smu) +static int smu_v11_0_check_pptable(struct smu_context *smu) { int ret; @@ -725,7 +727,7 @@ int smu_v11_0_check_pptable(struct smu_context *smu) return ret; } -int smu_v11_0_parse_pptable(struct smu_context *smu) +static int smu_v11_0_parse_pptable(struct smu_context *smu) { int ret; @@ -749,7 +751,7 @@ int smu_v11_0_parse_pptable(struct smu_context *smu) return ret; } -int smu_v11_0_populate_smc_pptable(struct smu_context *smu) +static int smu_v11_0_populate_smc_pptable(struct smu_context *smu) { int ret; @@ -758,7 +760,7 @@ int smu_v11_0_populate_smc_pptable(struct smu_context *smu) return ret; } -int smu_v11_0_write_pptable(struct smu_context *smu) +static int smu_v11_0_write_pptable(struct smu_context *smu) { struct smu_table_context *table_context = &smu->smu_table; int ret = 0; @@ -769,7 +771,24 @@ int smu_v11_0_write_pptable(struct smu_context *smu) return ret; } -int smu_v11_0_set_deep_sleep_dcefclk(struct smu_context *smu, uint32_t clk) +static int smu_v11_0_write_watermarks_table(struct smu_context *smu) +{ + int ret = 0; + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *table = NULL; + + table = &smu_table->tables[SMU_TABLE_WATERMARKS]; + + if (!table->cpu_addr) + return -EINVAL; + + ret = smu_update_table(smu, SMU_TABLE_WATERMARKS, 0, table->cpu_addr, + true); + + return ret; +} + +static int smu_v11_0_set_deep_sleep_dcefclk(struct smu_context *smu, uint32_t clk) { int ret; @@ -781,7 +800,7 @@ int smu_v11_0_set_deep_sleep_dcefclk(struct smu_context *smu, uint32_t clk) return ret; } -int smu_v11_0_set_min_dcef_deep_sleep(struct smu_context *smu) +static int smu_v11_0_set_min_dcef_deep_sleep(struct smu_context *smu) { struct smu_table_context *table_context = &smu->smu_table; @@ -790,10 +809,11 @@ int smu_v11_0_set_min_dcef_deep_sleep(struct smu_context *smu) if (!table_context) return -EINVAL; - return smu_v11_0_set_deep_sleep_dcefclk(smu, table_context->boot_values.dcefclk / 100); + return smu_set_deep_sleep_dcefclk(smu, + table_context->boot_values.dcefclk / 100); } -int smu_v11_0_set_tool_table_location(struct smu_context *smu) +static int smu_v11_0_set_tool_table_location(struct smu_context *smu) { int ret = 0; struct smu_table *tool_table = &smu->smu_table.tables[SMU_TABLE_PMSTATUSLOG]; @@ -811,7 +831,7 @@ int smu_v11_0_set_tool_table_location(struct smu_context *smu) return ret; } -int smu_v11_0_init_display_count(struct smu_context *smu, uint32_t count) +static int smu_v11_0_init_display_count(struct smu_context *smu, uint32_t count) { int ret = 0; @@ -823,7 +843,7 @@ int smu_v11_0_init_display_count(struct smu_context *smu, uint32_t count) } -int smu_v11_0_set_allowed_mask(struct smu_context *smu) +static int smu_v11_0_set_allowed_mask(struct smu_context *smu) { struct smu_feature *feature = &smu->smu_feature; int ret = 0; @@ -850,7 +870,7 @@ failed: return ret; } -int smu_v11_0_get_enabled_mask(struct smu_context *smu, +static int smu_v11_0_get_enabled_mask(struct smu_context *smu, uint32_t *feature_mask, uint32_t num) { uint32_t feature_mask_high = 0, feature_mask_low = 0; @@ -879,7 +899,7 @@ int smu_v11_0_get_enabled_mask(struct smu_context *smu, return ret; } -int smu_v11_0_system_features_control(struct smu_context *smu, +static int smu_v11_0_system_features_control(struct smu_context *smu, bool en) { struct smu_feature *feature = &smu->smu_feature; @@ -905,7 +925,7 @@ int smu_v11_0_system_features_control(struct smu_context *smu, return ret; } -int smu_v11_0_notify_display_change(struct smu_context *smu) +static int smu_v11_0_notify_display_change(struct smu_context *smu) { int ret = 0; @@ -963,7 +983,7 @@ smu_v11_0_get_max_sustainable_clock(struct smu_context *smu, uint32_t *clock, return ret; } -int smu_v11_0_init_max_sustainable_clocks(struct smu_context *smu) +static int smu_v11_0_init_max_sustainable_clocks(struct smu_context *smu) { struct smu_11_0_max_sustainable_clocks *max_sustainable_clocks; int ret = 0; @@ -1043,7 +1063,7 @@ int smu_v11_0_init_max_sustainable_clocks(struct smu_context *smu) return 0; } -int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n) +static int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n) { int ret = 0; @@ -1071,7 +1091,7 @@ int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n) return 0; } -int smu_v11_0_get_current_clk_freq(struct smu_context *smu, +static int smu_v11_0_get_current_clk_freq(struct smu_context *smu, enum smu_clk_type clk_id, uint32_t *value) { @@ -1150,7 +1170,7 @@ static int smu_v11_0_enable_thermal_alert(struct smu_context *smu) return 0; } -int smu_v11_0_start_thermal_control(struct smu_context *smu) +static int smu_v11_0_start_thermal_control(struct smu_context *smu) { int ret = 0; struct smu_temperature_range range; @@ -1192,15 +1212,6 @@ int smu_v11_0_start_thermal_control(struct smu_context *smu) return ret; } -int smu_v11_0_stop_thermal_control(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - - WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0); - - return 0; -} - static uint16_t convert_to_vddc(uint8_t vid) { return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE); @@ -1225,7 +1236,7 @@ static int smu_v11_0_get_gfx_vdd(struct smu_context *smu, uint32_t *value) } -int smu_v11_0_read_sensor(struct smu_context *smu, +static int smu_v11_0_read_sensor(struct smu_context *smu, enum amd_pp_sensors sensor, void *data, uint32_t *size) { @@ -1262,7 +1273,7 @@ int smu_v11_0_read_sensor(struct smu_context *smu, return ret; } -int +static int smu_v11_0_display_clock_voltage_request(struct smu_context *smu, struct pp_display_clock_request *clock_req) @@ -1305,7 +1316,9 @@ smu_v11_0_display_clock_voltage_request(struct smu_context *smu, if (clk_select == SMU_UCLK && smu->disable_uclk_switch) return 0; + mutex_lock(&smu->mutex); ret = smu_set_hard_freq_range(smu, clk_select, clk_freq, 0); + mutex_unlock(&smu->mutex); if(clk_select == SMU_UCLK) smu->hard_min_uclk_req_from_dal = clk_freq; @@ -1315,7 +1328,27 @@ failed: return ret; } -int smu_v11_0_gfx_off_control(struct smu_context *smu, bool enable) +static int +smu_v11_0_set_watermarks_for_clock_ranges(struct smu_context *smu, struct + dm_pp_wm_sets_with_clock_ranges_soc15 + *clock_ranges) +{ + int ret = 0; + struct smu_table *watermarks = &smu->smu_table.tables[SMU_TABLE_WATERMARKS]; + void *table = watermarks->cpu_addr; + + if (!smu->disable_watermark && + smu_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT) && + smu_feature_is_enabled(smu, SMU_FEATURE_DPM_SOCCLK_BIT)) { + smu_set_watermarks_table(smu, table, clock_ranges); + smu->watermarks_bitmap |= WATERMARKS_EXIST; + smu->watermarks_bitmap &= ~WATERMARKS_LOADED; + } + + return ret; +} + +static int smu_v11_0_gfx_off_control(struct smu_context *smu, bool enable) { int ret = 0; struct amdgpu_device *adev = smu->adev; @@ -1328,10 +1361,12 @@ int smu_v11_0_gfx_off_control(struct smu_context *smu, bool enable) case CHIP_NAVI12: if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) return 0; + mutex_lock(&smu->mutex); if (enable) ret = smu_send_smc_msg(smu, SMU_MSG_AllowGfxOff); else ret = smu_send_smc_msg(smu, SMU_MSG_DisallowGfxOff); + mutex_unlock(&smu->mutex); break; default: break; @@ -1340,7 +1375,7 @@ int smu_v11_0_gfx_off_control(struct smu_context *smu, bool enable) return ret; } -uint32_t +static uint32_t smu_v11_0_get_fan_control_mode(struct smu_context *smu) { if (!smu_feature_is_enabled(smu, SMU_FEATURE_FAN_CONTROL_BIT)) @@ -1380,7 +1415,7 @@ smu_v11_0_set_fan_static_mode(struct smu_context *smu, uint32_t mode) return 0; } -int +static int smu_v11_0_set_fan_speed_percent(struct smu_context *smu, uint32_t speed) { struct amdgpu_device *adev = smu->adev; @@ -1409,7 +1444,7 @@ smu_v11_0_set_fan_speed_percent(struct smu_context *smu, uint32_t speed) return smu_v11_0_set_fan_static_mode(smu, FDO_PWM_MODE_STATIC); } -int +static int smu_v11_0_set_fan_control_mode(struct smu_context *smu, uint32_t mode) { @@ -1437,7 +1472,7 @@ smu_v11_0_set_fan_control_mode(struct smu_context *smu, return ret; } -int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, +static int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, uint32_t speed) { struct amdgpu_device *adev = smu->adev; @@ -1447,9 +1482,10 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, if (!speed) return -EINVAL; + mutex_lock(&(smu->mutex)); ret = smu_v11_0_auto_fan_control(smu, 0); if (ret) - return ret; + goto set_fan_speed_rpm_failed; crystal_clock_freq = amdgpu_asic_get_xclk(adev); tach_period = 60 * crystal_clock_freq * 10000 / (8 * speed); @@ -1460,19 +1496,23 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, ret = smu_v11_0_set_fan_static_mode(smu, FDO_PWM_MODE_STATIC_RPM); +set_fan_speed_rpm_failed: + mutex_unlock(&(smu->mutex)); return ret; } #define XGMI_STATE_D0 1 #define XGMI_STATE_D3 0 -int smu_v11_0_set_xgmi_pstate(struct smu_context *smu, +static int smu_v11_0_set_xgmi_pstate(struct smu_context *smu, uint32_t pstate) { int ret = 0; + mutex_lock(&(smu->mutex)); ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetXgmiMode, pstate ? XGMI_STATE_D0 : XGMI_STATE_D3); + mutex_unlock(&(smu->mutex)); return ret; } @@ -1519,7 +1559,7 @@ static const struct amdgpu_irq_src_funcs smu_v11_0_irq_funcs = .process = smu_v11_0_irq_process, }; -int smu_v11_0_register_irq_handler(struct smu_context *smu) +static int smu_v11_0_register_irq_handler(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; struct amdgpu_irq_src *irq_src = smu->irq_source; @@ -1551,7 +1591,7 @@ int smu_v11_0_register_irq_handler(struct smu_context *smu) return ret; } -int smu_v11_0_get_max_sustainable_clocks_by_dc(struct smu_context *smu, +static int smu_v11_0_get_max_sustainable_clocks_by_dc(struct smu_context *smu, struct pp_smu_nv_clock_table *max_clocks) { struct smu_table_context *table_context = &smu->smu_table; @@ -1581,11 +1621,13 @@ int smu_v11_0_get_max_sustainable_clocks_by_dc(struct smu_context *smu, return 0; } -int smu_v11_0_set_azalia_d3_pme(struct smu_context *smu) +static int smu_v11_0_set_azalia_d3_pme(struct smu_context *smu) { int ret = 0; + mutex_lock(&smu->mutex); ret = smu_send_smc_msg(smu, SMU_MSG_BacoAudioD3PME); + mutex_unlock(&smu->mutex); return ret; } @@ -1595,7 +1637,7 @@ static int smu_v11_0_baco_set_armd3_sequence(struct smu_context *smu, enum smu_v return smu_send_smc_msg_with_param(smu, SMU_MSG_ArmD3, baco_seq); } -bool smu_v11_0_baco_is_support(struct smu_context *smu) +static bool smu_v11_0_baco_is_support(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; struct smu_baco_context *smu_baco = &smu->smu_baco; @@ -1619,7 +1661,7 @@ bool smu_v11_0_baco_is_support(struct smu_context *smu) return false; } -enum smu_baco_state smu_v11_0_baco_get_state(struct smu_context *smu) +static enum smu_baco_state smu_v11_0_baco_get_state(struct smu_context *smu) { struct smu_baco_context *smu_baco = &smu->smu_baco; enum smu_baco_state baco_state; @@ -1631,7 +1673,7 @@ enum smu_baco_state smu_v11_0_baco_get_state(struct smu_context *smu) return baco_state; } -int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state) +static int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state) { struct smu_baco_context *smu_baco = &smu->smu_baco; @@ -1655,7 +1697,7 @@ out: return ret; } -int smu_v11_0_baco_reset(struct smu_context *smu) +static int smu_v11_0_baco_reset(struct smu_context *smu) { int ret = 0; @@ -1676,12 +1718,13 @@ int smu_v11_0_baco_reset(struct smu_context *smu) return ret; } -int smu_v11_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type, +static int smu_v11_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *min, uint32_t *max) { int ret = 0, clk_id = 0; uint32_t param = 0; + mutex_lock(&smu->mutex); clk_id = smu_clk_get_index(smu, clk_type); if (clk_id < 0) { ret = -EINVAL; @@ -1708,75 +1751,80 @@ int smu_v11_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type c } failed: + mutex_unlock(&smu->mutex); return ret; } -int smu_v11_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t min, uint32_t max) -{ - int ret = 0, clk_id = 0; - uint32_t param; +static const struct smu_funcs smu_v11_0_funcs = { + .init_microcode = smu_v11_0_init_microcode, + .load_microcode = smu_v11_0_load_microcode, + .check_fw_status = smu_v11_0_check_fw_status, + .check_fw_version = smu_v11_0_check_fw_version, + .send_smc_msg = smu_v11_0_send_msg, + .send_smc_msg_with_param = smu_v11_0_send_msg_with_param, + .read_smc_arg = smu_v11_0_read_arg, + .setup_pptable = smu_v11_0_setup_pptable, + .init_smc_tables = smu_v11_0_init_smc_tables, + .fini_smc_tables = smu_v11_0_fini_smc_tables, + .init_power = smu_v11_0_init_power, + .fini_power = smu_v11_0_fini_power, + .get_vbios_bootup_values = smu_v11_0_get_vbios_bootup_values, + .get_clk_info_from_vbios = smu_v11_0_get_clk_info_from_vbios, + .notify_memory_pool_location = smu_v11_0_notify_memory_pool_location, + .check_pptable = smu_v11_0_check_pptable, + .parse_pptable = smu_v11_0_parse_pptable, + .populate_smc_tables = smu_v11_0_populate_smc_pptable, + .write_pptable = smu_v11_0_write_pptable, + .write_watermarks_table = smu_v11_0_write_watermarks_table, + .set_min_dcef_deep_sleep = smu_v11_0_set_min_dcef_deep_sleep, + .set_tool_table_location = smu_v11_0_set_tool_table_location, + .init_display_count = smu_v11_0_init_display_count, + .set_allowed_mask = smu_v11_0_set_allowed_mask, + .get_enabled_mask = smu_v11_0_get_enabled_mask, + .system_features_control = smu_v11_0_system_features_control, + .notify_display_change = smu_v11_0_notify_display_change, + .set_power_limit = smu_v11_0_set_power_limit, + .get_current_clk_freq = smu_v11_0_get_current_clk_freq, + .init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks, + .start_thermal_control = smu_v11_0_start_thermal_control, + .read_sensor = smu_v11_0_read_sensor, + .set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk, + .display_clock_voltage_request = smu_v11_0_display_clock_voltage_request, + .set_watermarks_for_clock_ranges = smu_v11_0_set_watermarks_for_clock_ranges, + .get_fan_control_mode = smu_v11_0_get_fan_control_mode, + .set_fan_control_mode = smu_v11_0_set_fan_control_mode, + .set_fan_speed_percent = smu_v11_0_set_fan_speed_percent, + .set_fan_speed_rpm = smu_v11_0_set_fan_speed_rpm, + .set_xgmi_pstate = smu_v11_0_set_xgmi_pstate, + .gfx_off_control = smu_v11_0_gfx_off_control, + .register_irq_handler = smu_v11_0_register_irq_handler, + .set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme, + .get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc, + .baco_is_support = smu_v11_0_baco_is_support, + .baco_get_state = smu_v11_0_baco_get_state, + .baco_set_state = smu_v11_0_baco_set_state, + .baco_reset = smu_v11_0_baco_reset, + .get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq, +}; - clk_id = smu_clk_get_index(smu, clk_type); - if (clk_id < 0) - return clk_id; - - if (max > 0) { - param = (uint32_t)((clk_id << 16) | (max & 0xffff)); - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxByFreq, - param); - if (ret) - return ret; - } - - if (min > 0) { - param = (uint32_t)((clk_id << 16) | (min & 0xffff)); - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMinByFreq, - param); - if (ret) - return ret; - } - - return ret; -} - -int smu_v11_0_override_pcie_parameters(struct smu_context *smu) +void smu_v11_0_set_smu_funcs(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - uint32_t pcie_gen = 0, pcie_width = 0; - int ret; - - if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) - pcie_gen = 3; - else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) - pcie_gen = 2; - else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) - pcie_gen = 1; - else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1) - pcie_gen = 0; - - /* Bit 31:16: LCLK DPM level. 0 is DPM0, and 1 is DPM1 - * Bit 15:8: PCIE GEN, 0 to 3 corresponds to GEN1 to GEN4 - * Bit 7:0: PCIE lane width, 1 to 7 corresponds is x1 to x32 - */ - if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) - pcie_width = 6; - else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12) - pcie_width = 5; - else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8) - pcie_width = 4; - else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4) - pcie_width = 3; - else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2) - pcie_width = 2; - else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1) - pcie_width = 1; - - ret = smu_update_pcie_parameters(smu, pcie_gen, pcie_width); - - if (ret) - pr_err("[%s] Attempt to override pcie params failed!\n", __func__); - - return ret; + smu->funcs = &smu_v11_0_funcs; + switch (adev->asic_type) { + case CHIP_VEGA20: + vega20_set_ppt_funcs(smu); + break; + case CHIP_ARCTURUS: + arcturus_set_ppt_funcs(smu); + break; + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + navi10_set_ppt_funcs(smu); + break; + default: + pr_warn("Unknown asic for smu11\n"); + } } diff --git a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c index 139dd737eaa5..9d2280ca1f4b 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c @@ -24,12 +24,12 @@ #include #include "amdgpu.h" #include "amdgpu_smu.h" -#include "smu_internal.h" #include "atomfirmware.h" #include "amdgpu_atomfirmware.h" #include "smu_v12_0.h" #include "soc15_common.h" #include "atom.h" +#include "renoir_ppt.h" #include "asic_reg/mp/mp_12_0_0_offset.h" #include "asic_reg/mp/mp_12_0_0_sh_mask.h" @@ -41,7 +41,7 @@ #define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS_MASK 0x00000006L #define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS__SHIFT 0x1 -int smu_v12_0_send_msg_without_waiting(struct smu_context *smu, +static int smu_v12_0_send_msg_without_waiting(struct smu_context *smu, uint16_t msg) { struct amdgpu_device *adev = smu->adev; @@ -50,7 +50,7 @@ int smu_v12_0_send_msg_without_waiting(struct smu_context *smu, return 0; } -int smu_v12_0_read_arg(struct smu_context *smu, uint32_t *arg) +static int smu_v12_0_read_arg(struct smu_context *smu, uint32_t *arg) { struct amdgpu_device *adev = smu->adev; @@ -58,7 +58,7 @@ int smu_v12_0_read_arg(struct smu_context *smu, uint32_t *arg) return 0; } -int smu_v12_0_wait_for_response(struct smu_context *smu) +static int smu_v12_0_wait_for_response(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; uint32_t cur_value, i; @@ -77,7 +77,7 @@ int smu_v12_0_wait_for_response(struct smu_context *smu) return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90) == 0x1 ? 0 : -EIO; } -int smu_v12_0_send_msg(struct smu_context *smu, uint16_t msg) +static int smu_v12_0_send_msg(struct smu_context *smu, uint16_t msg) { struct amdgpu_device *adev = smu->adev; int ret = 0, index = 0; @@ -102,7 +102,7 @@ int smu_v12_0_send_msg(struct smu_context *smu, uint16_t msg) } -int +static int smu_v12_0_send_msg_with_param(struct smu_context *smu, uint16_t msg, uint32_t param) { @@ -132,7 +132,7 @@ smu_v12_0_send_msg_with_param(struct smu_context *smu, uint16_t msg, return ret; } -int smu_v12_0_check_fw_status(struct smu_context *smu) +static int smu_v12_0_check_fw_status(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; uint32_t mp1_fw_flags; @@ -147,7 +147,7 @@ int smu_v12_0_check_fw_status(struct smu_context *smu) return -EIO; } -int smu_v12_0_check_fw_version(struct smu_context *smu) +static int smu_v12_0_check_fw_version(struct smu_context *smu) { uint32_t if_version = 0xff, smu_version = 0xff; uint16_t smu_major; @@ -181,7 +181,7 @@ int smu_v12_0_check_fw_version(struct smu_context *smu) return ret; } -int smu_v12_0_powergate_sdma(struct smu_context *smu, bool gate) +static int smu_v12_0_powergate_sdma(struct smu_context *smu, bool gate) { if (!(smu->adev->flags & AMD_IS_APU)) return 0; @@ -192,7 +192,7 @@ int smu_v12_0_powergate_sdma(struct smu_context *smu, bool gate) return smu_send_smc_msg(smu, SMU_MSG_PowerUpSdma); } -int smu_v12_0_powergate_vcn(struct smu_context *smu, bool gate) +static int smu_v12_0_powergate_vcn(struct smu_context *smu, bool gate) { if (!(smu->adev->flags & AMD_IS_APU)) return 0; @@ -203,7 +203,7 @@ int smu_v12_0_powergate_vcn(struct smu_context *smu, bool gate) return smu_send_smc_msg(smu, SMU_MSG_PowerUpVcn); } -int smu_v12_0_set_gfx_cgpg(struct smu_context *smu, bool enable) +static int smu_v12_0_set_gfx_cgpg(struct smu_context *smu, bool enable) { if (!(smu->adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) return 0; @@ -224,7 +224,7 @@ int smu_v12_0_set_gfx_cgpg(struct smu_context *smu, bool enable) * Returns 2=Not in GFXOFF. * Returns 3=Transition into GFXOFF. */ -uint32_t smu_v12_0_get_gfxoff_status(struct smu_context *smu) +static uint32_t smu_v12_0_get_gfxoff_status(struct smu_context *smu) { uint32_t reg; uint32_t gfxOff_Status = 0; @@ -237,13 +237,22 @@ uint32_t smu_v12_0_get_gfxoff_status(struct smu_context *smu) return gfxOff_Status; } -int smu_v12_0_gfx_off_control(struct smu_context *smu, bool enable) +static int smu_v12_0_gfx_off_control(struct smu_context *smu, bool enable) { int ret = 0, timeout = 500; if (enable) { ret = smu_send_smc_msg(smu, SMU_MSG_AllowGfxOff); + /* confirm gfx is back to "off" state, timeout is 5 seconds */ + while (!(smu_v12_0_get_gfxoff_status(smu) == 0)) { + msleep(10); + timeout--; + if (timeout == 0) { + DRM_ERROR("enable gfxoff timeout and failed!\n"); + break; + } + } } else { ret = smu_send_smc_msg(smu, SMU_MSG_DisallowGfxOff); @@ -261,12 +270,12 @@ int smu_v12_0_gfx_off_control(struct smu_context *smu, bool enable) return ret; } -int smu_v12_0_init_smc_tables(struct smu_context *smu) +static int smu_v12_0_init_smc_tables(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; struct smu_table *tables = NULL; - if (smu_table->tables) + if (smu_table->tables || smu_table->table_count == 0) return -EINVAL; tables = kcalloc(SMU_TABLE_COUNT, sizeof(struct smu_table), @@ -279,11 +288,11 @@ int smu_v12_0_init_smc_tables(struct smu_context *smu) return smu_tables_init(smu, tables); } -int smu_v12_0_fini_smc_tables(struct smu_context *smu) +static int smu_v12_0_fini_smc_tables(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; - if (!smu_table->tables) + if (!smu_table->tables || smu_table->table_count == 0) return -EINVAL; kfree(smu_table->clocks_table); @@ -295,7 +304,7 @@ int smu_v12_0_fini_smc_tables(struct smu_context *smu) return 0; } -int smu_v12_0_populate_smc_tables(struct smu_context *smu) +static int smu_v12_0_populate_smc_tables(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; struct smu_table *table = NULL; @@ -310,20 +319,14 @@ int smu_v12_0_populate_smc_tables(struct smu_context *smu) return smu_update_table(smu, SMU_TABLE_DPMCLOCKS, 0, smu_table->clocks_table, false); } -int smu_v12_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type, +static int smu_v12_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *min, uint32_t *max) { int ret = 0; - uint32_t mclk_mask, soc_mask; + + mutex_lock(&smu->mutex); if (max) { - ret = smu_get_profiling_clk_mask(smu, AMD_DPM_FORCED_LEVEL_PROFILE_PEAK, - NULL, - &mclk_mask, - &soc_mask); - if (ret) - goto failed; - switch (clk_type) { case SMU_GFXCLK: case SMU_SCLK: @@ -337,20 +340,14 @@ int smu_v12_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type c goto failed; break; case SMU_UCLK: - case SMU_FCLK: - case SMU_MCLK: - ret = smu_get_dpm_clk_limited(smu, clk_type, mclk_mask, max); - if (ret) - goto failed; - break; - case SMU_SOCCLK: - ret = smu_get_dpm_clk_limited(smu, clk_type, soc_mask, max); + ret = smu_get_dpm_uclk_limited(smu, max, true); if (ret) goto failed; break; default: ret = -EINVAL; goto failed; + } } @@ -368,14 +365,7 @@ int smu_v12_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type c goto failed; break; case SMU_UCLK: - case SMU_FCLK: - case SMU_MCLK: - ret = smu_get_dpm_clk_limited(smu, clk_type, 0, min); - if (ret) - goto failed; - break; - case SMU_SOCCLK: - ret = smu_get_dpm_clk_limited(smu, clk_type, 0, min); + ret = smu_get_dpm_uclk_limited(smu, min, false); if (ret) goto failed; break; @@ -383,65 +373,40 @@ int smu_v12_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type c ret = -EINVAL; goto failed; } + } failed: + mutex_unlock(&smu->mutex); return ret; } -int smu_v12_0_mode2_reset(struct smu_context *smu){ - return smu_v12_0_send_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, SMU_RESET_MODE_2); -} +static const struct smu_funcs smu_v12_0_funcs = { + .check_fw_status = smu_v12_0_check_fw_status, + .check_fw_version = smu_v12_0_check_fw_version, + .powergate_sdma = smu_v12_0_powergate_sdma, + .powergate_vcn = smu_v12_0_powergate_vcn, + .send_smc_msg = smu_v12_0_send_msg, + .send_smc_msg_with_param = smu_v12_0_send_msg_with_param, + .read_smc_arg = smu_v12_0_read_arg, + .set_gfx_cgpg = smu_v12_0_set_gfx_cgpg, + .gfx_off_control = smu_v12_0_gfx_off_control, + .init_smc_tables = smu_v12_0_init_smc_tables, + .fini_smc_tables = smu_v12_0_fini_smc_tables, + .populate_smc_tables = smu_v12_0_populate_smc_tables, + .get_dpm_ultimate_freq = smu_v12_0_get_dpm_ultimate_freq, +}; -int smu_v12_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t min, uint32_t max) +void smu_v12_0_set_smu_funcs(struct smu_context *smu) { - int ret = 0; + struct amdgpu_device *adev = smu->adev; - if (max < min) - return -EINVAL; + smu->funcs = &smu_v12_0_funcs; - switch (clk_type) { - case SMU_GFXCLK: - case SMU_SCLK: - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinGfxClk, min); - if (ret) - return ret; - - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxGfxClk, max); - if (ret) - return ret; - break; - case SMU_FCLK: - case SMU_MCLK: - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinFclkByFreq, min); - if (ret) - return ret; - - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxFclkByFreq, max); - if (ret) - return ret; - break; - case SMU_SOCCLK: - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinSocclkByFreq, min); - if (ret) - return ret; - - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxSocclkByFreq, max); - if (ret) - return ret; - break; - case SMU_VCLK: - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinVcn, min); - if (ret) - return ret; - - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxVcn, max); - if (ret) - return ret; - break; + switch (adev->asic_type) { + case CHIP_RENOIR: + renoir_set_ppt_funcs(smu); + break; default: - return -EINVAL; + pr_warn("Unknown asic for smu12\n"); } - - return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c index aa0ee2b46135..3f12cf341511 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c @@ -137,7 +137,7 @@ static int smu10_copy_table_from_smc(struct pp_hwmgr *hwmgr, priv->smu_tables.entry[table_id].table_id); /* flush hdp cache */ - adev->nbio.funcs->hdp_flush(adev, NULL); + adev->nbio_funcs->hdp_flush(adev, NULL); memcpy(table, (uint8_t *)priv->smu_tables.entry[table_id].table, priv->smu_tables.entry[table_id].size); diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c index 7dca04a89217..4728aa23a818 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c @@ -177,10 +177,12 @@ static int smu8_load_mec_firmware(struct pp_hwmgr *hwmgr) uint32_t tmp; int ret = 0; struct cgs_firmware_info info = {0}; + struct smu8_smumgr *smu8_smu; if (hwmgr == NULL || hwmgr->device == NULL) return -EINVAL; + smu8_smu = hwmgr->smu_backend; ret = cgs_get_firmware_info(hwmgr->device, CGS_UCODE_ID_CP_MEC, &info); diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c index 0f3836fd9666..0dbdde69f2d9 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c @@ -58,7 +58,7 @@ static int vega10_copy_table_from_smc(struct pp_hwmgr *hwmgr, priv->smu_tables.entry[table_id].table_id); /* flush hdp cache */ - adev->nbio.funcs->hdp_flush(adev, NULL); + adev->nbio_funcs->hdp_flush(adev, NULL); memcpy(table, priv->smu_tables.entry[table_id].table, priv->smu_tables.entry[table_id].size); diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c index 90c782c132d2..f9589806bf83 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c @@ -66,7 +66,7 @@ static int vega12_copy_table_from_smc(struct pp_hwmgr *hwmgr, return -EINVAL); /* flush hdp cache */ - adev->nbio.funcs->hdp_flush(adev, NULL); + adev->nbio_funcs->hdp_flush(adev, NULL); memcpy(table, priv->smu_tables.entry[table_id].table, priv->smu_tables.entry[table_id].size); diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c index f604612f411f..b9089c6bea85 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c @@ -189,7 +189,7 @@ static int vega20_copy_table_from_smc(struct pp_hwmgr *hwmgr, return ret); /* flush hdp cache */ - adev->nbio.funcs->hdp_flush(adev, NULL); + adev->nbio_funcs->hdp_flush(adev, NULL); memcpy(table, priv->smu_tables.entry[table_id].table, priv->smu_tables.entry[table_id].size); @@ -290,7 +290,7 @@ int vega20_get_activity_monitor_coeff(struct pp_hwmgr *hwmgr, return ret); /* flush hdp cache */ - adev->nbio.funcs->hdp_flush(adev, NULL); + adev->nbio_funcs->hdp_flush(adev, NULL); memcpy(table, priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].table, priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].size); diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c index 7c8933f987d1..bbd8ebd58434 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c @@ -25,7 +25,6 @@ #include #include "amdgpu.h" #include "amdgpu_smu.h" -#include "smu_internal.h" #include "atomfirmware.h" #include "amdgpu_atomfirmware.h" #include "smu_v11_0.h" @@ -144,7 +143,6 @@ static struct smu_11_0_cmn2aisc_mapping vega20_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(PrepareMp1ForShutdown), MSG_MAP(SetMGpuFanBoostLimitRpm), MSG_MAP(GetAVFSVoltageByDpm), - MSG_MAP(DFCstateControl), }; static struct smu_11_0_cmn2aisc_mapping vega20_clk_map[SMU_CLK_COUNT] = { @@ -636,6 +634,7 @@ amd_pm_state_type vega20_get_current_power_state(struct smu_context *smu) !smu_dpm_ctx->dpm_current_power_state) return -EINVAL; + mutex_lock(&(smu->mutex)); switch (smu_dpm_ctx->dpm_current_power_state->classification.ui_label) { case SMU_STATE_UI_LABEL_BATTERY: pm_type = POWER_STATE_TYPE_BATTERY; @@ -653,6 +652,7 @@ amd_pm_state_type vega20_get_current_power_state(struct smu_context *smu) pm_type = POWER_STATE_TYPE_DEFAULT; break; } + mutex_unlock(&(smu->mutex)); return pm_type; } @@ -1274,8 +1274,16 @@ static int vega20_force_clk_levels(struct smu_context *smu, struct vega20_dpm_table *dpm_table; struct vega20_single_dpm_table *single_dpm_table; uint32_t soft_min_level, soft_max_level, hard_min_level; + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; int ret = 0; + if (smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) { + pr_info("force clock level is for dpm manual mode only.\n"); + return -EINVAL; + } + + mutex_lock(&(smu->mutex)); + soft_min_level = mask ? (ffs(mask) - 1) : 0; soft_max_level = mask ? (fls(mask) - 1) : 0; @@ -1428,6 +1436,7 @@ static int vega20_force_clk_levels(struct smu_context *smu, break; } + mutex_unlock(&(smu->mutex)); return ret; } @@ -1442,6 +1451,8 @@ static int vega20_get_clock_by_type_with_latency(struct smu_context *smu, dpm_table = smu_dpm->dpm_context; + mutex_lock(&smu->mutex); + switch (clk_type) { case SMU_GFXCLK: single_dpm_table = &(dpm_table->gfx_table); @@ -1463,6 +1474,7 @@ static int vega20_get_clock_by_type_with_latency(struct smu_context *smu, ret = -EINVAL; } + mutex_unlock(&smu->mutex); return ret; } @@ -2248,7 +2260,7 @@ vega20_notify_smc_dispaly_config(struct smu_context *smu) if (smu_feature_is_supported(smu, SMU_FEATURE_DPM_DCEFCLK_BIT)) { clock_req.clock_type = amd_pp_dcef_clock; clock_req.clock_freq_in_khz = min_clocks.dcef_clock * 10; - if (!smu_v11_0_display_clock_voltage_request(smu, &clock_req)) { + if (!smu->funcs->display_clock_voltage_request(smu, &clock_req)) { if (smu_feature_is_supported(smu, SMU_FEATURE_DS_DCEFCLK_BIT)) { ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetMinDeepSleepDcefclk, @@ -2535,6 +2547,8 @@ static int vega20_set_od_percentage(struct smu_context *smu, int feature_enabled; PPCLK_e clk_id; + mutex_lock(&(smu->mutex)); + dpm_table = smu_dpm->dpm_context; golden_table = smu_dpm->golden_dpm_context; @@ -2584,10 +2598,11 @@ static int vega20_set_od_percentage(struct smu_context *smu, } ret = smu_handle_task(smu, smu_dpm->dpm_level, - AMD_PP_TASK_READJUST_POWER_STATE, - false); + AMD_PP_TASK_READJUST_POWER_STATE); set_od_failed: + mutex_unlock(&(smu->mutex)); + return ret; } @@ -2812,9 +2827,10 @@ static int vega20_odn_edit_dpm_table(struct smu_context *smu, } if (type == PP_OD_COMMIT_DPM_TABLE) { + mutex_lock(&(smu->mutex)); ret = smu_handle_task(smu, smu_dpm->dpm_level, - AMD_PP_TASK_READJUST_POWER_STATE, - false); + AMD_PP_TASK_READJUST_POWER_STATE); + mutex_unlock(&(smu->mutex)); } return ret; @@ -3031,7 +3047,7 @@ static int vega20_read_sensor(struct smu_context *smu, *size = 4; break; default: - ret = smu_v11_0_read_sensor(smu, sensor, data, size); + ret = smu_smc_read_sensor(smu, sensor, data, size); } mutex_unlock(&smu->sensor_lock); @@ -3125,49 +3141,6 @@ static int vega20_get_thermal_temperature_range(struct smu_context *smu, return 0; } -static int vega20_set_df_cstate(struct smu_context *smu, - enum pp_df_cstate state) -{ - uint32_t smu_version; - int ret; - - ret = smu_get_smc_version(smu, NULL, &smu_version); - if (ret) { - pr_err("Failed to get smu version!\n"); - return ret; - } - - /* PPSMC_MSG_DFCstateControl is supported with 40.50 and later fws */ - if (smu_version < 0x283200) { - pr_err("Df cstate control is supported with 40.50 and later SMC fw!\n"); - return -EINVAL; - } - - return smu_send_smc_msg_with_param(smu, SMU_MSG_DFCstateControl, state); -} - -static int vega20_update_pcie_parameters(struct smu_context *smu, - uint32_t pcie_gen_cap, - uint32_t pcie_width_cap) -{ - PPTable_t *pptable = smu->smu_table.driver_pptable; - int ret, i; - uint32_t smu_pcie_arg; - - for (i = 0; i < NUM_LINK_LEVELS; i++) { - smu_pcie_arg = (i << 16) | - ((pptable->PcieGenSpeed[i] <= pcie_gen_cap) ? (pptable->PcieGenSpeed[i] << 8) : - (pcie_gen_cap << 8)) | ((pptable->PcieLaneCount[i] <= pcie_width_cap) ? - pptable->PcieLaneCount[i] : pcie_width_cap); - ret = smu_send_smc_msg_with_param(smu, - SMU_MSG_OverridePcieParameters, - smu_pcie_arg); - } - - return ret; -} - - static const struct pptable_funcs vega20_ppt_funcs = { .tables_init = vega20_tables_init, .alloc_dpm_context = vega20_allocate_dpm_context, @@ -3180,7 +3153,7 @@ static const struct pptable_funcs vega20_ppt_funcs = { .get_smu_table_index = vega20_get_smu_table_index, .get_smu_power_index = vega20_get_pwr_src_index, .get_workload_type = vega20_get_workload_type, - .run_btc = vega20_run_btc_afll, + .run_afll_btc = vega20_run_btc_afll, .get_allowed_feature_mask = vega20_get_allowed_feature_mask, .get_current_power_state = vega20_get_current_power_state, .set_default_dpm_table = vega20_set_default_dpm_table, @@ -3210,61 +3183,13 @@ static const struct pptable_funcs vega20_ppt_funcs = { .get_fan_speed_percent = vega20_get_fan_speed_percent, .get_fan_speed_rpm = vega20_get_fan_speed_rpm, .set_watermarks_table = vega20_set_watermarks_table, - .get_thermal_temperature_range = vega20_get_thermal_temperature_range, - .set_df_cstate = vega20_set_df_cstate, - .update_pcie_parameters = vega20_update_pcie_parameters, - .init_microcode = smu_v11_0_init_microcode, - .load_microcode = smu_v11_0_load_microcode, - .init_smc_tables = smu_v11_0_init_smc_tables, - .fini_smc_tables = smu_v11_0_fini_smc_tables, - .init_power = smu_v11_0_init_power, - .fini_power = smu_v11_0_fini_power, - .check_fw_status = smu_v11_0_check_fw_status, - .setup_pptable = smu_v11_0_setup_pptable, - .get_vbios_bootup_values = smu_v11_0_get_vbios_bootup_values, - .get_clk_info_from_vbios = smu_v11_0_get_clk_info_from_vbios, - .check_pptable = smu_v11_0_check_pptable, - .parse_pptable = smu_v11_0_parse_pptable, - .populate_smc_tables = smu_v11_0_populate_smc_pptable, - .check_fw_version = smu_v11_0_check_fw_version, - .write_pptable = smu_v11_0_write_pptable, - .set_min_dcef_deep_sleep = smu_v11_0_set_min_dcef_deep_sleep, - .set_tool_table_location = smu_v11_0_set_tool_table_location, - .notify_memory_pool_location = smu_v11_0_notify_memory_pool_location, - .system_features_control = smu_v11_0_system_features_control, - .send_smc_msg = smu_v11_0_send_msg, - .send_smc_msg_with_param = smu_v11_0_send_msg_with_param, - .read_smc_arg = smu_v11_0_read_arg, - .init_display_count = smu_v11_0_init_display_count, - .set_allowed_mask = smu_v11_0_set_allowed_mask, - .get_enabled_mask = smu_v11_0_get_enabled_mask, - .notify_display_change = smu_v11_0_notify_display_change, - .set_power_limit = smu_v11_0_set_power_limit, - .get_current_clk_freq = smu_v11_0_get_current_clk_freq, - .init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks, - .start_thermal_control = smu_v11_0_start_thermal_control, - .stop_thermal_control = smu_v11_0_stop_thermal_control, - .set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk, - .display_clock_voltage_request = smu_v11_0_display_clock_voltage_request, - .get_fan_control_mode = smu_v11_0_get_fan_control_mode, - .set_fan_control_mode = smu_v11_0_set_fan_control_mode, - .set_fan_speed_percent = smu_v11_0_set_fan_speed_percent, - .set_fan_speed_rpm = smu_v11_0_set_fan_speed_rpm, - .set_xgmi_pstate = smu_v11_0_set_xgmi_pstate, - .gfx_off_control = smu_v11_0_gfx_off_control, - .register_irq_handler = smu_v11_0_register_irq_handler, - .set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme, - .get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc, - .baco_is_support= smu_v11_0_baco_is_support, - .baco_get_state = smu_v11_0_baco_get_state, - .baco_set_state = smu_v11_0_baco_set_state, - .baco_reset = smu_v11_0_baco_reset, - .get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq, - .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range, - .override_pcie_parameters = smu_v11_0_override_pcie_parameters, + .get_thermal_temperature_range = vega20_get_thermal_temperature_range }; void vega20_set_ppt_funcs(struct smu_context *smu) { + struct smu_table_context *smu_table = &smu->smu_table; + smu->ppt_funcs = &vega20_ppt_funcs; + smu_table->table_count = TABLE_COUNT; } diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index ae5809a1f19a..b854a422a523 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -28,13 +28,6 @@ #include #include -#if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS) -#include -#include -#include -#include -#endif - #include #include #include @@ -52,12 +45,6 @@ * protocol. The helpers contain a topology manager and bandwidth manager. * The helpers encapsulate the sending and received of sideband msgs. */ -struct drm_dp_pending_up_req { - struct drm_dp_sideband_msg_hdr hdr; - struct drm_dp_sideband_msg_req_body msg; - struct list_head next; -}; - static bool dump_dp_payload_table(struct drm_dp_mst_topology_mgr *mgr, char *buf); @@ -74,8 +61,8 @@ static int drm_dp_send_dpcd_write(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port, int offset, int size, u8 *bytes); -static int drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr, - struct drm_dp_mst_branch *mstb); +static void drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_mst_branch *mstb); static int drm_dp_send_enum_path_resources(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_branch *mstb, struct drm_dp_mst_port *port); @@ -1406,194 +1393,39 @@ drm_dp_mst_put_port_malloc(struct drm_dp_mst_port *port) } EXPORT_SYMBOL(drm_dp_mst_put_port_malloc); -#if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS) - -#define STACK_DEPTH 8 - -static noinline void -__topology_ref_save(struct drm_dp_mst_topology_mgr *mgr, - struct drm_dp_mst_topology_ref_history *history, - enum drm_dp_mst_topology_ref_type type) -{ - struct drm_dp_mst_topology_ref_entry *entry = NULL; - depot_stack_handle_t backtrace; - ulong stack_entries[STACK_DEPTH]; - uint n; - int i; - - n = stack_trace_save(stack_entries, ARRAY_SIZE(stack_entries), 1); - backtrace = stack_depot_save(stack_entries, n, GFP_KERNEL); - if (!backtrace) - return; - - /* Try to find an existing entry for this backtrace */ - for (i = 0; i < history->len; i++) { - if (history->entries[i].backtrace == backtrace) { - entry = &history->entries[i]; - break; - } - } - - /* Otherwise add one */ - if (!entry) { - struct drm_dp_mst_topology_ref_entry *new; - int new_len = history->len + 1; - - new = krealloc(history->entries, sizeof(*new) * new_len, - GFP_KERNEL); - if (!new) - return; - - entry = &new[history->len]; - history->len = new_len; - history->entries = new; - - entry->backtrace = backtrace; - entry->type = type; - entry->count = 0; - } - entry->count++; - entry->ts_nsec = ktime_get_ns(); -} - -static int -topology_ref_history_cmp(const void *a, const void *b) -{ - const struct drm_dp_mst_topology_ref_entry *entry_a = a, *entry_b = b; - - if (entry_a->ts_nsec > entry_b->ts_nsec) - return 1; - else if (entry_a->ts_nsec < entry_b->ts_nsec) - return -1; - else - return 0; -} - -static inline const char * -topology_ref_type_to_str(enum drm_dp_mst_topology_ref_type type) -{ - if (type == DRM_DP_MST_TOPOLOGY_REF_GET) - return "get"; - else - return "put"; -} - -static void -__dump_topology_ref_history(struct drm_dp_mst_topology_ref_history *history, - void *ptr, const char *type_str) -{ - struct drm_printer p = drm_debug_printer(DBG_PREFIX); - char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL); - int i; - - if (!buf) - return; - - if (!history->len) - goto out; - - /* First, sort the list so that it goes from oldest to newest - * reference entry - */ - sort(history->entries, history->len, sizeof(*history->entries), - topology_ref_history_cmp, NULL); - - drm_printf(&p, "%s (%p) topology count reached 0, dumping history:\n", - type_str, ptr); - - for (i = 0; i < history->len; i++) { - const struct drm_dp_mst_topology_ref_entry *entry = - &history->entries[i]; - ulong *entries; - uint nr_entries; - u64 ts_nsec = entry->ts_nsec; - u32 rem_nsec = do_div(ts_nsec, 1000000000); - - nr_entries = stack_depot_fetch(entry->backtrace, &entries); - stack_trace_snprint(buf, PAGE_SIZE, entries, nr_entries, 4); - - drm_printf(&p, " %d %ss (last at %5llu.%06u):\n%s", - entry->count, - topology_ref_type_to_str(entry->type), - ts_nsec, rem_nsec / 1000, buf); - } - - /* Now free the history, since this is the only time we expose it */ - kfree(history->entries); -out: - kfree(buf); -} - -static __always_inline void -drm_dp_mst_dump_mstb_topology_history(struct drm_dp_mst_branch *mstb) -{ - __dump_topology_ref_history(&mstb->topology_ref_history, mstb, - "MSTB"); -} - -static __always_inline void -drm_dp_mst_dump_port_topology_history(struct drm_dp_mst_port *port) -{ - __dump_topology_ref_history(&port->topology_ref_history, port, - "Port"); -} - -static __always_inline void -save_mstb_topology_ref(struct drm_dp_mst_branch *mstb, - enum drm_dp_mst_topology_ref_type type) -{ - __topology_ref_save(mstb->mgr, &mstb->topology_ref_history, type); -} - -static __always_inline void -save_port_topology_ref(struct drm_dp_mst_port *port, - enum drm_dp_mst_topology_ref_type type) -{ - __topology_ref_save(port->mgr, &port->topology_ref_history, type); -} - -static inline void -topology_ref_history_lock(struct drm_dp_mst_topology_mgr *mgr) -{ - mutex_lock(&mgr->topology_ref_history_lock); -} - -static inline void -topology_ref_history_unlock(struct drm_dp_mst_topology_mgr *mgr) -{ - mutex_unlock(&mgr->topology_ref_history_lock); -} -#else -static inline void -topology_ref_history_lock(struct drm_dp_mst_topology_mgr *mgr) {} -static inline void -topology_ref_history_unlock(struct drm_dp_mst_topology_mgr *mgr) {} -static inline void -drm_dp_mst_dump_mstb_topology_history(struct drm_dp_mst_branch *mstb) {} -static inline void -drm_dp_mst_dump_port_topology_history(struct drm_dp_mst_port *port) {} -#define save_mstb_topology_ref(mstb, type) -#define save_port_topology_ref(port, type) -#endif - static void drm_dp_destroy_mst_branch_device(struct kref *kref) { struct drm_dp_mst_branch *mstb = container_of(kref, struct drm_dp_mst_branch, topology_kref); struct drm_dp_mst_topology_mgr *mgr = mstb->mgr; + struct drm_dp_mst_port *port, *tmp; + bool wake_tx = false; - drm_dp_mst_dump_mstb_topology_history(mstb); + mutex_lock(&mgr->lock); + list_for_each_entry_safe(port, tmp, &mstb->ports, next) { + list_del(&port->next); + drm_dp_mst_topology_put_port(port); + } + mutex_unlock(&mgr->lock); - INIT_LIST_HEAD(&mstb->destroy_next); + /* drop any tx slots msg */ + mutex_lock(&mstb->mgr->qlock); + if (mstb->tx_slots[0]) { + mstb->tx_slots[0]->state = DRM_DP_SIDEBAND_TX_TIMEOUT; + mstb->tx_slots[0] = NULL; + wake_tx = true; + } + if (mstb->tx_slots[1]) { + mstb->tx_slots[1]->state = DRM_DP_SIDEBAND_TX_TIMEOUT; + mstb->tx_slots[1] = NULL; + wake_tx = true; + } + mutex_unlock(&mstb->mgr->qlock); - /* - * This can get called under mgr->mutex, so we need to perform the - * actual destruction of the mstb in another worker - */ - mutex_lock(&mgr->delayed_destroy_lock); - list_add(&mstb->destroy_next, &mgr->destroy_branch_device_list); - mutex_unlock(&mgr->delayed_destroy_lock); - schedule_work(&mgr->delayed_destroy_work); + if (wake_tx) + wake_up_all(&mstb->mgr->tx_waitq); + + drm_dp_mst_put_mstb_malloc(mstb); } /** @@ -1621,17 +1453,11 @@ static void drm_dp_destroy_mst_branch_device(struct kref *kref) static int __must_check drm_dp_mst_topology_try_get_mstb(struct drm_dp_mst_branch *mstb) { - int ret; + int ret = kref_get_unless_zero(&mstb->topology_kref); - topology_ref_history_lock(mstb->mgr); - ret = kref_get_unless_zero(&mstb->topology_kref); - if (ret) { - DRM_DEBUG("mstb %p (%d)\n", - mstb, kref_read(&mstb->topology_kref)); - save_mstb_topology_ref(mstb, DRM_DP_MST_TOPOLOGY_REF_GET); - } - - topology_ref_history_unlock(mstb->mgr); + if (ret) + DRM_DEBUG("mstb %p (%d)\n", mstb, + kref_read(&mstb->topology_kref)); return ret; } @@ -1652,14 +1478,9 @@ drm_dp_mst_topology_try_get_mstb(struct drm_dp_mst_branch *mstb) */ static void drm_dp_mst_topology_get_mstb(struct drm_dp_mst_branch *mstb) { - topology_ref_history_lock(mstb->mgr); - - save_mstb_topology_ref(mstb, DRM_DP_MST_TOPOLOGY_REF_GET); WARN_ON(kref_read(&mstb->topology_kref) == 0); kref_get(&mstb->topology_kref); DRM_DEBUG("mstb %p (%d)\n", mstb, kref_read(&mstb->topology_kref)); - - topology_ref_history_unlock(mstb->mgr); } /** @@ -1677,40 +1498,60 @@ static void drm_dp_mst_topology_get_mstb(struct drm_dp_mst_branch *mstb) static void drm_dp_mst_topology_put_mstb(struct drm_dp_mst_branch *mstb) { - topology_ref_history_lock(mstb->mgr); - DRM_DEBUG("mstb %p (%d)\n", mstb, kref_read(&mstb->topology_kref) - 1); - save_mstb_topology_ref(mstb, DRM_DP_MST_TOPOLOGY_REF_PUT); - - topology_ref_history_unlock(mstb->mgr); kref_put(&mstb->topology_kref, drm_dp_destroy_mst_branch_device); } +static void drm_dp_port_teardown_pdt(struct drm_dp_mst_port *port, int old_pdt) +{ + struct drm_dp_mst_branch *mstb; + + switch (old_pdt) { + case DP_PEER_DEVICE_DP_LEGACY_CONV: + case DP_PEER_DEVICE_SST_SINK: + /* remove i2c over sideband */ + drm_dp_mst_unregister_i2c_bus(&port->aux); + break; + case DP_PEER_DEVICE_MST_BRANCHING: + mstb = port->mstb; + port->mstb = NULL; + drm_dp_mst_topology_put_mstb(mstb); + break; + } +} + static void drm_dp_destroy_port(struct kref *kref) { struct drm_dp_mst_port *port = container_of(kref, struct drm_dp_mst_port, topology_kref); struct drm_dp_mst_topology_mgr *mgr = port->mgr; - drm_dp_mst_dump_port_topology_history(port); + if (!port->input) { + kfree(port->cached_edid); - /* There's nothing that needs locking to destroy an input port yet */ - if (port->input) { - drm_dp_mst_put_port_malloc(port); - return; + /* + * The only time we don't have a connector + * on an output port is if the connector init + * fails. + */ + if (port->connector) { + /* we can't destroy the connector here, as + * we might be holding the mode_config.mutex + * from an EDID retrieval */ + + mutex_lock(&mgr->destroy_connector_lock); + list_add(&port->next, &mgr->destroy_connector_list); + mutex_unlock(&mgr->destroy_connector_lock); + schedule_work(&mgr->destroy_connector_work); + return; + } + /* no need to clean up vcpi + * as if we have no connector we never setup a vcpi */ + drm_dp_port_teardown_pdt(port, port->pdt); + port->pdt = DP_PEER_DEVICE_NONE; } - - kfree(port->cached_edid); - - /* - * we can't destroy the connector here, as we might be holding the - * mode_config.mutex from an EDID retrieval - */ - mutex_lock(&mgr->delayed_destroy_lock); - list_add(&port->next, &mgr->destroy_port_list); - mutex_unlock(&mgr->delayed_destroy_lock); - schedule_work(&mgr->delayed_destroy_work); + drm_dp_mst_put_port_malloc(port); } /** @@ -1738,17 +1579,12 @@ static void drm_dp_destroy_port(struct kref *kref) static int __must_check drm_dp_mst_topology_try_get_port(struct drm_dp_mst_port *port) { - int ret; + int ret = kref_get_unless_zero(&port->topology_kref); - topology_ref_history_lock(port->mgr); - ret = kref_get_unless_zero(&port->topology_kref); - if (ret) { - DRM_DEBUG("port %p (%d)\n", - port, kref_read(&port->topology_kref)); - save_port_topology_ref(port, DRM_DP_MST_TOPOLOGY_REF_GET); - } + if (ret) + DRM_DEBUG("port %p (%d)\n", port, + kref_read(&port->topology_kref)); - topology_ref_history_unlock(port->mgr); return ret; } @@ -1767,14 +1603,9 @@ drm_dp_mst_topology_try_get_port(struct drm_dp_mst_port *port) */ static void drm_dp_mst_topology_get_port(struct drm_dp_mst_port *port) { - topology_ref_history_lock(port->mgr); - WARN_ON(kref_read(&port->topology_kref) == 0); kref_get(&port->topology_kref); DRM_DEBUG("port %p (%d)\n", port, kref_read(&port->topology_kref)); - save_port_topology_ref(port, DRM_DP_MST_TOPOLOGY_REF_GET); - - topology_ref_history_unlock(port->mgr); } /** @@ -1790,13 +1621,8 @@ static void drm_dp_mst_topology_get_port(struct drm_dp_mst_port *port) */ static void drm_dp_mst_topology_put_port(struct drm_dp_mst_port *port) { - topology_ref_history_lock(port->mgr); - DRM_DEBUG("port %p (%d)\n", port, kref_read(&port->topology_kref) - 1); - save_port_topology_ref(port, DRM_DP_MST_TOPOLOGY_REF_PUT); - - topology_ref_history_unlock(port->mgr); kref_put(&port->topology_kref, drm_dp_destroy_port); } @@ -1913,79 +1739,38 @@ static u8 drm_dp_calculate_rad(struct drm_dp_mst_port *port, return parent_lct + 1; } -static int drm_dp_port_set_pdt(struct drm_dp_mst_port *port, u8 new_pdt) +/* + * return sends link address for new mstb + */ +static bool drm_dp_port_setup_pdt(struct drm_dp_mst_port *port) { - struct drm_dp_mst_topology_mgr *mgr = port->mgr; - struct drm_dp_mst_branch *mstb; - u8 rad[8], lct; - int ret = 0; - - if (port->pdt == new_pdt) - return 0; - - /* Teardown the old pdt, if there is one */ - switch (port->pdt) { - case DP_PEER_DEVICE_DP_LEGACY_CONV: - case DP_PEER_DEVICE_SST_SINK: - /* - * If the new PDT would also have an i2c bus, don't bother - * with reregistering it - */ - if (new_pdt == DP_PEER_DEVICE_DP_LEGACY_CONV || - new_pdt == DP_PEER_DEVICE_SST_SINK) { - port->pdt = new_pdt; - return 0; - } - - /* remove i2c over sideband */ - drm_dp_mst_unregister_i2c_bus(&port->aux); - break; - case DP_PEER_DEVICE_MST_BRANCHING: - mutex_lock(&mgr->lock); - drm_dp_mst_topology_put_mstb(port->mstb); - port->mstb = NULL; - mutex_unlock(&mgr->lock); - break; - } - - port->pdt = new_pdt; + int ret; + u8 rad[6], lct; + bool send_link = false; switch (port->pdt) { case DP_PEER_DEVICE_DP_LEGACY_CONV: case DP_PEER_DEVICE_SST_SINK: /* add i2c over sideband */ ret = drm_dp_mst_register_i2c_bus(&port->aux); break; - case DP_PEER_DEVICE_MST_BRANCHING: lct = drm_dp_calculate_rad(port, rad); - mstb = drm_dp_add_mst_branch_device(lct, rad); - if (!mstb) { - ret = -ENOMEM; - DRM_ERROR("Failed to create MSTB for port %p", port); - goto out; + + port->mstb = drm_dp_add_mst_branch_device(lct, rad); + if (port->mstb) { + port->mstb->mgr = port->mgr; + port->mstb->port_parent = port; + /* + * Make sure this port's memory allocation stays + * around until its child MSTB releases it + */ + drm_dp_mst_get_port_malloc(port); + + send_link = true; } - - mutex_lock(&mgr->lock); - port->mstb = mstb; - mstb->mgr = port->mgr; - mstb->port_parent = port; - - /* - * Make sure this port's memory allocation stays - * around until its child MSTB releases it - */ - drm_dp_mst_get_port_malloc(port); - mutex_unlock(&mgr->lock); - - /* And make sure we send a link address for this */ - ret = 1; break; } - -out: - if (ret < 0) - port->pdt = DP_PEER_DEVICE_NONE; - return ret; + return send_link; } /** @@ -2118,130 +1903,44 @@ void drm_dp_mst_connector_early_unregister(struct drm_connector *connector, EXPORT_SYMBOL(drm_dp_mst_connector_early_unregister); static void -drm_dp_mst_port_add_connector(struct drm_dp_mst_branch *mstb, - struct drm_dp_mst_port *port) -{ - struct drm_dp_mst_topology_mgr *mgr = port->mgr; - char proppath[255]; - int ret; - - build_mst_prop_path(mstb, port->port_num, proppath, sizeof(proppath)); - port->connector = mgr->cbs->add_connector(mgr, port, proppath); - if (!port->connector) { - ret = -ENOMEM; - goto error; - } - - if ((port->pdt == DP_PEER_DEVICE_DP_LEGACY_CONV || - port->pdt == DP_PEER_DEVICE_SST_SINK) && - port->port_num >= DP_MST_LOGICAL_PORT_0) { - port->cached_edid = drm_get_edid(port->connector, - &port->aux.ddc); - drm_connector_set_tile_property(port->connector); - } - - mgr->cbs->register_connector(port->connector); - return; - -error: - DRM_ERROR("Failed to create connector for port %p: %d\n", port, ret); -} - -/* - * Drop a topology reference, and unlink the port from the in-memory topology - * layout - */ -static void -drm_dp_mst_topology_unlink_port(struct drm_dp_mst_topology_mgr *mgr, - struct drm_dp_mst_port *port) -{ - mutex_lock(&mgr->lock); - list_del(&port->next); - mutex_unlock(&mgr->lock); - drm_dp_mst_topology_put_port(port); -} - -static struct drm_dp_mst_port * -drm_dp_mst_add_port(struct drm_device *dev, - struct drm_dp_mst_topology_mgr *mgr, - struct drm_dp_mst_branch *mstb, u8 port_number) -{ - struct drm_dp_mst_port *port = kzalloc(sizeof(*port), GFP_KERNEL); - - if (!port) - return NULL; - - kref_init(&port->topology_kref); - kref_init(&port->malloc_kref); - port->parent = mstb; - port->port_num = port_number; - port->mgr = mgr; - port->aux.name = "DPMST"; - port->aux.dev = dev->dev; - port->aux.is_remote = true; - - /* - * Make sure the memory allocation for our parent branch stays - * around until our own memory allocation is released - */ - drm_dp_mst_get_mstb_malloc(mstb); - - return port; -} - -static int drm_dp_mst_handle_link_address_port(struct drm_dp_mst_branch *mstb, struct drm_device *dev, struct drm_dp_link_addr_reply_port *port_msg) { - struct drm_dp_mst_topology_mgr *mgr = mstb->mgr; struct drm_dp_mst_port *port; - int old_ddps = 0, ret; - u8 new_pdt = DP_PEER_DEVICE_NONE; - bool created = false, send_link_addr = false, changed = false; + bool ret; + bool created = false; + int old_pdt = 0; + int old_ddps = 0; port = drm_dp_get_port(mstb, port_msg->port_number); if (!port) { - port = drm_dp_mst_add_port(dev, mgr, mstb, - port_msg->port_number); + port = kzalloc(sizeof(*port), GFP_KERNEL); if (!port) - return -ENOMEM; - created = true; - changed = true; - } else if (!port->input && port_msg->input_port && port->connector) { - /* Since port->connector can't be changed here, we create a - * new port if input_port changes from 0 to 1 - */ - drm_dp_mst_topology_unlink_port(mgr, port); - drm_dp_mst_topology_put_port(port); - port = drm_dp_mst_add_port(dev, mgr, mstb, - port_msg->port_number); - if (!port) - return -ENOMEM; - changed = true; - created = true; - } else if (port->input && !port_msg->input_port) { - changed = true; - } else if (port->connector) { - /* We're updating a port that's exposed to userspace, so do it - * under lock - */ - drm_modeset_lock(&mgr->base.lock, NULL); + return; + kref_init(&port->topology_kref); + kref_init(&port->malloc_kref); + port->parent = mstb; + port->port_num = port_msg->port_number; + port->mgr = mstb->mgr; + port->aux.name = "DPMST"; + port->aux.dev = dev->dev; + port->aux.is_remote = true; + /* + * Make sure the memory allocation for our parent branch stays + * around until our own memory allocation is released + */ + drm_dp_mst_get_mstb_malloc(mstb); + + created = true; + } else { + old_pdt = port->pdt; old_ddps = port->ddps; - changed = port->ddps != port_msg->ddps || - (port->ddps && - (port->ldps != port_msg->legacy_device_plug_status || - port->dpcd_rev != port_msg->dpcd_revision || - port->mcs != port_msg->mcs || - port->pdt != port_msg->peer_device_type || - port->num_sdp_stream_sinks != - port_msg->num_sdp_stream_sinks)); } + port->pdt = port_msg->peer_device_type; port->input = port_msg->input_port; - if (!port->input) - new_pdt = port_msg->peer_device_type; port->mcs = port_msg->mcs; port->ddps = port_msg->ddps; port->ldps = port_msg->legacy_device_plug_status; @@ -2252,104 +1951,78 @@ drm_dp_mst_handle_link_address_port(struct drm_dp_mst_branch *mstb, /* manage mstb port lists with mgr lock - take a reference for this list */ if (created) { - mutex_lock(&mgr->lock); + mutex_lock(&mstb->mgr->lock); drm_dp_mst_topology_get_port(port); list_add(&port->next, &mstb->ports); - mutex_unlock(&mgr->lock); + mutex_unlock(&mstb->mgr->lock); } if (old_ddps != port->ddps) { if (port->ddps) { if (!port->input) { - drm_dp_send_enum_path_resources(mgr, mstb, - port); + drm_dp_send_enum_path_resources(mstb->mgr, + mstb, port); } } else { port->available_pbn = 0; } } - ret = drm_dp_port_set_pdt(port, new_pdt); - if (ret == 1) { - send_link_addr = true; - } else if (ret < 0) { - DRM_ERROR("Failed to change PDT on port %p: %d\n", - port, ret); - goto fail; + if (old_pdt != port->pdt && !port->input) { + drm_dp_port_teardown_pdt(port, old_pdt); + + ret = drm_dp_port_setup_pdt(port); + if (ret == true) + drm_dp_send_link_address(mstb->mgr, port->mstb); } - /* - * If this port wasn't just created, then we're reprobing because - * we're coming out of suspend. In this case, always resend the link - * address if there's an MSTB on this port - */ - if (!created && port->pdt == DP_PEER_DEVICE_MST_BRANCHING) - send_link_addr = true; + if (created && !port->input) { + char proppath[255]; - if (port->connector) - drm_modeset_unlock(&mgr->base.lock); - else if (!port->input) - drm_dp_mst_port_add_connector(mstb, port); - - if (send_link_addr && port->mstb) { - ret = drm_dp_send_link_address(mgr, port->mstb); - if (ret == 1) /* MSTB below us changed */ - changed = true; - else if (ret < 0) - goto fail_put; + build_mst_prop_path(mstb, port->port_num, proppath, + sizeof(proppath)); + port->connector = (*mstb->mgr->cbs->add_connector)(mstb->mgr, + port, + proppath); + if (!port->connector) { + /* remove it from the port list */ + mutex_lock(&mstb->mgr->lock); + list_del(&port->next); + mutex_unlock(&mstb->mgr->lock); + /* drop port list reference */ + drm_dp_mst_topology_put_port(port); + goto out; + } + if ((port->pdt == DP_PEER_DEVICE_DP_LEGACY_CONV || + port->pdt == DP_PEER_DEVICE_SST_SINK) && + port->port_num >= DP_MST_LOGICAL_PORT_0) { + port->cached_edid = drm_get_edid(port->connector, + &port->aux.ddc); + drm_connector_set_tile_property(port->connector); + } + (*mstb->mgr->cbs->register_connector)(port->connector); } +out: /* put reference to this port */ drm_dp_mst_topology_put_port(port); - return changed; - -fail: - drm_dp_mst_topology_unlink_port(mgr, port); - if (port->connector) - drm_modeset_unlock(&mgr->base.lock); -fail_put: - drm_dp_mst_topology_put_port(port); - return ret; } static void drm_dp_mst_handle_conn_stat(struct drm_dp_mst_branch *mstb, struct drm_dp_connection_status_notify *conn_stat) { - struct drm_dp_mst_topology_mgr *mgr = mstb->mgr; struct drm_dp_mst_port *port; - int old_ddps, ret; - u8 new_pdt; - bool dowork = false, create_connector = false; - + int old_pdt; + int old_ddps; + bool dowork = false; port = drm_dp_get_port(mstb, conn_stat->port_number); if (!port) return; - if (port->connector) { - if (!port->input && conn_stat->input_port) { - /* - * We can't remove a connector from an already exposed - * port, so just throw the port out and make sure we - * reprobe the link address of it's parent MSTB - */ - drm_dp_mst_topology_unlink_port(mgr, port); - mstb->link_address_sent = false; - dowork = true; - goto out; - } - - /* Locking is only needed if the port's exposed to userspace */ - drm_modeset_lock(&mgr->base.lock, NULL); - } else if (port->input && !conn_stat->input_port) { - create_connector = true; - /* Reprobe link address so we get num_sdp_streams */ - mstb->link_address_sent = false; - dowork = true; - } - old_ddps = port->ddps; - port->input = conn_stat->input_port; + old_pdt = port->pdt; + port->pdt = conn_stat->peer_device_type; port->mcs = conn_stat->message_capability_status; port->ldps = conn_stat->legacy_device_plug_status; port->ddps = conn_stat->displayport_device_plug_status; @@ -2361,27 +2034,17 @@ drm_dp_mst_handle_conn_stat(struct drm_dp_mst_branch *mstb, port->available_pbn = 0; } } + if (old_pdt != port->pdt && !port->input) { + drm_dp_port_teardown_pdt(port, old_pdt); - new_pdt = port->input ? DP_PEER_DEVICE_NONE : conn_stat->peer_device_type; - - ret = drm_dp_port_set_pdt(port, new_pdt); - if (ret == 1) { - dowork = true; - } else if (ret < 0) { - DRM_ERROR("Failed to change PDT for port %p: %d\n", - port, ret); - dowork = false; + if (drm_dp_port_setup_pdt(port)) + dowork = true; } - if (port->connector) - drm_modeset_unlock(&mgr->base.lock); - else if (create_connector) - drm_dp_mst_port_add_connector(mstb, port); - -out: drm_dp_mst_topology_put_port(port); if (dowork) queue_work(system_long_wq, &mstb->mgr->work); + } static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device(struct drm_dp_mst_topology_mgr *mgr, @@ -2467,62 +2130,41 @@ drm_dp_get_mst_branch_device_by_guid(struct drm_dp_mst_topology_mgr *mgr, return mstb; } -static int drm_dp_check_and_send_link_address(struct drm_dp_mst_topology_mgr *mgr, +static void drm_dp_check_and_send_link_address(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_branch *mstb) { struct drm_dp_mst_port *port; - int ret; - bool changed = false; - - if (!mstb->link_address_sent) { - ret = drm_dp_send_link_address(mgr, mstb); - if (ret == 1) - changed = true; - else if (ret < 0) - return ret; - } + struct drm_dp_mst_branch *mstb_child; + if (!mstb->link_address_sent) + drm_dp_send_link_address(mgr, mstb); list_for_each_entry(port, &mstb->ports, next) { - struct drm_dp_mst_branch *mstb_child = NULL; - - if (port->input || !port->ddps) + if (port->input) continue; - if (!port->available_pbn) { - drm_modeset_lock(&mgr->base.lock, NULL); - drm_dp_send_enum_path_resources(mgr, mstb, port); - drm_modeset_unlock(&mgr->base.lock); - changed = true; - } + if (!port->ddps) + continue; - if (port->mstb) + if (!port->available_pbn) + drm_dp_send_enum_path_resources(mgr, mstb, port); + + if (port->mstb) { mstb_child = drm_dp_mst_topology_get_mstb_validated( mgr, port->mstb); - - if (mstb_child) { - ret = drm_dp_check_and_send_link_address(mgr, - mstb_child); - drm_dp_mst_topology_put_mstb(mstb_child); - if (ret == 1) - changed = true; - else if (ret < 0) - return ret; + if (mstb_child) { + drm_dp_check_and_send_link_address(mgr, mstb_child); + drm_dp_mst_topology_put_mstb(mstb_child); + } } } - - return changed; } static void drm_dp_mst_link_probe_work(struct work_struct *work) { - struct drm_dp_mst_topology_mgr *mgr = - container_of(work, struct drm_dp_mst_topology_mgr, work); - struct drm_device *dev = mgr->dev; + struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, work); struct drm_dp_mst_branch *mstb; int ret; - mutex_lock(&mgr->probe_lock); - mutex_lock(&mgr->lock); mstb = mgr->mst_primary; if (mstb) { @@ -2531,17 +2173,10 @@ static void drm_dp_mst_link_probe_work(struct work_struct *work) mstb = NULL; } mutex_unlock(&mgr->lock); - if (!mstb) { - mutex_unlock(&mgr->probe_lock); - return; + if (mstb) { + drm_dp_check_and_send_link_address(mgr, mstb); + drm_dp_mst_topology_put_mstb(mstb); } - - ret = drm_dp_check_and_send_link_address(mgr, mstb); - drm_dp_mst_topology_put_mstb(mstb); - - mutex_unlock(&mgr->probe_lock); - if (ret) - drm_kms_helper_hotplug_event(dev); } static bool drm_dp_validate_guid(struct drm_dp_mst_topology_mgr *mgr, @@ -2787,18 +2422,16 @@ drm_dp_dump_link_address(struct drm_dp_link_address_ack_reply *reply) } } -static int drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr, +static void drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_branch *mstb) { struct drm_dp_sideband_msg_tx *txmsg; struct drm_dp_link_address_ack_reply *reply; - struct drm_dp_mst_port *port, *tmp; - int i, len, ret, port_mask = 0; - bool changed = false; + int i, len, ret; txmsg = kzalloc(sizeof(*txmsg), GFP_KERNEL); if (!txmsg) - return -ENOMEM; + return; txmsg->dst = mstb; len = build_link_address(txmsg); @@ -2824,39 +2457,16 @@ static int drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr, drm_dp_check_mstb_guid(mstb, reply->guid); - for (i = 0; i < reply->nports; i++) { - port_mask |= BIT(reply->ports[i].port_number); - ret = drm_dp_mst_handle_link_address_port(mstb, mgr->dev, - &reply->ports[i]); - if (ret == 1) - changed = true; - else if (ret < 0) - goto out; - } + for (i = 0; i < reply->nports; i++) + drm_dp_mst_handle_link_address_port(mstb, mgr->dev, + &reply->ports[i]); - /* Prune any ports that are currently a part of mstb in our in-memory - * topology, but were not seen in this link address. Usually this - * means that they were removed while the topology was out of sync, - * e.g. during suspend/resume - */ - mutex_lock(&mgr->lock); - list_for_each_entry_safe(port, tmp, &mstb->ports, next) { - if (port_mask & BIT(port->port_num)) - continue; - - DRM_DEBUG_KMS("port %d was not in link address, removing\n", - port->port_num); - list_del(&port->next); - drm_dp_mst_topology_put_port(port); - changed = true; - } - mutex_unlock(&mgr->lock); + drm_kms_helper_hotplug_event(mgr->dev); out: if (ret <= 0) mstb->link_address_sent = false; kfree(txmsg); - return ret < 0 ? ret : changed; } static int @@ -3461,23 +3071,6 @@ out_unlock: } EXPORT_SYMBOL(drm_dp_mst_topology_mgr_set_mst); -static void -drm_dp_mst_topology_mgr_invalidate_mstb(struct drm_dp_mst_branch *mstb) -{ - struct drm_dp_mst_port *port; - - /* The link address will need to be re-sent on resume */ - mstb->link_address_sent = false; - - list_for_each_entry(port, &mstb->ports, next) { - /* The PBN for each port will also need to be re-probed */ - port->available_pbn = 0; - - if (port->mstb) - drm_dp_mst_topology_mgr_invalidate_mstb(port->mstb); - } -} - /** * drm_dp_mst_topology_mgr_suspend() - suspend the MST manager * @mgr: manager to suspend @@ -3491,89 +3084,62 @@ void drm_dp_mst_topology_mgr_suspend(struct drm_dp_mst_topology_mgr *mgr) drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, DP_MST_EN | DP_UPSTREAM_IS_SRC); mutex_unlock(&mgr->lock); - flush_work(&mgr->up_req_work); flush_work(&mgr->work); - flush_work(&mgr->delayed_destroy_work); - - mutex_lock(&mgr->lock); - if (mgr->mst_state && mgr->mst_primary) - drm_dp_mst_topology_mgr_invalidate_mstb(mgr->mst_primary); - mutex_unlock(&mgr->lock); + flush_work(&mgr->destroy_connector_work); } EXPORT_SYMBOL(drm_dp_mst_topology_mgr_suspend); /** * drm_dp_mst_topology_mgr_resume() - resume the MST manager * @mgr: manager to resume - * @sync: whether or not to perform topology reprobing synchronously * * This will fetch DPCD and see if the device is still there, * if it is, it will rewrite the MSTM control bits, and return. * - * If the device fails this returns -1, and the driver should do + * if the device fails this returns -1, and the driver should do * a full MST reprobe, in case we were undocked. - * - * During system resume (where it is assumed that the driver will be calling - * drm_atomic_helper_resume()) this function should be called beforehand with - * @sync set to true. In contexts like runtime resume where the driver is not - * expected to be calling drm_atomic_helper_resume(), this function should be - * called with @sync set to false in order to avoid deadlocking. - * - * Returns: -1 if the MST topology was removed while we were suspended, 0 - * otherwise. */ -int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr, - bool sync) +int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr) { - int ret; - u8 guid[16]; + int ret = 0; mutex_lock(&mgr->lock); - if (!mgr->mst_primary) - goto out_fail; - ret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, mgr->dpcd, - DP_RECEIVER_CAP_SIZE); - if (ret != DP_RECEIVER_CAP_SIZE) { - DRM_DEBUG_KMS("dpcd read failed - undocked during suspend?\n"); - goto out_fail; - } + if (mgr->mst_primary) { + int sret; + u8 guid[16]; - ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, - DP_MST_EN | - DP_UP_REQ_EN | - DP_UPSTREAM_IS_SRC); - if (ret < 0) { - DRM_DEBUG_KMS("mst write failed - undocked during suspend?\n"); - goto out_fail; - } + sret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, mgr->dpcd, DP_RECEIVER_CAP_SIZE); + if (sret != DP_RECEIVER_CAP_SIZE) { + DRM_DEBUG_KMS("dpcd read failed - undocked during suspend?\n"); + ret = -1; + goto out_unlock; + } - /* Some hubs forget their guids after they resume */ - ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16); - if (ret != 16) { - DRM_DEBUG_KMS("dpcd read failed - undocked during suspend?\n"); - goto out_fail; - } - drm_dp_check_mstb_guid(mgr->mst_primary, guid); + ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, + DP_MST_EN | DP_UP_REQ_EN | DP_UPSTREAM_IS_SRC); + if (ret < 0) { + DRM_DEBUG_KMS("mst write failed - undocked during suspend?\n"); + ret = -1; + goto out_unlock; + } - /* - * For the final step of resuming the topology, we need to bring the - * state of our in-memory topology back into sync with reality. So, - * restart the probing process as if we're probing a new hub - */ - queue_work(system_long_wq, &mgr->work); + /* Some hubs forget their guids after they resume */ + sret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16); + if (sret != 16) { + DRM_DEBUG_KMS("dpcd read failed - undocked during suspend?\n"); + ret = -1; + goto out_unlock; + } + drm_dp_check_mstb_guid(mgr->mst_primary, guid); + + ret = 0; + } else + ret = -1; + +out_unlock: mutex_unlock(&mgr->lock); - - if (sync) { - DRM_DEBUG_KMS("Waiting for link probe work to finish re-syncing topology...\n"); - flush_work(&mgr->work); - } - - return 0; - -out_fail: - mutex_unlock(&mgr->lock); - return -1; + return ret; } EXPORT_SYMBOL(drm_dp_mst_topology_mgr_resume); @@ -3690,78 +3256,12 @@ clear_down_rep_recv: return 0; } -static inline bool -drm_dp_mst_process_up_req(struct drm_dp_mst_topology_mgr *mgr, - struct drm_dp_pending_up_req *up_req) -{ - struct drm_dp_mst_branch *mstb = NULL; - struct drm_dp_sideband_msg_req_body *msg = &up_req->msg; - struct drm_dp_sideband_msg_hdr *hdr = &up_req->hdr; - bool hotplug = false; - - if (hdr->broadcast) { - const u8 *guid = NULL; - - if (msg->req_type == DP_CONNECTION_STATUS_NOTIFY) - guid = msg->u.conn_stat.guid; - else if (msg->req_type == DP_RESOURCE_STATUS_NOTIFY) - guid = msg->u.resource_stat.guid; - - mstb = drm_dp_get_mst_branch_device_by_guid(mgr, guid); - } else { - mstb = drm_dp_get_mst_branch_device(mgr, hdr->lct, hdr->rad); - } - - if (!mstb) { - DRM_DEBUG_KMS("Got MST reply from unknown device %d\n", - hdr->lct); - return false; - } - - /* TODO: Add missing handler for DP_RESOURCE_STATUS_NOTIFY events */ - if (msg->req_type == DP_CONNECTION_STATUS_NOTIFY) { - drm_dp_mst_handle_conn_stat(mstb, &msg->u.conn_stat); - hotplug = true; - } - - drm_dp_mst_topology_put_mstb(mstb); - return hotplug; -} - -static void drm_dp_mst_up_req_work(struct work_struct *work) -{ - struct drm_dp_mst_topology_mgr *mgr = - container_of(work, struct drm_dp_mst_topology_mgr, - up_req_work); - struct drm_dp_pending_up_req *up_req; - bool send_hotplug = false; - - mutex_lock(&mgr->probe_lock); - while (true) { - mutex_lock(&mgr->up_req_lock); - up_req = list_first_entry_or_null(&mgr->up_req_list, - struct drm_dp_pending_up_req, - next); - if (up_req) - list_del(&up_req->next); - mutex_unlock(&mgr->up_req_lock); - - if (!up_req) - break; - - send_hotplug |= drm_dp_mst_process_up_req(mgr, up_req); - kfree(up_req); - } - mutex_unlock(&mgr->probe_lock); - - if (send_hotplug) - drm_kms_helper_hotplug_event(mgr->dev); -} - static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) { + struct drm_dp_sideband_msg_req_body msg; struct drm_dp_sideband_msg_hdr *hdr = &mgr->up_req_recv.initial_hdr; - struct drm_dp_pending_up_req *up_req; + struct drm_dp_mst_branch *mstb = NULL; + const u8 *guid; bool seqno; if (!drm_dp_get_one_sb_msg(mgr, true)) @@ -3770,53 +3270,56 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) if (!mgr->up_req_recv.have_eomt) return 0; - up_req = kzalloc(sizeof(*up_req), GFP_KERNEL); - if (!up_req) { - DRM_ERROR("Not enough memory to process MST up req\n"); - return -ENOMEM; + if (!hdr->broadcast) { + mstb = drm_dp_get_mst_branch_device(mgr, hdr->lct, hdr->rad); + if (!mstb) { + DRM_DEBUG_KMS("Got MST reply from unknown device %d\n", + hdr->lct); + goto out; + } } - INIT_LIST_HEAD(&up_req->next); seqno = hdr->seqno; - drm_dp_sideband_parse_req(&mgr->up_req_recv, &up_req->msg); + drm_dp_sideband_parse_req(&mgr->up_req_recv, &msg); - if (up_req->msg.req_type != DP_CONNECTION_STATUS_NOTIFY && - up_req->msg.req_type != DP_RESOURCE_STATUS_NOTIFY) { - DRM_DEBUG_KMS("Received unknown up req type, ignoring: %x\n", - up_req->msg.req_type); - kfree(up_req); + if (msg.req_type == DP_CONNECTION_STATUS_NOTIFY) + guid = msg.u.conn_stat.guid; + else if (msg.req_type == DP_RESOURCE_STATUS_NOTIFY) + guid = msg.u.resource_stat.guid; + else goto out; + + drm_dp_send_up_ack_reply(mgr, mgr->mst_primary, msg.req_type, seqno, + false); + + if (!mstb) { + mstb = drm_dp_get_mst_branch_device_by_guid(mgr, guid); + if (!mstb) { + DRM_DEBUG_KMS("Got MST reply from unknown device %d\n", + hdr->lct); + goto out; + } } - drm_dp_send_up_ack_reply(mgr, mgr->mst_primary, up_req->msg.req_type, - seqno, false); - - if (up_req->msg.req_type == DP_CONNECTION_STATUS_NOTIFY) { - const struct drm_dp_connection_status_notify *conn_stat = - &up_req->msg.u.conn_stat; + if (msg.req_type == DP_CONNECTION_STATUS_NOTIFY) { + drm_dp_mst_handle_conn_stat(mstb, &msg.u.conn_stat); DRM_DEBUG_KMS("Got CSN: pn: %d ldps:%d ddps: %d mcs: %d ip: %d pdt: %d\n", - conn_stat->port_number, - conn_stat->legacy_device_plug_status, - conn_stat->displayport_device_plug_status, - conn_stat->message_capability_status, - conn_stat->input_port, - conn_stat->peer_device_type); - } else if (up_req->msg.req_type == DP_RESOURCE_STATUS_NOTIFY) { - const struct drm_dp_resource_status_notify *res_stat = - &up_req->msg.u.resource_stat; + msg.u.conn_stat.port_number, + msg.u.conn_stat.legacy_device_plug_status, + msg.u.conn_stat.displayport_device_plug_status, + msg.u.conn_stat.message_capability_status, + msg.u.conn_stat.input_port, + msg.u.conn_stat.peer_device_type); + drm_kms_helper_hotplug_event(mgr->dev); + } else if (msg.req_type == DP_RESOURCE_STATUS_NOTIFY) { DRM_DEBUG_KMS("Got RSN: pn: %d avail_pbn %d\n", - res_stat->port_number, - res_stat->available_pbn); + msg.u.resource_stat.port_number, + msg.u.resource_stat.available_pbn); } - up_req->hdr = *hdr; - mutex_lock(&mgr->up_req_lock); - list_add_tail(&up_req->next, &mgr->up_req_list); - mutex_unlock(&mgr->up_req_lock); - queue_work(system_long_wq, &mgr->up_req_work); - + drm_dp_mst_topology_put_mstb(mstb); out: memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); return 0; @@ -3863,31 +3366,22 @@ EXPORT_SYMBOL(drm_dp_mst_hpd_irq); /** * drm_dp_mst_detect_port() - get connection status for an MST port * @connector: DRM connector for this port - * @ctx: The acquisition context to use for grabbing locks * @mgr: manager for this port - * @port: pointer to a port + * @port: unverified pointer to a port * - * This returns the current connection state for a port. + * This returns the current connection state for a port. It validates the + * port pointer still exists so the caller doesn't require a reference */ -int -drm_dp_mst_detect_port(struct drm_connector *connector, - struct drm_modeset_acquire_ctx *ctx, - struct drm_dp_mst_topology_mgr *mgr, - struct drm_dp_mst_port *port) +enum drm_connector_status drm_dp_mst_detect_port(struct drm_connector *connector, + struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port) { - int ret; + enum drm_connector_status status = connector_status_disconnected; /* we need to search for the port in the mgr in case it's gone */ port = drm_dp_mst_topology_get_port_validated(mgr, port); if (!port) return connector_status_disconnected; - ret = drm_modeset_lock(&mgr->base.lock, ctx); - if (ret) - goto out; - - ret = connector_status_disconnected; - if (!port->ddps) goto out; @@ -3897,7 +3391,7 @@ drm_dp_mst_detect_port(struct drm_connector *connector, break; case DP_PEER_DEVICE_SST_SINK: - ret = connector_status_connected; + status = connector_status_connected; /* for logical ports - cache the EDID */ if (port->port_num >= 8 && !port->cached_edid) { port->cached_edid = drm_get_edid(connector, &port->aux.ddc); @@ -3905,12 +3399,12 @@ drm_dp_mst_detect_port(struct drm_connector *connector, break; case DP_PEER_DEVICE_DP_LEGACY_CONV: if (port->ldps) - ret = connector_status_connected; + status = connector_status_connected; break; } out: drm_dp_mst_topology_put_port(port); - return ret; + return status; } EXPORT_SYMBOL(drm_dp_mst_detect_port); @@ -4500,103 +3994,34 @@ static void drm_dp_tx_work(struct work_struct *work) mutex_unlock(&mgr->qlock); } -static inline void -drm_dp_delayed_destroy_port(struct drm_dp_mst_port *port) +static void drm_dp_destroy_connector_work(struct work_struct *work) { - if (port->connector) - port->mgr->cbs->destroy_connector(port->mgr, port->connector); - - drm_dp_port_set_pdt(port, DP_PEER_DEVICE_NONE); - drm_dp_mst_put_port_malloc(port); -} - -static inline void -drm_dp_delayed_destroy_mstb(struct drm_dp_mst_branch *mstb) -{ - struct drm_dp_mst_topology_mgr *mgr = mstb->mgr; - struct drm_dp_mst_port *port, *tmp; - bool wake_tx = false; - - mutex_lock(&mgr->lock); - list_for_each_entry_safe(port, tmp, &mstb->ports, next) { - list_del(&port->next); - drm_dp_mst_topology_put_port(port); - } - mutex_unlock(&mgr->lock); - - /* drop any tx slots msg */ - mutex_lock(&mstb->mgr->qlock); - if (mstb->tx_slots[0]) { - mstb->tx_slots[0]->state = DRM_DP_SIDEBAND_TX_TIMEOUT; - mstb->tx_slots[0] = NULL; - wake_tx = true; - } - if (mstb->tx_slots[1]) { - mstb->tx_slots[1]->state = DRM_DP_SIDEBAND_TX_TIMEOUT; - mstb->tx_slots[1] = NULL; - wake_tx = true; - } - mutex_unlock(&mstb->mgr->qlock); - - if (wake_tx) - wake_up_all(&mstb->mgr->tx_waitq); - - drm_dp_mst_put_mstb_malloc(mstb); -} - -static void drm_dp_delayed_destroy_work(struct work_struct *work) -{ - struct drm_dp_mst_topology_mgr *mgr = - container_of(work, struct drm_dp_mst_topology_mgr, - delayed_destroy_work); - bool send_hotplug = false, go_again; - + struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, destroy_connector_work); + struct drm_dp_mst_port *port; + bool send_hotplug = false; /* * Not a regular list traverse as we have to drop the destroy - * connector lock before destroying the mstb/port, to avoid AB->BA + * connector lock before destroying the connector, to avoid AB->BA * ordering between this lock and the config mutex. */ - do { - go_again = false; - - for (;;) { - struct drm_dp_mst_branch *mstb; - - mutex_lock(&mgr->delayed_destroy_lock); - mstb = list_first_entry_or_null(&mgr->destroy_branch_device_list, - struct drm_dp_mst_branch, - destroy_next); - if (mstb) - list_del(&mstb->destroy_next); - mutex_unlock(&mgr->delayed_destroy_lock); - - if (!mstb) - break; - - drm_dp_delayed_destroy_mstb(mstb); - go_again = true; + for (;;) { + mutex_lock(&mgr->destroy_connector_lock); + port = list_first_entry_or_null(&mgr->destroy_connector_list, struct drm_dp_mst_port, next); + if (!port) { + mutex_unlock(&mgr->destroy_connector_lock); + break; } + list_del(&port->next); + mutex_unlock(&mgr->destroy_connector_lock); - for (;;) { - struct drm_dp_mst_port *port; + mgr->cbs->destroy_connector(mgr, port->connector); - mutex_lock(&mgr->delayed_destroy_lock); - port = list_first_entry_or_null(&mgr->destroy_port_list, - struct drm_dp_mst_port, - next); - if (port) - list_del(&port->next); - mutex_unlock(&mgr->delayed_destroy_lock); - - if (!port) - break; - - drm_dp_delayed_destroy_port(port); - send_hotplug = true; - go_again = true; - } - } while (go_again); + drm_dp_port_teardown_pdt(port, port->pdt); + port->pdt = DP_PEER_DEVICE_NONE; + drm_dp_mst_put_port_malloc(port); + send_hotplug = true; + } if (send_hotplug) drm_kms_helper_hotplug_event(mgr->dev); } @@ -4783,20 +4208,12 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr, mutex_init(&mgr->lock); mutex_init(&mgr->qlock); mutex_init(&mgr->payload_lock); - mutex_init(&mgr->delayed_destroy_lock); - mutex_init(&mgr->up_req_lock); - mutex_init(&mgr->probe_lock); -#if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS) - mutex_init(&mgr->topology_ref_history_lock); -#endif + mutex_init(&mgr->destroy_connector_lock); INIT_LIST_HEAD(&mgr->tx_msg_downq); - INIT_LIST_HEAD(&mgr->destroy_port_list); - INIT_LIST_HEAD(&mgr->destroy_branch_device_list); - INIT_LIST_HEAD(&mgr->up_req_list); + INIT_LIST_HEAD(&mgr->destroy_connector_list); INIT_WORK(&mgr->work, drm_dp_mst_link_probe_work); INIT_WORK(&mgr->tx_work, drm_dp_tx_work); - INIT_WORK(&mgr->delayed_destroy_work, drm_dp_delayed_destroy_work); - INIT_WORK(&mgr->up_req_work, drm_dp_mst_up_req_work); + INIT_WORK(&mgr->destroy_connector_work, drm_dp_destroy_connector_work); init_waitqueue_head(&mgr->tx_waitq); mgr->dev = dev; mgr->aux = aux; @@ -4837,7 +4254,7 @@ void drm_dp_mst_topology_mgr_destroy(struct drm_dp_mst_topology_mgr *mgr) { drm_dp_mst_topology_mgr_set_mst(mgr, false); flush_work(&mgr->work); - cancel_work_sync(&mgr->delayed_destroy_work); + flush_work(&mgr->destroy_connector_work); mutex_lock(&mgr->payload_lock); kfree(mgr->payloads); mgr->payloads = NULL; @@ -4849,15 +4266,10 @@ void drm_dp_mst_topology_mgr_destroy(struct drm_dp_mst_topology_mgr *mgr) drm_atomic_private_obj_fini(&mgr->base); mgr->funcs = NULL; - mutex_destroy(&mgr->delayed_destroy_lock); + mutex_destroy(&mgr->destroy_connector_lock); mutex_destroy(&mgr->payload_lock); mutex_destroy(&mgr->qlock); mutex_destroy(&mgr->lock); - mutex_destroy(&mgr->up_req_lock); - mutex_destroy(&mgr->probe_lock); -#if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS) - mutex_destroy(&mgr->topology_ref_history_lock); -#endif } EXPORT_SYMBOL(drm_dp_mst_topology_mgr_destroy); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 000fa4a1899f..2f2b889096b0 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1105,33 +1105,21 @@ int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size, if (obj_size < vma->vm_end - vma->vm_start) return -EINVAL; - /* Take a ref for this mapping of the object, so that the fault - * handler can dereference the mmap offset's pointer to the object. - * This reference is cleaned up by the corresponding vm_close - * (which should happen whether the vma was created by this call, or - * by a vm_open due to mremap or partial unmap or whatever). - */ - drm_gem_object_get(obj); - if (obj->funcs && obj->funcs->mmap) { /* Remove the fake offset */ vma->vm_pgoff -= drm_vma_node_start(&obj->vma_node); ret = obj->funcs->mmap(obj, vma); - if (ret) { - drm_gem_object_put_unlocked(obj); + if (ret) return ret; - } WARN_ON(!(vma->vm_flags & VM_DONTEXPAND)); } else { if (obj->funcs && obj->funcs->vm_ops) vma->vm_ops = obj->funcs->vm_ops; else if (dev->driver->gem_vm_ops) vma->vm_ops = dev->driver->gem_vm_ops; - else { - drm_gem_object_put_unlocked(obj); + else return -EINVAL; - } vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); @@ -1140,6 +1128,14 @@ int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size, vma->vm_private_data = obj; + /* Take a ref for this mapping of the object, so that the fault + * handler can dereference the mmap offset's pointer to the object. + * This reference is cleaned up by the corresponding vm_close + * (which should happen whether the vma was created by this call, or + * by a vm_open due to mremap or partial unmap or whatever). + */ + drm_gem_object_get(obj); + return 0; } EXPORT_SYMBOL(drm_gem_mmap_obj); diff --git a/drivers/gpu/drm/drm_gem_ttm_helper.c b/drivers/gpu/drm/drm_gem_ttm_helper.c index 605a8a3da7f9..7412bfc5c05a 100644 --- a/drivers/gpu/drm/drm_gem_ttm_helper.c +++ b/drivers/gpu/drm/drm_gem_ttm_helper.c @@ -64,19 +64,8 @@ int drm_gem_ttm_mmap(struct drm_gem_object *gem, struct vm_area_struct *vma) { struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem); - int ret; - ret = ttm_bo_mmap_obj(vma, bo); - if (ret < 0) - return ret; - - /* - * ttm has its own object refcounting, so drop gem reference - * to avoid double accounting counting. - */ - drm_gem_object_put_unlocked(gem); - - return 0; + return ttm_bo_mmap_obj(vma, bo); } EXPORT_SYMBOL(drm_gem_ttm_mmap); diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 6cfdb95fef2f..7b24338fad3c 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -1069,9 +1069,9 @@ static bool mixer_mode_fixup(struct exynos_drm_crtc *crtc, struct mixer_context *ctx = crtc->ctx; int width = mode->hdisplay, height = mode->vdisplay, i; - static const struct { + struct { int hdisplay, vdisplay, htotal, vtotal, scan_val; - } modes[] = { + } static const modes[] = { { 720, 480, 858, 525, MXR_CFG_SCAN_NTSC | MXR_CFG_SCAN_SD }, { 720, 576, 864, 625, MXR_CFG_SCAN_PAL | MXR_CFG_SCAN_SD }, { 1280, 720, 1650, 750, MXR_CFG_SCAN_HD_720 | MXR_CFG_SCAN_HD }, diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index ba9595960bbe..3c6d57df262d 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -148,9 +148,3 @@ menu "drm/i915 Profile Guided Optimisation" depends on DRM_I915 source "drivers/gpu/drm/i915/Kconfig.profile" endmenu - -menu "drm/i915 Unstable Evolution" - visible if EXPERT && STAGING && BROKEN - depends on DRM_I915 - source "drivers/gpu/drm/i915/Kconfig.unstable" -endmenu diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile index 1799537a3228..48df8889a88a 100644 --- a/drivers/gpu/drm/i915/Kconfig.profile +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -12,29 +12,6 @@ config DRM_I915_USERFAULT_AUTOSUSPEND May be 0 to disable the extra delay and solely use the device level runtime pm autosuspend delay tunable. -config DRM_I915_HEARTBEAT_INTERVAL - int "Interval between heartbeat pulses (ms)" - default 2500 # milliseconds - help - The driver sends a periodic heartbeat down all active engines to - check the health of the GPU and undertake regular house-keeping of - internal driver state. - - May be 0 to disable heartbeats and therefore disable automatic GPU - hang detection. - -config DRM_I915_PREEMPT_TIMEOUT - int "Preempt timeout (ms, jiffy granularity)" - default 100 # milliseconds - help - How long to wait (in milliseconds) for a preemption event to occur - when submitting a new context via execlists. If the current context - does not hit an arbitration point and yield to HW before the timer - expires, the HW will be reset to allow the more important context - to execute. - - May be 0 to disable the timeout. - config DRM_I915_SPIN_REQUEST int "Busywait for request completion (us)" default 5 # microseconds @@ -48,29 +25,3 @@ config DRM_I915_SPIN_REQUEST May be 0 to disable the initial spin. In practice, we estimate the cost of enabling the interrupt (if currently disabled) to be a few microseconds. - -config DRM_I915_STOP_TIMEOUT - int "How long to wait for an engine to quiesce gracefully before reset (ms)" - default 100 # milliseconds - help - By stopping submission and sleeping for a short time before resetting - the GPU, we allow the innocent contexts also on the system to quiesce. - It is then less likely for a hanging context to cause collateral - damage as the system is reset in order to recover. The corollary is - that the reset itself may take longer and so be more disruptive to - interactive or low latency workloads. - -config DRM_I915_TIMESLICE_DURATION - int "Scheduling quantum for userspace batches (ms, jiffy granularity)" - default 1 # milliseconds - help - When two user batches of equal priority are executing, we will - alternate execution of each batch to ensure forward progress of - all users. This is necessary in some cases where there may be - an implicit dependency between those batches that requires - concurrent execution in order for them to proceed, e.g. they - interact with each other via userspace semaphores. Each context - is scheduled for execution for the timeslice duration, before - switching to the next context. - - May be 0 to disable timeslicing. diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 90dcf09f52cc..a16a2daef977 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -78,24 +78,22 @@ gt-y += \ gt/intel_breadcrumbs.o \ gt/intel_context.o \ gt/intel_engine_cs.o \ - gt/intel_engine_heartbeat.o \ - gt/intel_engine_pm.o \ gt/intel_engine_pool.o \ + gt/intel_engine_pm.o \ gt/intel_engine_user.o \ gt/intel_gt.o \ gt/intel_gt_irq.o \ gt/intel_gt_pm.o \ gt/intel_gt_pm_irq.o \ gt/intel_gt_requests.o \ + gt/intel_hangcheck.o \ gt/intel_llc.o \ gt/intel_lrc.o \ - gt/intel_mocs.o \ gt/intel_rc6.o \ gt/intel_renderstate.o \ gt/intel_reset.o \ - gt/intel_ring.o \ - gt/intel_ring_submission.o \ - gt/intel_rps.o \ + gt/intel_ringbuffer.o \ + gt/intel_mocs.o \ gt/intel_sseu.o \ gt/intel_timeline.o \ gt/intel_workarounds.o @@ -121,7 +119,6 @@ gem-y += \ gem/i915_gem_internal.o \ gem/i915_gem_object.o \ gem/i915_gem_object_blt.o \ - gem/i915_gem_lmem.o \ gem/i915_gem_mman.o \ gem/i915_gem_pages.o \ gem/i915_gem_phys.o \ @@ -150,7 +147,6 @@ i915-y += \ i915_scheduler.o \ i915_trace_points.o \ i915_vma.o \ - intel_region_lmem.o \ intel_wopcm.o # general-purpose microcontroller (GuC) support @@ -247,8 +243,7 @@ i915-y += \ oa/i915_oa_cflgt2.o \ oa/i915_oa_cflgt3.o \ oa/i915_oa_cnl.o \ - oa/i915_oa_icl.o \ - oa/i915_oa_tgl.o + oa/i915_oa_icl.o i915-y += i915_perf.o # Post-mortem debug and GPU hang state capture diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 325df29b0447..6e398c33a524 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1584,7 +1584,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv) encoder->get_hw_state = gen11_dsi_get_hw_state; encoder->type = INTEL_OUTPUT_DSI; encoder->cloneable = 0; - encoder->pipe_mask = ~0; + encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); encoder->power_domain = POWER_DOMAIN_PORT_DSI; encoder->get_power_domains = gen11_dsi_get_power_domains; diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index 9cd6d2348a1e..c5a552a69752 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -429,13 +429,6 @@ void intel_atomic_state_clear(struct drm_atomic_state *s) struct intel_atomic_state *state = to_intel_atomic_state(s); drm_atomic_state_default_clear(&state->base); state->dpll_set = state->modeset = false; - state->global_state_changed = false; - state->active_pipes = 0; - memset(&state->min_cdclk, 0, sizeof(state->min_cdclk)); - memset(&state->min_voltage_level, 0, sizeof(state->min_voltage_level)); - memset(&state->cdclk.logical, 0, sizeof(state->cdclk.logical)); - memset(&state->cdclk.actual, 0, sizeof(state->cdclk.actual)); - state->cdclk.pipe = INVALID_PIPE; } struct intel_crtc_state * @@ -449,40 +442,3 @@ intel_atomic_get_crtc_state(struct drm_atomic_state *state, return to_intel_crtc_state(crtc_state); } - -int intel_atomic_lock_global_state(struct intel_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - struct intel_crtc *crtc; - - state->global_state_changed = true; - - for_each_intel_crtc(&dev_priv->drm, crtc) { - int ret; - - ret = drm_modeset_lock(&crtc->base.mutex, - state->base.acquire_ctx); - if (ret) - return ret; - } - - return 0; -} - -int intel_atomic_serialize_global_state(struct intel_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - struct intel_crtc *crtc; - - state->global_state_changed = true; - - for_each_intel_crtc(&dev_priv->drm, crtc) { - struct intel_crtc_state *crtc_state; - - crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); - if (IS_ERR(crtc_state)) - return PTR_ERR(crtc_state); - } - - return 0; -} diff --git a/drivers/gpu/drm/i915/display/intel_atomic.h b/drivers/gpu/drm/i915/display/intel_atomic.h index 49d5cb1b9e0a..58065d3161a3 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.h +++ b/drivers/gpu/drm/i915/display/intel_atomic.h @@ -16,7 +16,6 @@ struct drm_crtc_state; struct drm_device; struct drm_i915_private; struct drm_property; -struct intel_atomic_state; struct intel_crtc; struct intel_crtc_state; @@ -47,8 +46,4 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state); -int intel_atomic_lock_global_state(struct intel_atomic_state *state); - -int intel_atomic_serialize_global_state(struct intel_atomic_state *state); - #endif /* __INTEL_ATOMIC_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c index 98f557a9f8ee..a6cff5a160fb 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c @@ -138,44 +138,6 @@ unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state, return cpp * crtc_state->pixel_rate; } -bool intel_plane_calc_min_cdclk(struct intel_atomic_state *state, - struct intel_plane *plane) -{ - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - const struct intel_plane_state *plane_state = - intel_atomic_get_new_plane_state(state, plane); - struct intel_crtc *crtc = to_intel_crtc(plane_state->base.crtc); - struct intel_crtc_state *crtc_state; - - if (!plane_state->base.visible || !plane->min_cdclk) - return false; - - crtc_state = intel_atomic_get_new_crtc_state(state, crtc); - - crtc_state->min_cdclk[plane->id] = - plane->min_cdclk(crtc_state, plane_state); - - /* - * Does the cdclk need to be bumbed up? - * - * Note: we obviously need to be called before the new - * cdclk frequency is calculated so state->cdclk.logical - * hasn't been populated yet. Hence we look at the old - * cdclk state under dev_priv->cdclk.logical. This is - * safe as long we hold at least one crtc mutex (which - * must be true since we have crtc_state). - */ - if (crtc_state->min_cdclk[plane->id] > dev_priv->cdclk.logical.cdclk) { - DRM_DEBUG_KMS("[PLANE:%d:%s] min_cdclk (%d kHz) > logical cdclk (%d kHz)\n", - plane->base.base.id, plane->base.name, - crtc_state->min_cdclk[plane->id], - dev_priv->cdclk.logical.cdclk); - return true; - } - - return false; -} - int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state, struct intel_crtc_state *new_crtc_state, const struct intel_plane_state *old_plane_state, @@ -189,7 +151,6 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_ new_crtc_state->nv12_planes &= ~BIT(plane->id); new_crtc_state->c8_planes &= ~BIT(plane->id); new_crtc_state->data_rate[plane->id] = 0; - new_crtc_state->min_cdclk[plane->id] = 0; new_plane_state->base.visible = false; if (!new_plane_state->base.crtc && !old_plane_state->base.crtc) diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.h b/drivers/gpu/drm/i915/display/intel_atomic_plane.h index e61e9a82aadf..dc85af02e9b7 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.h +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.h @@ -47,7 +47,5 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat struct intel_crtc_state *crtc_state, const struct intel_plane_state *old_plane_state, struct intel_plane_state *plane_state); -bool intel_plane_calc_min_cdclk(struct intel_atomic_state *state, - struct intel_plane *plane); #endif /* __INTEL_ATOMIC_PLANE_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index 85e6b2bbb34f..ed18511befa3 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -28,7 +28,6 @@ #include #include "i915_drv.h" -#include "intel_atomic.h" #include "intel_audio.h" #include "intel_display_types.h" #include "intel_lpe_audio.h" @@ -819,8 +818,13 @@ retry: to_intel_atomic_state(state)->cdclk.force_min_cdclk = enable ? 2 * 96000 : 0; - /* Protects dev_priv->cdclk.force_min_cdclk */ - ret = intel_atomic_lock_global_state(to_intel_atomic_state(state)); + /* + * Protects dev_priv->cdclk.force_min_cdclk + * Need to lock this here in case we have no active pipes + * and thus wouldn't lock it during the commit otherwise. + */ + ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, + &ctx); if (!ret) ret = drm_atomic_commit(state); diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 0caef2592a7e..3d867963a6d1 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1918,19 +1918,6 @@ static int intel_pixel_rate_to_cdclk(const struct intel_crtc_state *crtc_state) return DIV_ROUND_UP(pixel_rate * 100, 90); } -static int intel_planes_min_cdclk(const struct intel_crtc_state *crtc_state) -{ - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_plane *plane; - int min_cdclk = 0; - - for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) - min_cdclk = max(crtc_state->min_cdclk[plane->id], min_cdclk); - - return min_cdclk; -} - int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = @@ -1999,9 +1986,6 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) IS_GEMINILAKE(dev_priv)) min_cdclk = max(158400, min_cdclk); - /* Account for additional needs from the planes */ - min_cdclk = max(intel_planes_min_cdclk(crtc_state), min_cdclk); - if (min_cdclk > dev_priv->max_cdclk_freq) { DRM_DEBUG_KMS("required cdclk (%d kHz) exceeds max (%d kHz)\n", min_cdclk, dev_priv->max_cdclk_freq); @@ -2023,20 +2007,11 @@ static int intel_compute_min_cdclk(struct intel_atomic_state *state) sizeof(state->min_cdclk)); for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { - int ret; - min_cdclk = intel_crtc_compute_min_cdclk(crtc_state); if (min_cdclk < 0) return min_cdclk; - if (state->min_cdclk[i] == min_cdclk) - continue; - state->min_cdclk[i] = min_cdclk; - - ret = intel_atomic_lock_global_state(state); - if (ret) - return ret; } min_cdclk = state->cdclk.force_min_cdclk; @@ -2059,7 +2034,7 @@ static int intel_compute_min_cdclk(struct intel_atomic_state *state) * future platforms this code will need to be * adjusted. */ -static int bxt_compute_min_voltage_level(struct intel_atomic_state *state) +static u8 bxt_compute_min_voltage_level(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc *crtc; @@ -2072,21 +2047,11 @@ static int bxt_compute_min_voltage_level(struct intel_atomic_state *state) sizeof(state->min_voltage_level)); for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { - int ret; - if (crtc_state->base.enable) - min_voltage_level = crtc_state->min_voltage_level; + state->min_voltage_level[i] = + crtc_state->min_voltage_level; else - min_voltage_level = 0; - - if (state->min_voltage_level[i] == min_voltage_level) - continue; - - state->min_voltage_level[i] = min_voltage_level; - - ret = intel_atomic_lock_global_state(state); - if (ret) - return ret; + state->min_voltage_level[i] = 0; } min_voltage_level = 0; @@ -2230,24 +2195,20 @@ static int skl_modeset_calc_cdclk(struct intel_atomic_state *state) static int bxt_modeset_calc_cdclk(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - int min_cdclk, min_voltage_level, cdclk, vco; + int min_cdclk, cdclk, vco; min_cdclk = intel_compute_min_cdclk(state); if (min_cdclk < 0) return min_cdclk; - min_voltage_level = bxt_compute_min_voltage_level(state); - if (min_voltage_level < 0) - return min_voltage_level; - cdclk = bxt_calc_cdclk(dev_priv, min_cdclk); vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk); state->cdclk.logical.vco = vco; state->cdclk.logical.cdclk = cdclk; state->cdclk.logical.voltage_level = - max_t(int, min_voltage_level, - dev_priv->display.calc_voltage_level(cdclk)); + max(dev_priv->display.calc_voltage_level(cdclk), + bxt_compute_min_voltage_level(state)); if (!state->active_pipes) { cdclk = bxt_calc_cdclk(dev_priv, state->cdclk.force_min_cdclk); @@ -2264,6 +2225,23 @@ static int bxt_modeset_calc_cdclk(struct intel_atomic_state *state) return 0; } +static int intel_lock_all_pipes(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_crtc *crtc; + + /* Add all pipes to the state */ + for_each_intel_crtc(&dev_priv->drm, crtc) { + struct intel_crtc_state *crtc_state; + + crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + } + + return 0; +} + static int intel_modeset_all_pipes(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); @@ -2330,63 +2308,48 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state) return ret; /* - * Writes to dev_priv->cdclk.{actual,logical} must protected - * by holding all the crtc mutexes even if we don't end up + * Writes to dev_priv->cdclk.logical must protected by + * holding all the crtc locks, even if we don't end up * touching the hardware */ - if (intel_cdclk_changed(&dev_priv->cdclk.actual, - &state->cdclk.actual)) { - /* - * Also serialize commits across all crtcs - * if the actual hw needs to be poked. - */ - ret = intel_atomic_serialize_global_state(state); - if (ret) + if (intel_cdclk_changed(&dev_priv->cdclk.logical, + &state->cdclk.logical)) { + ret = intel_lock_all_pipes(state); + if (ret < 0) return ret; - } else if (intel_cdclk_changed(&dev_priv->cdclk.logical, - &state->cdclk.logical)) { - ret = intel_atomic_lock_global_state(state); - if (ret) - return ret; - } else { - return 0; } - if (is_power_of_2(state->active_pipes) && - intel_cdclk_needs_cd2x_update(dev_priv, - &dev_priv->cdclk.actual, - &state->cdclk.actual)) { + if (is_power_of_2(state->active_pipes)) { struct intel_crtc *crtc; struct intel_crtc_state *crtc_state; pipe = ilog2(state->active_pipes); crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - - crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); - if (IS_ERR(crtc_state)) - return PTR_ERR(crtc_state); - - if (drm_atomic_crtc_needs_modeset(&crtc_state->base)) + crtc_state = intel_atomic_get_new_crtc_state(state, crtc); + if (crtc_state && + drm_atomic_crtc_needs_modeset(&crtc_state->base)) pipe = INVALID_PIPE; } else { pipe = INVALID_PIPE; } - if (pipe != INVALID_PIPE) { - state->cdclk.pipe = pipe; + /* All pipes must be switched off while we change the cdclk. */ + if (pipe != INVALID_PIPE && + intel_cdclk_needs_cd2x_update(dev_priv, + &dev_priv->cdclk.actual, + &state->cdclk.actual)) { + ret = intel_lock_all_pipes(state); + if (ret) + return ret; - DRM_DEBUG_KMS("Can change cdclk with pipe %c active\n", - pipe_name(pipe)); + state->cdclk.pipe = pipe; } else if (intel_cdclk_needs_modeset(&dev_priv->cdclk.actual, &state->cdclk.actual)) { - /* All pipes must be switched off while we change the cdclk. */ ret = intel_modeset_all_pipes(state); if (ret) return ret; state->cdclk.pipe = INVALID_PIPE; - - DRM_DEBUG_KMS("Modeset required for cdclk change\n"); } DRM_DEBUG_KMS("New cdclk calculated to be logical %u kHz, actual %u kHz\n", diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 39cc6d79dc85..ff6126ea793c 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -844,7 +844,7 @@ load_detect: } /* for pre-945g platforms use load detect */ - ret = intel_get_load_detect_pipe(connector, &tmp, ctx); + ret = intel_get_load_detect_pipe(connector, NULL, &tmp, ctx); if (ret > 0) { if (intel_crt_detect_ddc(connector)) status = connector_status_connected; @@ -864,13 +864,6 @@ load_detect: out: intel_display_power_put(dev_priv, intel_encoder->power_domain, wakeref); - - /* - * Make sure the refs for power wells enabled during detect are - * dropped to avoid a new detect cycle triggered by HPD polling. - */ - intel_display_power_flush_work(dev_priv); - return status; } @@ -1001,9 +994,9 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->base.type = INTEL_OUTPUT_ANALOG; crt->base.cloneable = (1 << INTEL_OUTPUT_DVO) | (1 << INTEL_OUTPUT_HDMI); if (IS_I830(dev_priv)) - crt->base.pipe_mask = BIT(PIPE_A); + crt->base.crtc_mask = BIT(PIPE_A); else - crt->base.pipe_mask = ~0; + crt->base.crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); if (IS_GEN(dev_priv, 2)) connector->interlace_allowed = 0; diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index b51f244ad7a5..9ba794cb9b4f 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -1905,9 +1905,6 @@ intel_ddi_transcoder_func_reg_val_get(const struct intel_crtc_state *crtc_state) } else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) { temp |= TRANS_DDI_MODE_SELECT_DP_MST; temp |= DDI_PORT_WIDTH(crtc_state->lane_count); - - if (INTEL_GEN(dev_priv) >= 12) - temp |= TRANS_DDI_MST_TRANSPORT_SELECT(crtc_state->cpu_transcoder); } else { temp |= TRANS_DDI_MODE_SELECT_DP_SST; temp |= DDI_PORT_WIDTH(crtc_state->lane_count); @@ -2237,7 +2234,7 @@ static void intel_ddi_get_power_domains(struct intel_encoder *encoder, /* * VDSC power is needed when DSC is enabled */ - if (crtc_state->dsc.compression_enable) + if (crtc_state->dsc_params.compression_enable) intel_display_power_get(dev_priv, intel_dsc_power_domain(crtc_state)); } @@ -2841,8 +2838,6 @@ tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock, for (ln = 0; ln < 2; ln++) { I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, ln)); - I915_WRITE(DKL_TX_PMD_LANE_SUS(tc_port), 0); - /* All the registers are RMW */ val = I915_READ(DKL_TX_DPCNTL0(tc_port)); val &= ~dpcnt_mask; @@ -3875,12 +3870,12 @@ static i915_reg_t gen9_chicken_trans_reg_by_port(struct drm_i915_private *dev_priv, enum port port) { - static const enum transcoder trans[] = { - [PORT_A] = TRANSCODER_EDP, - [PORT_B] = TRANSCODER_A, - [PORT_C] = TRANSCODER_B, - [PORT_D] = TRANSCODER_C, - [PORT_E] = TRANSCODER_A, + static const i915_reg_t regs[] = { + [PORT_A] = CHICKEN_TRANS_EDP, + [PORT_B] = CHICKEN_TRANS_A, + [PORT_C] = CHICKEN_TRANS_B, + [PORT_D] = CHICKEN_TRANS_C, + [PORT_E] = CHICKEN_TRANS_A, }; WARN_ON(INTEL_GEN(dev_priv) < 9); @@ -3888,7 +3883,7 @@ gen9_chicken_trans_reg_by_port(struct drm_i915_private *dev_priv, if (WARN_ON(port < PORT_A || port > PORT_E)) port = PORT_A; - return CHICKEN_TRANS(trans[port]); + return regs[port]; } static void intel_enable_ddi_hdmi(struct intel_encoder *encoder, @@ -4688,6 +4683,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) struct intel_encoder *intel_encoder; struct drm_encoder *encoder; bool init_hdmi, init_dp, init_lspcon = false; + enum pipe pipe; enum phy phy = intel_port_to_phy(dev_priv, port); init_hdmi = port_info->supports_dvi || port_info->supports_hdmi; @@ -4739,7 +4735,8 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_encoder->power_domain = intel_port_to_power_domain(port); intel_encoder->port = port; intel_encoder->cloneable = 0; - intel_encoder->pipe_mask = ~0; + for_each_pipe(dev_priv, pipe) + intel_encoder->crtc_mask |= BIT(pipe); if (INTEL_GEN(dev_priv) >= 11) intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 348ce0456696..2912abd85148 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -55,8 +55,6 @@ #include "display/intel_tv.h" #include "display/intel_vdsc.h" -#include "gt/intel_rps.h" - #include "i915_drv.h" #include "i915_trace.h" #include "intel_acpi.h" @@ -90,17 +88,7 @@ static const u32 i8xx_primary_formats[] = { DRM_FORMAT_XRGB8888, }; -/* Primary plane formats for ivb (no fp16 due to hw issue) */ -static const u32 ivb_primary_formats[] = { - DRM_FORMAT_C8, - DRM_FORMAT_RGB565, - DRM_FORMAT_XRGB8888, - DRM_FORMAT_XBGR8888, - DRM_FORMAT_XRGB2101010, - DRM_FORMAT_XBGR2101010, -}; - -/* Primary plane formats for gen >= 4, except ivb */ +/* Primary plane formats for gen >= 4 */ static const u32 i965_primary_formats[] = { DRM_FORMAT_C8, DRM_FORMAT_RGB565, @@ -108,7 +96,6 @@ static const u32 i965_primary_formats[] = { DRM_FORMAT_XBGR8888, DRM_FORMAT_XRGB2101010, DRM_FORMAT_XBGR2101010, - DRM_FORMAT_XBGR16161616F, }; static const u64 i9xx_format_modifiers[] = { @@ -2984,8 +2971,6 @@ static int i9xx_format_to_fourcc(int format) return DRM_FORMAT_XRGB2101010; case DISPPLANE_RGBX101010: return DRM_FORMAT_XBGR2101010; - case DISPPLANE_RGBX161616: - return DRM_FORMAT_XBGR16161616F; } } @@ -3169,7 +3154,6 @@ static void intel_plane_disable_noatomic(struct intel_crtc *crtc, intel_set_plane_visible(crtc_state, plane_state, false); fixup_active_planes(crtc_state); crtc_state->data_rate[plane->id] = 0; - crtc_state->min_cdclk[plane->id] = 0; if (plane->id == PLANE_PRIMARY) intel_pre_disable_primary_noatomic(&crtc->base); @@ -3593,53 +3577,6 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) return 0; } -static void i9xx_plane_ratio(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state, - unsigned int *num, unsigned int *den) -{ - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int cpp = fb->format->cpp[0]; - - /* - * g4x bspec says 64bpp pixel rate can't exceed 80% - * of cdclk when the sprite plane is enabled on the - * same pipe. ilk/snb bspec says 64bpp pixel rate is - * never allowed to exceed 80% of cdclk. Let's just go - * with the ilk/snb limit always. - */ - if (cpp == 8) { - *num = 10; - *den = 8; - } else { - *num = 1; - *den = 1; - } -} - -static int i9xx_plane_min_cdclk(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) -{ - unsigned int pixel_rate; - unsigned int num, den; - - /* - * Note that crtc_state->pixel_rate accounts for both - * horizontal and vertical panel fitter downscaling factors. - * Pre-HSW bspec tells us to only consider the horizontal - * downscaling factor here. We ignore that and just consider - * both for simplicity. - */ - pixel_rate = crtc_state->pixel_rate; - - i9xx_plane_ratio(crtc_state, plane_state, &num, &den); - - /* two pixels per clock with double wide pipe */ - if (crtc_state->double_wide) - den *= 2; - - return DIV_ROUND_UP(pixel_rate * num, den); -} - unsigned int i9xx_plane_max_stride(struct intel_plane *plane, u32 pixel_format, u64 modifier, @@ -3722,9 +3659,6 @@ static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state, case DRM_FORMAT_XBGR2101010: dspcntr |= DISPPLANE_RGBX101010; break; - case DRM_FORMAT_XBGR16161616F: - dspcntr |= DISPPLANE_RGBX161616; - break; default: MISSING_CASE(fb->format->format); return 0; @@ -3747,8 +3681,7 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); - const struct drm_framebuffer *fb = plane_state->base.fb; - int src_x, src_y, src_w; + int src_x, src_y; u32 offset; int ret; @@ -3759,14 +3692,9 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state) if (!plane_state->base.visible) return 0; - src_w = drm_rect_width(&plane_state->base.src) >> 16; src_x = plane_state->base.src.x1 >> 16; src_y = plane_state->base.src.y1 >> 16; - /* Undocumented hardware limit on i965/g4x/vlv/chv */ - if (HAS_GMCH(dev_priv) && fb->format->cpp[0] == 8 && src_w > 2048) - return -EINVAL; - intel_add_fb_offsets(&src_x, &src_y, plane_state, 0); if (INTEL_GEN(dev_priv) >= 4) @@ -5664,6 +5592,10 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, case DRM_FORMAT_ARGB8888: case DRM_FORMAT_XRGB2101010: case DRM_FORMAT_XBGR2101010: + case DRM_FORMAT_XBGR16161616F: + case DRM_FORMAT_ABGR16161616F: + case DRM_FORMAT_XRGB16161616F: + case DRM_FORMAT_ARGB16161616F: case DRM_FORMAT_YUYV: case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: @@ -5679,13 +5611,6 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, case DRM_FORMAT_XVYU12_16161616: case DRM_FORMAT_XVYU16161616: break; - case DRM_FORMAT_XBGR16161616F: - case DRM_FORMAT_ABGR16161616F: - case DRM_FORMAT_XRGB16161616F: - case DRM_FORMAT_ARGB16161616F: - if (INTEL_GEN(dev_priv) >= 11) - break; - /* fall through */ default: DRM_DEBUG_KMS("[PLANE:%d:%s] FB:%d unsupported scaling format 0x%x\n", intel_plane->base.base.id, intel_plane->base.name, @@ -9434,6 +9359,7 @@ static bool wrpll_uses_pch_ssc(struct drm_i915_private *dev_priv, static void lpt_init_pch_refclk(struct drm_i915_private *dev_priv) { struct intel_encoder *encoder; + bool pch_ssc_in_use = false; bool has_fdi = false; for_each_intel_encoder(&dev_priv->drm, encoder) { @@ -9461,24 +9387,22 @@ static void lpt_init_pch_refclk(struct drm_i915_private *dev_priv) * clock hierarchy. That would also allow us to do * clock bending finally. */ - dev_priv->pch_ssc_use = 0; - if (spll_uses_pch_ssc(dev_priv)) { DRM_DEBUG_KMS("SPLL using PCH SSC\n"); - dev_priv->pch_ssc_use |= BIT(DPLL_ID_SPLL); + pch_ssc_in_use = true; } if (wrpll_uses_pch_ssc(dev_priv, DPLL_ID_WRPLL1)) { DRM_DEBUG_KMS("WRPLL1 using PCH SSC\n"); - dev_priv->pch_ssc_use |= BIT(DPLL_ID_WRPLL1); + pch_ssc_in_use = true; } if (wrpll_uses_pch_ssc(dev_priv, DPLL_ID_WRPLL2)) { DRM_DEBUG_KMS("WRPLL2 using PCH SSC\n"); - dev_priv->pch_ssc_use |= BIT(DPLL_ID_WRPLL2); + pch_ssc_in_use = true; } - if (dev_priv->pch_ssc_use) + if (pch_ssc_in_use) return; if (has_fdi) { @@ -10947,7 +10871,7 @@ static void i845_update_cursor(struct intel_plane *plane, unsigned long irqflags; if (plane_state && plane_state->base.visible) { - unsigned int width = drm_rect_width(&plane_state->base.dst); + unsigned int width = drm_rect_width(&plane_state->base.src); unsigned int height = drm_rect_height(&plane_state->base.dst); cntl = plane_state->ctl | @@ -11328,6 +11252,7 @@ static int intel_modeset_disable_planes(struct drm_atomic_state *state, } int intel_get_load_detect_pipe(struct drm_connector *connector, + const struct drm_display_mode *mode, struct intel_load_detect_pipe *old, struct drm_modeset_acquire_ctx *ctx) { @@ -11434,8 +11359,10 @@ found: crtc_state->base.active = crtc_state->base.enable = true; - ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, - &load_detect_mode); + if (!mode) + mode = &load_detect_mode; + + ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode); if (ret) goto fail; @@ -11779,7 +11706,6 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat plane_state->base.visible = visible = false; crtc_state->active_planes &= ~BIT(plane->id); crtc_state->data_rate[plane->id] = 0; - crtc_state->min_cdclk[plane->id] = 0; } if (!was_visible && !visible) @@ -12146,6 +12072,11 @@ static int intel_crtc_atomic_check(struct intel_atomic_state *state, if (INTEL_GEN(dev_priv) >= 9) { if (mode_changed || crtc_state->update_pipe) ret = skl_update_scaler_crtc(crtc_state); + + if (!ret) + ret = icl_check_nv12_planes(crtc_state); + if (!ret) + ret = skl_check_pipe_max_pixel_rate(crtc, crtc_state); if (!ret) ret = intel_atomic_setup_scalers(dev_priv, crtc, crtc_state); @@ -12494,12 +12425,6 @@ static bool check_digital_port_conflicts(struct intel_atomic_state *state) unsigned int used_mst_ports = 0; bool ret = true; - /* - * We're going to peek into connector->state, - * hence connection_mutex must be held. - */ - drm_modeset_lock_assert_held(&dev->mode_config.connection_mutex); - /* * Walk the connector list instead of the encoder * list to detect the problem on ddi platforms @@ -13787,6 +13712,11 @@ static int intel_modeset_checks(struct intel_atomic_state *state) struct intel_crtc *crtc; int ret, i; + if (!check_digital_port_conflicts(state)) { + DRM_DEBUG_KMS("rejecting conflicting digital port configuration\n"); + return -EINVAL; + } + /* keep the current setting */ if (!state->cdclk.force_min_cdclk_changed) state->cdclk.force_min_cdclk = dev_priv->cdclk.force_min_cdclk; @@ -13795,6 +13725,7 @@ static int intel_modeset_checks(struct intel_atomic_state *state) state->active_pipes = dev_priv->active_pipes; state->cdclk.logical = dev_priv->cdclk.logical; state->cdclk.actual = dev_priv->cdclk.actual; + state->cdclk.pipe = INVALID_PIPE; for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { @@ -13807,12 +13738,6 @@ static int intel_modeset_checks(struct intel_atomic_state *state) state->active_pipe_changes |= BIT(crtc->pipe); } - if (state->active_pipe_changes) { - ret = intel_atomic_lock_global_state(state); - if (ret) - return ret; - } - ret = intel_modeset_calc_cdclk(state); if (ret) return ret; @@ -13865,49 +13790,12 @@ static void intel_crtc_check_fastset(const struct intel_crtc_state *old_crtc_sta new_crtc_state->has_drrs = old_crtc_state->has_drrs; } -static int intel_crtc_add_planes_to_state(struct intel_atomic_state *state, - struct intel_crtc *crtc, - u8 plane_ids_mask) +static int intel_atomic_check_planes(struct intel_atomic_state *state) { - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - struct intel_plane *plane; - - for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { - struct intel_plane_state *plane_state; - - if ((plane_ids_mask & BIT(plane->id)) == 0) - continue; - - plane_state = intel_atomic_get_plane_state(state, plane); - if (IS_ERR(plane_state)) - return PTR_ERR(plane_state); - } - - return 0; -} - -static bool active_planes_affects_min_cdclk(struct drm_i915_private *dev_priv) -{ - /* See {hsw,vlv,ivb}_plane_ratio() */ - return IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv) || - IS_CHERRYVIEW(dev_priv) || IS_VALLEYVIEW(dev_priv) || - IS_IVYBRIDGE(dev_priv); -} - -static int intel_atomic_check_planes(struct intel_atomic_state *state, - bool *need_modeset) -{ - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - struct intel_crtc_state *old_crtc_state, *new_crtc_state; struct intel_plane_state *plane_state; struct intel_plane *plane; - struct intel_crtc *crtc; int i, ret; - ret = icl_add_linked_planes(state); - if (ret) - return ret; - for_each_new_intel_plane_in_state(state, plane, plane_state, i) { ret = intel_plane_atomic_check(state, plane); if (ret) { @@ -13917,41 +13805,6 @@ static int intel_atomic_check_planes(struct intel_atomic_state *state, } } - for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, - new_crtc_state, i) { - u8 old_active_planes, new_active_planes; - - ret = icl_check_nv12_planes(new_crtc_state); - if (ret) - return ret; - - /* - * On some platforms the number of active planes affects - * the planes' minimum cdclk calculation. Add such planes - * to the state before we compute the minimum cdclk. - */ - if (!active_planes_affects_min_cdclk(dev_priv)) - continue; - - old_active_planes = old_crtc_state->active_planes & ~BIT(PLANE_CURSOR); - new_active_planes = new_crtc_state->active_planes & ~BIT(PLANE_CURSOR); - - if (hweight8(old_active_planes) == hweight8(new_active_planes)) - continue; - - ret = intel_crtc_add_planes_to_state(state, crtc, new_active_planes); - if (ret) - return ret; - } - - /* - * active_planes bitmask has been updated, and potentially - * affected planes are part of the state. We can now - * compute the minimum cdclk for each plane. - */ - for_each_new_intel_plane_in_state(state, plane, plane_state, i) - *need_modeset |= intel_plane_calc_min_cdclk(state, plane); - return 0; } @@ -13986,7 +13839,7 @@ static int intel_atomic_check(struct drm_device *dev, struct intel_crtc_state *old_crtc_state, *new_crtc_state; struct intel_crtc *crtc; int ret, i; - bool any_ms = false; + bool any_ms = state->cdclk.force_min_cdclk_changed; /* Catch I915_MODE_FLAG_INHERITED */ for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, @@ -14020,22 +13873,10 @@ static int intel_atomic_check(struct drm_device *dev, any_ms = true; } - if (any_ms && !check_digital_port_conflicts(state)) { - DRM_DEBUG_KMS("rejecting conflicting digital port configuration\n"); - ret = EINVAL; - goto fail; - } - ret = drm_dp_mst_atomic_check(&state->base); if (ret) goto fail; - any_ms |= state->cdclk.force_min_cdclk_changed; - - ret = intel_atomic_check_planes(state, &any_ms); - if (ret) - goto fail; - if (any_ms) { ret = intel_modeset_checks(state); if (ret) @@ -14044,6 +13885,14 @@ static int intel_atomic_check(struct drm_device *dev, state->cdclk.logical = dev_priv->cdclk.logical; } + ret = icl_add_linked_planes(state); + if (ret) + goto fail; + + ret = intel_atomic_check_planes(state); + if (ret) + goto fail; + ret = intel_atomic_check_crtcs(state); if (ret) goto fail; @@ -14124,6 +13973,9 @@ static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state, struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + /* drm_atomic_helper_update_legacy_modeset_state might not be called. */ + crtc->base.mode = new_crtc_state->base.mode; + /* * Update pipe size and adjust fitter if needed: the reason for this is * that in compute_mode_changes we check the native mode (not the pfit @@ -14385,8 +14237,8 @@ static void intel_crtc_enable_trans_port_sync(struct intel_crtc *crtc, static void intel_set_dp_tp_ctl_normal(struct intel_crtc *crtc, struct intel_atomic_state *state) { - struct drm_connector *uninitialized_var(conn); struct drm_connector_state *conn_state; + struct drm_connector *conn; struct intel_dp *intel_dp; int i; @@ -14818,14 +14670,6 @@ static void intel_atomic_track_fbs(struct intel_atomic_state *state) plane->frontbuffer_bit); } -static void assert_global_state_locked(struct drm_i915_private *dev_priv) -{ - struct intel_crtc *crtc; - - for_each_intel_crtc(&dev_priv->drm, crtc) - drm_modeset_lock_assert_held(&crtc->base.mutex); -} - static int intel_atomic_commit(struct drm_device *dev, struct drm_atomic_state *_state, bool nonblock) @@ -14891,9 +14735,7 @@ static int intel_atomic_commit(struct drm_device *dev, intel_shared_dpll_swap_state(state); intel_atomic_track_fbs(state); - if (state->global_state_changed) { - assert_global_state_locked(dev_priv); - + if (state->modeset) { memcpy(dev_priv->min_cdclk, state->min_cdclk, sizeof(state->min_cdclk)); memcpy(dev_priv->min_voltage_level, state->min_voltage_level, @@ -14940,7 +14782,7 @@ static int do_rps_boost(struct wait_queue_entry *_wait, * vblank without our intervention, so leave RPS alone. */ if (!i915_request_started(rq)) - intel_rps_boost(rq); + gen6_rps_boost(rq); i915_request_put(rq); drm_crtc_vblank_put(wait->crtc); @@ -15021,7 +14863,7 @@ static void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) { struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_DISPLAY), + .priority = I915_PRIORITY_DISPLAY, }; i915_gem_object_wait_priority(obj, 0, &attr); @@ -15134,7 +14976,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, * maximum clocks following a vblank miss (see do_rps_boost()). */ if (!intel_state->rps_interactive) { - intel_rps_mark_interactive(&dev_priv->gt.rps, true); + intel_rps_mark_interactive(dev_priv, true); intel_state->rps_interactive = true; } @@ -15159,7 +15001,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane, struct drm_i915_private *dev_priv = to_i915(plane->dev); if (intel_state->rps_interactive) { - intel_rps_mark_interactive(&dev_priv->gt.rps, false); + intel_rps_mark_interactive(dev_priv, false); intel_state->rps_interactive = false; } @@ -15167,6 +15009,44 @@ intel_cleanup_plane_fb(struct drm_plane *plane, intel_plane_unpin_fb(old_plane_state); } +int +skl_max_scale(const struct intel_crtc_state *crtc_state, + const struct drm_format_info *format) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + int max_scale; + int crtc_clock, max_dotclk, tmpclk1, tmpclk2; + + if (!crtc_state->base.enable) + return DRM_PLANE_HELPER_NO_SCALING; + + crtc_clock = crtc_state->base.adjusted_mode.crtc_clock; + max_dotclk = to_intel_atomic_state(crtc_state->base.state)->cdclk.logical.cdclk; + + if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10) + max_dotclk *= 2; + + if (WARN_ON_ONCE(!crtc_clock || max_dotclk < crtc_clock)) + return DRM_PLANE_HELPER_NO_SCALING; + + /* + * skl max scale is lower of: + * close to 3 but not 3, -1 is for that purpose + * or + * cdclk/crtc_clock + */ + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv) || + !drm_format_info_is_yuv_semiplanar(format)) + tmpclk1 = 0x30000 - 1; + else + tmpclk1 = 0x20000 - 1; + tmpclk2 = (1 << 8) * ((max_dotclk << 8) / crtc_clock); + max_scale = min(tmpclk1, tmpclk2); + + return max_scale; +} + /** * intel_plane_destroy - destroy a plane * @plane: plane to destroy @@ -15221,7 +15101,6 @@ static bool i965_plane_format_mod_supported(struct drm_plane *_plane, case DRM_FORMAT_XBGR8888: case DRM_FORMAT_XRGB2101010: case DRM_FORMAT_XBGR2101010: - case DRM_FORMAT_XBGR16161616F: return modifier == DRM_FORMAT_MOD_LINEAR || modifier == I915_FORMAT_MOD_X_TILED; default: @@ -15442,26 +15321,8 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) } if (INTEL_GEN(dev_priv) >= 4) { - /* - * WaFP16GammaEnabling:ivb - * "Workaround : When using the 64-bit format, the plane - * output on each color channel has one quarter amplitude. - * It can be brought up to full amplitude by using pipe - * gamma correction or pipe color space conversion to - * multiply the plane output by four." - * - * There is no dedicated plane gamma for the primary plane, - * and using the pipe gamma/csc could conflict with other - * planes, so we choose not to expose fp16 on IVB primary - * planes. HSW primary planes no longer have this problem. - */ - if (IS_IVYBRIDGE(dev_priv)) { - formats = ivb_primary_formats; - num_formats = ARRAY_SIZE(ivb_primary_formats); - } else { - formats = i965_primary_formats; - num_formats = ARRAY_SIZE(i965_primary_formats); - } + formats = i965_primary_formats; + num_formats = ARRAY_SIZE(i965_primary_formats); modifiers = i9xx_format_modifiers; plane->max_stride = i9xx_plane_max_stride; @@ -15470,15 +15331,6 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) plane->get_hw_state = i9xx_plane_get_hw_state; plane->check_plane = i9xx_plane_check; - if (IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) - plane->min_cdclk = hsw_plane_min_cdclk; - else if (IS_IVYBRIDGE(dev_priv)) - plane->min_cdclk = ivb_plane_min_cdclk; - else if (IS_CHERRYVIEW(dev_priv) || IS_VALLEYVIEW(dev_priv)) - plane->min_cdclk = vlv_plane_min_cdclk; - else - plane->min_cdclk = i9xx_plane_min_cdclk; - plane_funcs = &i965_plane_funcs; } else { formats = i8xx_primary_formats; @@ -15490,7 +15342,6 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) plane->disable_plane = i9xx_disable_plane; plane->get_hw_state = i9xx_plane_get_hw_state; plane->check_plane = i9xx_plane_check; - plane->min_cdclk = i9xx_plane_min_cdclk; plane_funcs = &i8xx_plane_funcs; } @@ -15842,7 +15693,7 @@ static u32 intel_encoder_possible_crtcs(struct intel_encoder *encoder) u32 possible_crtcs = 0; for_each_intel_crtc(dev, crtc) { - if (encoder->pipe_mask & BIT(crtc->pipe)) + if (encoder->crtc_mask & BIT(crtc->pipe)) possible_crtcs |= drm_crtc_mask(&crtc->base); } @@ -16443,21 +16294,6 @@ intel_mode_valid(struct drm_device *dev, mode->vtotal > vtotal_max) return MODE_V_ILLEGAL; - if (INTEL_GEN(dev_priv) >= 5) { - if (mode->hdisplay < 64 || - mode->htotal - mode->hdisplay < 32) - return MODE_H_ILLEGAL; - - if (mode->vtotal - mode->vdisplay < 5) - return MODE_V_ILLEGAL; - } else { - if (mode->htotal - mode->hdisplay < 32) - return MODE_H_ILLEGAL; - - if (mode->vtotal - mode->vdisplay < 3) - return MODE_V_ILLEGAL; - } - return MODE_OK; } @@ -17388,16 +17224,13 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) struct intel_plane *plane; int min_cdclk = 0; + memset(&crtc->base.mode, 0, sizeof(crtc->base.mode)); if (crtc_state->base.active) { - struct drm_display_mode mode; - - intel_mode_from_pipe_config(&crtc_state->base.adjusted_mode, - crtc_state); - - mode = crtc_state->base.adjusted_mode; - mode.hdisplay = crtc_state->pipe_src_w; - mode.vdisplay = crtc_state->pipe_src_h; - WARN_ON(drm_atomic_set_mode_for_crtc(&crtc_state->base, &mode)); + intel_mode_from_pipe_config(&crtc->base.mode, crtc_state); + crtc->base.mode.hdisplay = crtc_state->pipe_src_w; + crtc->base.mode.vdisplay = crtc_state->pipe_src_h; + intel_mode_from_pipe_config(&crtc_state->base.adjusted_mode, crtc_state); + WARN_ON(drm_atomic_set_mode_for_crtc(&crtc_state->base, &crtc->base.mode)); /* * The initial mode needs to be set in order to keep @@ -17412,9 +17245,17 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) intel_crtc_compute_pixel_rate(crtc_state); + min_cdclk = intel_crtc_compute_min_cdclk(crtc_state); + if (WARN_ON(min_cdclk < 0)) + min_cdclk = 0; + intel_crtc_update_active_timings(crtc_state); } + dev_priv->min_cdclk[crtc->pipe] = min_cdclk; + dev_priv->min_voltage_level[crtc->pipe] = + crtc_state->min_voltage_level; + for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { const struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); @@ -17426,34 +17267,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) if (plane_state->base.visible) crtc_state->data_rate[plane->id] = 4 * crtc_state->pixel_rate; - /* - * FIXME don't have the fb yet, so can't - * use plane->min_cdclk() :( - */ - if (plane_state->base.visible && plane->min_cdclk) { - if (crtc_state->double_wide || - INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) - crtc_state->min_cdclk[plane->id] = - DIV_ROUND_UP(crtc_state->pixel_rate, 2); - else - crtc_state->min_cdclk[plane->id] = - crtc_state->pixel_rate; - } - DRM_DEBUG_KMS("[PLANE:%d:%s] min_cdclk %d kHz\n", - plane->base.base.id, plane->base.name, - crtc_state->min_cdclk[plane->id]); } - if (crtc_state->base.active) { - min_cdclk = intel_crtc_compute_min_cdclk(crtc_state); - if (WARN_ON(min_cdclk < 0)) - min_cdclk = 0; - } - - dev_priv->min_cdclk[crtc->pipe] = min_cdclk; - dev_priv->min_voltage_level[crtc->pipe] = - crtc_state->min_voltage_level; - intel_bw_crtc_update(bw_state, crtc_state); intel_pipe_config_sanity_check(dev_priv, crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 355c50088589..7dcb176d91b0 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -509,6 +509,7 @@ void vlv_wait_port_ready(struct drm_i915_private *dev_priv, struct intel_digital_port *dport, unsigned int expected_mask); int intel_get_load_detect_pipe(struct drm_connector *connector, + const struct drm_display_mode *mode, struct intel_load_detect_pipe *old, struct drm_modeset_acquire_ctx *ctx); void intel_release_load_detect_pipe(struct drm_connector *connector, @@ -562,6 +563,8 @@ void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, u16 skl_scaler_calc_phase(int sub, int scale, bool chroma_center); int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state); +int skl_max_scale(const struct intel_crtc_state *crtc_state, + const struct drm_format_info *format); u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); u32 glk_plane_color_ctl_crtc(const struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 707ac110e271..6f9e7927e248 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -2682,8 +2682,6 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, TGL_PW_2_POWER_DOMAINS | \ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ - BIT_ULL(POWER_DOMAIN_AUX_B) | \ - BIT_ULL(POWER_DOMAIN_AUX_C) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define TGL_DDI_IO_D_TC1_POWER_DOMAINS ( \ diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 4341bd66a418..8358152e403e 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -128,8 +128,7 @@ struct intel_encoder { enum intel_output_type type; enum port port; - u16 cloneable; - u8 pipe_mask; + unsigned int cloneable; enum intel_hotplug_state (*hotplug)(struct intel_encoder *encoder, struct intel_connector *connector, bool irq_received); @@ -188,6 +187,7 @@ struct intel_encoder { * device interrupts are disabled. */ void (*suspend)(struct intel_encoder *); + int crtc_mask; enum hpd_pin hpd_pin; enum intel_display_power_domain power_domain; /* for communication with audio component; protected by av_mutex */ @@ -506,14 +506,6 @@ struct intel_atomic_state { bool rps_interactive; - /* - * active_pipes - * min_cdclk[] - * min_voltage_level[] - * cdclk.* - */ - bool global_state_changed; - /* Gen9+ only */ struct skl_ddb_values wm_results; @@ -940,8 +932,6 @@ struct intel_crtc_state { struct intel_crtc_wm_state wm; - int min_cdclk[I915_MAX_PLANES]; - u32 data_rate[I915_MAX_PLANES]; /* Gamma mode programmed on the pipe */ @@ -996,8 +986,8 @@ struct intel_crtc_state { bool dsc_split; u16 compressed_bpp; u8 slice_count; - struct drm_dsc_config config; - } dsc; + } dsc_params; + struct drm_dsc_config dp_dsc_cfg; /* Forward Error correction State */ bool fec_enable; @@ -1087,8 +1077,6 @@ struct intel_plane { bool (*get_hw_state)(struct intel_plane *plane, enum pipe *pipe); int (*check_plane)(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state); - int (*min_cdclk)(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state); }; struct intel_watermark_params { diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index c61ac0c3acb5..5eeafa45831a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1179,20 +1179,18 @@ intel_dp_aux_wait_done(struct intel_dp *intel_dp) { struct drm_i915_private *i915 = dp_to_i915(intel_dp); i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg(intel_dp); - const unsigned int timeout_ms = 10; u32 status; bool done; #define C (((status = intel_uncore_read_notrace(&i915->uncore, ch_ctl)) & DP_AUX_CH_CTL_SEND_BUSY) == 0) done = wait_event_timeout(i915->gmbus_wait_queue, C, - msecs_to_jiffies_timeout(timeout_ms)); + msecs_to_jiffies_timeout(10)); /* just trace the final value */ trace_i915_reg_rw(false, ch_ctl, status, sizeof(status), true); if (!done) - DRM_ERROR("%s did not complete or timeout within %ums (status 0x%08x)\n", - intel_dp->aux.name, timeout_ms, status); + DRM_ERROR("dp aux hw did not signal timeout!\n"); #undef C return status; @@ -1293,9 +1291,6 @@ static u32 skl_get_aux_send_ctl(struct intel_dp *intel_dp, u32 unused) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *i915 = - to_i915(intel_dig_port->base.base.dev); - enum phy phy = intel_port_to_phy(i915, intel_dig_port->base.port); u32 ret; ret = DP_AUX_CH_CTL_SEND_BUSY | @@ -1308,8 +1303,7 @@ static u32 skl_get_aux_send_ctl(struct intel_dp *intel_dp, DP_AUX_CH_CTL_FW_SYNC_PULSE_SKL(32) | DP_AUX_CH_CTL_SYNC_PULSE_SKL(32); - if (intel_phy_is_tc(i915, phy) && - intel_dig_port->tc_mode == TC_PORT_TBT_ALT) + if (intel_dig_port->tc_mode == TC_PORT_TBT_ALT) ret |= DP_AUX_CH_CTL_TBT_IO; return ret; @@ -1894,9 +1888,6 @@ static bool intel_dp_source_supports_dsc(struct intel_dp *intel_dp, { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - if (!INTEL_INFO(dev_priv)->display.has_dsc) - return false; - /* On TGL, DSC is supported on all Pipes */ if (INTEL_GEN(dev_priv) >= 12) return true; @@ -2089,10 +2080,10 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, pipe_config->lane_count = limits->max_lane_count; if (intel_dp_is_edp(intel_dp)) { - pipe_config->dsc.compressed_bpp = + pipe_config->dsc_params.compressed_bpp = min_t(u16, drm_edp_dsc_sink_output_bpp(intel_dp->dsc_dpcd) >> 4, pipe_config->pipe_bpp); - pipe_config->dsc.slice_count = + pipe_config->dsc_params.slice_count = drm_dp_dsc_sink_max_slice_count(intel_dp->dsc_dpcd, true); } else { @@ -2113,10 +2104,10 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, DRM_DEBUG_KMS("Compressed BPP/Slice Count not supported\n"); return -EINVAL; } - pipe_config->dsc.compressed_bpp = min_t(u16, + pipe_config->dsc_params.compressed_bpp = min_t(u16, dsc_max_output_bpp >> 4, pipe_config->pipe_bpp); - pipe_config->dsc.slice_count = dsc_dp_slice_count; + pipe_config->dsc_params.slice_count = dsc_dp_slice_count; } /* * VDSC engine operates at 1 Pixel per clock, so if peak pixel rate @@ -2124,8 +2115,8 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, * then we need to use 2 VDSC instances. */ if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq) { - if (pipe_config->dsc.slice_count > 1) { - pipe_config->dsc.dsc_split = true; + if (pipe_config->dsc_params.slice_count > 1) { + pipe_config->dsc_params.dsc_split = true; } else { DRM_DEBUG_KMS("Cannot split stream to use 2 VDSC instances\n"); return -EINVAL; @@ -2137,16 +2128,16 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, DRM_DEBUG_KMS("Cannot compute valid DSC parameters for Input Bpp = %d " "Compressed BPP = %d\n", pipe_config->pipe_bpp, - pipe_config->dsc.compressed_bpp); + pipe_config->dsc_params.compressed_bpp); return ret; } - pipe_config->dsc.compression_enable = true; + pipe_config->dsc_params.compression_enable = true; DRM_DEBUG_KMS("DP DSC computed with Input Bpp = %d " "Compressed Bpp = %d Slice Count = %d\n", pipe_config->pipe_bpp, - pipe_config->dsc.compressed_bpp, - pipe_config->dsc.slice_count); + pipe_config->dsc_params.compressed_bpp, + pipe_config->dsc_params.slice_count); return 0; } @@ -2220,15 +2211,15 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, return ret; } - if (pipe_config->dsc.compression_enable) { + if (pipe_config->dsc_params.compression_enable) { DRM_DEBUG_KMS("DP lane count %d clock %d Input bpp %d Compressed bpp %d\n", pipe_config->lane_count, pipe_config->port_clock, pipe_config->pipe_bpp, - pipe_config->dsc.compressed_bpp); + pipe_config->dsc_params.compressed_bpp); DRM_DEBUG_KMS("DP link rate required %i available %i\n", intel_dp_link_required(adjusted_mode->crtc_clock, - pipe_config->dsc.compressed_bpp), + pipe_config->dsc_params.compressed_bpp), intel_dp_max_data_rate(pipe_config->port_clock, pipe_config->lane_count)); } else { @@ -2386,8 +2377,8 @@ intel_dp_compute_config(struct intel_encoder *encoder, pipe_config->limited_color_range = intel_dp_limited_color_range(pipe_config, conn_state); - if (pipe_config->dsc.compression_enable) - output_bpp = pipe_config->dsc.compressed_bpp; + if (pipe_config->dsc_params.compression_enable) + output_bpp = pipe_config->dsc_params.compressed_bpp; else output_bpp = intel_dp_output_bpp(pipe_config, pipe_config->pipe_bpp); @@ -3111,7 +3102,7 @@ void intel_dp_sink_set_decompression_state(struct intel_dp *intel_dp, { int ret; - if (!crtc_state->dsc.compression_enable) + if (!crtc_state->dsc_params.compression_enable) return; ret = drm_dp_dpcd_writeb(&intel_dp->aux, DP_DSC_ENABLE, @@ -5697,12 +5688,6 @@ out: if (status != connector_status_connected && !intel_dp->is_mst) intel_dp_unset_edid(intel_dp); - /* - * Make sure the refs for power wells enabled during detect are - * dropped to avoid a new detect cycle triggered by HPD polling. - */ - intel_display_power_flush_work(dev_priv); - return status; } @@ -7575,11 +7560,11 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_encoder->power_domain = intel_port_to_power_domain(port); if (IS_CHERRYVIEW(dev_priv)) { if (port == PORT_D) - intel_encoder->pipe_mask = BIT(PIPE_C); + intel_encoder->crtc_mask = BIT(PIPE_C); else - intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B); + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B); } else { - intel_encoder->pipe_mask = ~0; + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); } intel_encoder->cloneable = 0; intel_encoder->port = port; @@ -7640,8 +7625,7 @@ void intel_dp_mst_resume(struct drm_i915_private *dev_priv) if (!intel_dp->can_mst) continue; - ret = drm_dp_mst_topology_mgr_resume(&intel_dp->mst_mgr, - true); + ret = drm_dp_mst_topology_mgr_resume(&intel_dp->mst_mgr); if (ret) { intel_dp->is_mst = false; drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 9ae5b8b6bbbc..bbcab27644dc 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -391,7 +391,20 @@ static int intel_dp_mst_get_ddc_modes(struct drm_connector *connector) return ret; } +static enum drm_connector_status +intel_dp_mst_detect(struct drm_connector *connector, bool force) +{ + struct intel_connector *intel_connector = to_intel_connector(connector); + struct intel_dp *intel_dp = intel_connector->mst_port; + + if (drm_connector_is_unregistered(connector)) + return connector_status_disconnected; + return drm_dp_mst_detect_port(connector, &intel_dp->mst_mgr, + intel_connector->port); +} + static const struct drm_connector_funcs intel_dp_mst_connector_funcs = { + .detect = intel_dp_mst_detect, .fill_modes = drm_helper_probe_single_connector_modes, .atomic_get_property = intel_digital_connector_atomic_get_property, .atomic_set_property = intel_digital_connector_atomic_set_property, @@ -452,26 +465,11 @@ static struct drm_encoder *intel_mst_atomic_best_encoder(struct drm_connector *c return &intel_dp->mst_encoders[crtc->pipe]->base.base; } -static int -intel_dp_mst_detect(struct drm_connector *connector, - struct drm_modeset_acquire_ctx *ctx, bool force) -{ - struct intel_connector *intel_connector = to_intel_connector(connector); - struct intel_dp *intel_dp = intel_connector->mst_port; - - if (drm_connector_is_unregistered(connector)) - return connector_status_disconnected; - - return drm_dp_mst_detect_port(connector, ctx, &intel_dp->mst_mgr, - intel_connector->port); -} - static const struct drm_connector_helper_funcs intel_dp_mst_connector_helper_funcs = { .get_modes = intel_dp_mst_get_modes, .mode_valid = intel_dp_mst_mode_valid, .atomic_best_encoder = intel_mst_atomic_best_encoder, .atomic_check = intel_dp_mst_atomic_check, - .detect_ctx = intel_dp_mst_detect, }; static void intel_dp_mst_encoder_destroy(struct drm_encoder *encoder) @@ -600,6 +598,8 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum struct intel_dp_mst_encoder *intel_mst; struct intel_encoder *intel_encoder; struct drm_device *dev = intel_dig_port->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + enum pipe pipe_iter; intel_mst = kzalloc(sizeof(*intel_mst), GFP_KERNEL); @@ -617,15 +617,8 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum intel_encoder->power_domain = intel_dig_port->base.power_domain; intel_encoder->port = intel_dig_port->base.port; intel_encoder->cloneable = 0; - /* - * This is wrong, but broken userspace uses the intersection - * of possible_crtcs of all the encoders of a given connector - * to figure out which crtcs can drive said connector. What - * should be used instead is the union of possible_crtcs. - * To keep such userspace functioning we must misconfigure - * this to make sure the intersection is not empty :( - */ - intel_encoder->pipe_mask = ~0; + for_each_pipe(dev_priv, pipe_iter) + intel_encoder->crtc_mask |= BIT(pipe_iter); intel_encoder->compute_config = intel_dp_mst_compute_config; intel_encoder->disable = intel_mst_disable_dp; diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 3ce0a023eee0..ec10fa7d3c69 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -526,31 +526,16 @@ static void hsw_ddi_wrpll_disable(struct drm_i915_private *dev_priv, val = I915_READ(WRPLL_CTL(id)); I915_WRITE(WRPLL_CTL(id), val & ~WRPLL_PLL_ENABLE); POSTING_READ(WRPLL_CTL(id)); - - /* - * Try to set up the PCH reference clock once all DPLLs - * that depend on it have been shut down. - */ - if (dev_priv->pch_ssc_use & BIT(id)) - intel_init_pch_refclk(dev_priv); } static void hsw_ddi_spll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { - enum intel_dpll_id id = pll->info->id; u32 val; val = I915_READ(SPLL_CTL); I915_WRITE(SPLL_CTL, val & ~SPLL_PLL_ENABLE); POSTING_READ(SPLL_CTL); - - /* - * Try to set up the PCH reference clock once all DPLLs - * that depend on it have been shut down. - */ - if (dev_priv->pch_ssc_use & BIT(id)) - intel_init_pch_refclk(dev_priv); } static bool hsw_ddi_wrpll_get_hw_state(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h index 2a104c64291d..e7588799fce5 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h @@ -147,11 +147,11 @@ enum intel_dpll_id { */ DPLL_ID_ICL_MGPLL4 = 6, /** - * @DPLL_ID_TGL_MGPLL5: TGL TC PLL port 5 (TC5) + * @DPLL_ID_TGL_TCPLL5: TGL TC PLL port 5 (TC5) */ DPLL_ID_TGL_MGPLL5 = 7, /** - * @DPLL_ID_TGL_MGPLL6: TGL TC PLL port 6 (TC6) + * @DPLL_ID_TGL_TCPLL6: TGL TC PLL port 6 (TC6) */ DPLL_ID_TGL_MGPLL6 = 8, }; @@ -337,11 +337,6 @@ struct intel_shared_dpll { * @info: platform specific info */ const struct dpll_info *info; - - /** - * @wakeref: In some platforms a device-level runtime pm reference may - * need to be grabbed to disable DC states while this DPLL is enabled - */ intel_wakeref_t wakeref; }; diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c index bcfbcb743e7d..9827f99491d1 100644 --- a/drivers/gpu/drm/i915/display/intel_dvo.c +++ b/drivers/gpu/drm/i915/display/intel_dvo.c @@ -505,7 +505,7 @@ void intel_dvo_init(struct drm_i915_private *dev_priv) intel_encoder->type = INTEL_OUTPUT_DVO; intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = port; - intel_encoder->pipe_mask = ~0; + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B); switch (dvo->type) { case INTEL_DVO_CHIP_TMDS: diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index f1f41ca8402b..e69fa34528df 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -922,7 +922,7 @@ static void intel_hdcp_prop_work(struct work_struct *work) bool is_hdcp_supported(struct drm_i915_private *dev_priv, enum port port) { /* PORT E doesn't have HDCP, and PORT F is disabled */ - return INTEL_INFO(dev_priv)->display.has_hdcp && port < PORT_E; + return INTEL_GEN(dev_priv) >= 9 && port < PORT_E; } static int diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index f6f5312205c4..b54ccbb5aad5 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -2626,12 +2626,6 @@ out: if (status != connector_status_connected) cec_notifier_phys_addr_invalidate(intel_hdmi->cec_notifier); - /* - * Make sure the refs for power wells enabled during detect are - * dropped to avoid a new detect cycle triggered by HPD polling. - */ - intel_display_power_flush_work(dev_priv); - return status; } @@ -3283,11 +3277,11 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, intel_encoder->port = port; if (IS_CHERRYVIEW(dev_priv)) { if (port == PORT_D) - intel_encoder->pipe_mask = BIT(PIPE_C); + intel_encoder->crtc_mask = BIT(PIPE_C); else - intel_encoder->pipe_mask = BIT(PIPE_A) | BIT(PIPE_B); + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B); } else { - intel_encoder->pipe_mask = ~0; + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); } intel_encoder->cloneable = 1 << INTEL_OUTPUT_ANALOG; /* diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index b1bc78623647..13841d7c455b 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -899,10 +899,12 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = PORT_NONE; intel_encoder->cloneable = 0; - if (INTEL_GEN(dev_priv) < 4) - intel_encoder->pipe_mask = BIT(PIPE_B); + if (HAS_PCH_SPLIT(dev_priv)) + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); + else if (IS_GEN(dev_priv, 4)) + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B); else - intel_encoder->pipe_mask = ~0; + intel_encoder->crtc_mask = BIT(PIPE_B); drm_connector_helper_add(connector, &intel_lvds_connector_helper_funcs); connector->display_info.subpixel_order = SubPixelHorizontalRGB; diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 848ce07a8ec2..2360f19f9694 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -30,7 +30,6 @@ #include #include "gem/i915_gem_pm.h" -#include "gt/intel_ring.h" #include "i915_drv.h" #include "i915_reg.h" diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 6a9f322d3fca..50f22abcd30e 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -76,7 +76,7 @@ static bool intel_psr2_enabled(struct drm_i915_private *dev_priv, const struct intel_crtc_state *crtc_state) { /* Cannot enable DSC and PSR2 simultaneously */ - WARN_ON(crtc_state->dsc.compression_enable && + WARN_ON(crtc_state->dsc_params.compression_enable && crtc_state->has_psr2); switch (dev_priv->psr.debug & I915_PSR_DEBUG_MODE_MASK) { @@ -623,7 +623,7 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, * resolution requires DSC to be enabled, priority is given to DSC * over PSR2. */ - if (crtc_state->dsc.compression_enable) { + if (crtc_state->dsc_params.compression_enable) { DRM_DEBUG_KMS("PSR2 cannot be enabled since DSC is enabled\n"); return false; } @@ -740,6 +740,25 @@ static void intel_psr_activate(struct intel_dp *intel_dp) dev_priv->psr.active = true; } +static i915_reg_t gen9_chicken_trans_reg(struct drm_i915_private *dev_priv, + enum transcoder cpu_transcoder) +{ + static const i915_reg_t regs[] = { + [TRANSCODER_A] = CHICKEN_TRANS_A, + [TRANSCODER_B] = CHICKEN_TRANS_B, + [TRANSCODER_C] = CHICKEN_TRANS_C, + [TRANSCODER_EDP] = CHICKEN_TRANS_EDP, + }; + + WARN_ON(INTEL_GEN(dev_priv) < 9); + + if (WARN_ON(cpu_transcoder >= ARRAY_SIZE(regs) || + !regs[cpu_transcoder].reg)) + cpu_transcoder = TRANSCODER_A; + + return regs[cpu_transcoder]; +} + static void intel_psr_enable_source(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { @@ -755,7 +774,8 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, if (dev_priv->psr.psr2_enabled && (IS_GEN(dev_priv, 9) && !IS_GEMINILAKE(dev_priv))) { - i915_reg_t reg = CHICKEN_TRANS(cpu_transcoder); + i915_reg_t reg = gen9_chicken_trans_reg(dev_priv, + cpu_transcoder); u32 chicken = I915_READ(reg); chicken |= PSR2_VSC_ENABLE_PROG_HEADER | @@ -1417,7 +1437,7 @@ void intel_psr_short_pulse(struct intel_dp *intel_dp) if (val & DP_PSR_VSC_SDP_UNCORRECTABLE_ERROR) DRM_DEBUG_KMS("PSR VSC SDP uncorrectable error, disabling PSR\n"); if (val & DP_PSR_LINK_CRC_ERROR) - DRM_DEBUG_KMS("PSR Link CRC error, disabling PSR\n"); + DRM_ERROR("PSR Link CRC error, disabling PSR\n"); if (val & ~errors) DRM_ERROR("PSR_ERROR_STATUS unhandled errors %x\n", diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 5b7f4baf7348..47f5d87a938a 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -2921,7 +2921,7 @@ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) bytes[0], bytes[1]); return false; } - intel_sdvo->base.pipe_mask = ~0; + intel_sdvo->base.crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); return true; } diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index edc41fc40726..5ae12ab3c5b7 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -322,55 +322,6 @@ bool icl_is_hdr_plane(struct drm_i915_private *dev_priv, enum plane_id plane_id) icl_hdr_plane_mask() & BIT(plane_id); } -static void -skl_plane_ratio(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state, - unsigned int *num, unsigned int *den) -{ - struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); - const struct drm_framebuffer *fb = plane_state->base.fb; - - if (fb->format->cpp[0] == 8) { - if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { - *num = 10; - *den = 8; - } else { - *num = 9; - *den = 8; - } - } else { - *num = 1; - *den = 1; - } -} - -static int skl_plane_min_cdclk(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) -{ - struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); - unsigned int pixel_rate = crtc_state->pixel_rate; - unsigned int src_w, src_h, dst_w, dst_h; - unsigned int num, den; - - skl_plane_ratio(crtc_state, plane_state, &num, &den); - - /* two pixels per clock on glk+ */ - if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) - den *= 2; - - src_w = drm_rect_width(&plane_state->base.src) >> 16; - src_h = drm_rect_height(&plane_state->base.src) >> 16; - dst_w = drm_rect_width(&plane_state->base.dst); - dst_h = drm_rect_height(&plane_state->base.dst); - - /* Downscaling limits the maximum pixel rate */ - dst_w = min(src_w, dst_w); - dst_h = min(src_h, dst_h); - - return DIV64_U64_ROUND_UP(mul_u32_u32(pixel_rate * num, src_w * src_h), - mul_u32_u32(den, dst_w * dst_h)); -} - static unsigned int skl_plane_max_stride(struct intel_plane *plane, u32 pixel_format, u64 modifier, @@ -860,85 +811,6 @@ vlv_update_clrc(const struct intel_plane_state *plane_state) SP_SH_SIN(sh_sin) | SP_SH_COS(sh_cos)); } -static void -vlv_plane_ratio(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state, - unsigned int *num, unsigned int *den) -{ - u8 active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR); - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int cpp = fb->format->cpp[0]; - - /* - * VLV bspec only considers cases where all three planes are - * enabled, and cases where the primary and one sprite is enabled. - * Let's assume the case with just two sprites enabled also - * maps to the latter case. - */ - if (hweight8(active_planes) == 3) { - switch (cpp) { - case 8: - *num = 11; - *den = 8; - break; - case 4: - *num = 18; - *den = 16; - break; - default: - *num = 1; - *den = 1; - break; - } - } else if (hweight8(active_planes) == 2) { - switch (cpp) { - case 8: - *num = 10; - *den = 8; - break; - case 4: - *num = 17; - *den = 16; - break; - default: - *num = 1; - *den = 1; - break; - } - } else { - switch (cpp) { - case 8: - *num = 10; - *den = 8; - break; - default: - *num = 1; - *den = 1; - break; - } - } -} - -int vlv_plane_min_cdclk(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) -{ - unsigned int pixel_rate; - unsigned int num, den; - - /* - * Note that crtc_state->pixel_rate accounts for both - * horizontal and vertical panel fitter downscaling factors. - * Pre-HSW bspec tells us to only consider the horizontal - * downscaling factor here. We ignore that and just consider - * both for simplicity. - */ - pixel_rate = crtc_state->pixel_rate; - - vlv_plane_ratio(crtc_state, plane_state, &num, &den); - - return DIV_ROUND_UP(pixel_rate * num, den); -} - static u32 vlv_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state) { u32 sprctl = 0; @@ -1145,164 +1017,6 @@ vlv_plane_get_hw_state(struct intel_plane *plane, return ret; } -static void ivb_plane_ratio(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state, - unsigned int *num, unsigned int *den) -{ - u8 active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR); - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int cpp = fb->format->cpp[0]; - - if (hweight8(active_planes) == 2) { - switch (cpp) { - case 8: - *num = 10; - *den = 8; - break; - case 4: - *num = 17; - *den = 16; - break; - default: - *num = 1; - *den = 1; - break; - } - } else { - switch (cpp) { - case 8: - *num = 9; - *den = 8; - break; - default: - *num = 1; - *den = 1; - break; - } - } -} - -static void ivb_plane_ratio_scaling(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state, - unsigned int *num, unsigned int *den) -{ - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int cpp = fb->format->cpp[0]; - - switch (cpp) { - case 8: - *num = 12; - *den = 8; - break; - case 4: - *num = 19; - *den = 16; - break; - case 2: - *num = 33; - *den = 32; - break; - default: - *num = 1; - *den = 1; - break; - } -} - -int ivb_plane_min_cdclk(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) -{ - unsigned int pixel_rate; - unsigned int num, den; - - /* - * Note that crtc_state->pixel_rate accounts for both - * horizontal and vertical panel fitter downscaling factors. - * Pre-HSW bspec tells us to only consider the horizontal - * downscaling factor here. We ignore that and just consider - * both for simplicity. - */ - pixel_rate = crtc_state->pixel_rate; - - ivb_plane_ratio(crtc_state, plane_state, &num, &den); - - return DIV_ROUND_UP(pixel_rate * num, den); -} - -static int ivb_sprite_min_cdclk(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) -{ - unsigned int src_w, dst_w, pixel_rate; - unsigned int num, den; - - /* - * Note that crtc_state->pixel_rate accounts for both - * horizontal and vertical panel fitter downscaling factors. - * Pre-HSW bspec tells us to only consider the horizontal - * downscaling factor here. We ignore that and just consider - * both for simplicity. - */ - pixel_rate = crtc_state->pixel_rate; - - src_w = drm_rect_width(&plane_state->base.src) >> 16; - dst_w = drm_rect_width(&plane_state->base.dst); - - if (src_w != dst_w) - ivb_plane_ratio_scaling(crtc_state, plane_state, &num, &den); - else - ivb_plane_ratio(crtc_state, plane_state, &num, &den); - - /* Horizontal downscaling limits the maximum pixel rate */ - dst_w = min(src_w, dst_w); - - return DIV_ROUND_UP_ULL(mul_u32_u32(pixel_rate, num * src_w), - den * dst_w); -} - -static void hsw_plane_ratio(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state, - unsigned int *num, unsigned int *den) -{ - u8 active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR); - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int cpp = fb->format->cpp[0]; - - if (hweight8(active_planes) == 2) { - switch (cpp) { - case 8: - *num = 10; - *den = 8; - break; - default: - *num = 1; - *den = 1; - break; - } - } else { - switch (cpp) { - case 8: - *num = 9; - *den = 8; - break; - default: - *num = 1; - *den = 1; - break; - } - } -} - -int hsw_plane_min_cdclk(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) -{ - unsigned int pixel_rate = crtc_state->pixel_rate; - unsigned int num, den; - - hsw_plane_ratio(crtc_state, plane_state, &num, &den); - - return DIV_ROUND_UP(pixel_rate * num, den); -} - static u32 ivb_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state) { u32 sprctl = 0; @@ -1316,16 +1030,6 @@ static u32 ivb_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state) return sprctl; } -static bool ivb_need_sprite_gamma(const struct intel_plane_state *plane_state) -{ - struct drm_i915_private *dev_priv = - to_i915(plane_state->base.plane->dev); - const struct drm_framebuffer *fb = plane_state->base.fb; - - return fb->format->cpp[0] == 8 && - (IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)); -} - static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -1348,12 +1052,6 @@ static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, case DRM_FORMAT_XRGB8888: sprctl |= SPRITE_FORMAT_RGBX888; break; - case DRM_FORMAT_XBGR16161616F: - sprctl |= SPRITE_FORMAT_RGBX161616 | SPRITE_RGB_ORDER_RGBX; - break; - case DRM_FORMAT_XRGB16161616F: - sprctl |= SPRITE_FORMAT_RGBX161616; - break; case DRM_FORMAT_YUYV: sprctl |= SPRITE_FORMAT_YUV422 | SPRITE_YUV_ORDER_YUYV; break; @@ -1371,8 +1069,7 @@ static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, return 0; } - if (!ivb_need_sprite_gamma(plane_state)) - sprctl |= SPRITE_INT_GAMMA_DISABLE; + sprctl |= SPRITE_INT_GAMMA_DISABLE; if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709) sprctl |= SPRITE_YUV_TO_RGB_CSC_FORMAT_BT709; @@ -1394,26 +1091,12 @@ static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, return sprctl; } -static void ivb_sprite_linear_gamma(const struct intel_plane_state *plane_state, - u16 gamma[18]) +static void ivb_sprite_linear_gamma(u16 gamma[18]) { - int scale, i; + int i; - /* - * WaFP16GammaEnabling:ivb,hsw - * "Workaround : When using the 64-bit format, the sprite output - * on each color channel has one quarter amplitude. It can be - * brought up to full amplitude by using sprite internal gamma - * correction, pipe gamma correction, or pipe color space - * conversion to multiply the sprite output by four." - */ - scale = 4; - - for (i = 0; i < 16; i++) - gamma[i] = min((scale * i << 10) / 16, (1 << 10) - 1); - - gamma[i] = min((scale * i << 10) / 16, 1 << 10); - i++; + for (i = 0; i < 17; i++) + gamma[i] = (i << 10) / 16; gamma[i] = 3 << 10; i++; @@ -1427,10 +1110,7 @@ static void ivb_update_gamma(const struct intel_plane_state *plane_state) u16 gamma[18]; int i; - if (!ivb_need_sprite_gamma(plane_state)) - return; - - ivb_sprite_linear_gamma(plane_state, gamma); + ivb_sprite_linear_gamma(gamma); /* FIXME these register are single buffered :( */ for (i = 0; i < 16; i++) @@ -1563,53 +1243,6 @@ ivb_plane_get_hw_state(struct intel_plane *plane, return ret; } -static int g4x_sprite_min_cdclk(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) -{ - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int hscale, pixel_rate; - unsigned int limit, decimate; - - /* - * Note that crtc_state->pixel_rate accounts for both - * horizontal and vertical panel fitter downscaling factors. - * Pre-HSW bspec tells us to only consider the horizontal - * downscaling factor here. We ignore that and just consider - * both for simplicity. - */ - pixel_rate = crtc_state->pixel_rate; - - /* Horizontal downscaling limits the maximum pixel rate */ - hscale = drm_rect_calc_hscale(&plane_state->base.src, - &plane_state->base.dst, - 0, INT_MAX); - if (hscale < 0x10000) - return pixel_rate; - - /* Decimation steps at 2x,4x,8x,16x */ - decimate = ilog2(hscale >> 16); - hscale >>= decimate; - - /* Starting limit is 90% of cdclk */ - limit = 9; - - /* -10% per decimation step */ - limit -= decimate; - - /* -10% for RGB */ - if (fb->format->cpp[0] >= 4) - limit--; /* -10% for RGB */ - - /* - * We should also do -10% if sprite scaling is enabled - * on the other pipe, but we can't really check for that, - * so we ignore it. - */ - - return DIV_ROUND_UP_ULL(mul_u32_u32(pixel_rate, 10 * hscale), - limit << 16); -} - static unsigned int g4x_sprite_max_stride(struct intel_plane *plane, u32 pixel_format, u64 modifier, @@ -1653,12 +1286,6 @@ static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, case DRM_FORMAT_XRGB8888: dvscntr |= DVS_FORMAT_RGBX888; break; - case DRM_FORMAT_XBGR16161616F: - dvscntr |= DVS_FORMAT_RGBX161616 | DVS_RGB_ORDER_XBGR; - break; - case DRM_FORMAT_XRGB16161616F: - dvscntr |= DVS_FORMAT_RGBX161616; - break; case DRM_FORMAT_YUYV: dvscntr |= DVS_FORMAT_YUV422 | DVS_YUV_ORDER_YUYV; break; @@ -1872,11 +1499,6 @@ static bool intel_fb_scalable(const struct drm_framebuffer *fb) switch (fb->format->format) { case DRM_FORMAT_C8: return false; - case DRM_FORMAT_XRGB16161616F: - case DRM_FORMAT_ARGB16161616F: - case DRM_FORMAT_XBGR16161616F: - case DRM_FORMAT_ABGR16161616F: - return INTEL_GEN(to_i915(fb->dev)) >= 11; default: return true; } @@ -2165,22 +1787,6 @@ static int skl_plane_check_nv12_rotation(const struct intel_plane_state *plane_s return 0; } -static int skl_plane_max_scale(struct drm_i915_private *dev_priv, - const struct drm_framebuffer *fb) -{ - /* - * We don't yet know the final source width nor - * whether we can use the HQ scaler mode. Assume - * the best case. - * FIXME need to properly check this later. - */ - if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv) || - !drm_format_info_is_yuv_semiplanar(fb->format)) - return 0x30000 - 1; - else - return 0x20000 - 1; -} - static int skl_plane_check(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) { @@ -2198,7 +1804,7 @@ static int skl_plane_check(struct intel_crtc_state *crtc_state, /* use scaler when colorkey is not required */ if (!plane_state->ckey.flags && intel_fb_scalable(fb)) { min_scale = 1; - max_scale = skl_plane_max_scale(dev_priv, fb); + max_scale = skl_max_scale(crtc_state, fb->format); } ret = drm_atomic_helper_check_plane_state(&plane_state->base, @@ -2373,10 +1979,8 @@ static const u64 i9xx_plane_format_modifiers[] = { }; static const u32 snb_plane_formats[] = { - DRM_FORMAT_XRGB8888, DRM_FORMAT_XBGR8888, - DRM_FORMAT_XRGB16161616F, - DRM_FORMAT_XBGR16161616F, + DRM_FORMAT_XRGB8888, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, DRM_FORMAT_UYVY, @@ -2406,8 +2010,6 @@ static const u32 skl_plane_formats[] = { DRM_FORMAT_ABGR8888, DRM_FORMAT_XRGB2101010, DRM_FORMAT_XBGR2101010, - DRM_FORMAT_XRGB16161616F, - DRM_FORMAT_XBGR16161616F, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, DRM_FORMAT_UYVY, @@ -2423,8 +2025,6 @@ static const u32 skl_planar_formats[] = { DRM_FORMAT_ABGR8888, DRM_FORMAT_XRGB2101010, DRM_FORMAT_XBGR2101010, - DRM_FORMAT_XRGB16161616F, - DRM_FORMAT_XBGR16161616F, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, DRM_FORMAT_UYVY, @@ -2441,8 +2041,6 @@ static const u32 glk_planar_formats[] = { DRM_FORMAT_ABGR8888, DRM_FORMAT_XRGB2101010, DRM_FORMAT_XBGR2101010, - DRM_FORMAT_XRGB16161616F, - DRM_FORMAT_XBGR16161616F, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, DRM_FORMAT_UYVY, @@ -2593,8 +2191,6 @@ static bool snb_sprite_format_mod_supported(struct drm_plane *_plane, switch (format) { case DRM_FORMAT_XRGB8888: case DRM_FORMAT_XBGR8888: - case DRM_FORMAT_XRGB16161616F: - case DRM_FORMAT_XBGR16161616F: case DRM_FORMAT_YUYV: case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: @@ -2915,7 +2511,6 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, plane->disable_plane = skl_disable_plane; plane->get_hw_state = skl_plane_get_hw_state; plane->check_plane = skl_plane_check; - plane->min_cdclk = skl_plane_min_cdclk; if (icl_is_nv12_y_plane(plane_id)) plane->update_slave = icl_update_slave; @@ -3023,7 +2618,6 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, plane->disable_plane = vlv_disable_plane; plane->get_hw_state = vlv_plane_get_hw_state; plane->check_plane = vlv_sprite_check; - plane->min_cdclk = vlv_plane_min_cdclk; formats = vlv_plane_formats; num_formats = ARRAY_SIZE(vlv_plane_formats); @@ -3037,11 +2631,6 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, plane->get_hw_state = ivb_plane_get_hw_state; plane->check_plane = g4x_sprite_check; - if (IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) - plane->min_cdclk = hsw_plane_min_cdclk; - else - plane->min_cdclk = ivb_sprite_min_cdclk; - formats = snb_plane_formats; num_formats = ARRAY_SIZE(snb_plane_formats); modifiers = i9xx_plane_format_modifiers; @@ -3053,7 +2642,6 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, plane->disable_plane = g4x_disable_plane; plane->get_hw_state = g4x_plane_get_hw_state; plane->check_plane = g4x_sprite_check; - plane->min_cdclk = g4x_sprite_min_cdclk; modifiers = i9xx_plane_format_modifiers; if (IS_GEN(dev_priv, 6)) { diff --git a/drivers/gpu/drm/i915/display/intel_sprite.h b/drivers/gpu/drm/i915/display/intel_sprite.h index 5eeaa92420d1..229336214f68 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.h +++ b/drivers/gpu/drm/i915/display/intel_sprite.h @@ -49,11 +49,4 @@ static inline u8 icl_hdr_plane_mask(void) bool icl_is_hdr_plane(struct drm_i915_private *dev_priv, enum plane_id plane_id); -int ivb_plane_min_cdclk(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state); -int hsw_plane_min_cdclk(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state); -int vlv_plane_min_cdclk(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state); - #endif /* __INTEL_SPRITE_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index 9983fadf6c28..70726b481244 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -1701,7 +1701,7 @@ intel_tv_detect(struct drm_connector *connector, struct intel_load_detect_pipe tmp; int ret; - ret = intel_get_load_detect_pipe(connector, &tmp, ctx); + ret = intel_get_load_detect_pipe(connector, NULL, &tmp, ctx); if (ret < 0) return ret; @@ -1947,7 +1947,7 @@ intel_tv_init(struct drm_i915_private *dev_priv) intel_encoder->type = INTEL_OUTPUT_TVOUT; intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = PORT_NONE; - intel_encoder->pipe_mask = ~0; + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B); intel_encoder->cloneable = 0; intel_tv->type = DRM_MODE_CONNECTOR_Unknown; diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index 69a7cb1fa121..e3045ced4bfe 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -114,7 +114,6 @@ enum bdb_block_id { BDB_LVDS_POWER = 44, BDB_MIPI_CONFIG = 52, BDB_MIPI_SEQUENCE = 53, - BDB_COMPRESSION_PARAMETERS = 56, BDB_SKIP = 254, /* VBIOS private block, ignore */ }; @@ -812,55 +811,4 @@ struct bdb_mipi_sequence { u8 data[0]; /* up to 6 variable length blocks */ } __packed; -/* - * Block 56 - Compression Parameters - */ - -#define VBT_RC_BUFFER_BLOCK_SIZE_1KB 0 -#define VBT_RC_BUFFER_BLOCK_SIZE_4KB 1 -#define VBT_RC_BUFFER_BLOCK_SIZE_16KB 2 -#define VBT_RC_BUFFER_BLOCK_SIZE_64KB 3 - -#define VBT_DSC_LINE_BUFFER_DEPTH(vbt_value) ((vbt_value) + 8) /* bits */ -#define VBT_DSC_MAX_BPP(vbt_value) (6 + (vbt_value) * 2) - -struct dsc_compression_parameters_entry { - u8 version_major:4; - u8 version_minor:4; - - u8 rc_buffer_block_size:2; - u8 reserved1:6; - - /* - * Buffer size in bytes: - * - * 4 ^ rc_buffer_block_size * 1024 * (rc_buffer_size + 1) bytes - */ - u8 rc_buffer_size; - u32 slices_per_line; - - u8 line_buffer_depth:4; - u8 reserved2:4; - - /* Flag Bits 1 */ - u8 block_prediction_enable:1; - u8 reserved3:7; - - u8 max_bpp; /* mapping */ - - /* Color depth capabilities */ - u8 reserved4:1; - u8 support_8bpc:1; - u8 support_10bpc:1; - u8 support_12bpc:1; - u8 reserved5:4; - - u16 slice_height; -} __packed; - -struct bdb_compression_parameters { - u16 entry_size; - struct dsc_compression_parameters_entry data[16]; -} __packed; - #endif /* _INTEL_VBT_DEFS_H_ */ diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index 896b0c334f5e..d4fb7f16f9f6 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -322,8 +322,8 @@ static int get_column_index_for_rc_params(u8 bits_per_component) int intel_dp_compute_dsc_params(struct intel_dp *intel_dp, struct intel_crtc_state *pipe_config) { - struct drm_dsc_config *vdsc_cfg = &pipe_config->dsc.config; - u16 compressed_bpp = pipe_config->dsc.compressed_bpp; + struct drm_dsc_config *vdsc_cfg = &pipe_config->dp_dsc_cfg; + u16 compressed_bpp = pipe_config->dsc_params.compressed_bpp; u8 i = 0; int row_index = 0; int column_index = 0; @@ -332,7 +332,7 @@ int intel_dp_compute_dsc_params(struct intel_dp *intel_dp, vdsc_cfg->pic_width = pipe_config->base.adjusted_mode.crtc_hdisplay; vdsc_cfg->pic_height = pipe_config->base.adjusted_mode.crtc_vdisplay; vdsc_cfg->slice_width = DIV_ROUND_UP(vdsc_cfg->pic_width, - pipe_config->dsc.slice_count); + pipe_config->dsc_params.slice_count); /* * Slice Height of 8 works for all currently available panels. So start * with that if pic_height is an integral multiple of 8. @@ -485,13 +485,13 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - const struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; + const struct drm_dsc_config *vdsc_cfg = &crtc_state->dp_dsc_cfg; enum pipe pipe = crtc->pipe; enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; u32 pps_val = 0; u32 rc_buf_thresh_dword[4]; u32 rc_range_params_dword[8]; - u8 num_vdsc_instances = (crtc_state->dsc.dsc_split) ? 2 : 1; + u8 num_vdsc_instances = (crtc_state->dsc_params.dsc_split) ? 2 : 1; int i = 0; /* Populate PICTURE_PARAMETER_SET_0 registers */ @@ -514,11 +514,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_0, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_0(pipe), pps_val); - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_0(pipe), pps_val); } @@ -533,11 +533,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_1, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_1(pipe), pps_val); - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_1(pipe), pps_val); } @@ -553,11 +553,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_2, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_2(pipe), pps_val); - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_2(pipe), pps_val); } @@ -573,11 +573,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_3, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_3(pipe), pps_val); - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_3(pipe), pps_val); } @@ -593,11 +593,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_4, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_4(pipe), pps_val); - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_4(pipe), pps_val); } @@ -613,11 +613,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_5, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_5(pipe), pps_val); - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_5(pipe), pps_val); } @@ -635,11 +635,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_6, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_6(pipe), pps_val); - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_6(pipe), pps_val); } @@ -655,11 +655,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_7, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_7(pipe), pps_val); - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_7(pipe), pps_val); } @@ -675,11 +675,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_8, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_8(pipe), pps_val); - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_8(pipe), pps_val); } @@ -695,11 +695,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_9, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_9(pipe), pps_val); - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_9(pipe), pps_val); } @@ -717,11 +717,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_10, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_10(pipe), pps_val); - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_10(pipe), pps_val); } @@ -740,11 +740,11 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, * If 2 VDSC instances are needed, configure PPS for second * VDSC */ - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(DSCC_PICTURE_PARAMETER_SET_16, pps_val); } else { I915_WRITE(ICL_DSC0_PICTURE_PARAMETER_SET_16(pipe), pps_val); - if (crtc_state->dsc.dsc_split) + if (crtc_state->dsc_params.dsc_split) I915_WRITE(ICL_DSC1_PICTURE_PARAMETER_SET_16(pipe), pps_val); } @@ -763,7 +763,7 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, I915_WRITE(DSCA_RC_BUF_THRESH_0_UDW, rc_buf_thresh_dword[1]); I915_WRITE(DSCA_RC_BUF_THRESH_1, rc_buf_thresh_dword[2]); I915_WRITE(DSCA_RC_BUF_THRESH_1_UDW, rc_buf_thresh_dword[3]); - if (crtc_state->dsc.dsc_split) { + if (crtc_state->dsc_params.dsc_split) { I915_WRITE(DSCC_RC_BUF_THRESH_0, rc_buf_thresh_dword[0]); I915_WRITE(DSCC_RC_BUF_THRESH_0_UDW, @@ -782,7 +782,7 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, rc_buf_thresh_dword[2]); I915_WRITE(ICL_DSC0_RC_BUF_THRESH_1_UDW(pipe), rc_buf_thresh_dword[3]); - if (crtc_state->dsc.dsc_split) { + if (crtc_state->dsc_params.dsc_split) { I915_WRITE(ICL_DSC1_RC_BUF_THRESH_0(pipe), rc_buf_thresh_dword[0]); I915_WRITE(ICL_DSC1_RC_BUF_THRESH_0_UDW(pipe), @@ -824,7 +824,7 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, rc_range_params_dword[6]); I915_WRITE(DSCA_RC_RANGE_PARAMETERS_3_UDW, rc_range_params_dword[7]); - if (crtc_state->dsc.dsc_split) { + if (crtc_state->dsc_params.dsc_split) { I915_WRITE(DSCC_RC_RANGE_PARAMETERS_0, rc_range_params_dword[0]); I915_WRITE(DSCC_RC_RANGE_PARAMETERS_0_UDW, @@ -859,7 +859,7 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, rc_range_params_dword[6]); I915_WRITE(ICL_DSC0_RC_RANGE_PARAMETERS_3_UDW(pipe), rc_range_params_dword[7]); - if (crtc_state->dsc.dsc_split) { + if (crtc_state->dsc_params.dsc_split) { I915_WRITE(ICL_DSC1_RC_RANGE_PARAMETERS_0(pipe), rc_range_params_dword[0]); I915_WRITE(ICL_DSC1_RC_RANGE_PARAMETERS_0_UDW(pipe), @@ -885,7 +885,7 @@ static void intel_dp_write_dsc_pps_sdp(struct intel_encoder *encoder, { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - const struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; + const struct drm_dsc_config *vdsc_cfg = &crtc_state->dp_dsc_cfg; struct drm_dsc_pps_infoframe dp_dsc_pps_sdp; /* Prepare DP SDP PPS header as per DP 1.4 spec, Table 2-123 */ @@ -909,7 +909,7 @@ void intel_dsc_enable(struct intel_encoder *encoder, u32 dss_ctl1_val = 0; u32 dss_ctl2_val = 0; - if (!crtc_state->dsc.compression_enable) + if (!crtc_state->dsc_params.compression_enable) return; /* Enable Power wells for VDSC/joining */ @@ -928,7 +928,7 @@ void intel_dsc_enable(struct intel_encoder *encoder, dss_ctl2_reg = ICL_PIPE_DSS_CTL2(pipe); } dss_ctl2_val |= LEFT_BRANCH_VDSC_ENABLE; - if (crtc_state->dsc.dsc_split) { + if (crtc_state->dsc_params.dsc_split) { dss_ctl2_val |= RIGHT_BRANCH_VDSC_ENABLE; dss_ctl1_val |= JOINER_ENABLE; } @@ -944,7 +944,7 @@ void intel_dsc_disable(const struct intel_crtc_state *old_crtc_state) i915_reg_t dss_ctl1_reg, dss_ctl2_reg; u32 dss_ctl1_val = 0, dss_ctl2_val = 0; - if (!old_crtc_state->dsc.compression_enable) + if (!old_crtc_state->dsc_params.compression_enable) return; if (old_crtc_state->cpu_transcoder == TRANSCODER_EDP) { diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 0ca49b1604c6..50064cde0724 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1870,11 +1870,11 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) * port C. BXT isn't limited like this. */ if (IS_GEN9_LP(dev_priv)) - intel_encoder->pipe_mask = ~0; + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); else if (port == PORT_A) - intel_encoder->pipe_mask = BIT(PIPE_A); + intel_encoder->crtc_mask = BIT(PIPE_A); else - intel_encoder->pipe_mask = BIT(PIPE_B); + intel_encoder->crtc_mask = BIT(PIPE_B); if (dev_priv->vbt.dsi.config->dual_link) intel_dsi->ports = BIT(PORT_A) | BIT(PORT_C); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index de6e55af82cf..7b01f4605f21 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -69,10 +69,8 @@ #include -#include "gt/intel_engine_heartbeat.h" -#include "gt/intel_engine_user.h" #include "gt/intel_lrc_reg.h" -#include "gt/intel_ring.h" +#include "gt/intel_engine_user.h" #include "i915_gem_context.h" #include "i915_globals.h" @@ -278,153 +276,6 @@ void i915_gem_context_release(struct kref *ref) schedule_work(&gc->free_work); } -static inline struct i915_gem_engines * -__context_engines_static(const struct i915_gem_context *ctx) -{ - return rcu_dereference_protected(ctx->engines, true); -} - -static bool __reset_engine(struct intel_engine_cs *engine) -{ - struct intel_gt *gt = engine->gt; - bool success = false; - - if (!intel_has_reset_engine(gt)) - return false; - - if (!test_and_set_bit(I915_RESET_ENGINE + engine->id, - >->reset.flags)) { - success = intel_engine_reset(engine, NULL) == 0; - clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, - >->reset.flags); - } - - return success; -} - -static void __reset_context(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - intel_gt_handle_error(engine->gt, engine->mask, 0, - "context closure in %s", ctx->name); -} - -static bool __cancel_engine(struct intel_engine_cs *engine) -{ - /* - * Send a "high priority pulse" down the engine to cause the - * current request to be momentarily preempted. (If it fails to - * be preempted, it will be reset). As we have marked our context - * as banned, any incomplete request, including any running, will - * be skipped following the preemption. - * - * If there is no hangchecking (one of the reasons why we try to - * cancel the context) and no forced preemption, there may be no - * means by which we reset the GPU and evict the persistent hog. - * Ergo if we are unable to inject a preemptive pulse that can - * kill the banned context, we fallback to doing a local reset - * instead. - */ - if (IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT) && - !intel_engine_pulse(engine)) - return true; - - /* If we are unable to send a pulse, try resetting this engine. */ - return __reset_engine(engine); -} - -static struct intel_engine_cs *__active_engine(struct i915_request *rq) -{ - struct intel_engine_cs *engine, *locked; - - /* - * Serialise with __i915_request_submit() so that it sees - * is-banned?, or we know the request is already inflight. - */ - locked = READ_ONCE(rq->engine); - spin_lock_irq(&locked->active.lock); - while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { - spin_unlock(&locked->active.lock); - spin_lock(&engine->active.lock); - locked = engine; - } - - engine = NULL; - if (i915_request_is_active(rq) && !rq->fence.error) - engine = rq->engine; - - spin_unlock_irq(&locked->active.lock); - - return engine; -} - -static struct intel_engine_cs *active_engine(struct intel_context *ce) -{ - struct intel_engine_cs *engine = NULL; - struct i915_request *rq; - - if (!ce->timeline) - return NULL; - - rcu_read_lock(); - list_for_each_entry_reverse(rq, &ce->timeline->requests, link) { - if (i915_request_completed(rq)) - break; - - /* Check with the backend if the request is inflight */ - engine = __active_engine(rq); - if (engine) - break; - } - rcu_read_unlock(); - - return engine; -} - -static void kill_context(struct i915_gem_context *ctx) -{ - struct i915_gem_engines_iter it; - struct intel_context *ce; - - /* - * If we are already banned, it was due to a guilty request causing - * a reset and the entire context being evicted from the GPU. - */ - if (i915_gem_context_is_banned(ctx)) - return; - - i915_gem_context_set_banned(ctx); - - /* - * Map the user's engine back to the actual engines; one virtual - * engine will be mapped to multiple engines, and using ctx->engine[] - * the same engine may be have multiple instances in the user's map. - * However, we only care about pending requests, so only include - * engines on which there are incomplete requests. - */ - for_each_gem_engine(ce, __context_engines_static(ctx), it) { - struct intel_engine_cs *engine; - - /* - * Check the current active state of this context; if we - * are currently executing on the GPU we need to evict - * ourselves. On the other hand, if we haven't yet been - * submitted to the GPU or if everything is complete, - * we have nothing to do. - */ - engine = active_engine(ce); - - /* First attempt to gracefully cancel the context */ - if (engine && !__cancel_engine(engine)) - /* - * If we are unable to send a preemptive pulse to bump - * the context from the GPU, we have to resort to a full - * reset. We hope the collateral damage is worth it. - */ - __reset_context(ctx, engine); - } -} - static void context_close(struct i915_gem_context *ctx) { struct i915_address_space *vm; @@ -447,47 +298,9 @@ static void context_close(struct i915_gem_context *ctx) lut_close(ctx); mutex_unlock(&ctx->mutex); - - /* - * If the user has disabled hangchecking, we can not be sure that - * the batches will ever complete after the context is closed, - * keeping the context and all resources pinned forever. So in this - * case we opt to forcibly kill off all remaining requests on - * context close. - */ - if (!i915_gem_context_is_persistent(ctx) || - !i915_modparams.enable_hangcheck) - kill_context(ctx); - i915_gem_context_put(ctx); } -static int __context_set_persistence(struct i915_gem_context *ctx, bool state) -{ - if (i915_gem_context_is_persistent(ctx) == state) - return 0; - - if (state) { - /* - * Only contexts that are short-lived [that will expire or be - * reset] are allowed to survive past termination. We require - * hangcheck to ensure that the persistent requests are healthy. - */ - if (!i915_modparams.enable_hangcheck) - return -EINVAL; - - i915_gem_context_set_persistence(ctx); - } else { - /* To cancel a context we use "preempt-to-idle" */ - if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) - return -ENODEV; - - i915_gem_context_clear_persistence(ctx); - } - - return 0; -} - static struct i915_gem_context * __create_context(struct drm_i915_private *i915) { @@ -522,7 +335,6 @@ __create_context(struct drm_i915_private *i915) i915_gem_context_set_bannable(ctx); i915_gem_context_set_recoverable(ctx); - __context_set_persistence(ctx, true /* cgroup hook? */); for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; @@ -679,7 +491,6 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) return ctx; i915_gem_context_clear_bannable(ctx); - i915_gem_context_set_persistence(ctx); ctx->sched.priority = I915_USER_PRIORITY(prio); GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); @@ -1790,16 +1601,6 @@ err_free: return err; } -static int -set_persistence(struct i915_gem_context *ctx, - const struct drm_i915_gem_context_param *args) -{ - if (args->size) - return -EINVAL; - - return __context_set_persistence(ctx, args->value); -} - static int ctx_setparam(struct drm_i915_file_private *fpriv, struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -1877,10 +1678,6 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_engines(ctx, args); break; - case I915_CONTEXT_PARAM_PERSISTENCE: - ret = set_persistence(ctx, args); - break; - case I915_CONTEXT_PARAM_BAN_PERIOD: default: ret = -EINVAL; @@ -2333,11 +2130,6 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, ret = get_engines(ctx, args); break; - case I915_CONTEXT_PARAM_PERSISTENCE: - args->size = 0; - args->value = i915_gem_context_is_persistent(ctx); - break; - case I915_CONTEXT_PARAM_BAN_PERIOD: default: ret = -EINVAL; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 18e50a769a6e..cfe80590f0ed 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -76,21 +76,6 @@ static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *c clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags); } -static inline bool i915_gem_context_is_persistent(const struct i915_gem_context *ctx) -{ - return test_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags); -} - -static inline void i915_gem_context_set_persistence(struct i915_gem_context *ctx) -{ - set_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags); -} - -static inline void i915_gem_context_clear_persistence(struct i915_gem_context *ctx) -{ - clear_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags); -} - static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx) { return test_bit(CONTEXT_BANNED, &ctx->flags); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 861d7d92fe9f..fe97b8ba4fda 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -137,7 +137,6 @@ struct i915_gem_context { #define UCONTEXT_NO_ERROR_CAPTURE 1 #define UCONTEXT_BANNABLE 2 #define UCONTEXT_RECOVERABLE 3 -#define UCONTEXT_PERSISTENCE 4 /** * @flags: small set of booleans diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index eaea49d08eb5..96ce95c8ac5a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -256,7 +256,6 @@ static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = { struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf) { - static struct lock_class_key lock_class; struct dma_buf_attachment *attach; struct drm_i915_gem_object *obj; int ret; @@ -288,7 +287,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, } drm_gem_private_object_init(dev, &obj->base, dma_buf->size); - i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops, &lock_class); + i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops); obj->base.import_attach = attach; obj->base.resv = dma_buf->resv; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index e4f5c269150a..e96901888323 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -19,7 +19,6 @@ #include "gt/intel_engine_pool.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" -#include "gt/intel_ring.h" #include "i915_drv.h" #include "i915_gem_clflush.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index 9cfb0e41ff06..5ae694c24df4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -164,7 +164,6 @@ struct drm_i915_gem_object * i915_gem_object_create_internal(struct drm_i915_private *i915, phys_addr_t size) { - static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; unsigned int cache_level; @@ -179,7 +178,7 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class); + i915_gem_object_init(obj, &i915_gem_object_internal_ops); /* * Mark the object as volatile, such that the pages are marked as diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index e3002849844b..fd4122d8c0a9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -312,7 +312,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) list_add(&obj->userfault_link, &i915->ggtt.userfault_list); mutex_unlock(&i915->ggtt.vm.mutex); - if (IS_ACTIVE(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)) + if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND) intel_wakeref_auto(&i915->ggtt.userfault_wakeref, msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index a50296cce0d8..dbf9be9a79f4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -47,10 +47,9 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj) } void i915_gem_object_init(struct drm_i915_gem_object *obj, - const struct drm_i915_gem_object_ops *ops, - struct lock_class_key *key) + const struct drm_i915_gem_object_ops *ops) { - __mutex_init(&obj->mm.lock, "obj->mm.lock", key); + mutex_init(&obj->mm.lock); spin_lock_init(&obj->vma.lock); INIT_LIST_HEAD(&obj->vma.list); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 458cd51331f1..85921796851f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -23,8 +23,7 @@ struct drm_i915_gem_object *i915_gem_object_alloc(void); void i915_gem_object_free(struct drm_i915_gem_object *obj); void i915_gem_object_init(struct drm_i915_gem_object *obj, - const struct drm_i915_gem_object_ops *ops, - struct lock_class_key *key); + const struct drm_i915_gem_object_ops *ops); struct drm_i915_gem_object * i915_gem_object_create_shmem(struct drm_i915_private *i915, resource_size_t size); @@ -462,5 +461,6 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj, int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, const struct i915_sched_attr *attr); +#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX) #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index 70809d8897cd..5bd8de124d74 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -8,7 +8,6 @@ #include "gt/intel_engine_pm.h" #include "gt/intel_engine_pool.h" #include "gt/intel_gt.h" -#include "gt/intel_ring.h" #include "i915_gem_clflush.h" #include "i915_gem_object_blt.h" @@ -17,7 +16,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, u32 value) { struct drm_i915_private *i915 = ce->vm->i915; - const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ + const u32 block_size = S16_MAX * PAGE_SIZE; struct intel_engine_pool_node *pool; struct i915_vma *batch; u64 offset; @@ -30,7 +29,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, GEM_BUG_ON(intel_engine_is_virtual(ce->engine)); intel_engine_pm_get(ce->engine); - count = div_u64(round_up(vma->size, block_size), block_size); + count = div_u64(vma->size, block_size); size = (1 + 8 * count) * sizeof(u32); size = round_up(size, PAGE_SIZE); pool = intel_engine_get_pool(ce->engine, size); @@ -201,7 +200,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, struct i915_vma *dst) { struct drm_i915_private *i915 = ce->vm->i915; - const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ + const u32 block_size = S16_MAX * PAGE_SIZE; struct intel_engine_pool_node *pool; struct i915_vma *batch; u64 src_offset, dst_offset; @@ -214,7 +213,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, GEM_BUG_ON(intel_engine_is_virtual(ce->engine)); intel_engine_pm_get(ce->engine); - count = div_u64(round_up(dst->size, block_size), block_size); + count = div_u64(dst->size, block_size); size = (1 + 11 * count) * sizeof(u32); size = round_up(size, PAGE_SIZE); pool = intel_engine_get_pool(ce->engine, size); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 96008374a412..a387e3ee728b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -31,11 +31,10 @@ struct i915_lut_handle { struct drm_i915_gem_object_ops { unsigned int flags; #define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0) -#define I915_GEM_OBJECT_HAS_IOMEM BIT(1) -#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(2) -#define I915_GEM_OBJECT_IS_PROXY BIT(3) -#define I915_GEM_OBJECT_NO_GGTT BIT(4) -#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(5) +#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) +#define I915_GEM_OBJECT_IS_PROXY BIT(2) +#define I915_GEM_OBJECT_NO_GGTT BIT(3) +#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(4) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 29f4c2850745..b0ec0959c13f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -7,7 +7,6 @@ #include "i915_drv.h" #include "i915_gem_object.h" #include "i915_scatterlist.h" -#include "i915_gem_lmem.h" void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages, @@ -155,16 +154,6 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) rcu_read_unlock(); } -static void unmap_object(struct drm_i915_gem_object *obj, void *ptr) -{ - if (i915_gem_object_is_lmem(obj)) - io_mapping_unmap((void __force __iomem *)ptr); - else if (is_vmalloc_addr(ptr)) - vunmap(ptr); - else - kunmap(kmap_to_page(ptr)); -} - struct sg_table * __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) { @@ -180,7 +169,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) i915_gem_object_make_unshrinkable(obj); if (obj->mm.mapping) { - unmap_object(obj, page_mask_bits(obj->mm.mapping)); + void *ptr; + + ptr = page_mask_bits(obj->mm.mapping); + if (is_vmalloc_addr(ptr)) + vunmap(ptr); + else + kunmap(kmap_to_page(ptr)); + obj->mm.mapping = NULL; } @@ -235,7 +231,7 @@ unlock: } /* The 'mapping' part of i915_gem_object_pin_map() below */ -static void *i915_gem_object_map(struct drm_i915_gem_object *obj, +static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, enum i915_map_type type) { unsigned long n_pages = obj->base.size >> PAGE_SHIFT; @@ -248,16 +244,6 @@ static void *i915_gem_object_map(struct drm_i915_gem_object *obj, pgprot_t pgprot; void *addr; - if (i915_gem_object_is_lmem(obj)) { - void __iomem *io; - - if (type != I915_MAP_WC) - return NULL; - - io = i915_gem_object_lmem_io_map(obj, 0, obj->base.size); - return (void __force *)io; - } - /* A single page can always be kmapped */ if (n_pages == 1 && type == I915_MAP_WB) return kmap(sg_page(sgt->sgl)); @@ -299,13 +285,11 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, enum i915_map_type type) { enum i915_map_type has_type; - unsigned int flags; bool pinned; void *ptr; int err; - flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_HAS_IOMEM; - if (!i915_gem_object_type_has(obj, flags)) + if (unlikely(!i915_gem_object_has_struct_page(obj))) return ERR_PTR(-ENXIO); err = mutex_lock_interruptible(&obj->mm.lock); @@ -337,7 +321,10 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, goto err_unpin; } - unmap_object(obj, ptr); + if (is_vmalloc_addr(ptr)) + vunmap(ptr); + else + kunmap(kmap_to_page(ptr)); ptr = obj->mm.mapping = NULL; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index f88ee1317bb4..7987b54fb1f5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -11,6 +11,74 @@ #include "i915_drv.h" +static int pm_notifier(struct notifier_block *nb, + unsigned long action, + void *data) +{ + struct drm_i915_private *i915 = + container_of(nb, typeof(*i915), gem.pm_notifier); + + switch (action) { + case INTEL_GT_UNPARK: + break; + + case INTEL_GT_PARK: + i915_vma_parked(i915); + break; + } + + return NOTIFY_OK; +} + +static bool switch_to_kernel_context_sync(struct intel_gt *gt) +{ + bool result = !intel_gt_is_wedged(gt); + + if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { + /* XXX hide warning from gem_eio */ + if (i915_modparams.reset) { + dev_err(gt->i915->drm.dev, + "Failed to idle engines, declaring wedged!\n"); + GEM_TRACE_DUMP(); + } + + /* + * Forcibly cancel outstanding work and leave + * the gpu quiet. + */ + intel_gt_set_wedged(gt); + result = false; + } + + if (intel_gt_pm_wait_for_idle(gt)) + result = false; + + return result; +} + +bool i915_gem_load_power_context(struct drm_i915_private *i915) +{ + return switch_to_kernel_context_sync(&i915->gt); +} + +static void user_forcewake(struct intel_gt *gt, bool suspend) +{ + int count = atomic_read(>->user_wakeref); + + /* Inside suspend/resume so single threaded, no races to worry about. */ + if (likely(!count)) + return; + + intel_gt_pm_get(gt); + if (suspend) { + GEM_BUG_ON(count > atomic_read(>->wakeref.count)); + atomic_sub(count, >->wakeref.count); + } else { + atomic_add(count, >->wakeref.count); + } + intel_gt_pm_put(gt); +} + void i915_gem_suspend(struct drm_i915_private *i915) { GEM_TRACE("\n"); @@ -18,6 +86,8 @@ void i915_gem_suspend(struct drm_i915_private *i915) intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0); flush_workqueue(i915->wq); + user_forcewake(&i915->gt, true); + /* * We have to flush all the executing contexts to main memory so * that they can saved in the hibernation image. To ensure the last @@ -27,7 +97,10 @@ void i915_gem_suspend(struct drm_i915_private *i915) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - intel_gt_suspend_prepare(&i915->gt); + intel_gt_suspend(&i915->gt); + intel_uc_suspend(&i915->gt.uc); + + cancel_delayed_work_sync(&i915->gt.hangcheck.work); i915_gem_drain_freed_objects(i915); } @@ -69,8 +142,6 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) * machine in an unusable condition. */ - intel_gt_suspend_late(&i915->gt); - spin_lock_irqsave(&i915->mm.obj_lock, flags); for (phase = phases; *phase; phase++) { LIST_HEAD(keep); @@ -95,6 +166,8 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) list_splice_tail(&keep, *phase); } spin_unlock_irqrestore(&i915->mm.obj_lock, flags); + + i915_gem_sanitize(i915); } void i915_gem_resume(struct drm_i915_private *i915) @@ -114,6 +187,14 @@ void i915_gem_resume(struct drm_i915_private *i915) if (intel_gt_resume(&i915->gt)) goto err_wedged; + intel_uc_resume(&i915->gt.uc); + + /* Always reload a context for powersaving. */ + if (!i915_gem_load_power_context(i915)) + goto err_wedged; + + user_forcewake(&i915->gt, false); + out_unlock: intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); return; @@ -126,3 +207,10 @@ err_wedged: } goto out_unlock; } + +void i915_gem_init__pm(struct drm_i915_private *i915) +{ + i915->gem.pm_notifier.notifier_call = pm_notifier; + blocking_notifier_chain_register(&i915->gt.pm_notifications, + &i915->gem.pm_notifier); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.h b/drivers/gpu/drm/i915/gem/i915_gem_pm.h index 26b78dbdc225..6f7d5d11ac3b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.h @@ -12,6 +12,9 @@ struct drm_i915_private; struct work_struct; +void i915_gem_init__pm(struct drm_i915_private *i915); + +bool i915_gem_load_power_context(struct drm_i915_private *i915); void i915_gem_resume(struct drm_i915_private *i915); void i915_gem_idle_work_handler(struct work_struct *work); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 4d69c3fc3439..be68b76e13b3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -465,7 +465,6 @@ create_shmem(struct intel_memory_region *mem, resource_size_t size, unsigned int flags) { - static struct lock_class_key lock_class; struct drm_i915_private *i915 = mem->i915; struct drm_i915_gem_object *obj; struct address_space *mapping; @@ -492,7 +491,7 @@ create_shmem(struct intel_memory_region *mem, mapping_set_gfp_mask(mapping, mask); GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); - i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class); + i915_gem_object_init(obj, &i915_gem_shmem_ops); obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index a2d49c04e6a4..57cd8bc2657c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -556,7 +556,6 @@ __i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, struct drm_mm_node *stolen, struct intel_memory_region *mem) { - static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; unsigned int cache_level; int err = -ENOMEM; @@ -566,7 +565,7 @@ __i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, goto err; drm_gem_private_object_init(&dev_priv->drm, &obj->base, stolen->size); - i915_gem_object_init(obj, &i915_gem_object_stolen_ops, &lock_class); + i915_gem_object_init(obj, &i915_gem_object_stolen_ops); obj->stolen = stolen; obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 1e045c337044..4f970474013f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -725,7 +725,6 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - static struct lock_class_key lock_class; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_userptr *args = data; struct drm_i915_gem_object *obj; @@ -770,7 +769,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, return -ENOMEM; drm_gem_private_object_init(dev, &obj->base, args->user_size); - i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class); + i915_gem_object_init(obj, &i915_gem_userptr_ops); obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c index 892d12db6c49..3c5d17b2b670 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c @@ -96,7 +96,6 @@ huge_gem_object(struct drm_i915_private *i915, phys_addr_t phys_size, dma_addr_t dma_size) { - static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; unsigned int cache_level; @@ -112,7 +111,7 @@ huge_gem_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); - i915_gem_object_init(obj, &huge_ops, &lock_class); + i915_gem_object_init(obj, &huge_ops); obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 688c49a24f32..f27772f6779a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -9,7 +9,6 @@ #include "i915_selftest.h" #include "gem/i915_gem_region.h" -#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" @@ -150,7 +149,6 @@ huge_pages_object(struct drm_i915_private *i915, u64 size, unsigned int page_mask) { - static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; GEM_BUG_ON(!size); @@ -167,7 +165,7 @@ huge_pages_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &huge_page_ops, &lock_class); + i915_gem_object_init(obj, &huge_page_ops); i915_gem_object_set_volatile(obj); @@ -297,7 +295,6 @@ static const struct drm_i915_gem_object_ops fake_ops_single = { static struct drm_i915_gem_object * fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) { - static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; GEM_BUG_ON(!size); @@ -316,9 +313,9 @@ fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) drm_gem_private_object_init(&i915->drm, &obj->base, size); if (single) - i915_gem_object_init(obj, &fake_ops_single, &lock_class); + i915_gem_object_init(obj, &fake_ops_single); else - i915_gem_object_init(obj, &fake_ops, &lock_class); + i915_gem_object_init(obj, &fake_ops); i915_gem_object_set_volatile(obj); @@ -984,8 +981,7 @@ static int gpu_write(struct intel_context *ce, vma->size >> PAGE_SHIFT, val); } -static int -__cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) +static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) { unsigned int needs_flush; unsigned long n; @@ -1017,51 +1013,6 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) return err; } -static int __cpu_check_lmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) -{ - unsigned long n; - int err; - - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_wc_domain(obj, false); - i915_gem_object_unlock(obj); - if (err) - return err; - - err = i915_gem_object_pin_pages(obj); - if (err) - return err; - - for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { - u32 __iomem *base; - u32 read_val; - - base = i915_gem_object_lmem_io_map_page_atomic(obj, n); - - read_val = ioread32(base + dword); - io_mapping_unmap_atomic(base); - if (read_val != val) { - pr_err("n=%lu base[%u]=%u, val=%u\n", - n, dword, read_val, val); - err = -EINVAL; - break; - } - } - - i915_gem_object_unpin_pages(obj); - return err; -} - -static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) -{ - if (i915_gem_object_has_struct_page(obj)) - return __cpu_check_shmem(obj, dword, val); - else if (i915_gem_object_is_lmem(obj)) - return __cpu_check_lmem(obj, dword, val); - - return -ENODEV; -} - static int __igt_write_huge(struct intel_context *ce, struct drm_i915_gem_object *obj, u64 size, u64 offset, @@ -1317,235 +1268,131 @@ out_device: return err; } -typedef struct drm_i915_gem_object * -(*igt_create_fn)(struct drm_i915_private *i915, u32 size, u32 flags); +static int igt_ppgtt_internal_huge(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_object *obj; + static const unsigned int sizes[] = { + SZ_64K, + SZ_128K, + SZ_256K, + SZ_512K, + SZ_1M, + SZ_2M, + }; + int i; + int err; + + /* + * Sanity check that the HW uses huge pages correctly through internal + * -- ensure that our writes land in the right place. + */ + + for (i = 0; i < ARRAY_SIZE(sizes); ++i) { + unsigned int size = sizes[i]; + + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) { + pr_info("internal unable to allocate huge-page(s) with size=%u\n", + size); + goto out_unpin; + } + + err = igt_write_huge(ctx, obj); + if (err) { + pr_err("internal write-huge failed with size=%u\n", + size); + goto out_unpin; + } + + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + i915_gem_object_put(obj); + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) { return i915->mm.gemfs && has_transparent_hugepage(); } -static struct drm_i915_gem_object * -igt_create_shmem(struct drm_i915_private *i915, u32 size, u32 flags) -{ - if (!igt_can_allocate_thp(i915)) { - pr_info("%s missing THP support, skipping\n", __func__); - return ERR_PTR(-ENODEV); - } - - return i915_gem_object_create_shmem(i915, size); -} - -static struct drm_i915_gem_object * -igt_create_internal(struct drm_i915_private *i915, u32 size, u32 flags) -{ - return i915_gem_object_create_internal(i915, size); -} - -static struct drm_i915_gem_object * -igt_create_system(struct drm_i915_private *i915, u32 size, u32 flags) -{ - return huge_pages_object(i915, size, size); -} - -static struct drm_i915_gem_object * -igt_create_local(struct drm_i915_private *i915, u32 size, u32 flags) -{ - return i915_gem_object_create_lmem(i915, size, flags); -} - -static u32 igt_random_size(struct rnd_state *prng, - u32 min_page_size, - u32 max_page_size) -{ - u64 mask; - u32 size; - - GEM_BUG_ON(!is_power_of_2(min_page_size)); - GEM_BUG_ON(!is_power_of_2(max_page_size)); - GEM_BUG_ON(min_page_size < PAGE_SIZE); - GEM_BUG_ON(min_page_size > max_page_size); - - mask = ((max_page_size << 1ULL) - 1) & PAGE_MASK; - size = prandom_u32_state(prng) & mask; - if (size < min_page_size) - size |= min_page_size; - - return size; -} - -static int igt_ppgtt_smoke_huge(void *arg) +static int igt_ppgtt_gemfs_huge(void *arg) { struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; - I915_RND_STATE(prng); - struct { - igt_create_fn fn; - u32 min; - u32 max; - } backends[] = { - { igt_create_internal, SZ_64K, SZ_2M, }, - { igt_create_shmem, SZ_64K, SZ_32M, }, - { igt_create_local, SZ_64K, SZ_1G, }, + static const unsigned int sizes[] = { + SZ_2M, + SZ_4M, + SZ_8M, + SZ_16M, + SZ_32M, }; - int err; int i; + int err; /* - * Sanity check that the HW uses huge pages correctly through our - * various backends -- ensure that our writes land in the right place. + * Sanity check that the HW uses huge pages correctly through gemfs -- + * ensure that our writes land in the right place. */ - for (i = 0; i < ARRAY_SIZE(backends); ++i) { - u32 min = backends[i].min; - u32 max = backends[i].max; - u32 size = max; -try_again: - size = igt_random_size(&prng, min, rounddown_pow_of_two(size)); + if (!igt_can_allocate_thp(i915)) { + pr_info("missing THP support, skipping\n"); + return 0; + } - obj = backends[i].fn(i915, size, 0); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - if (err == -E2BIG) { - size >>= 1; - goto try_again; - } else if (err == -ENODEV) { - err = 0; - continue; - } + for (i = 0; i < ARRAY_SIZE(sizes); ++i) { + unsigned int size = sizes[i]; - return err; - } + obj = i915_gem_object_create_shmem(i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); err = i915_gem_object_pin_pages(obj); - if (err) { - if (err == -ENXIO) { - i915_gem_object_put(obj); - size >>= 1; - goto try_again; - } + if (err) goto out_put; - } - if (obj->mm.page_sizes.phys < min) { - pr_info("%s unable to allocate huge-page(s) with size=%u, i=%d\n", - __func__, size, i); - err = -ENOMEM; + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { + pr_info("finishing test early, gemfs unable to allocate huge-page(s) with size=%u\n", + size); goto out_unpin; } err = igt_write_huge(ctx, obj); if (err) { - pr_err("%s write-huge failed with size=%u, i=%d\n", - __func__, size, i); + pr_err("gemfs write-huge failed with size=%u\n", + size); + goto out_unpin; } -out_unpin: + i915_gem_object_unpin_pages(obj); __i915_gem_object_put_pages(obj, I915_MM_NORMAL); -out_put: i915_gem_object_put(obj); - - if (err == -ENOMEM || err == -ENXIO) - err = 0; - - if (err) - break; - - cond_resched(); } - return err; -} + return 0; -static int igt_ppgtt_sanity_check(void *arg) -{ - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; - unsigned int supported = INTEL_INFO(i915)->page_sizes; - struct { - igt_create_fn fn; - unsigned int flags; - } backends[] = { - { igt_create_system, 0, }, - { igt_create_local, I915_BO_ALLOC_CONTIGUOUS, }, - }; - struct { - u32 size; - u32 pages; - } combos[] = { - { SZ_64K, SZ_64K }, - { SZ_2M, SZ_2M }, - { SZ_2M, SZ_64K }, - { SZ_2M - SZ_64K, SZ_64K }, - { SZ_2M - SZ_4K, SZ_64K | SZ_4K }, - { SZ_2M + SZ_4K, SZ_64K | SZ_4K }, - { SZ_2M + SZ_4K, SZ_2M | SZ_4K }, - { SZ_2M + SZ_64K, SZ_2M | SZ_64K }, - }; - int i, j; - int err; - - if (supported == I915_GTT_PAGE_SIZE_4K) - return 0; - - /* - * Sanity check that the HW behaves with a limited set of combinations. - * We already have a bunch of randomised testing, which should give us - * a decent amount of variation between runs, however we should keep - * this to limit the chances of introducing a temporary regression, by - * testing the most obvious cases that might make something blow up. - */ - - for (i = 0; i < ARRAY_SIZE(backends); ++i) { - for (j = 0; j < ARRAY_SIZE(combos); ++j) { - struct drm_i915_gem_object *obj; - u32 size = combos[j].size; - u32 pages = combos[j].pages; - - obj = backends[i].fn(i915, size, backends[i].flags); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - if (err == -ENODEV) { - pr_info("Device lacks local memory, skipping\n"); - err = 0; - break; - } - - return err; - } - - err = i915_gem_object_pin_pages(obj); - if (err) { - i915_gem_object_put(obj); - goto out; - } - - GEM_BUG_ON(pages > obj->base.size); - pages = pages & supported; - - if (pages) - obj->mm.page_sizes.sg = pages; - - err = igt_write_huge(ctx, obj); - - i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - i915_gem_object_put(obj); - - if (err) { - pr_err("%s write-huge failed with size=%u pages=%u i=%d, j=%d\n", - __func__, size, pages, i, j); - goto out; - } - } - - cond_resched(); - } - -out: - if (err == -ENOMEM) - err = 0; +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); return err; } @@ -1909,8 +1756,8 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ppgtt_pin_update), SUBTEST(igt_tmpfs_fallback), SUBTEST(igt_ppgtt_exhaust_huge), - SUBTEST(igt_ppgtt_smoke_huge), - SUBTEST(igt_ppgtt_sanity_check), + SUBTEST(igt_ppgtt_gemfs_huge), + SUBTEST(igt_ppgtt_internal_huge), }; struct drm_file *file; struct i915_gem_context *ctx; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index da8edee4fe0a..d8804a847945 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -5,7 +5,6 @@ #include "i915_selftest.h" -#include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "selftests/igt_flush_test.h" @@ -13,9 +12,10 @@ #include "huge_gem_object.h" #include "mock_context.h" -static int __igt_client_fill(struct intel_engine_cs *engine) +static int igt_client_fill(void *arg) { - struct intel_context *ce = engine->kernel_context; + struct drm_i915_private *i915 = arg; + struct intel_context *ce = i915->engine[BCS0]->kernel_context; struct drm_i915_gem_object *obj; struct rnd_state prng; IGT_TIMEOUT(end); @@ -37,7 +37,7 @@ static int __igt_client_fill(struct intel_engine_cs *engine) pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, phys_sz, sz, val); - obj = huge_gem_object(engine->i915, phys_sz, sz); + obj = huge_gem_object(i915, phys_sz, sz); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto err_flush; @@ -103,28 +103,6 @@ err_flush: return err; } -static int igt_client_fill(void *arg) -{ - int inst = 0; - - do { - struct intel_engine_cs *engine; - int err; - - engine = intel_engine_lookup_user(arg, - I915_ENGINE_CLASS_COPY, - inst++); - if (!engine) - return 0; - - err = __igt_client_fill(engine); - if (err == -ENOMEM) - err = 0; - if (err) - return err; - } while (1); -} - int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 2b29f6b4e1dd..549810f70aeb 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -8,17 +8,13 @@ #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" -#include "gt/intel_ring.h" #include "i915_selftest.h" #include "selftests/i915_random.h" -struct context { - struct drm_i915_gem_object *obj; - struct intel_engine_cs *engine; -}; - -static int cpu_set(struct context *ctx, unsigned long offset, u32 v) +static int cpu_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) { unsigned int needs_clflush; struct page *page; @@ -26,11 +22,11 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v) u32 *cpu; int err; - err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush); + err = i915_gem_object_prepare_write(obj, &needs_clflush); if (err) return err; - page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); + page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); map = kmap_atomic(page); cpu = map + offset_in_page(offset); @@ -43,12 +39,14 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v) drm_clflush_virt_range(cpu, sizeof(*cpu)); kunmap_atomic(map); - i915_gem_object_finish_access(ctx->obj); + i915_gem_object_finish_access(obj); return 0; } -static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) +static int cpu_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) { unsigned int needs_clflush; struct page *page; @@ -56,11 +54,11 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) u32 *cpu; int err; - err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush); + err = i915_gem_object_prepare_read(obj, &needs_clflush); if (err) return err; - page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); + page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); map = kmap_atomic(page); cpu = map + offset_in_page(offset); @@ -70,24 +68,26 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) *v = *cpu; kunmap_atomic(map); - i915_gem_object_finish_access(ctx->obj); + i915_gem_object_finish_access(obj); return 0; } -static int gtt_set(struct context *ctx, unsigned long offset, u32 v) +static int gtt_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) { struct i915_vma *vma; u32 __iomem *map; int err = 0; - i915_gem_object_lock(ctx->obj); - err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); - i915_gem_object_unlock(ctx->obj); + i915_gem_object_lock(obj); + err = i915_gem_object_set_to_gtt_domain(obj, true); + i915_gem_object_unlock(obj); if (err) return err; - vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE); + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -108,19 +108,21 @@ out_rpm: return err; } -static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) +static int gtt_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) { struct i915_vma *vma; u32 __iomem *map; int err = 0; - i915_gem_object_lock(ctx->obj); - err = i915_gem_object_set_to_gtt_domain(ctx->obj, false); - i915_gem_object_unlock(ctx->obj); + i915_gem_object_lock(obj); + err = i915_gem_object_set_to_gtt_domain(obj, false); + i915_gem_object_unlock(obj); if (err) return err; - vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE); + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -141,66 +143,73 @@ out_rpm: return err; } -static int wc_set(struct context *ctx, unsigned long offset, u32 v) +static int wc_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) { u32 *map; int err; - i915_gem_object_lock(ctx->obj); - err = i915_gem_object_set_to_wc_domain(ctx->obj, true); - i915_gem_object_unlock(ctx->obj); + i915_gem_object_lock(obj); + err = i915_gem_object_set_to_wc_domain(obj, true); + i915_gem_object_unlock(obj); if (err) return err; - map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC); + map = i915_gem_object_pin_map(obj, I915_MAP_WC); if (IS_ERR(map)) return PTR_ERR(map); map[offset / sizeof(*map)] = v; - i915_gem_object_unpin_map(ctx->obj); + i915_gem_object_unpin_map(obj); return 0; } -static int wc_get(struct context *ctx, unsigned long offset, u32 *v) +static int wc_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) { u32 *map; int err; - i915_gem_object_lock(ctx->obj); - err = i915_gem_object_set_to_wc_domain(ctx->obj, false); - i915_gem_object_unlock(ctx->obj); + i915_gem_object_lock(obj); + err = i915_gem_object_set_to_wc_domain(obj, false); + i915_gem_object_unlock(obj); if (err) return err; - map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC); + map = i915_gem_object_pin_map(obj, I915_MAP_WC); if (IS_ERR(map)) return PTR_ERR(map); *v = map[offset / sizeof(*map)]; - i915_gem_object_unpin_map(ctx->obj); + i915_gem_object_unpin_map(obj); return 0; } -static int gpu_set(struct context *ctx, unsigned long offset, u32 v) +static int gpu_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_request *rq; struct i915_vma *vma; u32 *cs; int err; - i915_gem_object_lock(ctx->obj); - err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); - i915_gem_object_unlock(ctx->obj); + i915_gem_object_lock(obj); + err = i915_gem_object_set_to_gtt_domain(obj, true); + i915_gem_object_unlock(obj); if (err) return err; - vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0); + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); if (IS_ERR(vma)) return PTR_ERR(vma); - rq = i915_request_create(ctx->engine->kernel_context); + rq = i915_request_create(i915->engine[RCS0]->kernel_context); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); @@ -213,12 +222,12 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v) return PTR_ERR(cs); } - if (INTEL_GEN(ctx->engine->i915) >= 8) { + if (INTEL_GEN(i915) >= 8) { *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); *cs++ = v; - } else if (INTEL_GEN(ctx->engine->i915) >= 4) { + } else if (INTEL_GEN(i915) >= 4) { *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = 0; *cs++ = i915_ggtt_offset(vma) + offset; @@ -243,34 +252,32 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v) return err; } -static bool always_valid(struct context *ctx) +static bool always_valid(struct drm_i915_private *i915) { return true; } -static bool needs_fence_registers(struct context *ctx) +static bool needs_fence_registers(struct drm_i915_private *i915) { - struct intel_gt *gt = ctx->engine->gt; - - if (intel_gt_is_wedged(gt)) - return false; - - return gt->ggtt->num_fences; + return !intel_gt_is_wedged(&i915->gt); } -static bool needs_mi_store_dword(struct context *ctx) +static bool needs_mi_store_dword(struct drm_i915_private *i915) { - if (intel_gt_is_wedged(ctx->engine->gt)) + if (intel_gt_is_wedged(&i915->gt)) return false; - return intel_engine_can_store_dword(ctx->engine); + if (!HAS_ENGINE(i915, RCS0)) + return false; + + return intel_engine_can_store_dword(i915->engine[RCS0]); } static const struct igt_coherency_mode { const char *name; - int (*set)(struct context *ctx, unsigned long offset, u32 v); - int (*get)(struct context *ctx, unsigned long offset, u32 *v); - bool (*valid)(struct context *ctx); + int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v); + int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v); + bool (*valid)(struct drm_i915_private *i915); } igt_coherency_mode[] = { { "cpu", cpu_set, cpu_get, always_valid }, { "gtt", gtt_set, gtt_get, needs_fence_registers }, @@ -279,37 +286,18 @@ static const struct igt_coherency_mode { { }, }; -static struct intel_engine_cs * -random_engine(struct drm_i915_private *i915, struct rnd_state *prng) -{ - struct intel_engine_cs *engine; - unsigned int count; - - count = 0; - for_each_uabi_engine(engine, i915) - count++; - - count = i915_prandom_u32_max_state(count, prng); - for_each_uabi_engine(engine, i915) - if (count-- == 0) - return engine; - - return NULL; -} - static int igt_gem_coherency(void *arg) { const unsigned int ncachelines = PAGE_SIZE/64; + I915_RND_STATE(prng); struct drm_i915_private *i915 = arg; const struct igt_coherency_mode *read, *write, *over; + struct drm_i915_gem_object *obj; unsigned long count, n; u32 *offsets, *values; - I915_RND_STATE(prng); - struct context ctx; int err = 0; - /* - * We repeatedly write, overwrite and read from a sequence of + /* We repeatedly write, overwrite and read from a sequence of * cachelines in order to try and detect incoherency (unflushed writes * from either the CPU or GPU). Each setter/getter uses our cache * domain API which should prevent incoherency. @@ -323,35 +311,31 @@ static int igt_gem_coherency(void *arg) values = offsets + ncachelines; - ctx.engine = random_engine(i915, &prng); - GEM_BUG_ON(!ctx.engine); - pr_info("%s: using %s\n", __func__, ctx.engine->name); - for (over = igt_coherency_mode; over->name; over++) { if (!over->set) continue; - if (!over->valid(&ctx)) + if (!over->valid(i915)) continue; for (write = igt_coherency_mode; write->name; write++) { if (!write->set) continue; - if (!write->valid(&ctx)) + if (!write->valid(i915)) continue; for (read = igt_coherency_mode; read->name; read++) { if (!read->get) continue; - if (!read->valid(&ctx)) + if (!read->valid(i915)) continue; for_each_prime_number_from(count, 1, ncachelines) { - ctx.obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(ctx.obj)) { - err = PTR_ERR(ctx.obj); + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); goto free; } @@ -360,7 +344,7 @@ static int igt_gem_coherency(void *arg) values[n] = prandom_u32_state(&prng); for (n = 0; n < count; n++) { - err = over->set(&ctx, offsets[n], ~values[n]); + err = over->set(obj, offsets[n], ~values[n]); if (err) { pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n", n, count, over->name, err); @@ -369,7 +353,7 @@ static int igt_gem_coherency(void *arg) } for (n = 0; n < count; n++) { - err = write->set(&ctx, offsets[n], values[n]); + err = write->set(obj, offsets[n], values[n]); if (err) { pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n", n, count, write->name, err); @@ -380,7 +364,7 @@ static int igt_gem_coherency(void *arg) for (n = 0; n < count; n++) { u32 found; - err = read->get(&ctx, offsets[n], &found); + err = read->get(obj, offsets[n], &found); if (err) { pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n", n, count, read->name, err); @@ -398,7 +382,7 @@ static int igt_gem_coherency(void *arg) } } - i915_gem_object_put(ctx.obj); + i915_gem_object_put(obj); } } } @@ -408,7 +392,7 @@ free: return err; put_object: - i915_gem_object_put(ctx.obj); + i915_gem_object_put(obj); goto free; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 62fabc023a83..e5c235051ae5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -32,6 +32,7 @@ static int live_nop_switch(void *arg) struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; struct i915_gem_context **ctx; + enum intel_engine_id id; struct igt_live_test t; struct drm_file *file; unsigned long n; @@ -66,7 +67,7 @@ static int live_nop_switch(void *arg) } } - for_each_uabi_engine(engine, i915) { + for_each_engine(engine, i915, id) { struct i915_request *rq; unsigned long end_time, prime; ktime_t times[2] = {}; @@ -169,24 +170,18 @@ static int __live_parallel_switch1(void *data) struct i915_request *rq = NULL; int err, n; - err = 0; - for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) { - struct i915_request *prev = rq; + for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { + i915_request_put(rq); rq = i915_request_create(arg->ce[n]); - if (IS_ERR(rq)) { - i915_request_put(prev); + if (IS_ERR(rq)) return PTR_ERR(rq); - } i915_request_get(rq); - if (prev) { - err = i915_request_await_dma_fence(rq, &prev->fence); - i915_request_put(prev); - } - i915_request_add(rq); } + + err = 0; if (i915_request_wait(rq, 0, HZ / 5) < 0) err = -ETIME; i915_request_put(rq); @@ -203,7 +198,6 @@ static int __live_parallel_switch1(void *data) static int __live_parallel_switchN(void *data) { struct parallel_switch *arg = data; - struct i915_request *rq = NULL; IGT_TIMEOUT(end_time); unsigned long count; int n; @@ -211,31 +205,17 @@ static int __live_parallel_switchN(void *data) count = 0; do { for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { - struct i915_request *prev = rq; - int err = 0; + struct i915_request *rq; rq = i915_request_create(arg->ce[n]); - if (IS_ERR(rq)) { - i915_request_put(prev); + if (IS_ERR(rq)) return PTR_ERR(rq); - } - - i915_request_get(rq); - if (prev) { - err = i915_request_await_dma_fence(rq, &prev->fence); - i915_request_put(prev); - } i915_request_add(rq); - if (err) { - i915_request_put(rq); - return err; - } } count++; } while (!__igt_timeout(end_time, NULL)); - i915_request_put(rq); pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count); return 0; @@ -345,8 +325,6 @@ static int live_parallel_switch(void *arg) get_task_struct(data[n].tsk); } - yield(); /* start all threads before we kthread_stop() */ - for (n = 0; n < count; n++) { int status; @@ -605,6 +583,7 @@ static int igt_ctx_exec(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; + enum intel_engine_id id; int err = -ENODEV; /* @@ -616,7 +595,7 @@ static int igt_ctx_exec(void *arg) if (!DRIVER_CAPS(i915)->has_logical_contexts) return 0; - for_each_uabi_engine(engine, i915) { + for_each_engine(engine, i915, id) { struct drm_i915_gem_object *obj = NULL; unsigned long ncontexts, ndwords, dw; struct i915_request *tq[5] = {}; @@ -732,6 +711,7 @@ static int igt_shared_ctx_exec(void *arg) struct i915_request *tq[5] = {}; struct i915_gem_context *parent; struct intel_engine_cs *engine; + enum intel_engine_id id; struct igt_live_test t; struct drm_file *file; int err = 0; @@ -763,7 +743,7 @@ static int igt_shared_ctx_exec(void *arg) if (err) goto out_file; - for_each_uabi_engine(engine, i915) { + for_each_engine(engine, i915, id) { unsigned long ncontexts, ndwords, dw; struct drm_i915_gem_object *obj = NULL; IGT_TIMEOUT(end_time); @@ -1188,90 +1168,93 @@ __igt_ctx_sseu(struct drm_i915_private *i915, const char *name, unsigned int flags) { + struct intel_engine_cs *engine = i915->engine[RCS0]; struct drm_i915_gem_object *obj; - int inst = 0; - int ret = 0; + struct i915_gem_context *ctx; + struct intel_context *ce; + struct intel_sseu pg_sseu; + struct drm_file *file; + int ret; - if (INTEL_GEN(i915) < 9 || !RUNTIME_INFO(i915)->sseu.has_slice_pg) + if (INTEL_GEN(i915) < 9 || !engine) return 0; + if (!RUNTIME_INFO(i915)->sseu.has_slice_pg) + return 0; + + if (hweight32(engine->sseu.slice_mask) < 2) + return 0; + + /* + * Gen11 VME friendly power-gated configuration with half enabled + * sub-slices. + */ + pg_sseu = engine->sseu; + pg_sseu.slice_mask = 1; + pg_sseu.subslice_mask = + ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); + + pr_info("SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", + name, flags, hweight32(engine->sseu.slice_mask), + hweight32(pg_sseu.slice_mask)); + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + if (flags & TEST_RESET) igt_global_reset_lock(&i915->gt); + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + ret = PTR_ERR(ctx); + goto out_unlock; + } + i915_gem_context_clear_bannable(ctx); /* to reset and beyond! */ + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) { ret = PTR_ERR(obj); goto out_unlock; } - do { - struct intel_engine_cs *engine; - struct intel_context *ce; - struct intel_sseu pg_sseu; + ce = i915_gem_context_get_engine(ctx, RCS0); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); + goto out_put; + } - engine = intel_engine_lookup_user(i915, - I915_ENGINE_CLASS_RENDER, - inst++); - if (!engine) - break; + ret = intel_context_pin(ce); + if (ret) + goto out_context; - if (hweight32(engine->sseu.slice_mask) < 2) - continue; + /* First set the default mask. */ + ret = __sseu_test(name, flags, ce, obj, engine->sseu); + if (ret) + goto out_fail; - /* - * Gen11 VME friendly power-gated configuration with - * half enabled sub-slices. - */ - pg_sseu = engine->sseu; - pg_sseu.slice_mask = 1; - pg_sseu.subslice_mask = - ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); + /* Then set a power-gated configuration. */ + ret = __sseu_test(name, flags, ce, obj, pg_sseu); + if (ret) + goto out_fail; - pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", - engine->name, name, flags, - hweight32(engine->sseu.slice_mask), - hweight32(pg_sseu.slice_mask)); + /* Back to defaults. */ + ret = __sseu_test(name, flags, ce, obj, engine->sseu); + if (ret) + goto out_fail; - ce = intel_context_create(engine->kernel_context->gem_context, - engine); - if (IS_ERR(ce)) { - ret = PTR_ERR(ce); - goto out_put; - } - - ret = intel_context_pin(ce); - if (ret) - goto out_ce; - - /* First set the default mask. */ - ret = __sseu_test(name, flags, ce, obj, engine->sseu); - if (ret) - goto out_unpin; - - /* Then set a power-gated configuration. */ - ret = __sseu_test(name, flags, ce, obj, pg_sseu); - if (ret) - goto out_unpin; - - /* Back to defaults. */ - ret = __sseu_test(name, flags, ce, obj, engine->sseu); - if (ret) - goto out_unpin; - - /* One last power-gated configuration for the road. */ - ret = __sseu_test(name, flags, ce, obj, pg_sseu); - if (ret) - goto out_unpin; - -out_unpin: - intel_context_unpin(ce); -out_ce: - intel_context_put(ce); - } while (!ret); + /* One last power-gated configuration for the road. */ + ret = __sseu_test(name, flags, ce, obj, pg_sseu); + if (ret) + goto out_fail; +out_fail: if (igt_flush_test(i915)) ret = -EIO; + intel_context_unpin(ce); +out_context: + intel_context_put(ce); out_put: i915_gem_object_put(obj); @@ -1279,6 +1262,8 @@ out_unlock: if (flags & TEST_RESET) igt_global_reset_unlock(&i915->gt); + mock_file_free(i915, file); + if (ret) pr_err("%s: Failed with %d!\n", name, ret); @@ -1666,6 +1651,7 @@ static int igt_vm_isolation(void *arg) struct drm_file *file; I915_RND_STATE(prng); unsigned long count; + unsigned int id; u64 vm_total; int err; @@ -1706,7 +1692,7 @@ static int igt_vm_isolation(void *arg) vm_total -= I915_GTT_PAGE_SIZE; count = 0; - for_each_uabi_engine(engine, i915) { + for_each_engine(engine, i915, id) { IGT_TIMEOUT(end_time); unsigned long this = 0; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 29b2077b73d2..65d4dbf91999 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -301,9 +301,6 @@ static int igt_partial_tiling(void *arg) int tiling; int err; - if (!i915_ggtt_has_aperture(&i915->ggtt)) - return 0; - /* We want to check the page mapping and fencing of a large object * mmapped through the GTT. The object we create is larger than can * possibly be mmaped as a whole, and so we must use partial GGTT vma. @@ -434,9 +431,6 @@ static int igt_smoke_tiling(void *arg) IGT_TIMEOUT(end); int err; - if (!i915_ggtt_has_aperture(&i915->ggtt)) - return 0; - /* * igt_partial_tiling() does an exhastive check of partial tiling * chunking, but will undoubtably run out of time. Here, we do a @@ -521,19 +515,20 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); struct intel_engine_cs *engine; + enum intel_engine_id id; + struct i915_vma *vma; + int err; - for_each_uabi_engine(engine, i915) { + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + + for_each_engine(engine, i915, id) { struct i915_request *rq; - struct i915_vma *vma; - int err; - - vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - return err; rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) { @@ -549,13 +544,12 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) i915_vma_unlock(vma); i915_request_add(rq); - i915_vma_unpin(vma); - if (err) - return err; } + i915_vma_unpin(vma); i915_gem_object_put(obj); /* leave it only alive via its active ref */ - return 0; + + return err; } static bool assert_mmap_offset(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c index e8132aca0bb6..9ec55b3a3815 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c @@ -3,241 +3,40 @@ * Copyright © 2019 Intel Corporation */ -#include - #include "gt/intel_gt.h" -#include "gt/intel_engine_user.h" #include "i915_selftest.h" -#include "gem/i915_gem_context.h" #include "selftests/igt_flush_test.h" -#include "selftests/i915_random.h" #include "selftests/mock_drm.h" #include "huge_gem_object.h" #include "mock_context.h" -static int wrap_ktime_compare(const void *A, const void *B) -{ - const ktime_t *a = A, *b = B; - - return ktime_compare(*a, *b); -} - -static int __perf_fill_blt(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - int inst = 0; - - do { - struct intel_engine_cs *engine; - ktime_t t[5]; - int pass; - int err; - - engine = intel_engine_lookup_user(i915, - I915_ENGINE_CLASS_COPY, - inst++); - if (!engine) - return 0; - - for (pass = 0; pass < ARRAY_SIZE(t); pass++) { - struct intel_context *ce = engine->kernel_context; - ktime_t t0, t1; - - t0 = ktime_get(); - - err = i915_gem_object_fill_blt(obj, ce, 0); - if (err) - return err; - - err = i915_gem_object_wait(obj, - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT); - if (err) - return err; - - t1 = ktime_get(); - t[pass] = ktime_sub(t1, t0); - } - - sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); - pr_info("%s: blt %zd KiB fill: %lld MiB/s\n", - engine->name, - obj->base.size >> 10, - div64_u64(mul_u32_u32(4 * obj->base.size, - 1000 * 1000 * 1000), - t[1] + 2 * t[2] + t[3]) >> 20); - } while (1); -} - -static int perf_fill_blt(void *arg) +static int igt_fill_blt(void *arg) { struct drm_i915_private *i915 = arg; - static const unsigned long sizes[] = { - SZ_4K, - SZ_64K, - SZ_2M, - SZ_64M - }; - int i; - - for (i = 0; i < ARRAY_SIZE(sizes); i++) { - struct drm_i915_gem_object *obj; - int err; - - obj = i915_gem_object_create_internal(i915, sizes[i]); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - err = __perf_fill_blt(obj); - i915_gem_object_put(obj); - if (err) - return err; - } - - return 0; -} - -static int __perf_copy_blt(struct drm_i915_gem_object *src, - struct drm_i915_gem_object *dst) -{ - struct drm_i915_private *i915 = to_i915(src->base.dev); - int inst = 0; - - do { - struct intel_engine_cs *engine; - ktime_t t[5]; - int pass; - - engine = intel_engine_lookup_user(i915, - I915_ENGINE_CLASS_COPY, - inst++); - if (!engine) - return 0; - - for (pass = 0; pass < ARRAY_SIZE(t); pass++) { - struct intel_context *ce = engine->kernel_context; - ktime_t t0, t1; - int err; - - t0 = ktime_get(); - - err = i915_gem_object_copy_blt(src, dst, ce); - if (err) - return err; - - err = i915_gem_object_wait(dst, - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT); - if (err) - return err; - - t1 = ktime_get(); - t[pass] = ktime_sub(t1, t0); - } - - sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); - pr_info("%s: blt %zd KiB copy: %lld MiB/s\n", - engine->name, - src->base.size >> 10, - div64_u64(mul_u32_u32(4 * src->base.size, - 1000 * 1000 * 1000), - t[1] + 2 * t[2] + t[3]) >> 20); - } while (1); -} - -static int perf_copy_blt(void *arg) -{ - struct drm_i915_private *i915 = arg; - static const unsigned long sizes[] = { - SZ_4K, - SZ_64K, - SZ_2M, - SZ_64M - }; - int i; - - for (i = 0; i < ARRAY_SIZE(sizes); i++) { - struct drm_i915_gem_object *src, *dst; - int err; - - src = i915_gem_object_create_internal(i915, sizes[i]); - if (IS_ERR(src)) - return PTR_ERR(src); - - dst = i915_gem_object_create_internal(i915, sizes[i]); - if (IS_ERR(dst)) { - err = PTR_ERR(dst); - goto err_src; - } - - err = __perf_copy_blt(src, dst); - - i915_gem_object_put(dst); -err_src: - i915_gem_object_put(src); - if (err) - return err; - } - - return 0; -} - -struct igt_thread_arg { - struct drm_i915_private *i915; - struct rnd_state prng; - unsigned int n_cpus; -}; - -static int igt_fill_blt_thread(void *arg) -{ - struct igt_thread_arg *thread = arg; - struct drm_i915_private *i915 = thread->i915; - struct rnd_state *prng = &thread->prng; + struct intel_context *ce = i915->engine[BCS0]->kernel_context; struct drm_i915_gem_object *obj; - struct i915_gem_context *ctx; - struct intel_context *ce; - struct drm_file *file; - unsigned int prio; + struct rnd_state prng; IGT_TIMEOUT(end); - int err; + u32 *vaddr; + int err = 0; - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); + prandom_seed_state(&prng, i915_selftest.random_seed); - ctx = live_context(i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out_file; - } - - prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); - ctx->sched.priority = I915_USER_PRIORITY(prio); - - ce = i915_gem_context_get_engine(ctx, BCS0); - GEM_BUG_ON(IS_ERR(ce)); + /* + * XXX: needs some threads to scale all these tests, also maybe throw + * in submission from higher priority context to see if we are + * preempted for very large objects... + */ do { const u32 max_block_size = S16_MAX * PAGE_SIZE; - u32 val = prandom_u32_state(prng); - u64 total = ce->vm->total; - u32 phys_sz; - u32 sz; - u32 *vaddr; + u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng)); + u32 phys_sz = sz % (max_block_size + 1); + u32 val = prandom_u32_state(&prng); u32 i; - /* - * If we have a tiny shared address space, like for the GGTT - * then we can't be too greedy. - */ - if (i915_is_ggtt(ce->vm)) - total = div64_u64(total, thread->n_cpus); - - sz = min_t(u64, total >> 4, prandom_u32_state(prng)); - phys_sz = sz % (max_block_size + 1); - sz = round_up(sz, PAGE_SIZE); phys_sz = round_up(phys_sz, PAGE_SIZE); @@ -299,56 +98,28 @@ err_flush: if (err == -ENOMEM) err = 0; - intel_context_put(ce); -out_file: - mock_file_free(i915, file); return err; } -static int igt_copy_blt_thread(void *arg) +static int igt_copy_blt(void *arg) { - struct igt_thread_arg *thread = arg; - struct drm_i915_private *i915 = thread->i915; - struct rnd_state *prng = &thread->prng; + struct drm_i915_private *i915 = arg; + struct intel_context *ce = i915->engine[BCS0]->kernel_context; struct drm_i915_gem_object *src, *dst; - struct i915_gem_context *ctx; - struct intel_context *ce; - struct drm_file *file; - unsigned int prio; + struct rnd_state prng; IGT_TIMEOUT(end); - int err; + u32 *vaddr; + int err = 0; - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - ctx = live_context(i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out_file; - } - - prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); - ctx->sched.priority = I915_USER_PRIORITY(prio); - - ce = i915_gem_context_get_engine(ctx, BCS0); - GEM_BUG_ON(IS_ERR(ce)); + prandom_seed_state(&prng, i915_selftest.random_seed); do { const u32 max_block_size = S16_MAX * PAGE_SIZE; - u32 val = prandom_u32_state(prng); - u64 total = ce->vm->total; - u32 phys_sz; - u32 sz; - u32 *vaddr; + u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng)); + u32 phys_sz = sz % (max_block_size + 1); + u32 val = prandom_u32_state(&prng); u32 i; - if (i915_is_ggtt(ce->vm)) - total = div64_u64(total, thread->n_cpus); - - sz = min_t(u64, total >> 4, prandom_u32_state(prng)); - phys_sz = sz % (max_block_size + 1); - sz = round_up(sz, PAGE_SIZE); phys_sz = round_up(phys_sz, PAGE_SIZE); @@ -430,85 +201,12 @@ err_flush: if (err == -ENOMEM) err = 0; - intel_context_put(ce); -out_file: - mock_file_free(i915, file); return err; } -static int igt_threaded_blt(struct drm_i915_private *i915, - int (*blt_fn)(void *arg)) -{ - struct igt_thread_arg *thread; - struct task_struct **tsk; - I915_RND_STATE(prng); - unsigned int n_cpus; - unsigned int i; - int err = 0; - - n_cpus = num_online_cpus() + 1; - - tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL); - if (!tsk) - return 0; - - thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL); - if (!thread) { - kfree(tsk); - return 0; - } - - for (i = 0; i < n_cpus; ++i) { - thread[i].i915 = i915; - thread[i].n_cpus = n_cpus; - thread[i].prng = - I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); - - tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i); - if (IS_ERR(tsk[i])) { - err = PTR_ERR(tsk[i]); - break; - } - - get_task_struct(tsk[i]); - } - - yield(); /* start all threads before we kthread_stop() */ - - for (i = 0; i < n_cpus; ++i) { - int status; - - if (IS_ERR_OR_NULL(tsk[i])) - continue; - - status = kthread_stop(tsk[i]); - if (status && !err) - err = status; - - put_task_struct(tsk[i]); - } - - kfree(tsk); - kfree(thread); - - return err; -} - -static int igt_fill_blt(void *arg) -{ - return igt_threaded_blt(arg, igt_fill_blt_thread); -} - -static int igt_copy_blt(void *arg) -{ - return igt_threaded_blt(arg, igt_copy_blt_thread); -} - int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { - SUBTEST(perf_fill_blt), - SUBTEST(perf_copy_blt), SUBTEST(igt_fill_blt), SUBTEST(igt_copy_blt), }; diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index 29b8984f0e47..74ddd682c9cd 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -22,8 +22,6 @@ mock_context(struct drm_i915_private *i915, INIT_LIST_HEAD(&ctx->link); ctx->i915 = i915; - i915_gem_context_set_persistence(ctx); - mutex_init(&ctx->engines_mutex); e = default_engines(ctx); if (IS_ERR(e)) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index ee9d2bcd2c13..59c3083c1ec1 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -13,7 +13,6 @@ #include "intel_context.h" #include "intel_engine.h" #include "intel_engine_pm.h" -#include "intel_ring.h" static struct i915_global_context { struct i915_global base; diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 68b3d317d959..dd742ac2fbdb 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -12,7 +12,6 @@ #include "i915_active.h" #include "intel_context_types.h" #include "intel_engine_types.h" -#include "intel_ring_types.h" #include "intel_timeline_types.h" void intel_context_init(struct intel_context *ce, diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index bc3b72bfa9e3..93ea367fe624 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -19,7 +19,6 @@ #include "intel_workarounds.h" struct drm_printer; -struct intel_gt; /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, * but keeps the logic simple. Indeed, the whole purpose of this macro is just @@ -90,6 +89,38 @@ struct intel_gt; /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. */ +enum intel_engine_hangcheck_action { + ENGINE_IDLE = 0, + ENGINE_WAIT, + ENGINE_ACTIVE_SEQNO, + ENGINE_ACTIVE_HEAD, + ENGINE_ACTIVE_SUBUNITS, + ENGINE_WAIT_KICK, + ENGINE_DEAD, +}; + +static inline const char * +hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) +{ + switch (a) { + case ENGINE_IDLE: + return "idle"; + case ENGINE_WAIT: + return "wait"; + case ENGINE_ACTIVE_SEQNO: + return "active seqno"; + case ENGINE_ACTIVE_HEAD: + return "active head"; + case ENGINE_ACTIVE_SUBUNITS: + return "active subunits"; + case ENGINE_WAIT_KICK: + return "wait kick"; + case ENGINE_DEAD: + return "dead"; + } + + return "unknown"; +} static inline unsigned int execlists_num_ports(const struct intel_engine_execlists * const execlists) @@ -175,13 +206,126 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define I915_HWS_CSB_WRITE_INDEX 0x1f #define CNL_HWS_CSB_WRITE_INDEX 0x2f +struct intel_ring * +intel_engine_create_ring(struct intel_engine_cs *engine, int size); +int intel_ring_pin(struct intel_ring *ring); +void intel_ring_reset(struct intel_ring *ring, u32 tail); +unsigned int intel_ring_update_space(struct intel_ring *ring); +void intel_ring_unpin(struct intel_ring *ring); +void intel_ring_free(struct kref *ref); + +static inline struct intel_ring *intel_ring_get(struct intel_ring *ring) +{ + kref_get(&ring->ref); + return ring; +} + +static inline void intel_ring_put(struct intel_ring *ring) +{ + kref_put(&ring->ref, intel_ring_free); +} + void intel_engine_stop(struct intel_engine_cs *engine); void intel_engine_cleanup(struct intel_engine_cs *engine); -int intel_engines_init_mmio(struct intel_gt *gt); -int intel_engines_setup(struct intel_gt *gt); -int intel_engines_init(struct intel_gt *gt); -void intel_engines_cleanup(struct intel_gt *gt); +int __must_check intel_ring_cacheline_align(struct i915_request *rq); + +u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n); + +static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) +{ + /* Dummy function. + * + * This serves as a placeholder in the code so that the reader + * can compare against the preceding intel_ring_begin() and + * check that the number of dwords emitted matches the space + * reserved for the command packet (i.e. the value passed to + * intel_ring_begin()). + */ + GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); +} + +static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) +{ + return pos & (ring->size - 1); +} + +static inline bool +intel_ring_offset_valid(const struct intel_ring *ring, + unsigned int pos) +{ + if (pos & -ring->size) /* must be strictly within the ring */ + return false; + + if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */ + return false; + + return true; +} + +static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) +{ + /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ + u32 offset = addr - rq->ring->vaddr; + GEM_BUG_ON(offset > rq->ring->size); + return intel_ring_wrap(rq->ring, offset); +} + +static inline void +assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) +{ + GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); + + /* + * "Ring Buffer Use" + * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 + * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 + * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the + * same cacheline, the Head Pointer must not be greater than the Tail + * Pointer." + * + * We use ring->head as the last known location of the actual RING_HEAD, + * it may have advanced but in the worst case it is equally the same + * as ring->head and so we should never program RING_TAIL to advance + * into the same cacheline as ring->head. + */ +#define cacheline(a) round_down(a, CACHELINE_BYTES) + GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && + tail < ring->head); +#undef cacheline +} + +static inline unsigned int +intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) +{ + /* Whilst writes to the tail are strictly order, there is no + * serialisation between readers and the writers. The tail may be + * read by i915_request_retire() just as it is being updated + * by execlists, as although the breadcrumb is complete, the context + * switch hasn't been seen. + */ + assert_ring_tail_valid(ring, tail); + ring->tail = tail; + return tail; +} + +static inline unsigned int +__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) +{ + /* + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the + * same cacheline, the Head Pointer must not be greater than the Tail + * Pointer." + */ + GEM_BUG_ON(!is_power_of_2(size)); + return (head - tail - CACHELINE_BYTES) & (size - 1); +} + +int intel_engines_init_mmio(struct drm_i915_private *i915); +int intel_engines_setup(struct drm_i915_private *i915); +int intel_engines_init(struct drm_i915_private *i915); +void intel_engines_cleanup(struct drm_i915_private *i915); int intel_engine_init_common(struct intel_engine_cs *engine); void intel_engine_cleanup_common(struct intel_engine_cs *engine); @@ -290,6 +434,61 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...); +static inline void intel_engine_context_in(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (READ_ONCE(engine->stats.enabled) == 0) + return; + + write_seqlock_irqsave(&engine->stats.lock, flags); + + if (engine->stats.enabled > 0) { + if (engine->stats.active++ == 0) + engine->stats.start = ktime_get(); + GEM_BUG_ON(engine->stats.active == 0); + } + + write_sequnlock_irqrestore(&engine->stats.lock, flags); +} + +static inline void intel_engine_context_out(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (READ_ONCE(engine->stats.enabled) == 0) + return; + + write_seqlock_irqsave(&engine->stats.lock, flags); + + if (engine->stats.enabled > 0) { + ktime_t last; + + if (engine->stats.active && --engine->stats.active == 0) { + /* + * Decrement the active context count and in case GPU + * is now idle add up to the running total. + */ + last = ktime_sub(ktime_get(), engine->stats.start); + + engine->stats.total = ktime_add(engine->stats.total, + last); + } else if (engine->stats.active == 0) { + /* + * After turning on engine stats, context out might be + * the first event in which case we account from the + * time stats gathering was turned on. + */ + last = ktime_sub(ktime_get(), engine->stats.enabled_at); + + engine->stats.total = ktime_add(engine->stats.total, + last); + } + } + + write_sequnlock_irqrestore(&engine->stats.lock, flags); +} + int intel_enable_engine_stats(struct intel_engine_cs *engine); void intel_disable_engine_stats(struct intel_engine_cs *engine); @@ -326,22 +525,4 @@ void intel_engine_init_active(struct intel_engine_cs *engine, #define ENGINE_MOCK 1 #define ENGINE_VIRTUAL 2 -static inline bool -intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) -{ - if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) - return false; - - return intel_engine_has_preemption(engine); -} - -static inline bool -intel_engine_has_timeslices(const struct intel_engine_cs *engine) -{ - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) - return false; - - return intel_engine_has_semaphores(engine); -} - #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index f8113bc756c6..051734c9b733 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -37,7 +37,6 @@ #include "intel_context.h" #include "intel_lrc.h" #include "intel_reset.h" -#include "intel_ring.h" /* Haswell does have the CXT_SIZE register however it does not appear to be * valid. Now, docs explain in dwords what is in the context object. The full @@ -309,15 +308,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->instance = info->instance; __sprint_engine_name(engine); - engine->props.heartbeat_interval_ms = - CONFIG_DRM_I915_HEARTBEAT_INTERVAL; - engine->props.preempt_timeout_ms = - CONFIG_DRM_I915_PREEMPT_TIMEOUT; - engine->props.stop_timeout_ms = - CONFIG_DRM_I915_STOP_TIMEOUT; - engine->props.timeslice_duration_ms = - CONFIG_DRM_I915_TIMESLICE_DURATION; - /* * To be overridden by the backend on setup. However to facilitate * cleanup on error during setup, we always provide the destroy vfunc. @@ -380,40 +370,38 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine) } } -static void intel_setup_engine_capabilities(struct intel_gt *gt) +static void intel_setup_engine_capabilities(struct drm_i915_private *i915) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, gt, id) + for_each_engine(engine, i915, id) __setup_engine_capabilities(engine); } /** * intel_engines_cleanup() - free the resources allocated for Command Streamers - * @gt: pointer to struct intel_gt + * @i915: the i915 devic */ -void intel_engines_cleanup(struct intel_gt *gt) +void intel_engines_cleanup(struct drm_i915_private *i915) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, gt, id) { + for_each_engine(engine, i915, id) { engine->destroy(engine); - gt->engine[id] = NULL; - gt->i915->engine[id] = NULL; + i915->engine[id] = NULL; } } /** * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers - * @gt: pointer to struct intel_gt + * @i915: the i915 device * * Return: non-zero if the initialization failed. */ -int intel_engines_init_mmio(struct intel_gt *gt) +int intel_engines_init_mmio(struct drm_i915_private *i915) { - struct drm_i915_private *i915 = gt->i915; struct intel_device_info *device_info = mkwrite_device_info(i915); const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask; unsigned int mask = 0; @@ -431,7 +419,7 @@ int intel_engines_init_mmio(struct intel_gt *gt) if (!HAS_ENGINE(i915, i)) continue; - err = intel_engine_setup(gt, i); + err = intel_engine_setup(&i915->gt, i); if (err) goto cleanup; @@ -448,36 +436,36 @@ int intel_engines_init_mmio(struct intel_gt *gt) RUNTIME_INFO(i915)->num_engines = hweight32(mask); - intel_gt_check_and_clear_faults(gt); + intel_gt_check_and_clear_faults(&i915->gt); - intel_setup_engine_capabilities(gt); + intel_setup_engine_capabilities(i915); return 0; cleanup: - intel_engines_cleanup(gt); + intel_engines_cleanup(i915); return err; } /** * intel_engines_init() - init the Engine Command Streamers - * @gt: pointer to struct intel_gt + * @i915: i915 device private * * Return: non-zero if the initialization failed. */ -int intel_engines_init(struct intel_gt *gt) +int intel_engines_init(struct drm_i915_private *i915) { int (*init)(struct intel_engine_cs *engine); struct intel_engine_cs *engine; enum intel_engine_id id; int err; - if (HAS_EXECLISTS(gt->i915)) + if (HAS_EXECLISTS(i915)) init = intel_execlists_submission_init; else init = intel_ring_submission_init; - for_each_engine(engine, gt, id) { + for_each_engine(engine, i915, id) { err = init(engine); if (err) goto cleanup; @@ -486,7 +474,7 @@ int intel_engines_init(struct intel_gt *gt) return 0; cleanup: - intel_engines_cleanup(gt); + intel_engines_cleanup(i915); return err; } @@ -530,7 +518,7 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine, unsigned int flags; flags = PIN_GLOBAL; - if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt)) + if (!HAS_LLC(engine->i915)) /* * On g33, we cannot place HWS above 256MiB, so * restrict its pinning to the low mappable arena. @@ -614,6 +602,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_active(engine, ENGINE_PHYSICAL); intel_engine_init_breadcrumbs(engine); intel_engine_init_execlists(engine); + intel_engine_init_hangcheck(engine); intel_engine_init_cmd_parser(engine); intel_engine_init__pm(engine); @@ -632,26 +621,26 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) /** * intel_engines_setup- setup engine state not requiring hw access - * @gt: pointer to struct intel_gt + * @i915: Device to setup. * * Initializes engine structure members shared between legacy and execlists * submission modes which do not require hardware access. * * Typically done early in the submission mode specific engine setup stage. */ -int intel_engines_setup(struct intel_gt *gt) +int intel_engines_setup(struct drm_i915_private *i915) { int (*setup)(struct intel_engine_cs *engine); struct intel_engine_cs *engine; enum intel_engine_id id; int err; - if (HAS_EXECLISTS(gt->i915)) + if (HAS_EXECLISTS(i915)) setup = intel_execlists_submission_setup; else setup = intel_ring_submission_setup; - for_each_engine(engine, gt, id) { + for_each_engine(engine, i915, id) { err = intel_engine_setup_common(engine); if (err) goto cleanup; @@ -669,7 +658,7 @@ int intel_engines_setup(struct intel_gt *gt) return 0; cleanup: - intel_engines_cleanup(gt); + intel_engines_cleanup(i915); return err; } @@ -884,21 +873,6 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine) return bbaddr; } -static unsigned long stop_timeout(const struct intel_engine_cs *engine) -{ - if (in_atomic() || irqs_disabled()) /* inside atomic preempt-reset? */ - return 0; - - /* - * If we are doing a normal GPU reset, we can take our time and allow - * the engine to quiesce. We've stopped submission to the engine, and - * if we wait long enough an innocent context should complete and - * leave the engine idle. So they should not be caught unaware by - * the forthcoming GPU reset (which usually follows the stop_cs)! - */ - return READ_ONCE(engine->props.stop_timeout_ms); -} - int intel_engine_stop_cs(struct intel_engine_cs *engine) { struct intel_uncore *uncore = engine->uncore; @@ -916,7 +890,7 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine) err = 0; if (__intel_wait_for_register_fw(uncore, mode, MODE_IDLE, MODE_IDLE, - 1000, stop_timeout(engine), + 1000, 0, NULL)) { GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name); err = -ETIMEDOUT; @@ -1344,11 +1318,10 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, unsigned int idx; u8 read, write; - drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n", + drm_printf(m, "\tExeclist tasklet queued? %s (%s), timeslice? %s\n", yesno(test_bit(TASKLET_STATE_SCHED, &engine->execlists.tasklet.state)), enableddisabled(!atomic_read(&engine->execlists.tasklet.count)), - repr_timer(&engine->execlists.preempt), repr_timer(&engine->execlists.timer)); read = execlists->csb_head; @@ -1474,13 +1447,8 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "*** WEDGED ***\n"); drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count)); - - rcu_read_lock(); - rq = READ_ONCE(engine->heartbeat.systole); - if (rq) - drm_printf(m, "\tHeartbeat: %d ms ago\n", - jiffies_to_msecs(jiffies - rq->emitted_jiffies)); - rcu_read_unlock(); + drm_printf(m, "\tHangcheck: %d ms ago\n", + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); drm_printf(m, "\tReset count: %d (global %d)\n", i915_reset_engine_count(error, engine), i915_reset_count(error)); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 3c0f490ff2c7..67eb6183648a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -7,13 +7,11 @@ #include "i915_drv.h" #include "intel_engine.h" -#include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" #include "intel_engine_pool.h" #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_rc6.h" -#include "intel_ring.h" static int __engine_unpark(struct intel_wakeref *wf) { @@ -36,7 +34,7 @@ static int __engine_unpark(struct intel_wakeref *wf) if (engine->unpark) engine->unpark(engine); - intel_engine_unpark_heartbeat(engine); + intel_engine_init_hangcheck(engine); return 0; } @@ -113,7 +111,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) i915_request_add_active_barriers(rq); /* Install ourselves as a preemption barrier */ - rq->sched.attr.priority = I915_PRIORITY_BARRIER; + rq->sched.attr.priority = I915_PRIORITY_UNPREEMPTABLE; __i915_request_commit(rq); /* Release our exclusive hold on the engine */ @@ -160,7 +158,6 @@ static int __engine_park(struct intel_wakeref *wf) call_idle_barriers(engine); /* cleanup after wedging */ - intel_engine_park_heartbeat(engine); intel_engine_disarm_breadcrumbs(engine); intel_engine_pool_park(&engine->pool); @@ -191,7 +188,6 @@ void intel_engine_init__pm(struct intel_engine_cs *engine) struct intel_runtime_pm *rpm = engine->uncore->rpm; intel_wakeref_init(&engine->wakeref, rpm, &wf_ops); - intel_engine_init_heartbeat(engine); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index c5d1047a4bc5..3451be034caf 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -15,7 +15,6 @@ #include #include #include -#include #include "i915_gem.h" #include "i915_pmu.h" @@ -59,7 +58,6 @@ struct i915_gem_context; struct i915_request; struct i915_sched_attr; struct intel_gt; -struct intel_ring; struct intel_uncore; typedef u8 intel_engine_mask_t; @@ -78,6 +76,40 @@ struct intel_instdone { u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; }; +struct intel_engine_hangcheck { + u64 acthd; + u32 last_ring; + u32 last_head; + unsigned long action_timestamp; + struct intel_instdone instdone; +}; + +struct intel_ring { + struct kref ref; + struct i915_vma *vma; + void *vaddr; + + /* + * As we have two types of rings, one global to the engine used + * by ringbuffer submission and those that are exclusive to a + * context used by execlists, we have to play safe and allow + * atomic updates to the pin_count. However, the actual pinning + * of the context is either done during initialisation for + * ringbuffer submission or serialised as part of the context + * pinning for execlists, and so we do not need a mutex ourselves + * to serialise intel_ring_pin/intel_ring_unpin. + */ + atomic_t pin_count; + + u32 head; + u32 tail; + u32 emit; + + u32 space; + u32 size; + u32 effective_size; +}; + /* * we use a single page to load ctx workarounds so all of these * values are referred in terms of dwords @@ -142,11 +174,6 @@ struct intel_engine_execlists { */ struct timer_list timer; - /** - * @preempt: reset the current context if it fails to give way - */ - struct timer_list preempt; - /** * @default_priolist: priority list for I915_PRIORITY_NORMAL */ @@ -299,11 +326,6 @@ struct intel_engine_cs { intel_engine_mask_t saturated; /* submitting semaphores too late? */ - struct { - struct delayed_work work; - struct i915_request *systole; - } heartbeat; - unsigned long serial; unsigned long wakeref_serial; @@ -454,6 +476,8 @@ struct intel_engine_cs { /* status_notifier: list of callbacks for context-switch changes */ struct atomic_notifier_head context_status_notifier; + struct intel_engine_hangcheck hangcheck; + #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) #define I915_ENGINE_SUPPORTS_STATS BIT(1) #define I915_ENGINE_HAS_PREEMPTION BIT(2) @@ -518,13 +542,6 @@ struct intel_engine_cs { */ ktime_t total; } stats; - - struct { - unsigned long heartbeat_interval_ms; - unsigned long preempt_timeout_ms; - unsigned long stop_timeout_ms; - unsigned long timeslice_duration_ms; - } props; }; static inline bool diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 4c26daf7ee46..1c4b6c9642ad 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -9,7 +9,6 @@ #include "intel_gt_requests.h" #include "intel_mocs.h" #include "intel_rc6.h" -#include "intel_rps.h" #include "intel_uncore.h" #include "intel_pm.h" @@ -23,19 +22,19 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); + intel_gt_init_hangcheck(gt); intel_gt_init_reset(gt); intel_gt_init_requests(gt); intel_gt_pm_init_early(gt); - - intel_rps_init_early(>->rps); intel_uc_init_early(>->uc); } -void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt) +void intel_gt_init_hw_early(struct drm_i915_private *i915) { - gt->ggtt = ggtt; + i915->gt.ggtt = &i915->ggtt; - intel_gt_sanitize(gt, false); + /* BIOS often leaves RC6 enabled, but disable it for hw init */ + intel_gt_pm_disable(&i915->gt); } static void init_unused_ring(struct intel_gt *gt, u32 base) @@ -322,7 +321,8 @@ void intel_gt_chipset_flush(struct intel_gt *gt) void intel_gt_driver_register(struct intel_gt *gt) { - intel_rps_driver_register(>->rps); + if (IS_GEN(gt->i915, 5)) + intel_gpu_ips_init(gt->i915); } static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) @@ -380,16 +380,20 @@ int intel_gt_init(struct intel_gt *gt) void intel_gt_driver_remove(struct intel_gt *gt) { GEM_BUG_ON(gt->awake); + intel_gt_pm_disable(gt); } void intel_gt_driver_unregister(struct intel_gt *gt) { - intel_rps_driver_unregister(>->rps); + intel_gpu_ips_teardown(); } void intel_gt_driver_release(struct intel_gt *gt) { + /* Paranoia: make sure we have disabled everything before we exit. */ + intel_gt_pm_disable(gt); intel_gt_pm_fini(gt); + intel_gt_fini_scratch(gt); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 5436f8c30708..e6ab0bff0efb 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -28,7 +28,7 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc) } void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915); -void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt); +void intel_gt_init_hw_early(struct drm_i915_private *i915); int __must_check intel_gt_init_hw(struct intel_gt *gt); int intel_gt_init(struct intel_gt *gt); void intel_gt_driver_register(struct intel_gt *gt); @@ -46,6 +46,8 @@ void intel_gt_clear_error_registers(struct intel_gt *gt, void intel_gt_flush_ggtt_writes(struct intel_gt *gt); void intel_gt_chipset_flush(struct intel_gt *gt); +void intel_gt_init_hangcheck(struct intel_gt *gt); + static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt, enum intel_gt_scratch_field field) { @@ -57,4 +59,6 @@ static inline bool intel_gt_is_wedged(struct intel_gt *gt) return __intel_reset_failed(>->reset); } +void intel_gt_queue_hangcheck(struct intel_gt *gt); + #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 973ee7eded64..34a4fb624bf7 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -11,7 +11,6 @@ #include "intel_gt.h" #include "intel_gt_irq.h" #include "intel_uncore.h" -#include "intel_rps.h" static void guc_irq_handler(struct intel_guc *guc, u16 iir) { @@ -78,7 +77,7 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance, return guc_irq_handler(>->uc.guc, iir); if (instance == OTHER_GTPM_INSTANCE) - return gen11_rps_irq_handler(>->rps, iir); + return gen11_rps_irq_handler(gt, iir); WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n", instance, iir); @@ -337,7 +336,7 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]) } if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { - gen6_rps_irq_handler(>->rps, gt_iir[2]); + gen6_rps_irq_handler(gt->i915, gt_iir[2]); guc_irq_handler(>->uc.guc, gt_iir[2] >> 16); } } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 060a27d9af34..b866d5b1eee0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -4,8 +4,6 @@ * Copyright © 2019 Intel Corporation */ -#include - #include "i915_drv.h" #include "i915_globals.h" #include "i915_params.h" @@ -14,28 +12,13 @@ #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" -#include "intel_llc.h" #include "intel_pm.h" #include "intel_rc6.h" -#include "intel_rps.h" #include "intel_wakeref.h" -static void user_forcewake(struct intel_gt *gt, bool suspend) +static void pm_notify(struct intel_gt *gt, int state) { - int count = atomic_read(>->user_wakeref); - - /* Inside suspend/resume so single threaded, no races to worry about. */ - if (likely(!count)) - return; - - intel_gt_pm_get(gt); - if (suspend) { - GEM_BUG_ON(count > atomic_read(>->wakeref.count)); - atomic_sub(count, >->wakeref.count); - } else { - atomic_add(count, >->wakeref.count); - } - intel_gt_pm_put(gt); + blocking_notifier_call_chain(>->pm_notifications, state, gt->i915); } static int __gt_unpark(struct intel_wakeref *wf) @@ -61,11 +44,19 @@ static int __gt_unpark(struct intel_wakeref *wf) gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); GEM_BUG_ON(!gt->awake); - intel_rps_unpark(>->rps); + intel_enable_gt_powersave(i915); + + i915_update_gfx_val(i915); + if (INTEL_GEN(i915) >= 6) + gen6_rps_busy(i915); + i915_pmu_gt_unparked(i915); + intel_gt_queue_hangcheck(gt); intel_gt_unpark_requests(gt); + pm_notify(gt, INTEL_GT_UNPARK); + return 0; } @@ -77,11 +68,12 @@ static int __gt_park(struct intel_wakeref *wf) GEM_TRACE("\n"); + pm_notify(gt, INTEL_GT_PARK); intel_gt_park_requests(gt); - i915_vma_parked(gt); i915_pmu_gt_parked(i915); - intel_rps_park(>->rps); + if (INTEL_GEN(i915) >= 6) + gen6_rps_idle(i915); /* Everything switched off, flush any residual interrupt just in case */ intel_synchronize_irq(i915); @@ -103,6 +95,8 @@ static const struct intel_wakeref_ops wf_ops = { void intel_gt_pm_init_early(struct intel_gt *gt) { intel_wakeref_init(>->wakeref, gt->uncore->rpm, &wf_ops); + + BLOCKING_INIT_NOTIFIER_HEAD(>->pm_notifications); } void intel_gt_pm_init(struct intel_gt *gt) @@ -113,7 +107,6 @@ void intel_gt_pm_init(struct intel_gt *gt) * user. */ intel_rc6_init(>->rc6); - intel_rps_init(>->rps); } static bool reset_engines(struct intel_gt *gt) @@ -138,22 +131,8 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) { struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; - GEM_TRACE("force:%s\n", yesno(force)); - - /* Use a raw wakeref to avoid calling intel_display_power_get early */ - wakeref = intel_runtime_pm_get(gt->uncore->rpm); - intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); - - /* - * As we have just resumed the machine and woken the device up from - * deep PCI sleep (presumably D3_cold), assume the HW has been reset - * back to defaults, recovering from whatever wedged state we left it - * in and so worth trying to use the device once more. - */ - if (intel_gt_is_wedged(gt)) - intel_gt_unset_wedged(gt); + GEM_TRACE("\n"); intel_uc_sanitize(>->uc); @@ -161,8 +140,6 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) if (engine->reset.prepare) engine->reset.prepare(engine); - intel_uc_reset_prepare(>->uc); - if (reset_engines(gt) || force) { for_each_engine(engine, gt, id) __intel_engine_reset(engine, false); @@ -171,9 +148,12 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) for_each_engine(engine, gt, id) if (engine->reset.finish) engine->reset.finish(engine); +} - intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); - intel_runtime_pm_put(gt->uncore->rpm, wakeref); +void intel_gt_pm_disable(struct intel_gt *gt) +{ + if (!is_mock_gt(gt)) + intel_sanitize_gt_powersave(gt->i915); } void intel_gt_pm_fini(struct intel_gt *gt) @@ -187,8 +167,6 @@ int intel_gt_resume(struct intel_gt *gt) enum intel_engine_id id; int err = 0; - GEM_TRACE("\n"); - /* * After resume, we may need to poke into the pinned kernel * contexts to paper over any damage caused by the sudden suspend. @@ -196,13 +174,9 @@ int intel_gt_resume(struct intel_gt *gt) * allowing us to fixup the user contexts on their first pin. */ intel_gt_pm_get(gt); - intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); intel_rc6_sanitize(>->rc6); - intel_rps_enable(>->rps); - intel_llc_enable(>->llc); - for_each_engine(engine, gt, id) { struct intel_context *ce; @@ -211,7 +185,9 @@ int intel_gt_resume(struct intel_gt *gt) ce = engine->kernel_context; if (ce) { GEM_BUG_ON(!intel_context_is_pinned(ce)); + mutex_acquire(&ce->pin_mutex.dep_map, 0, 0, _THIS_IP_); ce->ops->reset(ce); + mutex_release(&ce->pin_mutex.dep_map, 0, _THIS_IP_); } engine->serial++; /* kernel context lost */ @@ -227,22 +203,14 @@ int intel_gt_resume(struct intel_gt *gt) } intel_rc6_enable(>->rc6); - - intel_uc_resume(>->uc); - - user_forcewake(gt, false); - intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); intel_gt_pm_put(gt); return err; } -static void wait_for_suspend(struct intel_gt *gt) +static void wait_for_idle(struct intel_gt *gt) { - if (!intel_gt_pm_is_awake(gt)) - return; - if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { /* * Forcibly cancel outstanding work and leave @@ -254,65 +222,24 @@ static void wait_for_suspend(struct intel_gt *gt) intel_gt_pm_wait_for_idle(gt); } -void intel_gt_suspend_prepare(struct intel_gt *gt) -{ - user_forcewake(gt, true); - wait_for_suspend(gt); - - intel_uc_suspend(>->uc); -} - -static suspend_state_t pm_suspend_target(void) -{ -#if IS_ENABLED(CONFIG_PM_SLEEP) - return pm_suspend_target_state; -#else - return PM_SUSPEND_TO_IDLE; -#endif -} - -void intel_gt_suspend_late(struct intel_gt *gt) +void intel_gt_suspend(struct intel_gt *gt) { intel_wakeref_t wakeref; /* We expect to be idle already; but also want to be independent */ - wait_for_suspend(gt); + wait_for_idle(gt); - /* - * On disabling the device, we want to turn off HW access to memory - * that we no longer own. - * - * However, not all suspend-states disable the device. S0 (s2idle) - * is effectively runtime-suspend, the device is left powered on - * but needs to be put into a low power state. We need to keep - * powermanagement enabled, but we also retain system state and so - * it remains safe to keep on using our allocated memory. - */ - if (pm_suspend_target() == PM_SUSPEND_TO_IDLE) - return; - - with_intel_runtime_pm(gt->uncore->rpm, wakeref) { - intel_rps_disable(>->rps); + with_intel_runtime_pm(gt->uncore->rpm, wakeref) intel_rc6_disable(>->rc6); - intel_llc_disable(>->llc); - } - - intel_gt_sanitize(gt, false); - - GEM_TRACE("\n"); } void intel_gt_runtime_suspend(struct intel_gt *gt) { intel_uc_runtime_suspend(>->uc); - - GEM_TRACE("\n"); } int intel_gt_runtime_resume(struct intel_gt *gt) { - GEM_TRACE("\n"); - intel_gt_init_swizzling(gt); return intel_uc_runtime_resume(>->uc); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index b3e17399be9b..997770d3a968 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -12,6 +12,11 @@ #include "intel_gt_types.h" #include "intel_wakeref.h" +enum { + INTEL_GT_UNPARK, + INTEL_GT_PARK, +}; + static inline bool intel_gt_pm_is_awake(const struct intel_gt *gt) { return intel_wakeref_is_active(>->wakeref); @@ -39,13 +44,13 @@ static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) void intel_gt_pm_init_early(struct intel_gt *gt); void intel_gt_pm_init(struct intel_gt *gt); +void intel_gt_pm_disable(struct intel_gt *gt); void intel_gt_pm_fini(struct intel_gt *gt); void intel_gt_sanitize(struct intel_gt *gt, bool force); -void intel_gt_suspend_prepare(struct intel_gt *gt); -void intel_gt_suspend_late(struct intel_gt *gt); int intel_gt_resume(struct intel_gt *gt); +void intel_gt_suspend(struct intel_gt *gt); void intel_gt_runtime_suspend(struct intel_gt *gt); int intel_gt_runtime_resume(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index d4e14dbd172e..ae4aaf75ac78 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -20,7 +20,6 @@ #include "intel_llc_types.h" #include "intel_reset_types.h" #include "intel_rc6_types.h" -#include "intel_rps_types.h" #include "intel_wakeref.h" struct drm_i915_private; @@ -28,6 +27,14 @@ struct i915_ggtt; struct intel_engine_cs; struct intel_uncore; +struct intel_hangcheck { + /* For hangcheck timer */ +#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ +#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD) + + struct delayed_work work; +}; + struct intel_gt { struct drm_i915_private *i915; struct intel_uncore *uncore; @@ -61,6 +68,7 @@ struct intel_gt { struct list_head closed_vma; spinlock_t closed_lock; /* guards the list of closed_vma */ + struct intel_hangcheck hangcheck; struct intel_reset reset; /** @@ -74,7 +82,8 @@ struct intel_gt { struct intel_llc llc; struct intel_rc6 rc6; - struct intel_rps rps; + + struct blocking_notifier_head pm_notifications; ktime_t last_init_time; diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c index 05d042cdefe2..0fdef00af9e4 100644 --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c @@ -53,6 +53,7 @@ static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone) static bool subunits_stuck(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; + const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; struct intel_instdone instdone; struct intel_instdone *accu_instdone = &engine->hangcheck.instdone; bool stuck; @@ -71,7 +72,7 @@ static bool subunits_stuck(struct intel_engine_cs *engine) stuck &= instdone_unchanged(instdone.slice_common, &accu_instdone->slice_common); - for_each_instdone_slice_subslice(dev_priv, slice, subslice) { + for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice) { stuck &= instdone_unchanged(instdone.sampler[slice][subslice], &accu_instdone->sampler[slice][subslice]); stuck &= instdone_unchanged(instdone.row[slice][subslice], @@ -236,7 +237,7 @@ static void hangcheck_declare_hang(struct intel_gt *gt, hung &= ~stuck; len = scnprintf(msg, sizeof(msg), "%s on ", stuck == hung ? "no progress" : "hang"); - for_each_engine_masked(engine, gt->i915, hung, tmp) + for_each_engine_masked(engine, gt, hung, tmp) len += scnprintf(msg + len, sizeof(msg) - len, "%s, ", engine->name); msg[len-2] = '\0'; @@ -270,7 +271,7 @@ static void hangcheck_elapsed(struct work_struct *work) if (intel_gt_is_wedged(gt)) return; - wakeref = intel_runtime_pm_get_if_in_use(>->i915->runtime_pm); + wakeref = intel_runtime_pm_get_if_in_use(gt->uncore->rpm); if (!wakeref) return; @@ -280,10 +281,10 @@ static void hangcheck_elapsed(struct work_struct *work) */ intel_uncore_arm_unclaimed_mmio_detection(gt->uncore); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct hangcheck hc; - intel_engine_signal_breadcrumbs(engine); + intel_engine_breadcrumbs_irq(engine); hangcheck_load_sample(engine, &hc); hangcheck_accumulate_sample(engine, &hc); @@ -302,7 +303,7 @@ static void hangcheck_elapsed(struct work_struct *work) if (GEM_SHOW_DEBUG() && (hung | stuck)) { struct drm_printer p = drm_debug_printer("hangcheck"); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { if (intel_engine_is_idle(engine)) continue; @@ -321,7 +322,7 @@ static void hangcheck_elapsed(struct work_struct *work) if (hung) hangcheck_declare_hang(gt, hung, stuck); - intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); /* Reset timer in case GPU hangs without another request being added */ intel_gt_queue_hangcheck(gt); diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c index ceb785b75c25..35093eb5f24e 100644 --- a/drivers/gpu/drm/i915/gt/intel_llc.c +++ b/drivers/gpu/drm/i915/gt/intel_llc.c @@ -48,7 +48,7 @@ static bool get_ia_constants(struct intel_llc *llc, struct ia_constants *consts) { struct drm_i915_private *i915 = llc_to_gt(llc)->i915; - struct intel_rps *rps = &llc_to_gt(llc)->rps; + struct intel_rps *rps = &i915->gt_pm.rps; if (rps->max_freq <= rps->min_freq) return false; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 51aef2a233cb..d0088d020220 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -145,7 +145,6 @@ #include "intel_lrc_reg.h" #include "intel_mocs.h" #include "intel_reset.h" -#include "intel_ring.h" #include "intel_workarounds.h" #define RING_EXECLIST_QFULL (1 << 0x2) @@ -235,9 +234,16 @@ static void execlists_init_reg_state(u32 *reg_state, const struct intel_engine_cs *engine, const struct intel_ring *ring, bool close); -static void -__execlists_update_reg_state(const struct intel_context *ce, - const struct intel_engine_cs *engine); + +static void __context_pin_acquire(struct intel_context *ce) +{ + mutex_acquire(&ce->pin_mutex.dep_map, 2, 0, _RET_IP_); +} + +static void __context_pin_release(struct intel_context *ce) +{ + mutex_release(&ce->pin_mutex.dep_map, 0, _RET_IP_); +} static void mark_eio(struct i915_request *rq) { @@ -250,23 +256,6 @@ static void mark_eio(struct i915_request *rq) i915_request_mark_complete(rq); } -static struct i915_request * -active_request(const struct intel_timeline * const tl, struct i915_request *rq) -{ - struct i915_request *active = rq; - - rcu_read_lock(); - list_for_each_entry_continue_reverse(rq, &tl->requests, link) { - if (i915_request_completed(rq)) - break; - - active = rq; - } - rcu_read_unlock(); - - return active; -} - static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine) { return (i915_ggtt_offset(engine->status_page.vma) + @@ -471,7 +460,8 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) if (IS_GEN(engine->i915, 8)) desc |= GEN8_CTX_L3LLC_COHERENT; - desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */ + desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE; + /* bits 12-31 */ /* * The following 32bits are copied into the OA reports (dword 2). * Consider updating oa_get_render_ctx_id in i915_perf.c when changing @@ -935,61 +925,6 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status) status, rq); } -static void intel_engine_context_in(struct intel_engine_cs *engine) -{ - unsigned long flags; - - if (READ_ONCE(engine->stats.enabled) == 0) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - - if (engine->stats.enabled > 0) { - if (engine->stats.active++ == 0) - engine->stats.start = ktime_get(); - GEM_BUG_ON(engine->stats.active == 0); - } - - write_sequnlock_irqrestore(&engine->stats.lock, flags); -} - -static void intel_engine_context_out(struct intel_engine_cs *engine) -{ - unsigned long flags; - - if (READ_ONCE(engine->stats.enabled) == 0) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - - if (engine->stats.enabled > 0) { - ktime_t last; - - if (engine->stats.active && --engine->stats.active == 0) { - /* - * Decrement the active context count and in case GPU - * is now idle add up to the running total. - */ - last = ktime_sub(ktime_get(), engine->stats.start); - - engine->stats.total = ktime_add(engine->stats.total, - last); - } else if (engine->stats.active == 0) { - /* - * After turning on engine stats, context out might be - * the first event in which case we account from the - * time stats gathering was turned on. - */ - last = ktime_sub(ktime_get(), engine->stats.enabled_at); - - engine->stats.total = ktime_add(engine->stats.total, - last); - } - } - - write_sequnlock_irqrestore(&engine->stats.lock, flags); -} - static inline struct intel_engine_cs * __execlists_schedule_in(struct i915_request *rq) { @@ -1047,59 +982,6 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) tasklet_schedule(&ve->base.execlists.tasklet); } -static void restore_default_state(struct intel_context *ce, - struct intel_engine_cs *engine) -{ - u32 *regs = ce->lrc_reg_state; - - if (engine->pinned_default_state) - memcpy(regs, /* skip restoring the vanilla PPHWSP */ - engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, - engine->context_size - PAGE_SIZE); - - execlists_init_reg_state(regs, ce, engine, ce->ring, false); -} - -static void reset_active(struct i915_request *rq, - struct intel_engine_cs *engine) -{ - struct intel_context * const ce = rq->hw_context; - u32 head; - - /* - * The executing context has been cancelled. We want to prevent - * further execution along this context and propagate the error on - * to anything depending on its results. - * - * In __i915_request_submit(), we apply the -EIO and remove the - * requests' payloads for any banned requests. But first, we must - * rewind the context back to the start of the incomplete request so - * that we do not jump back into the middle of the batch. - * - * We preserve the breadcrumbs and semaphores of the incomplete - * requests so that inter-timeline dependencies (i.e other timelines) - * remain correctly ordered. And we defer to __i915_request_submit() - * so that all asynchronous waits are correctly handled. - */ - GEM_TRACE("%s(%s): { rq=%llx:%lld }\n", - __func__, engine->name, rq->fence.context, rq->fence.seqno); - - /* On resubmission of the active request, payload will be scrubbed */ - if (i915_request_completed(rq)) - head = rq->tail; - else - head = active_request(ce->timeline, rq)->head; - ce->ring->head = intel_ring_wrap(ce->ring, head); - intel_ring_update_space(ce->ring); - - /* Scrub the context image to prevent replaying the previous batch */ - restore_default_state(ce, engine); - __execlists_update_reg_state(ce, engine); - - /* We've switched away, so this should be a no-op, but intent matters */ - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; -} - static inline void __execlists_schedule_out(struct i915_request *rq, struct intel_engine_cs * const engine) @@ -1110,9 +992,6 @@ __execlists_schedule_out(struct i915_request *rq, execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); intel_gt_pm_put(engine->gt); - if (unlikely(i915_gem_context_is_banned(ce->gem_context))) - reset_active(rq, engine); - /* * If this is part of a virtual engine, its next request may * have been blocked waiting for access to the active context. @@ -1466,7 +1345,7 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) { int hint; - if (!intel_engine_has_timeslices(engine)) + if (!intel_engine_has_semaphores(engine)) return false; if (list_is_last(&rq->sched.link, &engine->active.requests)) @@ -1487,32 +1366,15 @@ switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq) return rq_prio(list_next_entry(rq, sched.link)); } -static inline unsigned long -timeslice(const struct intel_engine_cs *engine) +static bool +enable_timeslice(const struct intel_engine_execlists *execlists) { - return READ_ONCE(engine->props.timeslice_duration_ms); -} - -static unsigned long -active_timeslice(const struct intel_engine_cs *engine) -{ - const struct i915_request *rq = *engine->execlists.active; + const struct i915_request *rq = *execlists->active; if (i915_request_completed(rq)) - return 0; + return false; - if (engine->execlists.switch_priority_hint < effective_prio(rq)) - return 0; - - return timeslice(engine); -} - -static void set_timeslice(struct intel_engine_cs *engine) -{ - if (!intel_engine_has_timeslices(engine)) - return; - - set_timer_ms(&engine->execlists.timer, active_timeslice(engine)); + return execlists->switch_priority_hint >= effective_prio(rq); } static void record_preemption(struct intel_engine_execlists *execlists) @@ -1520,30 +1382,6 @@ static void record_preemption(struct intel_engine_execlists *execlists) (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); } -static unsigned long active_preempt_timeout(struct intel_engine_cs *engine) -{ - struct i915_request *rq; - - rq = last_active(&engine->execlists); - if (!rq) - return 0; - - /* Force a fast reset for terminated contexts (ignoring sysfs!) */ - if (unlikely(i915_gem_context_is_banned(rq->gem_context))) - return 1; - - return READ_ONCE(engine->props.preempt_timeout_ms); -} - -static void set_preempt_timeout(struct intel_engine_cs *engine) -{ - if (!intel_engine_has_preempt_reset(engine)) - return; - - set_timer_ms(&engine->execlists.preempt, - active_preempt_timeout(engine)); -} - static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -1683,9 +1521,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) */ if (!execlists->timer.expires && need_timeslice(engine, last)) - set_timer_ms(&execlists->timer, - timeslice(engine)); - + mod_timer(&execlists->timer, + jiffies + 1); return; } @@ -1920,8 +1757,6 @@ done: memset(port + 1, 0, (last_port - port) * sizeof(*port)); execlists_submit_ports(engine); - - set_preempt_timeout(engine); } else { skip_submit: ring_set_paused(engine, 0); @@ -2032,7 +1867,7 @@ static void process_csb(struct intel_engine_cs *engine) */ GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) && !reset_in_progress(execlists)); - GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine)); + GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915)); /* * Note that csb_write, csb_status may be either in HWSP or mmio. @@ -2109,7 +1944,10 @@ static void process_csb(struct intel_engine_cs *engine) execlists_num_ports(execlists) * sizeof(*execlists->pending)); - set_timeslice(engine); + if (enable_timeslice(execlists)) + mod_timer(&execlists->timer, jiffies + 1); + else + cancel_timer(&execlists->timer); WRITE_ONCE(execlists->pending[0], NULL); } else { @@ -2159,43 +1997,6 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) } } -static noinline void preempt_reset(struct intel_engine_cs *engine) -{ - const unsigned int bit = I915_RESET_ENGINE + engine->id; - unsigned long *lock = &engine->gt->reset.flags; - - if (i915_modparams.reset < 3) - return; - - if (test_and_set_bit(bit, lock)) - return; - - /* Mark this tasklet as disabled to avoid waiting for it to complete */ - tasklet_disable_nosync(&engine->execlists.tasklet); - - GEM_TRACE("%s: preempt timeout %lu+%ums\n", - engine->name, - READ_ONCE(engine->props.preempt_timeout_ms), - jiffies_to_msecs(jiffies - engine->execlists.preempt.expires)); - intel_engine_reset(engine, "preemption time out"); - - tasklet_enable(&engine->execlists.tasklet); - clear_and_wake_up_bit(bit, lock); -} - -static bool preempt_timeout(const struct intel_engine_cs *const engine) -{ - const struct timer_list *t = &engine->execlists.preempt; - - if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) - return false; - - if (!timer_expired(t)) - return false; - - return READ_ONCE(engine->execlists.pending[0]); -} - /* * Check the unread Context Status Buffers and manage the submission of new * contexts to the ELSP accordingly. @@ -2203,39 +2004,23 @@ static bool preempt_timeout(const struct intel_engine_cs *const engine) static void execlists_submission_tasklet(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; - bool timeout = preempt_timeout(engine); + unsigned long flags; process_csb(engine); - if (!READ_ONCE(engine->execlists.pending[0]) || timeout) { - unsigned long flags; - + if (!READ_ONCE(engine->execlists.pending[0])) { spin_lock_irqsave(&engine->active.lock, flags); __execlists_submission_tasklet(engine); spin_unlock_irqrestore(&engine->active.lock, flags); - - /* Recheck after serialising with direct-submission */ - if (timeout && preempt_timeout(engine)) - preempt_reset(engine); } } -static void __execlists_kick(struct intel_engine_execlists *execlists) +static void execlists_submission_timer(struct timer_list *timer) { + struct intel_engine_cs *engine = + from_timer(engine, timer, execlists.timer); + /* Kick the tasklet for some interrupt coalescing and reset handling */ - tasklet_hi_schedule(&execlists->tasklet); -} - -#define execlists_kick(t, member) \ - __execlists_kick(container_of(t, struct intel_engine_execlists, member)) - -static void execlists_timeslice(struct timer_list *timer) -{ - execlists_kick(timer, timer); -} - -static void execlists_preempt(struct timer_list *timer) -{ - execlists_kick(timer, preempt); + tasklet_hi_schedule(&engine->execlists.tasklet); } static void queue_request(struct intel_engine_cs *engine, @@ -2315,6 +2100,7 @@ set_redzone(void *vaddr, const struct intel_engine_cs *engine) if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) return; + vaddr += LRC_HEADER_PAGES * PAGE_SIZE; vaddr += engine->context_size; memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE); @@ -2326,6 +2112,7 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine) if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) return; + vaddr += LRC_HEADER_PAGES * PAGE_SIZE; vaddr += engine->context_size; if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) @@ -2940,28 +2727,37 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) &execlists->csb_status[reset_value]); } -static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) +static struct i915_request *active_request(struct i915_request *rq) { - if (INTEL_GEN(engine->i915) >= 12) - return 0x60; - else if (INTEL_GEN(engine->i915) >= 9) - return 0x54; - else if (engine->class == RENDER_CLASS) - return 0x58; - else - return -1; + const struct intel_context * const ce = rq->hw_context; + struct i915_request *active = NULL; + struct list_head *list; + + if (!i915_request_is_active(rq)) /* unwound, but incomplete! */ + return rq; + + list = &i915_request_active_timeline(rq)->requests; + list_for_each_entry_from_reverse(rq, list, link) { + if (i915_request_completed(rq)) + break; + + if (rq->hw_context != ce) + break; + + active = rq; + } + + return active; } static void __execlists_reset_reg_state(const struct intel_context *ce, const struct intel_engine_cs *engine) { u32 *regs = ce->lrc_reg_state; - int x; - x = lrc_ring_mi_mode(engine); - if (x != -1) { - regs[x + 1] &= ~STOP_RING; - regs[x + 1] |= STOP_RING << 16; + if (INTEL_GEN(engine->i915) >= 9) { + regs[GEN9_CTX_RING_MI_MODE + 1] &= ~STOP_RING; + regs[GEN9_CTX_RING_MI_MODE + 1] |= STOP_RING << 16; } } @@ -2970,6 +2766,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) struct intel_engine_execlists * const execlists = &engine->execlists; struct intel_context *ce; struct i915_request *rq; + u32 *regs; mb(); /* paranoia: read the CSB pointers from after the reset */ clflush(execlists->csb_write); @@ -2995,17 +2792,19 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) ce = rq->hw_context; GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); - if (i915_request_completed(rq)) { + /* Proclaim we have exclusive access to the context image! */ + __context_pin_acquire(ce); + + rq = active_request(rq); + if (!rq) { /* Idle context; tidy up the ring so we can restart afresh */ - ce->ring->head = intel_ring_wrap(ce->ring, rq->tail); + ce->ring->head = ce->ring->tail; goto out_replay; } /* Context has requests still in-flight; it should not be idle! */ GEM_BUG_ON(i915_active_is_idle(&ce->active)); - rq = active_request(ce->timeline, rq); ce->ring->head = intel_ring_wrap(ce->ring, rq->head); - GEM_BUG_ON(ce->ring->head == ce->ring->tail); /* * If this request hasn't started yet, e.g. it is waiting on a @@ -3046,15 +2845,22 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * to recreate its own state. */ GEM_BUG_ON(!intel_context_is_pinned(ce)); - restore_default_state(ce, engine); + regs = ce->lrc_reg_state; + if (engine->pinned_default_state) { + memcpy(regs, /* skip restoring the vanilla PPHWSP */ + engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, + engine->context_size - PAGE_SIZE); + } + execlists_init_reg_state(regs, ce, engine, ce->ring, false); out_replay: - GEM_TRACE("%s replay {head:%04x, tail:%04x}\n", + GEM_TRACE("%s replay {head:%04x, tail:%04x\n", engine->name, ce->ring->head, ce->ring->tail); intel_ring_update_space(ce->ring); __execlists_reset_reg_state(ce, engine); __execlists_update_reg_state(ce, engine); ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ + __context_pin_release(ce); unwind: /* Push back any incomplete requests for replay after the reset. */ @@ -3663,7 +3469,6 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) static void execlists_park(struct intel_engine_cs *engine) { cancel_timer(&engine->execlists.timer); - cancel_timer(&engine->execlists.preempt); } void intel_execlists_set_default_submission(struct intel_engine_cs *engine) @@ -3781,8 +3586,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) { tasklet_init(&engine->execlists.tasklet, execlists_submission_tasklet, (unsigned long)engine); - timer_setup(&engine->execlists.timer, execlists_timeslice, 0); - timer_setup(&engine->execlists.preempt, execlists_preempt, 0); + timer_setup(&engine->execlists.timer, execlists_submission_timer, 0); logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); @@ -3992,6 +3796,12 @@ populate_lr_context(struct intel_context *ce, set_redzone(vaddr, engine); if (engine->default_state) { + /* + * We only want to copy over the template context state; + * skipping over the headers reserved for GuC communication, + * leaving those as zero. + */ + const unsigned long start = LRC_HEADER_PAGES * PAGE_SIZE; void *defaults; defaults = i915_gem_object_pin_map(engine->default_state, @@ -4001,7 +3811,7 @@ populate_lr_context(struct intel_context *ce, goto err_unpin_ctx; } - memcpy(vaddr, defaults, engine->context_size); + memcpy(vaddr + start, defaults + start, engine->context_size); i915_gem_object_unpin_map(engine->default_state); inhibit = false; } @@ -4016,7 +3826,9 @@ populate_lr_context(struct intel_context *ce, ret = 0; err_unpin_ctx: - __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size); + __i915_gem_object_flush_map(ctx_obj, + LRC_HEADER_PAGES * PAGE_SIZE, + engine->context_size); i915_gem_object_unpin_map(ctx_obj); return ret; } @@ -4033,6 +3845,11 @@ static int __execlists_context_alloc(struct intel_context *ce, GEM_BUG_ON(ce->state); context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); + /* + * Before the actual start of the context image, we insert a few pages + * for our own use and for sharing with the GuC. + */ + context_size += LRC_HEADER_PAGES * PAGE_SIZE; if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) context_size += I915_GTT_PAGE_SIZE; /* for redzone */ @@ -4685,6 +4502,7 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, bool scrub) { GEM_BUG_ON(!intel_context_is_pinned(ce)); + __context_pin_acquire(ce); /* * We want a simple context + ring to execute the breadcrumb update. @@ -4694,21 +4512,23 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, * future request will be after userspace has had the opportunity * to recreate its own state. */ - if (scrub) - restore_default_state(ce, engine); + if (scrub) { + u32 *regs = ce->lrc_reg_state; + + if (engine->pinned_default_state) { + memcpy(regs, /* skip restoring the vanilla PPHWSP */ + engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, + engine->context_size - PAGE_SIZE); + } + execlists_init_reg_state(regs, ce, engine, ce->ring, false); + } /* Rerun the request; its payload has been neutered (if guilty). */ ce->ring->head = head; intel_ring_update_space(ce->ring); __execlists_update_reg_state(ce, engine); -} - -bool -intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine) -{ - return engine->set_default_submission == - intel_execlists_set_default_submission; + __context_pin_release(ce); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 04511d8ebdc1..99dc576a4e25 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -43,7 +43,6 @@ struct intel_engine_cs; #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) #define CTX_CTRL_RS_CTX_ENABLE (1 << 1) #define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2) -#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE (1 << 8) #define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) #define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) #define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) @@ -86,12 +85,31 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine); int intel_execlists_submission_init(struct intel_engine_cs *engine); /* Logical Ring Contexts */ + +/* + * We allocate a header at the start of the context image for our own + * use, therefore the actual location of the logical state is offset + * from the start of the VMA. The layout is + * + * | [guc] | [hwsp] [logical state] | + * |<- our header ->|<- context image ->| + * + */ +/* The first page is used for sharing data with the GuC */ +#define LRC_GUCSHR_PN (0) +#define LRC_GUCSHR_SZ (1) /* At the start of the context image is its per-process HWS page */ -#define LRC_PPHWSP_PN (0) +#define LRC_PPHWSP_PN (LRC_GUCSHR_PN + LRC_GUCSHR_SZ) #define LRC_PPHWSP_SZ (1) -/* After the PPHWSP we have the logical state for the context */ +/* Finally we have the logical state for the context */ #define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ) +/* + * Currently we include the PPHWSP in __intel_engine_context_size() so + * the size of the header is synonymous with the start of the PPHWSP. + */ +#define LRC_HEADER_PAGES LRC_PPHWSP_PN + /* Space within PPHWSP reserved to be used as scratch */ #define LRC_PPHWSP_SCRATCH 0x34 #define LRC_PPHWSP_SCRATCH_ADDR (LRC_PPHWSP_SCRATCH * sizeof(u32)) @@ -127,7 +145,4 @@ struct intel_engine_cs * intel_virtual_engine_get_sibling(struct intel_engine_cs *engine, unsigned int sibling); -bool -intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine); - #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 6e881c735b20..5bac3966906b 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -26,7 +26,6 @@ #include "intel_gt.h" #include "intel_mocs.h" #include "intel_lrc.h" -#include "intel_ring.h" /* structures required */ struct drm_i915_mocs_entry { @@ -462,12 +461,6 @@ static void intel_mocs_init_global(struct intel_gt *gt) struct drm_i915_mocs_table table; unsigned int index; - /* - * LLC and eDRAM control values are not applicable to dgfx - */ - if (IS_DGFX(gt->i915)) - return; - GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915)); if (!get_mocs_settings(gt->i915, &table)) diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 5ad4a92a9582..70f0e01a38b9 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -525,11 +525,6 @@ void intel_rc6_init(struct intel_rc6 *rc6) void intel_rc6_sanitize(struct intel_rc6 *rc6) { - if (rc6->enabled) { /* unbalanced suspend/resume */ - rpm_get(rc6); - rc6->enabled = false; - } - if (rc6->supported) __intel_rc6_disable(rc6); } diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c index c4edc35e7d89..6d05f9c64178 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -27,7 +27,6 @@ #include "i915_drv.h" #include "intel_renderstate.h" -#include "intel_ring.h" struct intel_renderstate { const struct intel_renderstate_rodata *rodata; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index f03e000051c1..bf8d1ed4b1d8 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1024,6 +1024,8 @@ void intel_gt_reset(struct intel_gt *gt, if (ret) goto taint; + intel_gt_queue_hangcheck(gt); + finish: reset_finish(gt, awake); unlock: @@ -1351,5 +1353,4 @@ void __intel_fini_wedge(struct intel_wedge_me *w) #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_reset.c" -#include "selftest_hangcheck.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index bacaa7bb8c9a..bf631f15aa78 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -322,7 +322,8 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL); - *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = i915_request_active_timeline(rq)->hwsp_offset | + PIPE_CONTROL_GLOBAL_GTT; *cs++ = rq->fence.seqno; *cs++ = MI_USER_INTERRUPT; @@ -425,7 +426,7 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL); - *cs++ = rq->timeline->hwsp_offset; + *cs++ = i915_request_active_timeline(rq)->hwsp_offset; *cs++ = rq->fence.seqno; *cs++ = MI_USER_INTERRUPT; @@ -439,8 +440,8 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; @@ -459,8 +460,8 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { int i; - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; @@ -930,6 +931,7 @@ static void cancel_requests(struct intel_engine_cs *engine) static void i9xx_submit_request(struct i915_request *request) { i915_request_submit(request); + wmb(); /* paranoid flush writes out of the WCB before mmio */ ENGINE_WRITE(request->engine, RING_TAIL, intel_ring_set_tail(request->ring, request->tail)); @@ -937,8 +939,8 @@ static void i9xx_submit_request(struct i915_request *request) static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) { - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH; @@ -960,8 +962,8 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) { int i; - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH; @@ -1272,7 +1274,7 @@ static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) struct i915_vma *vma; obj = i915_gem_object_create_stolen(i915, size); - if (!obj) + if (IS_ERR(obj)) obj = i915_gem_object_create_internal(i915, size); if (IS_ERR(obj)) return ERR_CAST(obj); @@ -1336,15 +1338,13 @@ void intel_ring_free(struct kref *ref) { struct intel_ring *ring = container_of(ref, typeof(*ring), ref); - i915_vma_close(ring->vma); i915_vma_put(ring->vma); - kfree(ring); } static void __ring_context_fini(struct intel_context *ce) { - i915_gem_object_put(ce->state->obj); + i915_vma_put(ce->state); } static void ring_context_destroy(struct kref *ref) @@ -1609,7 +1609,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) struct intel_engine_cs *signaller; *cs++ = MI_LOAD_REGISTER_IMM(num_engines); - for_each_engine(signaller, i915, id) { + for_each_engine(signaller, engine->gt, id) { if (signaller == engine) continue; @@ -1663,7 +1663,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) i915_reg_t last_reg = {}; /* keep gcc quiet */ *cs++ = MI_LOAD_REGISTER_IMM(num_engines); - for_each_engine(signaller, i915, id) { + for_each_engine(signaller, engine->gt, id) { if (signaller == engine) continue; @@ -1676,7 +1676,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) /* Insert a delay before the next switch! */ *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; *cs++ = i915_mmio_reg_offset(last_reg); - *cs++ = intel_gt_scratch_offset(rq->engine->gt, + *cs++ = intel_gt_scratch_offset(engine->gt, INTEL_GT_SCRATCH_FIELD_DEFAULT); *cs++ = MI_NOOP; } @@ -1741,46 +1741,22 @@ static int remap_l3(struct i915_request *rq) static int switch_context(struct i915_request *rq) { - struct intel_engine_cs *engine = rq->engine; - struct i915_address_space *vm = vm_alias(rq->hw_context); - unsigned int unwind_mm = 0; - u32 hw_flags = 0; + struct intel_context *ce = rq->hw_context; + struct i915_address_space *vm = vm_alias(ce); int ret; GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); if (vm) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - int loops; - - /* - * Baytail takes a little more convincing that it really needs - * to reload the PD between contexts. It is not just a little - * longer, as adding more stalls after the load_pd_dir (i.e. - * adding a long loop around flush_pd_dir) is not as effective - * as reloading the PD umpteen times. 32 is derived from - * experimentation (gem_exec_parallel/fds) and has no good - * explanation. - */ - loops = 1; - if (engine->id == BCS0 && IS_VALLEYVIEW(engine->i915)) - loops = 32; - - do { - ret = load_pd_dir(rq, ppgtt); - if (ret) - goto err; - } while (--loops); - - if (ppgtt->pd_dirty_engines & engine->mask) { - unwind_mm = engine->mask; - ppgtt->pd_dirty_engines &= ~unwind_mm; - hw_flags = MI_FORCE_RESTORE; - } + ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm)); + if (ret) + return ret; } - if (rq->hw_context->state) { - GEM_BUG_ON(engine->id != RCS0); + if (ce->state) { + u32 hw_flags; + + GEM_BUG_ON(rq->engine->id != RCS0); /* * The kernel context(s) is treated as pure scratch and is not @@ -1789,22 +1765,25 @@ static int switch_context(struct i915_request *rq) * as nothing actually executes using the kernel context; it * is purely used for flushing user contexts. */ + hw_flags = 0; if (i915_gem_context_is_kernel(rq->gem_context)) hw_flags = MI_RESTORE_INHIBIT; ret = mi_set_context(rq, hw_flags); if (ret) - goto err_mm; + return ret; } if (vm) { + struct intel_engine_cs *engine = rq->engine; + ret = engine->emit_flush(rq, EMIT_INVALIDATE); if (ret) - goto err_mm; + return ret; ret = flush_pd_dir(rq); if (ret) - goto err_mm; + return ret; /* * Not only do we need a full barrier (post-sync write) after @@ -1816,24 +1795,18 @@ static int switch_context(struct i915_request *rq) */ ret = engine->emit_flush(rq, EMIT_INVALIDATE); if (ret) - goto err_mm; + return ret; ret = engine->emit_flush(rq, EMIT_FLUSH); if (ret) - goto err_mm; + return ret; } ret = remap_l3(rq); if (ret) - goto err_mm; + return ret; return 0; - -err_mm: - if (unwind_mm) - i915_vm_to_ppgtt(vm)->pd_dirty_engines |= unwind_mm; -err: - return ret; } static int ring_request_alloc(struct i915_request *request) @@ -1841,7 +1814,7 @@ static int ring_request_alloc(struct i915_request *request) int ret; GEM_BUG_ON(!intel_context_is_pinned(request->hw_context)); - GEM_BUG_ON(request->timeline->has_initial_breadcrumb); + GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb); /* * Flush enough space to reduce the likelihood of waiting after @@ -1952,7 +1925,9 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) */ GEM_BUG_ON(!rq->reserved_space); - ret = wait_for_space(ring, rq->timeline, total_bytes); + ret = wait_for_space(ring, + i915_request_timeline(rq), + total_bytes); if (unlikely(ret)) return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 14ad10acd548..0f959694303c 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -4,13 +4,13 @@ * Copyright © 2016-2018 Intel Corporation */ +#include "gt/intel_gt_types.h" + #include "i915_drv.h" #include "i915_active.h" #include "i915_syncmap.h" -#include "intel_gt.h" -#include "intel_ring.h" -#include "intel_timeline.h" +#include "gt/intel_timeline.h" #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index e4bccc14602f..af8a8183154a 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -7,7 +7,6 @@ #include "i915_drv.h" #include "intel_context.h" #include "intel_gt.h" -#include "intel_ring.h" #include "intel_workarounds.h" /** @@ -1216,26 +1215,6 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) static void tgl_whitelist_build(struct intel_engine_cs *engine) { - struct i915_wa_list *w = &engine->whitelist; - - switch (engine->class) { - case RENDER_CLASS: - /* - * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl - * - * This covers 4 registers which are next to one another : - * - PS_INVOCATION_COUNT - * - PS_INVOCATION_COUNT_UDW - * - PS_DEPTH_COUNT - * - PS_DEPTH_COUNT_UDW - */ - whitelist_reg_ext(w, PS_INVOCATION_COUNT, - RING_FORCE_TO_NONPRIV_ACCESS_RD | - RING_FORCE_TO_NONPRIV_RANGE_4); - break; - default: - break; - } } void intel_engine_init_whitelist(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 83f549d203a0..123db2c3f956 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -23,7 +23,6 @@ */ #include "gem/i915_gem_context.h" -#include "gt/intel_ring.h" #include "i915_drv.h" #include "intel_context.h" diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index bc720defc6b8..f63a26a3e620 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -103,6 +103,9 @@ static int __live_context_size(struct intel_engine_cs *engine, * * TLDR; this overlaps with the execlists redzone. */ + if (HAS_EXECLISTS(engine->i915)) + vaddr += LRC_HEADER_PAGES * PAGE_SIZE; + vaddr += engine->context_size - I915_GTT_PAGE_SIZE; memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE); diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c index d1752f15702a..5d429037cdad 100644 --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -15,8 +15,7 @@ static int live_gt_resume(void *arg) /* Do several suspend/resume cycles to check we don't explode! */ do { - intel_gt_suspend_prepare(gt); - intel_gt_suspend_late(gt); + intel_gt_suspend(gt); if (gt->rc6.enabled) { pr_err("rc6 still enabled after suspend!\n"); diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 85e9ccf5c304..8e0016464325 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -826,8 +826,6 @@ static int __igt_reset_engines(struct intel_gt *gt, get_task_struct(tsk); } - yield(); /* start all threads before we begin */ - intel_engine_pm_get(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { @@ -1018,7 +1016,7 @@ static int igt_reset_wait(void *arg) { struct intel_gt *gt = arg; struct i915_gpu_error *global = >->i915->gpu_error; - struct intel_engine_cs *engine = gt->engine[RCS0]; + struct intel_engine_cs *engine = gt->i915->engine[RCS0]; struct i915_request *rq; unsigned int reset_count; struct hang h; @@ -1145,18 +1143,14 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, int (*fn)(void *), unsigned int flags) { - struct intel_engine_cs *engine = gt->engine[RCS0]; + struct intel_engine_cs *engine = gt->i915->engine[RCS0]; struct drm_i915_gem_object *obj; struct task_struct *tsk = NULL; struct i915_request *rq; struct evict_vma arg; struct hang h; - unsigned int pin_flags; int err; - if (!gt->ggtt->num_fences && flags & EXEC_OBJECT_NEEDS_FENCE) - return 0; - if (!engine || !intel_engine_can_store_dword(engine)) return 0; @@ -1192,12 +1186,10 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, goto out_obj; } - pin_flags = i915_vma_is_ggtt(arg.vma) ? PIN_GLOBAL : PIN_USER; - - if (flags & EXEC_OBJECT_NEEDS_FENCE) - pin_flags |= PIN_MAPPABLE; - - err = i915_vma_pin(arg.vma, 0, 0, pin_flags); + err = i915_vma_pin(arg.vma, 0, 0, + i915_vma_is_ggtt(arg.vma) ? + PIN_GLOBAL | PIN_MAPPABLE : + PIN_USER); if (err) { i915_request_add(rq); goto out_obj; @@ -1501,7 +1493,7 @@ static int igt_handle_error(void *arg) { struct intel_gt *gt = arg; struct i915_gpu_error *global = >->i915->gpu_error; - struct intel_engine_cs *engine = gt->engine[RCS0]; + struct intel_engine_cs *engine = gt->i915->engine[RCS0]; struct hang h; struct i915_request *rq; struct i915_gpu_state *error; @@ -1571,7 +1563,7 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, GEM_TRACE("i915_reset_engine(%s:%s) under %s\n", engine->name, mode, p->name); - tasklet_disable(t); + tasklet_disable_nosync(t); p->critical_section_begin(); err = intel_engine_reset(engine, NULL); @@ -1694,6 +1686,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) }; struct intel_gt *gt = &i915->gt; intel_wakeref_t wakeref; + bool saved_hangcheck; int err; if (!intel_has_gpu_reset(gt)) @@ -1703,9 +1696,12 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) return -EIO; /* we're long past hope of a successful reset */ wakeref = intel_runtime_pm_get(gt->uncore->rpm); + saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); + drain_delayed_work(>->hangcheck.work); /* flush param */ err = intel_gt_live_subtests(tests, gt); + i915_modparams.enable_hangcheck = saved_hangcheck; intel_runtime_pm_put(gt->uncore->rpm, wakeref); return err; diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.c b/drivers/gpu/drm/i915/gt/selftest_llc.c index fd3770e48ac7..a7057785e420 100644 --- a/drivers/gpu/drm/i915/gt/selftest_llc.c +++ b/drivers/gpu/drm/i915/gt/selftest_llc.c @@ -6,7 +6,6 @@ #include "intel_pm.h" /* intel_gpu_freq() */ #include "selftest_llc.h" -#include "intel_rps.h" static int gen6_verify_ring_freq(struct intel_llc *llc) { @@ -26,8 +25,6 @@ static int gen6_verify_ring_freq(struct intel_llc *llc) for (gpu_freq = consts.min_gpu_freq; gpu_freq <= consts.max_gpu_freq; gpu_freq++) { - struct intel_rps *rps = &llc_to_gt(llc)->rps; - unsigned int ia_freq, ring_freq, found; u32 val; @@ -47,7 +44,7 @@ static int gen6_verify_ring_freq(struct intel_llc *llc) if (found != ia_freq) { pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected CPU freq, found %d, expected %d\n", gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq, - intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), + intel_gpu_freq(i915, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), found, ia_freq); err = -EINVAL; break; @@ -57,7 +54,7 @@ static int gen6_verify_ring_freq(struct intel_llc *llc) if (found != ring_freq) { pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected ring freq, found %d, expected %d\n", gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq, - intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), + intel_gpu_freq(i915, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), found, ring_freq); err = -EINVAL; break; diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index eb71ac2f992c..5dc679781a08 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -7,7 +7,6 @@ #include #include "gem/i915_gem_pm.h" -#include "gt/intel_engine_heartbeat.h" #include "gt/intel_reset.h" #include "i915_selftest.h" @@ -169,7 +168,12 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) } GEM_BUG_ON(!ce[1]->ring->size); intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); + + local_irq_disable(); /* appease lockdep */ + __context_pin_acquire(ce[1]); __execlists_update_reg_state(ce[1], engine); + __context_pin_release(ce[1]); + local_irq_enable(); rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); if (IS_ERR(rq[0])) { @@ -440,8 +444,6 @@ static int live_timeslice_preempt(void *arg) * need to preempt the current task and replace it with another * ready task. */ - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) - return 0; obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(obj)) @@ -516,11 +518,6 @@ static void wait_for_submit(struct intel_engine_cs *engine, } while (!i915_request_is_active(rq)); } -static long timeslice_threshold(const struct intel_engine_cs *engine) -{ - return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1; -} - static int live_timeslice_queue(void *arg) { struct intel_gt *gt = arg; @@ -538,8 +535,6 @@ static int live_timeslice_queue(void *arg) * ELSP[1] is already occupied, so must rely on timeslicing to * eject ELSP[0] in favour of the queue.) */ - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) - return 0; obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(obj)) @@ -617,8 +612,8 @@ static int live_timeslice_queue(void *arg) err = -EINVAL; } - /* Timeslice every jiffy, so within 2 we should signal */ - if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) { + /* Timeslice every jiffie, so within 2 we should signal */ + if (i915_request_wait(rq, 0, 3) < 0) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -1170,325 +1165,6 @@ err_wedged: goto err_client_b; } -struct live_preempt_cancel { - struct intel_engine_cs *engine; - struct preempt_client a, b; -}; - -static int __cancel_active0(struct live_preempt_cancel *arg) -{ - struct i915_request *rq; - struct igt_live_test t; - int err; - - /* Preempt cancel of ELSP0 */ - GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); - if (igt_live_test_begin(&t, arg->engine->i915, - __func__, arg->engine->name)) - return -EIO; - - clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); - rq = spinner_create_request(&arg->a.spin, - arg->a.ctx, arg->engine, - MI_ARB_CHECK); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - i915_request_get(rq); - i915_request_add(rq); - if (!igt_wait_for_spinner(&arg->a.spin, rq)) { - err = -EIO; - goto out; - } - - i915_gem_context_set_banned(arg->a.ctx); - err = intel_engine_pulse(arg->engine); - if (err) - goto out; - - if (i915_request_wait(rq, 0, HZ / 5) < 0) { - err = -EIO; - goto out; - } - - if (rq->fence.error != -EIO) { - pr_err("Cancelled inflight0 request did not report -EIO\n"); - err = -EINVAL; - goto out; - } - -out: - i915_request_put(rq); - if (igt_live_test_end(&t)) - err = -EIO; - return err; -} - -static int __cancel_active1(struct live_preempt_cancel *arg) -{ - struct i915_request *rq[2] = {}; - struct igt_live_test t; - int err; - - /* Preempt cancel of ELSP1 */ - GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); - if (igt_live_test_begin(&t, arg->engine->i915, - __func__, arg->engine->name)) - return -EIO; - - clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); - rq[0] = spinner_create_request(&arg->a.spin, - arg->a.ctx, arg->engine, - MI_NOOP); /* no preemption */ - if (IS_ERR(rq[0])) - return PTR_ERR(rq[0]); - - i915_request_get(rq[0]); - i915_request_add(rq[0]); - if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { - err = -EIO; - goto out; - } - - clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags); - rq[1] = spinner_create_request(&arg->b.spin, - arg->b.ctx, arg->engine, - MI_ARB_CHECK); - if (IS_ERR(rq[1])) { - err = PTR_ERR(rq[1]); - goto out; - } - - i915_request_get(rq[1]); - err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); - i915_request_add(rq[1]); - if (err) - goto out; - - i915_gem_context_set_banned(arg->b.ctx); - err = intel_engine_pulse(arg->engine); - if (err) - goto out; - - igt_spinner_end(&arg->a.spin); - if (i915_request_wait(rq[1], 0, HZ / 5) < 0) { - err = -EIO; - goto out; - } - - if (rq[0]->fence.error != 0) { - pr_err("Normal inflight0 request did not complete\n"); - err = -EINVAL; - goto out; - } - - if (rq[1]->fence.error != -EIO) { - pr_err("Cancelled inflight1 request did not report -EIO\n"); - err = -EINVAL; - goto out; - } - -out: - i915_request_put(rq[1]); - i915_request_put(rq[0]); - if (igt_live_test_end(&t)) - err = -EIO; - return err; -} - -static int __cancel_queued(struct live_preempt_cancel *arg) -{ - struct i915_request *rq[3] = {}; - struct igt_live_test t; - int err; - - /* Full ELSP and one in the wings */ - GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); - if (igt_live_test_begin(&t, arg->engine->i915, - __func__, arg->engine->name)) - return -EIO; - - clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); - rq[0] = spinner_create_request(&arg->a.spin, - arg->a.ctx, arg->engine, - MI_ARB_CHECK); - if (IS_ERR(rq[0])) - return PTR_ERR(rq[0]); - - i915_request_get(rq[0]); - i915_request_add(rq[0]); - if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { - err = -EIO; - goto out; - } - - clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags); - rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); - if (IS_ERR(rq[1])) { - err = PTR_ERR(rq[1]); - goto out; - } - - i915_request_get(rq[1]); - err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); - i915_request_add(rq[1]); - if (err) - goto out; - - rq[2] = spinner_create_request(&arg->b.spin, - arg->a.ctx, arg->engine, - MI_ARB_CHECK); - if (IS_ERR(rq[2])) { - err = PTR_ERR(rq[2]); - goto out; - } - - i915_request_get(rq[2]); - err = i915_request_await_dma_fence(rq[2], &rq[1]->fence); - i915_request_add(rq[2]); - if (err) - goto out; - - i915_gem_context_set_banned(arg->a.ctx); - err = intel_engine_pulse(arg->engine); - if (err) - goto out; - - if (i915_request_wait(rq[2], 0, HZ / 5) < 0) { - err = -EIO; - goto out; - } - - if (rq[0]->fence.error != -EIO) { - pr_err("Cancelled inflight0 request did not report -EIO\n"); - err = -EINVAL; - goto out; - } - - if (rq[1]->fence.error != 0) { - pr_err("Normal inflight1 request did not complete\n"); - err = -EINVAL; - goto out; - } - - if (rq[2]->fence.error != -EIO) { - pr_err("Cancelled queued request did not report -EIO\n"); - err = -EINVAL; - goto out; - } - -out: - i915_request_put(rq[2]); - i915_request_put(rq[1]); - i915_request_put(rq[0]); - if (igt_live_test_end(&t)) - err = -EIO; - return err; -} - -static int __cancel_hostile(struct live_preempt_cancel *arg) -{ - struct i915_request *rq; - int err; - - /* Preempt cancel non-preemptible spinner in ELSP0 */ - if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) - return 0; - - GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); - clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); - rq = spinner_create_request(&arg->a.spin, - arg->a.ctx, arg->engine, - MI_NOOP); /* preemption disabled */ - if (IS_ERR(rq)) - return PTR_ERR(rq); - - i915_request_get(rq); - i915_request_add(rq); - if (!igt_wait_for_spinner(&arg->a.spin, rq)) { - err = -EIO; - goto out; - } - - i915_gem_context_set_banned(arg->a.ctx); - err = intel_engine_pulse(arg->engine); /* force reset */ - if (err) - goto out; - - if (i915_request_wait(rq, 0, HZ / 5) < 0) { - err = -EIO; - goto out; - } - - if (rq->fence.error != -EIO) { - pr_err("Cancelled inflight0 request did not report -EIO\n"); - err = -EINVAL; - goto out; - } - -out: - i915_request_put(rq); - if (igt_flush_test(arg->engine->i915)) - err = -EIO; - return err; -} - -static int live_preempt_cancel(void *arg) -{ - struct intel_gt *gt = arg; - struct live_preempt_cancel data; - enum intel_engine_id id; - int err = -ENOMEM; - - /* - * To cancel an inflight context, we need to first remove it from the - * GPU. That sounds like preemption! Plus a little bit of bookkeeping. - */ - - if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) - return 0; - - if (preempt_client_init(gt, &data.a)) - return -ENOMEM; - if (preempt_client_init(gt, &data.b)) - goto err_client_a; - - for_each_engine(data.engine, gt, id) { - if (!intel_engine_has_preemption(data.engine)) - continue; - - err = __cancel_active0(&data); - if (err) - goto err_wedged; - - err = __cancel_active1(&data); - if (err) - goto err_wedged; - - err = __cancel_queued(&data); - if (err) - goto err_wedged; - - err = __cancel_hostile(&data); - if (err) - goto err_wedged; - } - - err = 0; -err_client_b: - preempt_client_fini(&data.b); -err_client_a: - preempt_client_fini(&data.a); - return err; - -err_wedged: - GEM_TRACE_DUMP(); - igt_spinner_end(&data.b.spin); - igt_spinner_end(&data.a.spin); - intel_gt_set_wedged(gt); - goto err_client_b; -} - static int live_suppress_self_preempt(void *arg) { struct intel_gt *gt = arg; @@ -2026,105 +1702,6 @@ err_spin_hi: return err; } -static int live_preempt_timeout(void *arg) -{ - struct intel_gt *gt = arg; - struct i915_gem_context *ctx_hi, *ctx_lo; - struct igt_spinner spin_lo; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = -ENOMEM; - - /* - * Check that we force preemption to occur by cancelling the previous - * context if it refuses to yield the GPU. - */ - if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) - return 0; - - if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) - return 0; - - if (!intel_has_reset_engine(gt)) - return 0; - - if (igt_spinner_init(&spin_lo, gt)) - return -ENOMEM; - - ctx_hi = kernel_context(gt->i915); - if (!ctx_hi) - goto err_spin_lo; - ctx_hi->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); - - ctx_lo = kernel_context(gt->i915); - if (!ctx_lo) - goto err_ctx_hi; - ctx_lo->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); - - for_each_engine(engine, gt, id) { - unsigned long saved_timeout; - struct i915_request *rq; - - if (!intel_engine_has_preemption(engine)) - continue; - - rq = spinner_create_request(&spin_lo, ctx_lo, engine, - MI_NOOP); /* preemption disabled */ - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_ctx_lo; - } - - i915_request_add(rq); - if (!igt_wait_for_spinner(&spin_lo, rq)) { - intel_gt_set_wedged(gt); - err = -EIO; - goto err_ctx_lo; - } - - rq = igt_request_alloc(ctx_hi, engine); - if (IS_ERR(rq)) { - igt_spinner_end(&spin_lo); - err = PTR_ERR(rq); - goto err_ctx_lo; - } - - /* Flush the previous CS ack before changing timeouts */ - while (READ_ONCE(engine->execlists.pending[0])) - cpu_relax(); - - saved_timeout = engine->props.preempt_timeout_ms; - engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ - - i915_request_get(rq); - i915_request_add(rq); - - intel_engine_flush_submission(engine); - engine->props.preempt_timeout_ms = saved_timeout; - - if (i915_request_wait(rq, 0, HZ / 10) < 0) { - intel_gt_set_wedged(gt); - i915_request_put(rq); - err = -ETIME; - goto err_ctx_lo; - } - - igt_spinner_end(&spin_lo); - i915_request_put(rq); - } - - err = 0; -err_ctx_lo: - kernel_context_close(ctx_lo); -err_ctx_hi: - kernel_context_close(ctx_hi); -err_spin_lo: - igt_spinner_fini(&spin_lo); - return err; -} - static int random_range(struct rnd_state *rnd, int min, int max) { return i915_prandom_u32_max_state(max - min, rnd) + min; @@ -2252,8 +1829,6 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) get_task_struct(tsk[id]); } - yield(); /* start all threads before we kthread_stop() */ - count = 0; for_each_engine(engine, smoke->gt, id) { int status; @@ -3024,12 +2599,10 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_preempt), SUBTEST(live_late_preempt), SUBTEST(live_nopreempt), - SUBTEST(live_preempt_cancel), SUBTEST(live_suppress_self_preempt), SUBTEST(live_suppress_wait_preempt), SUBTEST(live_chain_preempt), SUBTEST(live_preempt_hang), - SUBTEST(live_preempt_timeout), SUBTEST(live_preempt_smoke), SUBTEST(live_virtual_engine), SUBTEST(live_virtual_mask), @@ -3176,100 +2749,6 @@ static int live_lrc_layout(void *arg) return err; } -static int find_offset(const u32 *lri, u32 offset) -{ - int i; - - for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) - if (lri[i] == offset) - return i; - - return -1; -} - -static int live_lrc_fixed(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = 0; - - /* - * Check the assumed register offsets match the actual locations in - * the context image. - */ - - for_each_engine(engine, gt, id) { - const struct { - u32 reg; - u32 offset; - const char *name; - } tbl[] = { - { - i915_mmio_reg_offset(RING_START(engine->mmio_base)), - CTX_RING_BUFFER_START - 1, - "RING_START" - }, - { - i915_mmio_reg_offset(RING_CTL(engine->mmio_base)), - CTX_RING_BUFFER_CONTROL - 1, - "RING_CTL" - }, - { - i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)), - CTX_RING_HEAD - 1, - "RING_HEAD" - }, - { - i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)), - CTX_RING_TAIL - 1, - "RING_TAIL" - }, - { - i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)), - lrc_ring_mi_mode(engine), - "RING_MI_MODE" - }, - { - engine->mmio_base + 0x110, - CTX_BB_STATE - 1, - "BB_STATE" - }, - { }, - }, *t; - u32 *hw; - - if (!engine->default_state) - continue; - - hw = i915_gem_object_pin_map(engine->default_state, - I915_MAP_WB); - if (IS_ERR(hw)) { - err = PTR_ERR(hw); - break; - } - hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); - - for (t = tbl; t->name; t++) { - int dw = find_offset(hw, t->reg); - - if (dw != t->offset) { - pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n", - engine->name, - t->name, - t->reg, - dw, - t->offset); - err = -EINVAL; - } - } - - i915_gem_object_unpin_map(engine->default_state); - } - - return err; -} - static int __live_lrc_state(struct i915_gem_context *fixme, struct intel_engine_cs *engine, struct i915_vma *scratch) @@ -3542,7 +3021,6 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_lrc_layout), - SUBTEST(live_lrc_fixed), SUBTEST(live_lrc_state), SUBTEST(live_gpr_clear), }; diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 6ad6aca315f6..6efb9221b7fa 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -126,7 +126,7 @@ static int igt_atomic_engine_reset(void *arg) goto out_unlock; for_each_engine(engine, gt, id) { - tasklet_disable(&engine->execlists.tasklet); + tasklet_disable_nosync(&engine->execlists.tasklet); intel_engine_pm_get(engine); for (p = igt_atomic_phases; p->name; p++) { diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index f04a59fe5d2c..dac86f699a4c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -9,7 +9,6 @@ #include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_requests.h" -#include "intel_ring.h" #include "../selftests/i915_random.h" #include "../i915_selftest.h" diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index abce6e4ec9c0..ef02920cec29 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -513,9 +513,6 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, ro_reg = ro_register(reg); - /* Clear non priv flags */ - reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK; - srm = MI_STORE_REGISTER_MEM; lrm = MI_LOAD_REGISTER_MEM; if (INTEL_GEN(ctx->i915) >= 8) @@ -813,8 +810,8 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx, u64 offset = results->node.start + sizeof(u32) * i; u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg); - /* Clear non priv flags */ - reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK; + /* Clear access permission field */ + reg &= ~RING_FORCE_TO_NONPRIV_ACCESS_MASK; *cs++ = srm; *cs++ = reg; @@ -852,9 +849,6 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx, if (ro_register(reg)) continue; - /* Clear non priv flags */ - reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK; - *cs++ = reg; *cs++ = 0xffffffff; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 019ae6486e8d..37f7bcbf7dac 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -4,8 +4,6 @@ */ #include "gt/intel_gt.h" -#include "gt/intel_gt_irq.h" -#include "gt/intel_gt_pm_irq.h" #include "intel_guc.h" #include "intel_guc_ads.h" #include "intel_guc_submission.h" @@ -79,93 +77,6 @@ void intel_guc_init_send_regs(struct intel_guc *guc) guc->send_regs.fw_domains = fw_domains; } -static void gen9_reset_guc_interrupts(struct intel_guc *guc) -{ - struct intel_gt *gt = guc_to_gt(guc); - - assert_rpm_wakelock_held(>->i915->runtime_pm); - - spin_lock_irq(>->irq_lock); - gen6_gt_pm_reset_iir(gt, gt->pm_guc_events); - spin_unlock_irq(>->irq_lock); -} - -static void gen9_enable_guc_interrupts(struct intel_guc *guc) -{ - struct intel_gt *gt = guc_to_gt(guc); - - assert_rpm_wakelock_held(>->i915->runtime_pm); - - spin_lock_irq(>->irq_lock); - if (!guc->interrupts.enabled) { - WARN_ON_ONCE(intel_uncore_read(gt->uncore, GEN8_GT_IIR(2)) & - gt->pm_guc_events); - guc->interrupts.enabled = true; - gen6_gt_pm_enable_irq(gt, gt->pm_guc_events); - } - spin_unlock_irq(>->irq_lock); -} - -static void gen9_disable_guc_interrupts(struct intel_guc *guc) -{ - struct intel_gt *gt = guc_to_gt(guc); - - assert_rpm_wakelock_held(>->i915->runtime_pm); - - spin_lock_irq(>->irq_lock); - guc->interrupts.enabled = false; - - gen6_gt_pm_disable_irq(gt, gt->pm_guc_events); - - spin_unlock_irq(>->irq_lock); - intel_synchronize_irq(gt->i915); - - gen9_reset_guc_interrupts(guc); -} - -static void gen11_reset_guc_interrupts(struct intel_guc *guc) -{ - struct intel_gt *gt = guc_to_gt(guc); - - spin_lock_irq(>->irq_lock); - gen11_gt_reset_one_iir(gt, 0, GEN11_GUC); - spin_unlock_irq(>->irq_lock); -} - -static void gen11_enable_guc_interrupts(struct intel_guc *guc) -{ - struct intel_gt *gt = guc_to_gt(guc); - - spin_lock_irq(>->irq_lock); - if (!guc->interrupts.enabled) { - u32 events = REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST); - - WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_GUC)); - intel_uncore_write(gt->uncore, - GEN11_GUC_SG_INTR_ENABLE, events); - intel_uncore_write(gt->uncore, - GEN11_GUC_SG_INTR_MASK, ~events); - guc->interrupts.enabled = true; - } - spin_unlock_irq(>->irq_lock); -} - -static void gen11_disable_guc_interrupts(struct intel_guc *guc) -{ - struct intel_gt *gt = guc_to_gt(guc); - - spin_lock_irq(>->irq_lock); - guc->interrupts.enabled = false; - - intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_MASK, ~0); - intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_ENABLE, 0); - - spin_unlock_irq(>->irq_lock); - intel_synchronize_irq(gt->i915); - - gen11_reset_guc_interrupts(guc); -} - void intel_guc_init_early(struct intel_guc *guc) { struct drm_i915_private *i915 = guc_to_gt(guc)->i915; @@ -192,6 +103,32 @@ void intel_guc_init_early(struct intel_guc *guc) } } +static int guc_shared_data_create(struct intel_guc *guc) +{ + struct i915_vma *vma; + void *vaddr; + + vma = intel_guc_allocate_vma(guc, PAGE_SIZE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + i915_vma_unpin_and_release(&vma, 0); + return PTR_ERR(vaddr); + } + + guc->shared_data = vma; + guc->shared_data_vaddr = vaddr; + + return 0; +} + +static void guc_shared_data_destroy(struct intel_guc *guc) +{ + i915_vma_unpin_and_release(&guc->shared_data, I915_VMA_RELEASE_MAP); +} + static u32 guc_ctl_debug_flags(struct intel_guc *guc) { u32 level = intel_guc_log_get_level(&guc->log); @@ -338,9 +275,14 @@ int intel_guc_init(struct intel_guc *guc) if (ret) goto err_fetch; - ret = intel_guc_log_create(&guc->log); + ret = guc_shared_data_create(guc); if (ret) goto err_fw; + GEM_BUG_ON(!guc->shared_data); + + ret = intel_guc_log_create(&guc->log); + if (ret) + goto err_shared; ret = intel_guc_ads_create(guc); if (ret) @@ -375,6 +317,8 @@ err_ads: intel_guc_ads_destroy(guc); err_log: intel_guc_log_destroy(&guc->log); +err_shared: + guc_shared_data_destroy(guc); err_fw: intel_uc_fw_fini(&guc->fw); err_fetch: @@ -399,6 +343,7 @@ void intel_guc_fini(struct intel_guc *guc) intel_guc_ads_destroy(guc); intel_guc_log_destroy(&guc->log); + guc_shared_data_destroy(guc); intel_uc_fw_fini(&guc->fw); intel_uc_fw_cleanup_fetch(&guc->fw); } @@ -594,9 +539,19 @@ int intel_guc_suspend(struct intel_guc *guc) int intel_guc_reset_engine(struct intel_guc *guc, struct intel_engine_cs *engine) { - /* XXX: to be implemented with submission interface rework */ + u32 data[7]; - return -ENODEV; + GEM_BUG_ON(!guc->execbuf_client); + + data[0] = INTEL_GUC_ACTION_REQUEST_ENGINE_RESET; + data[1] = engine->guc_id; + data[2] = 0; + data[3] = 0; + data[4] = 0; + data[5] = guc->execbuf_client->stage_id; + data[6] = intel_guc_ggtt_offset(guc, guc->shared_data); + + return intel_guc_send(guc, data, ARRAY_SIZE(data)); } /** diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index e6400204a2bd..2b2f046d3cc3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -47,6 +47,8 @@ struct intel_guc { struct i915_vma *stage_desc_pool; void *stage_desc_pool_vaddr; struct ida stage_ids; + struct i915_vma *shared_data; + void *shared_data_vaddr; struct intel_guc_client *execbuf_client; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index a26a85d50209..1d3cdd67ca2f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -548,7 +548,6 @@ enum intel_guc_action { INTEL_GUC_ACTION_ALLOCATE_DOORBELL = 0x10, INTEL_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20, INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30, - INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x40, INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302, INTEL_GUC_ACTION_ENTER_S_STATE = 0x501, INTEL_GUC_ACTION_EXIT_S_STATE = 0x502, @@ -557,6 +556,7 @@ enum intel_guc_action { INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, + INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x0E000, INTEL_GUC_ACTION_LIMIT }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index caed0d57e704..2cf2d3314f62 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -226,7 +226,7 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) mutex_lock(&log->relay.lock); - if (WARN_ON(!intel_guc_log_relay_created(log))) + if (WARN_ON(!intel_guc_log_relay_enabled(log))) goto out_unlock; /* Get the pointer to shared GuC log buffer */ @@ -361,7 +361,6 @@ void intel_guc_log_init_early(struct intel_guc_log *log) { mutex_init(&log->relay.lock); INIT_WORK(&log->relay.flush_work, capture_logs_work); - log->relay.started = false; } static int guc_log_relay_create(struct intel_guc_log *log) @@ -547,7 +546,7 @@ out_unlock: return ret; } -bool intel_guc_log_relay_created(const struct intel_guc_log *log) +bool intel_guc_log_relay_enabled(const struct intel_guc_log *log) { return log->relay.buf_addr; } @@ -561,7 +560,7 @@ int intel_guc_log_relay_open(struct intel_guc_log *log) mutex_lock(&log->relay.lock); - if (intel_guc_log_relay_created(log)) { + if (intel_guc_log_relay_enabled(log)) { ret = -EEXIST; goto out_unlock; } @@ -586,6 +585,15 @@ int intel_guc_log_relay_open(struct intel_guc_log *log) mutex_unlock(&log->relay.lock); + guc_log_enable_flush_events(log); + + /* + * When GuC is logging without us relaying to userspace, we're ignoring + * the flush notification. This means that we need to unconditionally + * flush on relay enabling, since GuC only notifies us once. + */ + queue_work(system_highpri_wq, &log->relay.flush_work); + return 0; out_relay: @@ -596,33 +604,11 @@ out_unlock: return ret; } -int intel_guc_log_relay_start(struct intel_guc_log *log) -{ - if (log->relay.started) - return -EEXIST; - - guc_log_enable_flush_events(log); - - /* - * When GuC is logging without us relaying to userspace, we're ignoring - * the flush notification. This means that we need to unconditionally - * flush on relay enabling, since GuC only notifies us once. - */ - queue_work(system_highpri_wq, &log->relay.flush_work); - - log->relay.started = true; - - return 0; -} - void intel_guc_log_relay_flush(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); intel_wakeref_t wakeref; - if (!log->relay.started) - return; - /* * Before initiating the forceful flush, wait for any pending/ongoing * flush to complete otherwise forceful flush may not actually happen. @@ -636,33 +622,18 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log) guc_log_capture_logs(log); } -/* - * Stops the relay log. Called from intel_guc_log_relay_close(), so no - * possibility of race with start/flush since relay_write cannot race - * relay_close. - */ -static void guc_log_relay_stop(struct intel_guc_log *log) +void intel_guc_log_relay_close(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *i915 = guc_to_gt(guc)->i915; - if (!log->relay.started) - return; - guc_log_disable_flush_events(log); intel_synchronize_irq(i915); flush_work(&log->relay.flush_work); - log->relay.started = false; -} - -void intel_guc_log_relay_close(struct intel_guc_log *log) -{ - guc_log_relay_stop(log); - mutex_lock(&log->relay.lock); - GEM_BUG_ON(!intel_guc_log_relay_created(log)); + GEM_BUG_ON(!intel_guc_log_relay_enabled(log)); guc_log_unmap(log); guc_log_relay_destroy(log); mutex_unlock(&log->relay.lock); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index c252c022c5fc..6f764879acb1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -47,7 +47,6 @@ struct intel_guc_log { struct i915_vma *vma; struct { void *buf_addr; - bool started; struct work_struct flush_work; struct rchan *channel; struct mutex lock; @@ -66,9 +65,8 @@ int intel_guc_log_create(struct intel_guc_log *log); void intel_guc_log_destroy(struct intel_guc_log *log); int intel_guc_log_set_level(struct intel_guc_log *log, u32 level); -bool intel_guc_log_relay_created(const struct intel_guc_log *log); +bool intel_guc_log_relay_enabled(const struct intel_guc_log *log); int intel_guc_log_relay_open(struct intel_guc_log *log); -int intel_guc_log_relay_start(struct intel_guc_log *log); void intel_guc_log_relay_flush(struct intel_guc_log *log); void intel_guc_log_relay_close(struct intel_guc_log *log); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 2498c55e0ea5..009e54a3764f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -6,13 +6,12 @@ #include #include "gem/i915_gem_context.h" + #include "gt/intel_context.h" #include "gt/intel_engine_pm.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "gt/intel_lrc_reg.h" -#include "gt/intel_ring.h" - #include "intel_guc_submission.h" #include "i915_drv.h" @@ -1011,7 +1010,7 @@ void intel_guc_submission_fini(struct intel_guc *guc) static void guc_interrupts_capture(struct intel_gt *gt) { - struct intel_rps *rps = >->rps; + struct intel_rps *rps = >->i915->gt_pm.rps; struct intel_uncore *uncore = gt->uncore; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -1057,7 +1056,7 @@ static void guc_interrupts_capture(struct intel_gt *gt) static void guc_interrupts_release(struct intel_gt *gt) { - struct intel_rps *rps = >->rps; + struct intel_rps *rps = >->i915->gt_pm.rps; struct intel_uncore *uncore = gt->uncore; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -1126,7 +1125,7 @@ int intel_guc_submission_enable(struct intel_guc *guc) enum intel_engine_id id; int err; - err = i915_inject_probe_error(gt->i915, -ENXIO); + err = i915_inject_load_error(gt->i915, -ENXIO); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 32a069841c14..8be515c8d0f0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -63,7 +63,7 @@ static int intel_huc_rsa_data_create(struct intel_huc *huc) void *vaddr; int err; - err = i915_inject_probe_error(gt->i915, -ENXIO); + err = i915_inject_load_error(gt->i915, -ENXIO); if (err) return err; @@ -161,7 +161,7 @@ int intel_huc_auth(struct intel_huc *huc) if (!intel_uc_fw_is_loaded(&huc->fw)) return -ENOEXEC; - ret = i915_inject_probe_error(gt->i915, -ENXIO); + ret = i915_inject_load_error(gt->i915, -ENXIO); if (ret) goto fail; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 629b19377a29..3fdbc935d155 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -20,7 +20,7 @@ static int __intel_uc_reset_hw(struct intel_uc *uc) int ret; u32 guc_status; - ret = i915_inject_probe_error(gt->i915, -ENXIO); + ret = i915_inject_load_error(gt->i915, -ENXIO); if (ret) return ret; @@ -197,7 +197,7 @@ static int guc_enable_communication(struct intel_guc *guc) GEM_BUG_ON(guc_communication_enabled(guc)); - ret = i915_inject_probe_error(i915, -ENXIO); + ret = i915_inject_load_error(i915, -ENXIO); if (ret) return ret; @@ -372,7 +372,7 @@ static int uc_init_wopcm(struct intel_uc *uc) GEM_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK)); GEM_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK); - err = i915_inject_probe_error(gt->i915, -ENXIO); + err = i915_inject_load_error(gt->i915, -ENXIO); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 66a30ab7044a..bb4889d2346d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -37,13 +37,8 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, /* * List of required GuC and HuC binaries per-platform. * Must be ordered based on platform + revid, from newer to older. - * - * TGL 35.2 is interface-compatible with 33.0 for previous Gens. The deltas - * between 33.0 and 35.2 are only related to new additions to support new Gen12 - * features. */ #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ - fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 3)) \ fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \ fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \ fw_def(COFFEELAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \ @@ -225,31 +220,29 @@ static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, { bool user = e == -EINVAL; - if (i915_inject_probe_error(i915, e)) { + if (i915_inject_load_error(i915, e)) { /* non-existing blob */ uc_fw->path = ""; uc_fw->user_overridden = user; - } else if (i915_inject_probe_error(i915, e)) { + } else if (i915_inject_load_error(i915, e)) { /* require next major version */ uc_fw->major_ver_wanted += 1; uc_fw->minor_ver_wanted = 0; uc_fw->user_overridden = user; - } else if (i915_inject_probe_error(i915, e)) { + } else if (i915_inject_load_error(i915, e)) { /* require next minor version */ uc_fw->minor_ver_wanted += 1; uc_fw->user_overridden = user; - } else if (uc_fw->major_ver_wanted && - i915_inject_probe_error(i915, e)) { + } else if (uc_fw->major_ver_wanted && i915_inject_load_error(i915, e)) { /* require prev major version */ uc_fw->major_ver_wanted -= 1; uc_fw->minor_ver_wanted = 0; uc_fw->user_overridden = user; - } else if (uc_fw->minor_ver_wanted && - i915_inject_probe_error(i915, e)) { + } else if (uc_fw->minor_ver_wanted && i915_inject_load_error(i915, e)) { /* require prev minor version - hey, this should work! */ uc_fw->minor_ver_wanted -= 1; uc_fw->user_overridden = user; - } else if (user && i915_inject_probe_error(i915, e)) { + } else if (user && i915_inject_load_error(i915, e)) { /* officially unsupported platform */ uc_fw->major_ver_wanted = 0; uc_fw->minor_ver_wanted = 0; @@ -278,7 +271,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915) GEM_BUG_ON(!i915->wopcm.size); GEM_BUG_ON(!intel_uc_fw_is_enabled(uc_fw)); - err = i915_inject_probe_error(i915, -ENXIO); + err = i915_inject_load_error(i915, -ENXIO); if (err) return err; @@ -439,7 +432,7 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, struct intel_gt *gt, u64 offset; int ret; - ret = i915_inject_probe_error(gt->i915, -ETIMEDOUT); + ret = i915_inject_load_error(gt->i915, -ETIMEDOUT); if (ret) return ret; @@ -500,7 +493,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt, /* make sure the status was cleared the last time we reset the uc */ GEM_BUG_ON(intel_uc_fw_is_loaded(uc_fw)); - err = i915_inject_probe_error(gt->i915, -ENOEXEC); + err = i915_inject_load_error(gt->i915, -ENOEXEC); if (err) return err; diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 6a3ac8cde95d..e753b1e706e2 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -35,9 +35,7 @@ */ #include - #include "i915_drv.h" -#include "gt/intel_ring.h" #include "gvt.h" #include "i915_pvinfo.h" #include "trace.h" diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index a816aef6142b..13044c027f27 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -152,7 +152,6 @@ static const struct drm_i915_gem_object_ops intel_vgpu_gem_ops = { static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev, struct intel_vgpu_fb_info *info) { - static struct lock_class_key lock_class; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj; @@ -162,7 +161,7 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev, drm_gem_private_object_init(dev, &obj->base, roundup(info->size, PAGE_SIZE)); - i915_gem_object_init(obj, &intel_vgpu_gem_ops, &lock_class); + i915_gem_object_init(obj, &intel_vgpu_gem_ops); obj->read_domains = I915_GEM_DOMAIN_GTT; obj->write_domain = 0; diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index afd7f66bdc2d..45a9124e53b6 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -819,16 +819,13 @@ static int trigger_aux_channel_interrupt(struct intel_vgpu *vgpu, struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; enum intel_gvt_event_type event; - if (reg == i915_mmio_reg_offset(DP_AUX_CH_CTL(AUX_CH_A))) + if (reg == _DPA_AUX_CH_CTL) event = AUX_CHANNEL_A; - else if (reg == _PCH_DPB_AUX_CH_CTL || - reg == i915_mmio_reg_offset(DP_AUX_CH_CTL(AUX_CH_B))) + else if (reg == _PCH_DPB_AUX_CH_CTL || reg == _DPB_AUX_CH_CTL) event = AUX_CHANNEL_B; - else if (reg == _PCH_DPC_AUX_CH_CTL || - reg == i915_mmio_reg_offset(DP_AUX_CH_CTL(AUX_CH_C))) + else if (reg == _PCH_DPC_AUX_CH_CTL || reg == _DPC_AUX_CH_CTL) event = AUX_CHANNEL_C; - else if (reg == _PCH_DPD_AUX_CH_CTL || - reg == i915_mmio_reg_offset(DP_AUX_CH_CTL(AUX_CH_D))) + else if (reg == _PCH_DPD_AUX_CH_CTL || reg == _DPD_AUX_CH_CTL) event = AUX_CHANNEL_D; else { WARN_ON(true); @@ -2875,11 +2872,11 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_DH(FORCEWAKE_MEDIA_GEN9, D_SKL_PLUS, NULL, mul_force_wake_write); MMIO_DH(FORCEWAKE_ACK_MEDIA_GEN9, D_SKL_PLUS, NULL, NULL); - MMIO_F(DP_AUX_CH_CTL(AUX_CH_B), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, + MMIO_F(_MMIO(_DPB_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, dp_aux_ch_ctl_mmio_write); - MMIO_F(DP_AUX_CH_CTL(AUX_CH_C), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, + MMIO_F(_MMIO(_DPC_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, dp_aux_ch_ctl_mmio_write); - MMIO_F(DP_AUX_CH_CTL(AUX_CH_D), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, + MMIO_F(_MMIO(_DPD_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, dp_aux_ch_ctl_mmio_write); MMIO_D(HSW_PWR_WELL_CTL1, D_SKL_PLUS); diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index aaf15916d29a..4208e40445b1 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -35,7 +35,6 @@ #include "i915_drv.h" #include "gt/intel_context.h" -#include "gt/intel_ring.h" #include "gvt.h" #include "trace.h" diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 5b2a7d072ec9..36bb7639e82f 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -38,7 +38,6 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_pm.h" #include "gt/intel_context.h" -#include "gt/intel_ring.h" #include "i915_drv.h" #include "gvt.h" @@ -195,7 +194,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) return -EFAULT; } - page = i915_gem_object_get_page(ctx_obj, i); + page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i); dst = kmap(page); intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst, I915_GTT_PAGE_SIZE); @@ -835,7 +834,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) return; } - page = i915_gem_object_get_page(ctx_obj, i); + page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i); src = kmap(page); intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src, I915_GTT_PAGE_SIZE); @@ -1585,7 +1584,9 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, */ if (list_empty(workload_q_head(vgpu, ring_id))) { intel_runtime_pm_get(&dev_priv->runtime_pm); + mutex_lock(&vgpu->vgpu_lock); ret = intel_gvt_scan_and_shadow_workload(workload); + mutex_unlock(&vgpu->vgpu_lock); intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm); } diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 207383dda84d..7927b1a0c7a6 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -7,7 +7,6 @@ #include #include "gt/intel_engine_pm.h" -#include "gt/intel_ring.h" #include "i915_drv.h" #include "i915_active.h" @@ -596,7 +595,6 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, struct llist_node *pos, *next; int err; - GEM_BUG_ON(i915_active_is_idle(ref)); GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers)); /* diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index 44859356ce97..4f52fe6146d2 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -214,6 +214,4 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, void i915_active_acquire_barrier(struct i915_active *ref); void i915_request_add_active_barriers(struct i915_request *rq); -void i915_active_print(struct i915_active *ref, struct drm_printer *m); - #endif /* _I915_ACTIVE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 8016484ebcd3..ada57eee914a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -44,7 +44,6 @@ #include "gt/intel_gt_requests.h" #include "gt/intel_reset.h" #include "gt/intel_rc6.h" -#include "gt/intel_rps.h" #include "gt/uc/intel_guc_submission.h" #include "i915_debugfs.h" @@ -792,7 +791,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_uncore *uncore = &dev_priv->uncore; - struct intel_rps *rps = &dev_priv->gt.rps; + struct intel_rps *rps = &dev_priv->gt_pm.rps; intel_wakeref_t wakeref; int ret = 0; @@ -828,23 +827,23 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq); seq_printf(m, "actual GPU freq: %d MHz\n", - intel_gpu_freq(rps, (freq_sts >> 8) & 0xff)); + intel_gpu_freq(dev_priv, (freq_sts >> 8) & 0xff)); seq_printf(m, "current GPU freq: %d MHz\n", - intel_gpu_freq(rps, rps->cur_freq)); + intel_gpu_freq(dev_priv, rps->cur_freq)); seq_printf(m, "max GPU freq: %d MHz\n", - intel_gpu_freq(rps, rps->max_freq)); + intel_gpu_freq(dev_priv, rps->max_freq)); seq_printf(m, "min GPU freq: %d MHz\n", - intel_gpu_freq(rps, rps->min_freq)); + intel_gpu_freq(dev_priv, rps->min_freq)); seq_printf(m, "idle GPU freq: %d MHz\n", - intel_gpu_freq(rps, rps->idle_freq)); + intel_gpu_freq(dev_priv, rps->idle_freq)); seq_printf(m, "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(rps, rps->efficient_freq)); + intel_gpu_freq(dev_priv, rps->efficient_freq)); } else if (INTEL_GEN(dev_priv) >= 6) { u32 rp_state_limits; u32 gt_perf_status; @@ -878,7 +877,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) else reqf >>= 25; } - reqf = intel_gpu_freq(rps, reqf); + reqf = intel_gpu_freq(dev_priv, reqf); rpmodectl = I915_READ(GEN6_RP_CONTROL); rpinclimit = I915_READ(GEN6_RP_UP_THRESHOLD); @@ -891,7 +890,8 @@ static int i915_frequency_info(struct seq_file *m, void *unused) rpdownei = I915_READ(GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; rpcurdown = I915_READ(GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; rpprevdown = I915_READ(GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; - cagf = intel_gpu_freq(rps, intel_get_cagf(rps, rpstat)); + cagf = intel_gpu_freq(dev_priv, + intel_get_cagf(dev_priv, rpstat)); intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); @@ -968,37 +968,37 @@ static int i915_frequency_info(struct seq_file *m, void *unused) max_freq *= (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", - intel_gpu_freq(rps, max_freq)); + intel_gpu_freq(dev_priv, max_freq)); max_freq = (rp_state_cap & 0xff00) >> 8; max_freq *= (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", - intel_gpu_freq(rps, max_freq)); + intel_gpu_freq(dev_priv, max_freq)); max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 16 : rp_state_cap >> 0) & 0xff; max_freq *= (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", - intel_gpu_freq(rps, max_freq)); + intel_gpu_freq(dev_priv, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", - intel_gpu_freq(rps, rps->max_freq)); + intel_gpu_freq(dev_priv, rps->max_freq)); seq_printf(m, "Current freq: %d MHz\n", - intel_gpu_freq(rps, rps->cur_freq)); + intel_gpu_freq(dev_priv, rps->cur_freq)); seq_printf(m, "Actual freq: %d MHz\n", cagf); seq_printf(m, "Idle freq: %d MHz\n", - intel_gpu_freq(rps, rps->idle_freq)); + intel_gpu_freq(dev_priv, rps->idle_freq)); seq_printf(m, "Min freq: %d MHz\n", - intel_gpu_freq(rps, rps->min_freq)); + intel_gpu_freq(dev_priv, rps->min_freq)); seq_printf(m, "Boost freq: %d MHz\n", - intel_gpu_freq(rps, rps->boost_freq)); + intel_gpu_freq(dev_priv, rps->boost_freq)); seq_printf(m, "Max freq: %d MHz\n", - intel_gpu_freq(rps, rps->max_freq)); + intel_gpu_freq(dev_priv, rps->max_freq)); seq_printf(m, "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(rps, rps->efficient_freq)); + intel_gpu_freq(dev_priv, rps->efficient_freq)); } else { seq_puts(m, "no P-state info available\n"); } @@ -1011,6 +1011,92 @@ static int i915_frequency_info(struct seq_file *m, void *unused) return ret; } +static void i915_instdone_info(struct drm_i915_private *dev_priv, + struct seq_file *m, + struct intel_instdone *instdone) +{ + const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; + int slice; + int subslice; + + seq_printf(m, "\t\tINSTDONE: 0x%08x\n", + instdone->instdone); + + if (INTEL_GEN(dev_priv) <= 3) + return; + + seq_printf(m, "\t\tSC_INSTDONE: 0x%08x\n", + instdone->slice_common); + + if (INTEL_GEN(dev_priv) <= 6) + return; + + for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice) + seq_printf(m, "\t\tSAMPLER_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, instdone->sampler[slice][subslice]); + + for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice) + seq_printf(m, "\t\tROW_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, instdone->row[slice][subslice]); +} + +static int i915_hangcheck_info(struct seq_file *m, void *unused) +{ + struct drm_i915_private *i915 = node_to_i915(m->private); + struct intel_gt *gt = &i915->gt; + struct intel_engine_cs *engine; + intel_wakeref_t wakeref; + enum intel_engine_id id; + + seq_printf(m, "Reset flags: %lx\n", gt->reset.flags); + if (test_bit(I915_WEDGED, >->reset.flags)) + seq_puts(m, "\tWedged\n"); + if (test_bit(I915_RESET_BACKOFF, >->reset.flags)) + seq_puts(m, "\tDevice (global) reset in progress\n"); + + if (!i915_modparams.enable_hangcheck) { + seq_puts(m, "Hangcheck disabled\n"); + return 0; + } + + if (timer_pending(>->hangcheck.work.timer)) + seq_printf(m, "Hangcheck active, timer fires in %dms\n", + jiffies_to_msecs(gt->hangcheck.work.timer.expires - + jiffies)); + else if (delayed_work_pending(>->hangcheck.work)) + seq_puts(m, "Hangcheck active, work pending\n"); + else + seq_puts(m, "Hangcheck inactive\n"); + + seq_printf(m, "GT active? %s\n", yesno(gt->awake)); + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + for_each_engine(engine, i915, id) { + struct intel_instdone instdone; + + seq_printf(m, "%s: %d ms ago\n", + engine->name, + jiffies_to_msecs(jiffies - + engine->hangcheck.action_timestamp)); + + seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", + (long long)engine->hangcheck.acthd, + intel_engine_get_active_head(engine)); + + intel_engine_get_instdone(engine, &instdone); + + seq_puts(m, "\tinstdone read =\n"); + i915_instdone_info(i915, m, &instdone); + + seq_puts(m, "\tinstdone accu =\n"); + i915_instdone_info(i915, m, + &engine->hangcheck.instdone); + } + } + + return 0; +} + static int ironlake_drpc_info(struct seq_file *m) { struct drm_i915_private *i915 = node_to_i915(m->private); @@ -1375,7 +1461,7 @@ static int i915_sr_status(struct seq_file *m, void *unused) static int i915_ring_freq_table(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct intel_rps *rps = &dev_priv->gt.rps; + struct intel_rps *rps = &dev_priv->gt_pm.rps; unsigned int max_gpu_freq, min_gpu_freq; intel_wakeref_t wakeref; int gpu_freq, ia_freq; @@ -1400,11 +1486,10 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) GEN6_PCODE_READ_MIN_FREQ_TABLE, &ia_freq, NULL); seq_printf(m, "%d\t\t%d\t\t\t\t%d\n", - intel_gpu_freq(rps, - (gpu_freq * - (IS_GEN9_BC(dev_priv) || - INTEL_GEN(dev_priv) >= 10 ? - GEN9_FREQ_SCALER : 1))), + intel_gpu_freq(dev_priv, (gpu_freq * + (IS_GEN9_BC(dev_priv) || + INTEL_GEN(dev_priv) >= 10 ? + GEN9_FREQ_SCALER : 1))), ((ia_freq >> 0) & 0xff) * 100, ((ia_freq >> 8) & 0xff) * 100); } @@ -1632,7 +1717,7 @@ static const char *rps_power_to_str(unsigned int power) static int i915_rps_boost_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct intel_rps *rps = &dev_priv->gt.rps; + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 act_freq = rps->cur_freq; intel_wakeref_t wakeref; @@ -1644,7 +1729,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) vlv_punit_put(dev_priv); act_freq = (act_freq >> 8) & 0xff; } else { - act_freq = intel_get_cagf(rps, + act_freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1)); } } @@ -1655,17 +1740,17 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) atomic_read(&rps->num_waiters)); seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); seq_printf(m, "Frequency requested %d, actual %d\n", - intel_gpu_freq(rps, rps->cur_freq), - intel_gpu_freq(rps, act_freq)); + intel_gpu_freq(dev_priv, rps->cur_freq), + intel_gpu_freq(dev_priv, act_freq)); seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n", - intel_gpu_freq(rps, rps->min_freq), - intel_gpu_freq(rps, rps->min_freq_softlimit), - intel_gpu_freq(rps, rps->max_freq_softlimit), - intel_gpu_freq(rps, rps->max_freq)); + intel_gpu_freq(dev_priv, rps->min_freq), + intel_gpu_freq(dev_priv, rps->min_freq_softlimit), + intel_gpu_freq(dev_priv, rps->max_freq_softlimit), + intel_gpu_freq(dev_priv, rps->max_freq)); seq_printf(m, " idle:%d, efficient:%d, boost:%d\n", - intel_gpu_freq(rps, rps->idle_freq), - intel_gpu_freq(rps, rps->efficient_freq), - intel_gpu_freq(rps, rps->boost_freq)); + intel_gpu_freq(dev_priv, rps->idle_freq), + intel_gpu_freq(dev_priv, rps->efficient_freq), + intel_gpu_freq(dev_priv, rps->boost_freq)); seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts)); @@ -1781,8 +1866,8 @@ static void i915_guc_log_info(struct seq_file *m, struct intel_guc_log *log = &dev_priv->gt.uc.guc.log; enum guc_log_buffer_type type; - if (!intel_guc_log_relay_created(log)) { - seq_puts(m, "GuC log relay not created\n"); + if (!intel_guc_log_relay_enabled(log)) { + seq_puts(m, "GuC log relay disabled\n"); return; } @@ -1969,23 +2054,9 @@ i915_guc_log_relay_write(struct file *filp, loff_t *ppos) { struct intel_guc_log *log = filp->private_data; - int val; - int ret; - ret = kstrtoint_from_user(ubuf, cnt, 0, &val); - if (ret < 0) - return ret; - - /* - * Enable and start the guc log relay on value of 1. - * Flush log relay for any other value. - */ - if (val == 1) - ret = intel_guc_log_relay_start(log); - else - intel_guc_log_relay_flush(log); - - return ret ?: cnt; + intel_guc_log_relay_flush(log); + return cnt; } static int i915_guc_log_relay_release(struct inode *inode, struct file *file) @@ -2123,12 +2194,8 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) status = "disabled"; seq_printf(m, "PSR mode: %s\n", status); - if (!psr->enabled) { - seq_printf(m, "PSR sink not reliable: %s\n", - yesno(psr->sink_not_reliable)); - + if (!psr->enabled) goto unlock; - } if (psr->psr2_enabled) { val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)); @@ -3581,11 +3648,17 @@ i915_drop_caches_get(void *data, u64 *val) return 0; } + static int -gt_drop_caches(struct intel_gt *gt, u64 val) +i915_drop_caches_set(void *data, u64 val) { + struct drm_i915_private *i915 = data; + struct intel_gt *gt = &i915->gt; int ret; + DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n", + val, val & DROP_ALL); + if (val & DROP_RESET_ACTIVE && wait_for(intel_engines_are_idle(gt), I915_IDLE_ENGINES_TIMEOUT)) intel_gt_set_wedged(gt); @@ -3608,22 +3681,6 @@ gt_drop_caches(struct intel_gt *gt, u64 val) if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(gt)) intel_gt_handle_error(gt, ALL_ENGINES, 0, NULL); - return 0; -} - -static int -i915_drop_caches_set(void *data, u64 val) -{ - struct drm_i915_private *i915 = data; - int ret; - - DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n", - val, val & DROP_ALL); - - ret = gt_drop_caches(&i915->gt, val); - if (ret) - return ret; - fs_reclaim_acquire(GFP_KERNEL); if (val & DROP_BOUND) i915_gem_shrink(i915, LONG_MAX, NULL, I915_SHRINK_BOUND); @@ -4282,6 +4339,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_guc_stage_pool", i915_guc_stage_pool, 0}, {"i915_huc_load_status", i915_huc_load_status_info, 0}, {"i915_frequency_info", i915_frequency_info, 0}, + {"i915_hangcheck_info", i915_hangcheck_info, 0}, {"i915_drpc_info", i915_drpc_info, 0}, {"i915_ring_freq_table", i915_ring_freq_table, 0}, {"i915_frontbuffer_tracking", i915_frontbuffer_tracking, 0}, @@ -4508,7 +4566,7 @@ static int i915_dsc_fec_support_show(struct seq_file *m, void *data) intel_dp = enc_to_intel_dp(&intel_attached_encoder(connector)->base); crtc_state = to_intel_crtc_state(crtc->state); seq_printf(m, "DSC_Enabled: %s\n", - yesno(crtc_state->dsc.compression_enable)); + yesno(crtc_state->dsc_params.compression_enable)); seq_printf(m, "DSC_Sink_Support: %s\n", yesno(drm_dp_sink_supports_dsc(intel_dp->dsc_dpcd))); seq_printf(m, "Force_DSC_Enable: %s\n", diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ccb5b566795f..157ed22052a2 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -76,7 +76,6 @@ #include "i915_trace.h" #include "i915_vgpu.h" #include "intel_csr.h" -#include "intel_memory_region.h" #include "intel_pm.h" static struct drm_driver driver; @@ -599,10 +598,12 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv) intel_uc_init_mmio(&dev_priv->gt.uc); - ret = intel_engines_init_mmio(&dev_priv->gt); + ret = intel_engines_init_mmio(dev_priv); if (ret) goto err_uncore; + i915_gem_init_mmio(dev_priv); + return 0; err_uncore: @@ -620,7 +621,7 @@ err_bridge: */ static void i915_driver_mmio_release(struct drm_i915_private *dev_priv) { - intel_engines_cleanup(&dev_priv->gt); + intel_engines_cleanup(dev_priv); intel_teardown_mchbar(dev_priv); intel_uncore_fini_mmio(&dev_priv->uncore); pci_dev_put(dev_priv->bridge_dev); @@ -1171,16 +1172,12 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) if (ret) goto err_ggtt; - ret = intel_memory_regions_hw_probe(dev_priv); - if (ret) - goto err_ggtt; - - intel_gt_init_hw_early(&dev_priv->gt, &dev_priv->ggtt); + intel_gt_init_hw_early(dev_priv); ret = i915_ggtt_enable_hw(dev_priv); if (ret) { DRM_ERROR("failed to enable GGTT\n"); - goto err_mem_regions; + goto err_ggtt; } pci_set_master(pdev); @@ -1197,7 +1194,7 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) if (ret) { DRM_ERROR("failed to set DMA mask\n"); - goto err_mem_regions; + goto err_ggtt; } } @@ -1215,7 +1212,7 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) if (ret) { DRM_ERROR("failed to set DMA mask\n"); - goto err_mem_regions; + goto err_ggtt; } } @@ -1267,8 +1264,6 @@ err_msi: if (pdev->msi_enabled) pci_disable_msi(pdev); pm_qos_remove_request(&dev_priv->pm_qos); -err_mem_regions: - intel_memory_regions_driver_release(dev_priv); err_ggtt: i915_ggtt_driver_release(dev_priv); err_perf: @@ -1481,23 +1476,6 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!i915_modparams.nuclear_pageflip && match_info->gen < 5) dev_priv->drm.driver_features &= ~DRIVER_ATOMIC; - /* - * Check if we support fake LMEM -- for now we only unleash this for - * the live selftests(test-and-exit). - */ -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) - if (IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)) { - if (INTEL_GEN(dev_priv) >= 9 && i915_selftest.live < 0 && - i915_modparams.fake_lmem_start) { - mkwrite_device_info(dev_priv)->memory_regions = - REGION_SMEM | REGION_LMEM | REGION_STOLEN; - mkwrite_device_info(dev_priv)->is_dgfx = true; - GEM_BUG_ON(!HAS_LMEM(dev_priv)); - GEM_BUG_ON(!IS_DGFX(dev_priv)); - } - } -#endif - ret = pci_enable_device(pdev); if (ret) goto out_fini; @@ -1532,7 +1510,6 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) out_cleanup_hw: i915_driver_hw_remove(dev_priv); - intel_memory_regions_driver_release(dev_priv); i915_ggtt_driver_release(dev_priv); out_cleanup_mmio: i915_driver_mmio_release(dev_priv); @@ -1571,7 +1548,10 @@ void i915_driver_remove(struct drm_i915_private *i915) i915_driver_modeset_remove(i915); + /* Free error state after interrupts are fully disabled. */ + cancel_delayed_work_sync(&i915->gt.hangcheck.work); i915_reset_error_state(i915); + i915_gem_driver_remove(i915); intel_power_domains_driver_remove(i915); @@ -1590,7 +1570,6 @@ static void i915_driver_release(struct drm_device *dev) i915_gem_driver_release(dev_priv); - intel_memory_regions_driver_release(dev_priv); i915_ggtt_driver_release(dev_priv); i915_driver_mmio_release(dev_priv); @@ -1818,8 +1797,9 @@ static int i915_drm_resume(struct drm_device *dev) int ret; disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); + intel_gt_pm_disable(&dev_priv->gt); - intel_gt_sanitize(&dev_priv->gt, true); + i915_gem_sanitize(dev_priv); ret = i915_ggtt_enable_hw(dev_priv); if (ret) @@ -1948,8 +1928,12 @@ static int i915_drm_resume_early(struct drm_device *dev) intel_display_power_resume_early(dev_priv); + intel_gt_pm_disable(&dev_priv->gt); + intel_power_domains_resume(dev_priv); + intel_gt_sanitize(&dev_priv->gt, true); + enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); return ret; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0567fe67e8c5..8882c0908c3b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -101,8 +101,6 @@ #include "i915_vma.h" #include "i915_irq.h" -#include "intel_region_lmem.h" - #include "intel_gvt.h" /* General customization: @@ -110,8 +108,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20191101" -#define DRIVER_TIMESTAMP 1572604873 +#define DRIVER_DATE "20191021" +#define DRIVER_TIMESTAMP 1571651766 struct drm_i915_gem_object; @@ -545,6 +543,94 @@ struct i915_suspend_saved_registers { struct vlv_s0ix_state; +struct intel_rps_ei { + ktime_t ktime; + u32 render_c0; + u32 media_c0; +}; + +struct intel_rps { + struct mutex lock; /* protects enabling and the worker */ + + /* + * work, interrupts_enabled and pm_iir are protected by + * dev_priv->irq_lock + */ + struct work_struct work; + bool interrupts_enabled; + u32 pm_iir; + + /* PM interrupt bits that should never be masked */ + u32 pm_intrmsk_mbz; + + /* Frequencies are stored in potentially platform dependent multiples. + * In other words, *_freq needs to be multiplied by X to be interesting. + * Soft limits are those which are used for the dynamic reclocking done + * by the driver (raise frequencies under heavy loads, and lower for + * lighter loads). Hard limits are those imposed by the hardware. + * + * A distinction is made for overclocking, which is never enabled by + * default, and is considered to be above the hard limit if it's + * possible at all. + */ + u8 cur_freq; /* Current frequency (cached, may not == HW) */ + u8 min_freq_softlimit; /* Minimum frequency permitted by the driver */ + u8 max_freq_softlimit; /* Max frequency permitted by the driver */ + u8 max_freq; /* Maximum frequency, RP0 if not overclocking */ + u8 min_freq; /* AKA RPn. Minimum frequency */ + u8 boost_freq; /* Frequency to request when wait boosting */ + u8 idle_freq; /* Frequency to request when we are idle */ + u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */ + u8 rp1_freq; /* "less than" RP0 power/freqency */ + u8 rp0_freq; /* Non-overclocked max frequency. */ + u16 gpll_ref_freq; /* vlv/chv GPLL reference frequency */ + + int last_adj; + + struct { + struct mutex mutex; + + enum { LOW_POWER, BETWEEN, HIGH_POWER } mode; + unsigned int interactive; + + u8 up_threshold; /* Current %busy required to uplock */ + u8 down_threshold; /* Current %busy required to downclock */ + } power; + + bool enabled; + atomic_t num_waiters; + atomic_t boosts; + + /* manual wa residency calculations */ + struct intel_rps_ei ei; +}; + +struct intel_gen6_power_mgmt { + struct intel_rps rps; +}; + +/* defined intel_pm.c */ +extern spinlock_t mchdev_lock; + +struct intel_ilk_power_mgmt { + u8 cur_delay; + u8 min_delay; + u8 max_delay; + u8 fmax; + u8 fstart; + + u64 last_count1; + unsigned long last_time1; + unsigned long chipset_power; + u64 last_count2; + u64 last_time2; + unsigned long gfx_power; + u8 corr; + + int c_m; + int r_t; +}; + #define MAX_L3_SLICES 2 struct intel_l3_parity { u32 *remap_info[MAX_L3_SLICES]; @@ -981,6 +1067,7 @@ struct drm_i915_private { u32 irq_mask; u32 de_irq_mask[I915_MAX_PIPES]; }; + u32 pm_rps_events; u32 pipestat_irq_mask[I915_MAX_PIPES]; struct i915_hotplug hotplug; @@ -1010,14 +1097,13 @@ struct drm_i915_private { unsigned int fdi_pll_freq; unsigned int czclk_freq; - /* - * For reading holding any crtc lock is sufficient, - * for writing must hold all of them. - */ struct { /* * The current logical cdclk state. * See intel_atomic_state.cdclk.logical + * + * For reading holding any crtc lock is sufficient, + * for writing must hold all of them. */ struct intel_cdclk_state logical; /* @@ -1087,10 +1173,6 @@ struct drm_i915_private { */ struct mutex dpll_lock; - /* - * For reading active_pipes, min_cdclk, min_voltage_level holding - * any crtc lock is sufficient, for writing must hold all of them. - */ u8 active_pipes; /* minimum acceptable cdclk for each pipe */ int min_cdclk[I915_MAX_PIPES]; @@ -1120,6 +1202,13 @@ struct drm_i915_private { */ u32 edram_size_mb; + /* gen6+ GT PM state */ + struct intel_gen6_power_mgmt gt_pm; + + /* ilk-only ips/rps state. Everything in here is protected by the global + * mchdev_lock in intel_pm.c */ + struct intel_ilk_power_mgmt ips; + struct i915_power_domains power_domains; struct i915_psr psr; @@ -1259,8 +1348,6 @@ struct drm_i915_private { } contexts; } gem; - u8 pch_ssc_use; - /* For i915gm/i945gm vblank irq workaround */ u8 vblank_enabled; @@ -1457,7 +1544,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, } #define IS_MOBILE(dev_priv) (INTEL_INFO(dev_priv)->is_mobile) -#define IS_DGFX(dev_priv) (INTEL_INFO(dev_priv)->is_dgfx) #define IS_I830(dev_priv) IS_PLATFORM(dev_priv, INTEL_I830) #define IS_I845G(dev_priv) IS_PLATFORM(dev_priv, INTEL_I845G) @@ -1695,7 +1781,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_IPC(dev_priv) (INTEL_INFO(dev_priv)->display.has_ipc) #define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i)) -#define HAS_LMEM(i915) HAS_REGION(i915, REGION_LMEM) #define HAS_GT_UC(dev_priv) (INTEL_INFO(dev_priv)->has_gt_uc) @@ -1761,6 +1846,7 @@ void i915_driver_remove(struct drm_i915_private *i915); int i915_resume_switcheroo(struct drm_i915_private *i915); int i915_suspend_switcheroo(struct drm_i915_private *i915, pm_message_t state); +void intel_engine_init_hangcheck(struct intel_engine_cs *engine); int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); static inline bool intel_gvt_active(struct drm_i915_private *dev_priv) @@ -1779,6 +1865,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, /* i915_gem.c */ int i915_gem_init_userptr(struct drm_i915_private *dev_priv); void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv); +void i915_gem_sanitize(struct drm_i915_private *i915); void i915_gem_init_early(struct drm_i915_private *dev_priv); void i915_gem_cleanup_early(struct drm_i915_private *dev_priv); int i915_gem_freeze(struct drm_i915_private *dev_priv); @@ -1862,6 +1949,7 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, return atomic_read(&error->reset_engine_count[engine->uabi_class]); } +void i915_gem_init_mmio(struct drm_i915_private *i915); int __must_check i915_gem_init(struct drm_i915_private *dev_priv); void i915_gem_driver_register(struct drm_i915_private *i915); void i915_gem_driver_unregister(struct drm_i915_private *i915); @@ -1914,6 +2002,9 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, unsigned int flags); int i915_gem_evict_vm(struct i915_address_space *vm); +void i915_gem_cleanup_memory_regions(struct drm_i915_private *i915); +int i915_gem_init_memory_regions(struct drm_i915_private *i915); + /* i915_gem_internal.c */ struct drm_i915_gem_object * i915_gem_object_create_internal(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ac23322fb382..dd0a3271b4e2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -48,11 +48,9 @@ #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" -#include "gt/intel_gt_requests.h" #include "gt/intel_mocs.h" #include "gt/intel_reset.h" #include "gt/intel_renderstate.h" -#include "gt/intel_rps.h" #include "gt/intel_workarounds.h" #include "i915_drv.h" @@ -1039,7 +1037,39 @@ out: return err; } -static int __intel_engines_record_defaults(struct intel_gt *gt) +void i915_gem_sanitize(struct drm_i915_private *i915) +{ + intel_wakeref_t wakeref; + + GEM_TRACE("\n"); + + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); + + /* + * As we have just resumed the machine and woken the device up from + * deep PCI sleep (presumably D3_cold), assume the HW has been reset + * back to defaults, recovering from whatever wedged state we left it + * in and so worth trying to use the device once more. + */ + if (intel_gt_is_wedged(&i915->gt)) + intel_gt_unset_wedged(&i915->gt); + + /* + * If we inherit context state from the BIOS or earlier occupants + * of the GPU, the GPU may be in an inconsistent state when we + * try to take over. The only way to remove the earlier state + * is by resetting. However, resetting on earlier gen is tricky as + * it may impact the display and we are uncertain about the stability + * of the reset, so this could be applied to even earlier gen. + */ + intel_gt_sanitize(&i915->gt, false); + + intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); +} + +static int __intel_engines_record_defaults(struct drm_i915_private *i915) { struct i915_request *requests[I915_NUM_ENGINES] = {}; struct intel_engine_cs *engine; @@ -1055,7 +1085,7 @@ static int __intel_engines_record_defaults(struct intel_gt *gt) * from the same default HW values. */ - for_each_engine(engine, gt, id) { + for_each_engine(engine, i915, id) { struct intel_context *ce; struct i915_request *rq; @@ -1063,8 +1093,7 @@ static int __intel_engines_record_defaults(struct intel_gt *gt) GEM_BUG_ON(!engine->kernel_context); engine->serial++; /* force the kernel context switch */ - ce = intel_context_create(engine->kernel_context->gem_context, - engine); + ce = intel_context_create(i915->kernel_context, engine); if (IS_ERR(ce)) { err = PTR_ERR(ce); goto out; @@ -1093,7 +1122,7 @@ err_rq: } /* Flush the default context image to memory, and enable powersaving. */ - if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { + if (!i915_gem_load_power_context(i915)) { err = -EIO; goto out; } @@ -1152,7 +1181,7 @@ out: * this is by declaring ourselves wedged. */ if (err) - intel_gt_set_wedged(gt); + intel_gt_set_wedged(&i915->gt); for (id = 0; id < ARRAY_SIZE(requests); id++) { struct intel_context *ce; @@ -1169,7 +1198,7 @@ out: return err; } -static int intel_engines_verify_workarounds(struct intel_gt *gt) +static int intel_engines_verify_workarounds(struct drm_i915_private *i915) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -1178,7 +1207,7 @@ static int intel_engines_verify_workarounds(struct intel_gt *gt) if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) return 0; - for_each_engine(engine, gt, id) { + for_each_engine(engine, i915, id) { if (intel_engine_verify_workarounds(engine, "load")) err = -EIO; } @@ -1220,7 +1249,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) intel_gt_init(&dev_priv->gt); - ret = intel_engines_setup(&dev_priv->gt); + ret = intel_engines_setup(dev_priv); if (ret) { GEM_BUG_ON(ret == -EIO); goto err_unlock; @@ -1232,12 +1261,14 @@ int i915_gem_init(struct drm_i915_private *dev_priv) goto err_scratch; } - ret = intel_engines_init(&dev_priv->gt); + ret = intel_engines_init(dev_priv); if (ret) { GEM_BUG_ON(ret == -EIO); goto err_context; } + intel_init_gt_powersave(dev_priv); + intel_uc_init(&dev_priv->gt.uc); ret = intel_gt_init_hw(&dev_priv->gt); @@ -1260,19 +1291,19 @@ int i915_gem_init(struct drm_i915_private *dev_priv) */ intel_init_clock_gating(dev_priv); - ret = intel_engines_verify_workarounds(&dev_priv->gt); + ret = intel_engines_verify_workarounds(dev_priv); if (ret) goto err_gt; - ret = __intel_engines_record_defaults(&dev_priv->gt); + ret = __intel_engines_record_defaults(dev_priv); if (ret) goto err_gt; - ret = i915_inject_probe_error(dev_priv, -ENODEV); + ret = i915_inject_load_error(dev_priv, -ENODEV); if (ret) goto err_gt; - ret = i915_inject_probe_error(dev_priv, -EIO); + ret = i915_inject_load_error(dev_priv, -EIO); if (ret) goto err_gt; @@ -1297,7 +1328,7 @@ err_init_hw: err_uc_init: if (ret != -EIO) { intel_uc_fini(&dev_priv->gt.uc); - intel_engines_cleanup(&dev_priv->gt); + intel_engines_cleanup(dev_priv); } err_context: if (ret != -EIO) @@ -1366,7 +1397,7 @@ void i915_gem_driver_remove(struct drm_i915_private *dev_priv) void i915_gem_driver_release(struct drm_i915_private *dev_priv) { - intel_engines_cleanup(&dev_priv->gt); + intel_engines_cleanup(dev_priv); i915_gem_driver_release__contexts(dev_priv); intel_gt_driver_release(&dev_priv->gt); @@ -1381,6 +1412,11 @@ void i915_gem_driver_release(struct drm_i915_private *dev_priv) WARN_ON(!list_empty(&dev_priv->gem.contexts.list)); } +void i915_gem_init_mmio(struct drm_i915_private *i915) +{ + i915_gem_sanitize(i915); +} + static void i915_gem_init__mm(struct drm_i915_private *i915) { spin_lock_init(&i915->mm.obj_lock); @@ -1396,6 +1432,7 @@ static void i915_gem_init__mm(struct drm_i915_private *i915) void i915_gem_init_early(struct drm_i915_private *dev_priv) { i915_gem_init__mm(dev_priv); + i915_gem_init__pm(dev_priv); spin_lock_init(&dev_priv->fb_tracking.lock); } diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index f6f9675848b8..2011f8e9a9f1 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -112,4 +112,18 @@ static inline bool __tasklet_is_scheduled(struct tasklet_struct *t) return test_bit(TASKLET_STATE_SCHED, &t->state); } +static inline void cancel_timer(struct timer_list *t) +{ + if (!READ_ONCE(t->expires)) + return; + + del_timer(t); + WRITE_ONCE(t->expires, 0); +} + +static inline bool timer_expired(const struct timer_list *t) +{ + return READ_ONCE(t->expires) && !timer_pending(t); +} + #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 71efccfde122..321189e1b0f2 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -846,10 +846,8 @@ void i915_ggtt_init_fences(struct i915_ggtt *ggtt) detect_bit_6_swizzle(ggtt); - if (!i915_ggtt_has_aperture(ggtt)) - num_fences = 0; - else if (INTEL_GEN(i915) >= 7 && - !(IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))) + if (INTEL_GEN(i915) >= 7 && + !(IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))) num_fences = 32; else if (INTEL_GEN(i915) >= 4 || IS_I945G(i915) || IS_I945GM(i915) || diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 88179202c556..3148d5946b63 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2661,8 +2661,7 @@ static void ggtt_release_guc_top(struct i915_ggtt *ggtt) static void cleanup_init_ggtt(struct i915_ggtt *ggtt) { ggtt_release_guc_top(ggtt); - if (drm_mm_node_allocated(&ggtt->error_capture)) - drm_mm_remove_node(&ggtt->error_capture); + drm_mm_remove_node(&ggtt->error_capture); } static int init_ggtt(struct i915_ggtt *ggtt) @@ -2693,15 +2692,13 @@ static int init_ggtt(struct i915_ggtt *ggtt) if (ret) return ret; - if (ggtt->mappable_end) { - /* Reserve a mappable slot for our lockless error capture */ - ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, &ggtt->error_capture, - PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, - 0, ggtt->mappable_end, - DRM_MM_INSERT_LOW); - if (ret) - return ret; - } + /* Reserve a mappable slot for our lockless error capture */ + ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, &ggtt->error_capture, + PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); + if (ret) + return ret; /* * The upper portion of the GuC address space has a sizeable hole @@ -2747,6 +2744,59 @@ int i915_init_ggtt(struct drm_i915_private *i915) return 0; } +void i915_gem_cleanup_memory_regions(struct drm_i915_private *i915) +{ + int i; + + for (i = 0; i < INTEL_REGION_UNKNOWN; i++) { + struct intel_memory_region *region = i915->mm.regions[i]; + + if (region) + intel_memory_region_put(region); + } +} + +int i915_gem_init_memory_regions(struct drm_i915_private *i915) +{ + int err, i; + + for (i = 0; i < INTEL_REGION_UNKNOWN; i++) { + struct intel_memory_region *mem = ERR_PTR(-ENODEV); + u32 type; + + if (!HAS_REGION(i915, BIT(i))) + continue; + + type = MEMORY_TYPE_FROM_REGION(intel_region_map[i]); + switch (type) { + case INTEL_MEMORY_SYSTEM: + mem = i915_gem_shmem_setup(i915); + break; + case INTEL_MEMORY_STOLEN: + mem = i915_gem_stolen_setup(i915); + break; + } + + if (IS_ERR(mem)) { + err = PTR_ERR(mem); + DRM_ERROR("Failed to setup region(%d) type=%d\n", err, type); + goto out_cleanup; + } + + mem->id = intel_region_map[i]; + mem->type = type; + mem->instance = MEMORY_INSTANCE_FROM_REGION(intel_region_map[i]); + + i915->mm.regions[i] = mem; + } + + return 0; + +out_cleanup: + i915_gem_cleanup_memory_regions(i915); + return err; +} + static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) { struct i915_vma *vma, *vn; @@ -2773,9 +2823,7 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) i915_address_space_fini(&ggtt->vm); arch_phys_wc_del(ggtt->mtrr); - - if (ggtt->iomap.size) - io_mapping_fini(&ggtt->iomap); + io_mapping_fini(&ggtt->iomap); } /** @@ -2786,6 +2834,8 @@ void i915_ggtt_driver_release(struct drm_i915_private *i915) { struct pagevec *pvec; + i915_gem_cleanup_memory_regions(i915); + fini_aliasing_ppgtt(&i915->ggtt); ggtt_cleanup_hw(&i915->ggtt); @@ -2872,51 +2922,35 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) return 0; } -static void tgl_setup_private_ppat(struct intel_uncore *uncore) +static void tgl_setup_private_ppat(struct drm_i915_private *dev_priv) { /* TGL doesn't support LLC or AGE settings */ - intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); - intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); - intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); - intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); - intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); - intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); - intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); - intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); + I915_WRITE(GEN12_PAT_INDEX(0), GEN8_PPAT_WB); + I915_WRITE(GEN12_PAT_INDEX(1), GEN8_PPAT_WC); + I915_WRITE(GEN12_PAT_INDEX(2), GEN8_PPAT_WT); + I915_WRITE(GEN12_PAT_INDEX(3), GEN8_PPAT_UC); + I915_WRITE(GEN12_PAT_INDEX(4), GEN8_PPAT_WB); + I915_WRITE(GEN12_PAT_INDEX(5), GEN8_PPAT_WB); + I915_WRITE(GEN12_PAT_INDEX(6), GEN8_PPAT_WB); + I915_WRITE(GEN12_PAT_INDEX(7), GEN8_PPAT_WB); } -static void cnl_setup_private_ppat(struct intel_uncore *uncore) +static void cnl_setup_private_ppat(struct drm_i915_private *dev_priv) { - intel_uncore_write(uncore, - GEN10_PAT_INDEX(0), - GEN8_PPAT_WB | GEN8_PPAT_LLC); - intel_uncore_write(uncore, - GEN10_PAT_INDEX(1), - GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); - intel_uncore_write(uncore, - GEN10_PAT_INDEX(2), - GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); - intel_uncore_write(uncore, - GEN10_PAT_INDEX(3), - GEN8_PPAT_UC); - intel_uncore_write(uncore, - GEN10_PAT_INDEX(4), - GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); - intel_uncore_write(uncore, - GEN10_PAT_INDEX(5), - GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); - intel_uncore_write(uncore, - GEN10_PAT_INDEX(6), - GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); - intel_uncore_write(uncore, - GEN10_PAT_INDEX(7), - GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); + I915_WRITE(GEN10_PAT_INDEX(0), GEN8_PPAT_WB | GEN8_PPAT_LLC); + I915_WRITE(GEN10_PAT_INDEX(1), GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); + I915_WRITE(GEN10_PAT_INDEX(2), GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); + I915_WRITE(GEN10_PAT_INDEX(3), GEN8_PPAT_UC); + I915_WRITE(GEN10_PAT_INDEX(4), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); + I915_WRITE(GEN10_PAT_INDEX(5), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); + I915_WRITE(GEN10_PAT_INDEX(6), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); + I915_WRITE(GEN10_PAT_INDEX(7), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); } /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability * bits. When using advanced contexts each context stores its own PAT, but * writing this data shouldn't be harmful even in those cases. */ -static void bdw_setup_private_ppat(struct intel_uncore *uncore) +static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) { u64 pat; @@ -2929,11 +2963,11 @@ static void bdw_setup_private_ppat(struct intel_uncore *uncore) GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); - intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); - intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); + I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); + I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); } -static void chv_setup_private_ppat(struct intel_uncore *uncore) +static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) { u64 pat; @@ -2965,8 +2999,8 @@ static void chv_setup_private_ppat(struct intel_uncore *uncore) GEN8_PPAT(6, CHV_PPAT_SNOOP) | GEN8_PPAT(7, CHV_PPAT_SNOOP); - intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); - intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); + I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); + I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); } static void gen6_gmch_remove(struct i915_address_space *vm) @@ -2977,26 +3011,18 @@ static void gen6_gmch_remove(struct i915_address_space *vm) cleanup_scratch_page(vm); } -static void setup_private_pat(struct intel_uncore *uncore) +static void setup_private_pat(struct drm_i915_private *dev_priv) { - struct drm_i915_private *i915 = uncore->i915; + GEM_BUG_ON(INTEL_GEN(dev_priv) < 8); - GEM_BUG_ON(INTEL_GEN(i915) < 8); - - if (INTEL_GEN(i915) >= 12) - tgl_setup_private_ppat(uncore); - else if (INTEL_GEN(i915) >= 10) - cnl_setup_private_ppat(uncore); - else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) - chv_setup_private_ppat(uncore); + if (INTEL_GEN(dev_priv) >= 12) + tgl_setup_private_ppat(dev_priv); + else if (INTEL_GEN(dev_priv) >= 10) + cnl_setup_private_ppat(dev_priv); + else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv)) + chv_setup_private_ppat(dev_priv); else - bdw_setup_private_ppat(uncore); -} - -static struct resource pci_resource(struct pci_dev *pdev, int bar) -{ - return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar), - pci_resource_len(pdev, bar)); + bdw_setup_private_ppat(dev_priv); } static int gen8_gmch_probe(struct i915_ggtt *ggtt) @@ -3008,10 +3034,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) int err; /* TODO: We're not aware of mappable constraints on gen8 yet */ - if (!IS_DGFX(dev_priv)) { - ggtt->gmadr = pci_resource(pdev, 2); - ggtt->mappable_end = resource_size(&ggtt->gmadr); - } + ggtt->gmadr = + (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2), + pci_resource_len(pdev, 2)); + ggtt->mappable_end = resource_size(&ggtt->gmadr); err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); if (!err) @@ -3052,7 +3078,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.pte_encode = gen8_pte_encode; - setup_private_pat(ggtt->vm.gt->uncore); + setup_private_pat(dev_priv); return ggtt_probe_common(ggtt, size); } @@ -3234,18 +3260,15 @@ static int ggtt_init_hw(struct i915_ggtt *ggtt) if (!HAS_LLC(i915) && !HAS_PPGTT(i915)) ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust; - if (ggtt->mappable_end) { - if (!io_mapping_init_wc(&ggtt->iomap, - ggtt->gmadr.start, - ggtt->mappable_end)) { - ggtt->vm.cleanup(&ggtt->vm); - return -EIO; - } - - ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, - ggtt->mappable_end); + if (!io_mapping_init_wc(&ggtt->iomap, + ggtt->gmadr.start, + ggtt->mappable_end)) { + ggtt->vm.cleanup(&ggtt->vm); + return -EIO; } + ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end); + i915_ggtt_init_fences(ggtt); return 0; @@ -3270,7 +3293,15 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) if (ret) return ret; + ret = i915_gem_init_memory_regions(dev_priv); + if (ret) + goto out_gtt_cleanup; + return 0; + +out_gtt_cleanup: + dev_priv->ggtt.vm.cleanup(&dev_priv->ggtt.vm); + return ret; } int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv) @@ -3351,12 +3382,10 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt) void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915) { - struct i915_ggtt *ggtt = &i915->ggtt; - - ggtt_restore_mappings(ggtt); + ggtt_restore_mappings(&i915->ggtt); if (INTEL_GEN(i915) >= 8) - setup_private_pat(ggtt->vm.gt->uncore); + setup_private_pat(i915); } static struct scatterlist * diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 402283ce2864..f074f1de66e8 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -575,11 +575,6 @@ void i915_ggtt_disable_guc(struct i915_ggtt *ggtt); int i915_init_ggtt(struct drm_i915_private *dev_priv); void i915_ggtt_driver_release(struct drm_i915_private *dev_priv); -static inline bool i915_ggtt_has_aperture(const struct i915_ggtt *ggtt) -{ - return ggtt->mappable_end > 0; -} - int i915_ppgtt_init_hw(struct intel_gt *gt); struct i915_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index e8b67f5e521d..5cf4eed5add8 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -40,7 +40,6 @@ #include "display/intel_overlay.h" #include "gem/i915_gem_context.h" -#include "gem/i915_gem_lmem.h" #include "i915_drv.h" #include "i915_gpu_error.h" @@ -236,7 +235,6 @@ struct compress { struct pagevec pool; struct z_stream_s zstream; void *tmp; - bool wc; }; static bool compress_init(struct compress *c) @@ -294,7 +292,7 @@ static int compress_page(struct compress *c, struct z_stream_s *zstream = &c->zstream; zstream->next_in = src; - if (c->wc && c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE)) + if (c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE)) zstream->next_in = c->tmp; zstream->avail_in = PAGE_SIZE; @@ -369,7 +367,6 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m) struct compress { struct pagevec pool; - bool wc; }; static bool compress_init(struct compress *c) @@ -392,7 +389,7 @@ static int compress_page(struct compress *c, if (!ptr) return -ENOMEM; - if (!(c->wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE))) + if (!i915_memcpy_from_wc(ptr, src, PAGE_SIZE)) memcpy(ptr, src, PAGE_SIZE); dst->pages[dst->page_count++] = ptr; @@ -537,6 +534,10 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, } err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head); err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail); + err_printf(m, " hangcheck timestamp: %dms (%lu%s)\n", + jiffies_to_msecs(ee->hangcheck_timestamp - epoch), + ee->hangcheck_timestamp, + ee->hangcheck_timestamp == epoch ? "; epoch" : ""); err_printf(m, " engine reset count: %u\n", ee->reset_count); for (n = 0; n < ee->num_ports; n++) { @@ -678,8 +679,11 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, ts = ktime_to_timespec64(error->uptime); err_printf(m, "Uptime: %lld s %ld us\n", (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC); - err_printf(m, "Capture: %lu jiffies; %d ms ago\n", - error->capture, jiffies_to_msecs(jiffies - error->capture)); + err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ); + err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n", + error->capture, + jiffies_to_msecs(jiffies - error->capture), + jiffies_to_msecs(error->capture - error->epoch)); for (ee = error->engine; ee; ee = ee->next) err_printf(m, "Active process (on ring %s): %s [%d]\n", @@ -737,21 +741,8 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, if (IS_GEN_RANGE(m->i915, 8, 11)) err_printf(m, "GTT_CACHE_EN: 0x%08x\n", error->gtt_cache); - if (IS_GEN(m->i915, 12)) - err_printf(m, "AUX_ERR_DBG: 0x%08x\n", error->aux_err); - - if (INTEL_GEN(m->i915) >= 12) { - int i; - - for (i = 0; i < GEN12_SFC_DONE_MAX; i++) - err_printf(m, " SFC_DONE[%d]: 0x%08x\n", i, - error->sfc_done[i]); - - err_printf(m, " GAM_DONE: 0x%08x\n", error->gam_done); - } - for (ee = error->engine; ee; ee = ee->next) - error_print_engine(m, ee, error->capture); + error_print_engine(m, ee, error->epoch); for (ee = error->engine; ee; ee = ee->next) { const struct drm_i915_error_object *obj; @@ -779,7 +770,7 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, for (j = 0; j < ee->num_requests; j++) error_print_request(m, " ", &ee->requests[j], - error->capture); + error->epoch); } print_error_obj(m, ee->engine, "ringbuffer", ee->ringbuffer); @@ -979,6 +970,7 @@ i915_error_object_create(struct drm_i915_private *i915, struct drm_i915_error_object *dst; unsigned long num_pages; struct sgt_iter iter; + dma_addr_t dma; int ret; might_sleep(); @@ -1004,54 +996,17 @@ i915_error_object_create(struct drm_i915_private *i915, dst->page_count = 0; dst->unused = 0; - compress->wc = i915_gem_object_is_lmem(vma->obj) || - drm_mm_node_allocated(&ggtt->error_capture); - ret = -EINVAL; - if (drm_mm_node_allocated(&ggtt->error_capture)) { + for_each_sgt_daddr(dma, iter, vma->pages) { void __iomem *s; - dma_addr_t dma; - for_each_sgt_daddr(dma, iter, vma->pages) { - ggtt->vm.insert_page(&ggtt->vm, dma, slot, - I915_CACHE_NONE, 0); + ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0); - s = io_mapping_map_wc(&ggtt->iomap, slot, PAGE_SIZE); - ret = compress_page(compress, (void __force *)s, dst); - io_mapping_unmap(s); - if (ret) - break; - } - } else if (i915_gem_object_is_lmem(vma->obj)) { - struct intel_memory_region *mem = vma->obj->mm.region; - dma_addr_t dma; - - for_each_sgt_daddr(dma, iter, vma->pages) { - void __iomem *s; - - s = io_mapping_map_atomic_wc(&mem->iomap, dma); - ret = compress_page(compress, (void __force *)s, dst); - io_mapping_unmap_atomic(s); - if (ret) - break; - } - } else { - struct page *page; - - for_each_sgt_page(page, iter, vma->pages) { - void *s; - - drm_clflush_pages(&page, 1); - - s = kmap_atomic(page); - ret = compress_page(compress, s, dst); - kunmap_atomic(s); - - drm_clflush_pages(&page, 1); - - if (ret) - break; - } + s = io_mapping_map_wc(&ggtt->iomap, slot, PAGE_SIZE); + ret = compress_page(compress, (void __force *)s, dst); + io_mapping_unmap(s); + if (ret) + break; } if (ret || compress_flush(compress, dst)) { @@ -1189,6 +1144,8 @@ static void error_record_engine_registers(struct i915_gpu_state *error, } ee->idle = intel_engine_is_idle(engine); + if (!ee->idle) + ee->hangcheck_timestamp = engine->hangcheck.action_timestamp; ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error, engine); @@ -1606,18 +1563,6 @@ static void capture_reg_state(struct i915_gpu_state *error) if (IS_GEN_RANGE(i915, 8, 11)) error->gtt_cache = intel_uncore_read(uncore, HSW_GTT_CACHE_EN); - if (IS_GEN(i915, 12)) - error->aux_err = intel_uncore_read(uncore, GEN12_AUX_ERR_DBG); - - if (INTEL_GEN(i915) >= 12) { - for (i = 0; i < GEN12_SFC_DONE_MAX; i++) { - error->sfc_done[i] = - intel_uncore_read(uncore, GEN12_SFC_DONE(i)); - } - - error->gam_done = intel_uncore_read(uncore, GEN12_GAM_DONE); - } - /* 4: Everything else */ if (INTEL_GEN(i915) >= 11) { error->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER); @@ -1712,15 +1657,26 @@ static void capture_params(struct i915_gpu_state *error) i915_params_copy(&error->params, &i915_modparams); } +static unsigned long capture_find_epoch(const struct i915_gpu_state *error) +{ + const struct drm_i915_error_engine *ee; + unsigned long epoch = error->capture; + + for (ee = error->engine; ee; ee = ee->next) { + if (ee->hangcheck_timestamp && + time_before(ee->hangcheck_timestamp, epoch)) + epoch = ee->hangcheck_timestamp; + } + + return epoch; +} + static void capture_finish(struct i915_gpu_state *error) { struct i915_ggtt *ggtt = &error->i915->ggtt; + const u64 slot = ggtt->error_capture.start; - if (drm_mm_node_allocated(&ggtt->error_capture)) { - const u64 slot = ggtt->error_capture.start; - - ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE); - } + ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE); } #define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x)) @@ -1766,6 +1722,8 @@ i915_capture_gpu_state(struct drm_i915_private *i915) error->overlay = intel_overlay_capture_error_state(i915); error->display = intel_display_capture_error_state(i915); + error->epoch = capture_find_epoch(error); + capture_finish(error); compress_fini(&compress); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 5d2c3372ff99..7f1cd0b1fef7 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -34,6 +34,7 @@ struct i915_gpu_state { ktime_t boottime; ktime_t uptime; unsigned long capture; + unsigned long epoch; struct drm_i915_private *i915; @@ -74,9 +75,6 @@ struct i915_gpu_state { u32 gab_ctl; u32 gfx_mode; u32 gtt_cache; - u32 aux_err; /* gen12 */ - u32 sfc_done[GEN12_SFC_DONE_MAX]; /* gen12 */ - u32 gam_done; /* gen12 */ u32 nfence; u64 fence[I915_MAX_NUM_FENCES]; @@ -88,6 +86,7 @@ struct i915_gpu_state { /* Software tracked state */ bool idle; + unsigned long hangcheck_timestamp; int num_requests; u32 reset_count; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index dae00f7dd7df..572a5c37cc61 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -45,7 +45,6 @@ #include "gt/intel_gt.h" #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm_irq.h" -#include "gt/intel_rps.h" #include "i915_drv.h" #include "i915_irq.h" @@ -321,6 +320,180 @@ void ilk_update_display_irq(struct drm_i915_private *dev_priv, } } +static i915_reg_t gen6_pm_iir(struct drm_i915_private *dev_priv) +{ + WARN_ON_ONCE(INTEL_GEN(dev_priv) >= 11); + + return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR; +} + +void gen11_reset_rps_interrupts(struct drm_i915_private *dev_priv) +{ + struct intel_gt *gt = &dev_priv->gt; + + spin_lock_irq(>->irq_lock); + + while (gen11_gt_reset_one_iir(gt, 0, GEN11_GTPM)) + ; + + dev_priv->gt_pm.rps.pm_iir = 0; + + spin_unlock_irq(>->irq_lock); +} + +void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv) +{ + struct intel_gt *gt = &dev_priv->gt; + + spin_lock_irq(>->irq_lock); + gen6_gt_pm_reset_iir(gt, GEN6_PM_RPS_EVENTS); + dev_priv->gt_pm.rps.pm_iir = 0; + spin_unlock_irq(>->irq_lock); +} + +void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) +{ + struct intel_gt *gt = &dev_priv->gt; + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + if (READ_ONCE(rps->interrupts_enabled)) + return; + + spin_lock_irq(>->irq_lock); + WARN_ON_ONCE(rps->pm_iir); + + if (INTEL_GEN(dev_priv) >= 11) + WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_GTPM)); + else + WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); + + rps->interrupts_enabled = true; + gen6_gt_pm_enable_irq(gt, dev_priv->pm_rps_events); + + spin_unlock_irq(>->irq_lock); +} + +u32 gen6_sanitize_rps_pm_mask(const struct drm_i915_private *i915, u32 mask) +{ + return mask & ~i915->gt_pm.rps.pm_intrmsk_mbz; +} + +void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + struct intel_gt *gt = &dev_priv->gt; + + if (!READ_ONCE(rps->interrupts_enabled)) + return; + + spin_lock_irq(>->irq_lock); + rps->interrupts_enabled = false; + + I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u)); + + gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS); + + spin_unlock_irq(>->irq_lock); + intel_synchronize_irq(dev_priv); + + /* Now that we will not be generating any more work, flush any + * outstanding tasks. As we are called on the RPS idle path, + * we will reset the GPU to minimum frequencies, so the current + * state of the worker can be discarded. + */ + cancel_work_sync(&rps->work); + if (INTEL_GEN(dev_priv) >= 11) + gen11_reset_rps_interrupts(dev_priv); + else + gen6_reset_rps_interrupts(dev_priv); +} + +void gen9_reset_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + assert_rpm_wakelock_held(gt->uncore->rpm); + + spin_lock_irq(>->irq_lock); + gen6_gt_pm_reset_iir(gt, gt->pm_guc_events); + spin_unlock_irq(>->irq_lock); +} + +void gen9_enable_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + assert_rpm_wakelock_held(gt->uncore->rpm); + + spin_lock_irq(>->irq_lock); + if (!guc->interrupts.enabled) { + WARN_ON_ONCE(intel_uncore_read(gt->uncore, + gen6_pm_iir(gt->i915)) & + gt->pm_guc_events); + guc->interrupts.enabled = true; + gen6_gt_pm_enable_irq(gt, gt->pm_guc_events); + } + spin_unlock_irq(>->irq_lock); +} + +void gen9_disable_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + assert_rpm_wakelock_held(gt->uncore->rpm); + + spin_lock_irq(>->irq_lock); + guc->interrupts.enabled = false; + + gen6_gt_pm_disable_irq(gt, gt->pm_guc_events); + + spin_unlock_irq(>->irq_lock); + intel_synchronize_irq(gt->i915); + + gen9_reset_guc_interrupts(guc); +} + +void gen11_reset_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + spin_lock_irq(>->irq_lock); + gen11_gt_reset_one_iir(gt, 0, GEN11_GUC); + spin_unlock_irq(>->irq_lock); +} + +void gen11_enable_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + spin_lock_irq(>->irq_lock); + if (!guc->interrupts.enabled) { + u32 events = REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST); + + WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_GUC)); + intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_ENABLE, events); + intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_MASK, ~events); + guc->interrupts.enabled = true; + } + spin_unlock_irq(>->irq_lock); +} + +void gen11_disable_guc_interrupts(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + + spin_lock_irq(>->irq_lock); + guc->interrupts.enabled = false; + + intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_MASK, ~0); + intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_ENABLE, 0); + + spin_unlock_irq(>->irq_lock); + intel_synchronize_irq(gt->i915); + + gen11_reset_guc_interrupts(guc); +} + /** * bdw_update_port_irq - update DE port interrupt * @dev_priv: driver private @@ -892,6 +1065,199 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc) return position; } +static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) +{ + struct intel_uncore *uncore = &dev_priv->uncore; + u32 busy_up, busy_down, max_avg, min_avg; + u8 new_delay; + + spin_lock(&mchdev_lock); + + intel_uncore_write16(uncore, + MEMINTRSTS, + intel_uncore_read(uncore, MEMINTRSTS)); + + new_delay = dev_priv->ips.cur_delay; + + intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); + busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG); + busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG); + max_avg = intel_uncore_read(uncore, RCBMAXAVG); + min_avg = intel_uncore_read(uncore, RCBMINAVG); + + /* Handle RCS change request from hw */ + if (busy_up > max_avg) { + if (dev_priv->ips.cur_delay != dev_priv->ips.max_delay) + new_delay = dev_priv->ips.cur_delay - 1; + if (new_delay < dev_priv->ips.max_delay) + new_delay = dev_priv->ips.max_delay; + } else if (busy_down < min_avg) { + if (dev_priv->ips.cur_delay != dev_priv->ips.min_delay) + new_delay = dev_priv->ips.cur_delay + 1; + if (new_delay > dev_priv->ips.min_delay) + new_delay = dev_priv->ips.min_delay; + } + + if (ironlake_set_drps(dev_priv, new_delay)) + dev_priv->ips.cur_delay = new_delay; + + spin_unlock(&mchdev_lock); + + return; +} + +static void vlv_c0_read(struct drm_i915_private *dev_priv, + struct intel_rps_ei *ei) +{ + ei->ktime = ktime_get_raw(); + ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT); + ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT); +} + +void gen6_rps_reset_ei(struct drm_i915_private *dev_priv) +{ + memset(&dev_priv->gt_pm.rps.ei, 0, sizeof(dev_priv->gt_pm.rps.ei)); +} + +static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + const struct intel_rps_ei *prev = &rps->ei; + struct intel_rps_ei now; + u32 events = 0; + + if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) + return 0; + + vlv_c0_read(dev_priv, &now); + + if (prev->ktime) { + u64 time, c0; + u32 render, media; + + time = ktime_us_delta(now.ktime, prev->ktime); + + time *= dev_priv->czclk_freq; + + /* Workload can be split between render + media, + * e.g. SwapBuffers being blitted in X after being rendered in + * mesa. To account for this we need to combine both engines + * into our activity counter. + */ + render = now.render_c0 - prev->render_c0; + media = now.media_c0 - prev->media_c0; + c0 = max(render, media); + c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ + + if (c0 > time * rps->power.up_threshold) + events = GEN6_PM_RP_UP_THRESHOLD; + else if (c0 < time * rps->power.down_threshold) + events = GEN6_PM_RP_DOWN_THRESHOLD; + } + + rps->ei = now; + return events; +} + +static void gen6_pm_rps_work(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, struct drm_i915_private, gt_pm.rps.work); + struct intel_gt *gt = &dev_priv->gt; + struct intel_rps *rps = &dev_priv->gt_pm.rps; + bool client_boost = false; + int new_delay, adj, min, max; + u32 pm_iir = 0; + + spin_lock_irq(>->irq_lock); + if (rps->interrupts_enabled) { + pm_iir = fetch_and_zero(&rps->pm_iir); + client_boost = atomic_read(&rps->num_waiters); + } + spin_unlock_irq(>->irq_lock); + + /* Make sure we didn't queue anything we're not going to process. */ + WARN_ON(pm_iir & ~dev_priv->pm_rps_events); + if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost) + goto out; + + mutex_lock(&rps->lock); + + pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir); + + adj = rps->last_adj; + new_delay = rps->cur_freq; + min = rps->min_freq_softlimit; + max = rps->max_freq_softlimit; + if (client_boost) + max = rps->max_freq; + if (client_boost && new_delay < rps->boost_freq) { + new_delay = rps->boost_freq; + adj = 0; + } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { + if (adj > 0) + adj *= 2; + else /* CHV needs even encode values */ + adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1; + + if (new_delay >= rps->max_freq_softlimit) + adj = 0; + } else if (client_boost) { + adj = 0; + } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { + if (rps->cur_freq > rps->efficient_freq) + new_delay = rps->efficient_freq; + else if (rps->cur_freq > rps->min_freq_softlimit) + new_delay = rps->min_freq_softlimit; + adj = 0; + } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { + if (adj < 0) + adj *= 2; + else /* CHV needs even encode values */ + adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1; + + if (new_delay <= rps->min_freq_softlimit) + adj = 0; + } else { /* unknown event */ + adj = 0; + } + + rps->last_adj = adj; + + /* + * Limit deboosting and boosting to keep ourselves at the extremes + * when in the respective power modes (i.e. slowly decrease frequencies + * while in the HIGH_POWER zone and slowly increase frequencies while + * in the LOW_POWER zone). On idle, we will hit the timeout and drop + * to the next level quickly, and conversely if busy we expect to + * hit a waitboost and rapidly switch into max power. + */ + if ((adj < 0 && rps->power.mode == HIGH_POWER) || + (adj > 0 && rps->power.mode == LOW_POWER)) + rps->last_adj = 0; + + /* sysfs frequency interfaces may have snuck in while servicing the + * interrupt + */ + new_delay += adj; + new_delay = clamp_t(int, new_delay, min, max); + + if (intel_set_rps(dev_priv, new_delay)) { + DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); + rps->last_adj = 0; + } + + mutex_unlock(&rps->lock); + +out: + /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */ + spin_lock_irq(>->irq_lock); + if (rps->interrupts_enabled) + gen6_gt_pm_unmask_irq(gt, dev_priv->pm_rps_events); + spin_unlock_irq(>->irq_lock); +} + + /** * ivybridge_parity_work - Workqueue called when a parity error interrupt * occurred. @@ -1265,6 +1631,54 @@ static void i9xx_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, res1, res2); } +/* The RPS events need forcewake, so we add them to a work queue and mask their + * IMR bits until the work is done. Other interrupts can be processed without + * the work queue. */ +void gen11_rps_irq_handler(struct intel_gt *gt, u32 pm_iir) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_rps *rps = &i915->gt_pm.rps; + const u32 events = i915->pm_rps_events & pm_iir; + + lockdep_assert_held(>->irq_lock); + + if (unlikely(!events)) + return; + + gen6_gt_pm_mask_irq(gt, events); + + if (!rps->interrupts_enabled) + return; + + rps->pm_iir |= events; + schedule_work(&rps->work); +} + +void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + struct intel_gt *gt = &dev_priv->gt; + + if (pm_iir & dev_priv->pm_rps_events) { + spin_lock(>->irq_lock); + gen6_gt_pm_mask_irq(gt, pm_iir & dev_priv->pm_rps_events); + if (rps->interrupts_enabled) { + rps->pm_iir |= pm_iir & dev_priv->pm_rps_events; + schedule_work(&rps->work); + } + spin_unlock(>->irq_lock); + } + + if (INTEL_GEN(dev_priv) >= 8) + return; + + if (pm_iir & PM_VEBOX_USER_INTERRUPT) + intel_engine_breadcrumbs_irq(dev_priv->engine[VECS0]); + + if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) + DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); +} + static void i9xx_pipestat_irq_reset(struct drm_i915_private *dev_priv) { enum pipe pipe; @@ -1575,7 +1989,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg) if (gt_iir) gen6_gt_irq_handler(&dev_priv->gt, gt_iir); if (pm_iir) - gen6_rps_irq_handler(&dev_priv->gt.rps, pm_iir); + gen6_rps_irq_handler(dev_priv, pm_iir); if (hotplug_status) i9xx_hpd_irq_handler(dev_priv, hotplug_status); @@ -1979,7 +2393,7 @@ static void ilk_display_irq_handler(struct drm_i915_private *dev_priv, } if (IS_GEN(dev_priv, 5) && de_iir & DE_PCU_EVENT) - gen5_rps_irq_handler(&dev_priv->gt.rps); + ironlake_rps_change_irq_handler(dev_priv); } static void ivb_display_irq_handler(struct drm_i915_private *dev_priv, @@ -2084,7 +2498,7 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg) if (pm_iir) { I915_WRITE(GEN6_PMIIR, pm_iir); ret = IRQ_HANDLED; - gen6_rps_irq_handler(&dev_priv->gt.rps, pm_iir); + gen6_rps_irq_handler(dev_priv, pm_iir); } } @@ -2161,16 +2575,10 @@ static u32 gen8_de_port_aux_mask(struct drm_i915_private *dev_priv) u32 mask; if (INTEL_GEN(dev_priv) >= 12) + /* TODO: Add AUX entries for USBC */ return TGL_DE_PORT_AUX_DDIA | TGL_DE_PORT_AUX_DDIB | - TGL_DE_PORT_AUX_DDIC | - TGL_DE_PORT_AUX_USBC1 | - TGL_DE_PORT_AUX_USBC2 | - TGL_DE_PORT_AUX_USBC3 | - TGL_DE_PORT_AUX_USBC4 | - TGL_DE_PORT_AUX_USBC5 | - TGL_DE_PORT_AUX_USBC6; - + TGL_DE_PORT_AUX_DDIC; mask = GEN8_AUX_CHANNEL_A; if (INTEL_GEN(dev_priv) >= 9) @@ -2189,9 +2597,7 @@ static u32 gen8_de_port_aux_mask(struct drm_i915_private *dev_priv) static u32 gen8_de_pipe_fault_mask(struct drm_i915_private *dev_priv) { - if (INTEL_GEN(dev_priv) >= 11) - return GEN11_DE_PIPE_IRQ_FAULT_ERRORS; - else if (INTEL_GEN(dev_priv) >= 9) + if (INTEL_GEN(dev_priv) >= 9) return GEN9_DE_PIPE_IRQ_FAULT_ERRORS; else return GEN8_DE_PIPE_IRQ_FAULT_ERRORS; @@ -2453,11 +2859,9 @@ static inline void gen11_master_intr_enable(void __iomem * const regs) raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, GEN11_MASTER_IRQ); } -static __always_inline irqreturn_t -__gen11_irq_handler(struct drm_i915_private * const i915, - u32 (*intr_disable)(void __iomem * const regs), - void (*intr_enable)(void __iomem * const regs)) +static irqreturn_t gen11_irq_handler(int irq, void *arg) { + struct drm_i915_private * const i915 = arg; void __iomem * const regs = i915->uncore.regs; struct intel_gt *gt = &i915->gt; u32 master_ctl; @@ -2466,9 +2870,9 @@ __gen11_irq_handler(struct drm_i915_private * const i915, if (!intel_irqs_enabled(i915)) return IRQ_NONE; - master_ctl = intr_disable(regs); + master_ctl = gen11_master_intr_disable(regs); if (!master_ctl) { - intr_enable(regs); + gen11_master_intr_enable(regs); return IRQ_NONE; } @@ -2490,20 +2894,13 @@ __gen11_irq_handler(struct drm_i915_private * const i915, gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl); - intr_enable(regs); + gen11_master_intr_enable(regs); gen11_gu_misc_irq_handler(gt, gu_misc_iir); return IRQ_HANDLED; } -static irqreturn_t gen11_irq_handler(int irq, void *arg) -{ - return __gen11_irq_handler(arg, - gen11_master_intr_disable, - gen11_master_intr_enable); -} - /* Called from drm generic code, passed 'crtc' which * we use as a pipe index */ @@ -3873,10 +4270,13 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) void intel_irq_init(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; + struct intel_rps *rps = &dev_priv->gt_pm.rps; int i; intel_hpd_init_work(dev_priv); + INIT_WORK(&rps->work, gen6_pm_rps_work); + INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work); for (i = 0; i < MAX_L3_SLICES; ++i) dev_priv->l3_parity.remap_info[i] = NULL; @@ -3885,6 +4285,33 @@ void intel_irq_init(struct drm_i915_private *dev_priv) if (HAS_GT_UC(dev_priv) && INTEL_GEN(dev_priv) < 11) dev_priv->gt.pm_guc_events = GUC_INTR_GUC2HOST << 16; + /* Let's track the enabled rps events */ + if (IS_VALLEYVIEW(dev_priv)) + /* WaGsvRC0ResidencyMethod:vlv */ + dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED; + else + dev_priv->pm_rps_events = (GEN6_PM_RP_UP_THRESHOLD | + GEN6_PM_RP_DOWN_THRESHOLD | + GEN6_PM_RP_DOWN_TIMEOUT); + + /* We share the register with other engine */ + if (INTEL_GEN(dev_priv) > 9) + GEM_WARN_ON(dev_priv->pm_rps_events & 0xffff0000); + + rps->pm_intrmsk_mbz = 0; + + /* + * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer + * if GEN6_PM_UP_EI_EXPIRED is masked. + * + * TODO: verify if this can be reproduced on VLV,CHV. + */ + if (INTEL_GEN(dev_priv) <= 7) + rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; + + if (INTEL_GEN(dev_priv) >= 8) + rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + dev->vblank_disable_immediate = true; /* Most platforms treat the display irq block as an always-on diff --git a/drivers/gpu/drm/i915/i915_irq.h b/drivers/gpu/drm/i915/i915_irq.h index 812c47a9c2d6..19a3bc019535 100644 --- a/drivers/gpu/drm/i915/i915_irq.h +++ b/drivers/gpu/drm/i915/i915_irq.h @@ -17,8 +17,14 @@ struct drm_device; struct drm_display_mode; struct drm_i915_private; struct intel_crtc; +struct intel_crtc; +struct intel_gt; +struct intel_guc; struct intel_uncore; +void gen11_rps_irq_handler(struct intel_gt *gt, u32 pm_iir); +void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir); + void intel_irq_init(struct drm_i915_private *dev_priv); void intel_irq_fini(struct drm_i915_private *dev_priv); int intel_irq_install(struct drm_i915_private *dev_priv); @@ -100,6 +106,12 @@ void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv, u8 pipe_mask); void gen8_irq_power_well_pre_disable(struct drm_i915_private *dev_priv, u8 pipe_mask); +void gen9_reset_guc_interrupts(struct intel_guc *guc); +void gen9_enable_guc_interrupts(struct intel_guc *guc); +void gen9_disable_guc_interrupts(struct intel_guc *guc); +void gen11_reset_guc_interrupts(struct intel_guc *guc); +void gen11_enable_guc_interrupts(struct intel_guc *guc); +void gen11_disable_guc_interrupts(struct intel_guc *guc); bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, bool in_vblank_irq, int *vpos, int *hpos, diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 1dd1f3652795..4f1806f65040 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -166,7 +166,7 @@ i915_param_named_unsafe(enable_dp_mst, bool, 0600, "Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)"); #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) -i915_param_named_unsafe(inject_probe_failure, uint, 0400, +i915_param_named_unsafe(inject_load_failure, uint, 0400, "Force an error after a number of failure check points (0:disabled (default), N:force failure at the Nth failure check point)"); #endif @@ -179,11 +179,6 @@ i915_param_named(enable_gvt, bool, 0400, "Enable support for Intel GVT-g graphics virtualization host support(default:false)"); #endif -#if IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM) -i915_param_named_unsafe(fake_lmem_start, ulong, 0600, - "Fake LMEM start offset (default: 0)"); -#endif - static __always_inline void _print_param(struct drm_printer *p, const char *name, const char *type, @@ -195,8 +190,6 @@ static __always_inline void _print_param(struct drm_printer *p, drm_printf(p, "i915.%s=%d\n", name, *(const int *)x); else if (!__builtin_strcmp(type, "unsigned int")) drm_printf(p, "i915.%s=%u\n", name, *(const unsigned int *)x); - else if (!__builtin_strcmp(type, "unsigned long")) - drm_printf(p, "i915.%s=%lu\n", name, *(const unsigned long *)x); else if (!__builtin_strcmp(type, "char *")) drm_printf(p, "i915.%s=%s\n", name, *(const char **)x); else diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 31b88f297fbc..d29ade3b7de6 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -61,12 +61,11 @@ struct drm_printer; param(char *, dmc_firmware_path, NULL) \ param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO)) \ param(int, edp_vswing, 0) \ - param(int, reset, 3) \ - param(unsigned int, inject_probe_failure, 0) \ + param(int, reset, 2) \ + param(unsigned int, inject_load_failure, 0) \ param(int, fastboot, -1) \ param(int, enable_dpcd_backlight, 0) \ param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE) \ - param(unsigned long, fake_lmem_start, 0) \ /* leave bools at the end to not create holes */ \ param(bool, alpha_support, IS_ENABLED(CONFIG_DRM_I915_ALPHA_SUPPORT)) \ param(bool, enable_hangcheck, true) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 1bb701d32a5d..f9a3bfe68689 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -612,7 +612,6 @@ static const struct intel_device_info intel_cherryview_info = { .has_logical_ring_preemption = 1, \ .display.has_csr = 1, \ .has_gt_uc = 1, \ - .display.has_hdcp = 1, \ .display.has_ipc = 1, \ .ddb_size = 896 @@ -656,7 +655,6 @@ static const struct intel_device_info intel_skylake_gt4_info = { .display.has_ddi = 1, \ .has_fpga_dbg = 1, \ .display.has_fbc = 1, \ - .display.has_hdcp = 1, \ .display.has_psr = 1, \ .has_runtime_pm = 1, \ .display.has_csr = 1, \ @@ -737,7 +735,6 @@ static const struct intel_device_info intel_coffeelake_gt3_info = { GEN9_FEATURES, \ GEN(10), \ .ddb_size = 1024, \ - .display.has_dsc = 1, \ .has_coherent_ggtt = false, \ GLK_COLORS @@ -825,10 +822,6 @@ static const struct intel_device_info intel_tigerlake_12_info = { .has_rps = false, /* XXX disabled for debugging */ }; -#define GEN12_DGFX_FEATURES \ - GEN12_FEATURES, \ - .is_dgfx = 1 - #undef GEN #undef PLATFORM diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index a8c2318d3d5e..d2ac51fe4f04 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -200,7 +200,6 @@ #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_lrc_reg.h" -#include "gt/intel_ring.h" #include "i915_drv.h" #include "i915_perf.h" @@ -218,7 +217,6 @@ #include "oa/i915_oa_cflgt3.h" #include "oa/i915_oa_cnl.h" #include "oa/i915_oa_icl.h" -#include "oa/i915_oa_tgl.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such @@ -295,7 +293,6 @@ static u32 i915_perf_stream_paranoid = true; /* On Gen8+ automatically triggered OA reports include a 'reason' field... */ #define OAREPORT_REASON_MASK 0x3f -#define OAREPORT_REASON_MASK_EXTENDED 0x7f #define OAREPORT_REASON_SHIFT 19 #define OAREPORT_REASON_TIMER (1<<0) #define OAREPORT_REASON_CTX_SWITCH (1<<3) @@ -341,10 +338,6 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { [I915_OA_FORMAT_C4_B8] = { 7, 64 }, }; -static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = { - [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, -}; - #define SAMPLE_OA_REPORT (1<<0) /** @@ -425,14 +418,6 @@ static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo) kfree(oa_bo); } -static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream) -{ - struct intel_uncore *uncore = stream->uncore; - - return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) & - GEN12_OAG_OATAILPTR_MASK; -} - static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; @@ -553,7 +538,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) aging_tail = hw_tail; stream->oa_buffer.aging_timestamp = now; } else { - DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %x\n", + DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n", hw_tail); } } @@ -755,9 +740,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * it to userspace... */ reason = ((report32[0] >> OAREPORT_REASON_SHIFT) & - (IS_GEN(stream->perf->i915, 12) ? - OAREPORT_REASON_MASK_EXTENDED : - OAREPORT_REASON_MASK)); + OAREPORT_REASON_MASK); if (reason == 0) { if (__ratelimit(&stream->perf->spurious_report_rs)) DRM_NOTE("Skipping spurious, invalid OA report\n"); @@ -774,8 +757,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * Note: that we don't clear the valid_ctx_bit so userspace can * understand that the ID has been squashed by the kernel. */ - if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) && - INTEL_GEN(stream->perf->i915) <= 11) + if (!(report32[0] & stream->perf->gen8_valid_ctx_bit)) ctx_id = report32[2] = INVALID_CTX_ID; /* @@ -842,11 +824,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, } if (start_offset != *offset) { - i915_reg_t oaheadptr; - - oaheadptr = IS_GEN(stream->perf->i915, 12) ? - GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR; - spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); /* @@ -854,8 +831,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * relative to oa_buf_base so put back here... */ head += gtt_offset; - intel_uncore_write(uncore, oaheadptr, - head & GEN12_OAG_OAHEADPTR_MASK); + + intel_uncore_write(uncore, GEN8_OAHEADPTR, + head & GEN8_OAHEADPTR_MASK); stream->oa_buffer.head = head; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); @@ -891,16 +869,12 @@ static int gen8_oa_read(struct i915_perf_stream *stream, { struct intel_uncore *uncore = stream->uncore; u32 oastatus; - i915_reg_t oastatus_reg; int ret; if (WARN_ON(!stream->oa_buffer.vaddr)) return -EIO; - oastatus_reg = IS_GEN(stream->perf->i915, 12) ? - GEN12_OAG_OASTATUS : GEN8_OASTATUS; - - oastatus = intel_uncore_read(uncore, oastatus_reg); + oastatus = intel_uncore_read(uncore, GEN8_OASTATUS); /* * We treat OABUFFER_OVERFLOW as a significant error: @@ -932,7 +906,7 @@ static int gen8_oa_read(struct i915_perf_stream *stream, * Note: .oa_enable() is expected to re-init the oabuffer and * reset GEN8_OASTATUS for us */ - oastatus = intel_uncore_read(uncore, oastatus_reg); + oastatus = intel_uncore_read(uncore, GEN8_OASTATUS); } if (oastatus & GEN8_OASTATUS_REPORT_LOST) { @@ -940,7 +914,7 @@ static int gen8_oa_read(struct i915_perf_stream *stream, DRM_I915_PERF_RECORD_OA_REPORT_LOST); if (ret) return ret; - intel_uncore_write(uncore, oastatus_reg, + intel_uncore_write(uncore, GEN8_OASTATUS, oastatus & ~GEN8_OASTATUS_REPORT_LOST); } @@ -1286,11 +1260,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) case 8: case 9: case 10: - if (intel_engine_in_execlists_submission_mode(ce->engine)) { - stream->specific_ctx_id_mask = - (1U << GEN8_CTX_ID_WIDTH) - 1; - stream->specific_ctx_id = stream->specific_ctx_id_mask; - } else { + if (USES_GUC_SUBMISSION(ce->engine->i915)) { /* * When using GuC, the context descriptor we write in * i915 is read by GuC and rewritten before it's @@ -1310,6 +1280,10 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) */ stream->specific_ctx_id_mask = (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1; + } else { + stream->specific_ctx_id_mask = + (1U << GEN8_CTX_ID_WIDTH) - 1; + stream->specific_ctx_id = stream->specific_ctx_id_mask; } break; @@ -1514,63 +1488,6 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream) stream->pollin = false; } -static void gen12_init_oa_buffer(struct i915_perf_stream *stream) -{ - struct intel_uncore *uncore = stream->uncore; - u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); - unsigned long flags; - - spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - - intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0); - intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR, - gtt_offset & GEN12_OAG_OAHEADPTR_MASK); - stream->oa_buffer.head = gtt_offset; - - /* - * PRM says: - * - * "This MMIO must be set before the OATAILPTR - * register and after the OAHEADPTR register. This is - * to enable proper functionality of the overflow - * bit." - */ - intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset | - OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); - intel_uncore_write(uncore, GEN12_OAG_OATAILPTR, - gtt_offset & GEN12_OAG_OATAILPTR_MASK); - - /* Mark that we need updated tail pointers to read from... */ - stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; - stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR; - - /* - * Reset state used to recognise context switches, affecting which - * reports we will forward to userspace while filtering for a single - * context. - */ - stream->oa_buffer.last_ctx_id = INVALID_CTX_ID; - - spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - - /* - * NB: although the OA buffer will initially be allocated - * zeroed via shmfs (and so this memset is redundant when - * first allocating), we may re-init the OA buffer, either - * when re-enabling a stream or in error/reset paths. - * - * The reason we clear the buffer for each re-init is for the - * sanity check in gen8_append_oa_reports() that looks at the - * reason field to make sure it's non-zero which relies on - * the assumption that new reports are being written to zeroed - * memory... - */ - memset(stream->oa_buffer.vaddr, 0, - stream->oa_buffer.vma->size); - - stream->pollin = false; -} - static int alloc_oa_buffer(struct i915_perf_stream *stream) { struct drm_i915_gem_object *bo; @@ -2073,20 +1990,12 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce, u32 *reg_state = ce->lrc_reg_state; int i; - if (IS_GEN(stream->perf->i915, 12)) { - u32 format = stream->oa_buffer.format; + reg_state[ctx_oactxctrl + 1] = + (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | + (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | + GEN8_OA_COUNTER_RESUME; - reg_state[ctx_oactxctrl + 1] = - (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | - (stream->oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0); - } else { - reg_state[ctx_oactxctrl + 1] = - (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | - (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | - GEN8_OA_COUNTER_RESUME; - } - - for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++) + for (i = 0; i < ARRAY_SIZE(flex_regs); i++) reg_state[ctx_flexeu0 + i * 2 + 1] = oa_config_flex_reg(stream->oa_config, flex_regs[i]); @@ -2219,36 +2128,6 @@ static int gen8_configure_context(struct i915_gem_context *ctx, return err; } -static int gen12_emit_oar_config(struct intel_context *ce, bool enable) -{ - struct i915_request *rq; - u32 *cs; - int err = 0; - - rq = i915_request_create(ce); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - goto out; - } - - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base)); - *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, - enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0); - *cs++ = MI_NOOP; - - intel_ring_advance(rq, cs); - -out: - i915_request_add(rq); - - return err; -} - /* * Manages updating the per-context aspects of the OA stream * configuration across all contexts. @@ -2273,8 +2152,8 @@ out: * * Note: it's only the RCS/Render context that has any OA state. */ -static int lrc_configure_all_contexts(struct i915_perf_stream *stream, - const struct i915_oa_config *oa_config) +static int gen8_configure_all_contexts(struct i915_perf_stream *stream, + const struct i915_oa_config *oa_config) { struct drm_i915_private *i915 = stream->perf->i915; /* The MMIO offsets for Flex EU registers aren't contiguous */ @@ -2286,9 +2165,11 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, CTX_R_PWR_CLK_STATE, }, { - IS_GEN(i915, 12) ? - GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL, + GEN8_OACTXCONTROL, stream->perf->ctx_oactxctrl_offset + 1, + ((stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | + (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | + GEN8_OA_COUNTER_RESUME) }, { EU_PERF_CNTL0, ctx_flexeuN(0) }, { EU_PERF_CNTL1, ctx_flexeuN(1) }, @@ -2301,23 +2182,9 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, #undef ctx_flexeuN struct intel_engine_cs *engine; struct i915_gem_context *ctx, *cn; - size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs); int i, err; - if (IS_GEN(i915, 12)) { - u32 format = stream->oa_buffer.format; - - regs[1].value = - (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | - (oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0); - } else { - regs[1].value = - (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | - (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | - GEN8_OA_COUNTER_RESUME; - } - - for (i = 2; !!ctx_flexeu0 && i < array_size; i++) + for (i = 2; i < ARRAY_SIZE(regs); i++) regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); lockdep_assert_held(&stream->perf->lock); @@ -2348,7 +2215,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, spin_unlock(&i915->gem.contexts.lock); - err = gen8_configure_context(ctx, regs, array_size); + err = gen8_configure_context(ctx, regs, ARRAY_SIZE(regs)); if (err) { i915_gem_context_put(ctx); return err; @@ -2373,7 +2240,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu); - err = gen8_modify_self(ce, regs, array_size); + err = gen8_modify_self(ce, regs, ARRAY_SIZE(regs)); if (err) return err; } @@ -2421,69 +2288,19 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) * to make sure all slices/subslices are ON before writing to NOA * registers. */ - ret = lrc_configure_all_contexts(stream, oa_config); + ret = gen8_configure_all_contexts(stream, oa_config); if (ret) return ret; return emit_oa_config(stream, oa_config, oa_context(stream)); } -static int gen12_enable_metric_set(struct i915_perf_stream *stream) -{ - struct intel_uncore *uncore = stream->uncore; - struct i915_oa_config *oa_config = stream->oa_config; - bool periodic = stream->periodic; - u32 period_exponent = stream->period_exponent; - int ret; - - intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG, - /* Disable clk ratio reports, like previous Gens. */ - _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | - GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) | - /* - * If the user didn't require OA reports, instruct the - * hardware not to emit ctx switch reports. - */ - !(stream->sample_flags & SAMPLE_OA_REPORT) ? - _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS) : - _MASKED_BIT_DISABLE(GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS)); - - intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ? - (GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME | - GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE | - (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT)) - : 0); - - /* - * Update all contexts prior writing the mux configurations as we need - * to make sure all slices/subslices are ON before writing to NOA - * registers. - */ - ret = lrc_configure_all_contexts(stream, oa_config); - if (ret) - return ret; - - /* - * For Gen12, performance counters are context - * saved/restored. Only enable it for the context that - * requested this. - */ - if (stream->ctx) { - ret = gen12_emit_oar_config(stream->pinned_ctx, - oa_config != NULL); - if (ret) - return ret; - } - - return emit_oa_config(stream, oa_config, oa_context(stream)); -} - static void gen8_disable_metric_set(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ - lrc_configure_all_contexts(stream, NULL); + gen8_configure_all_contexts(stream, NULL); intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0); } @@ -2493,22 +2310,7 @@ static void gen10_disable_metric_set(struct i915_perf_stream *stream) struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ - lrc_configure_all_contexts(stream, NULL); - - /* Make sure we disable noa to save power. */ - intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); -} - -static void gen12_disable_metric_set(struct i915_perf_stream *stream) -{ - struct intel_uncore *uncore = stream->uncore; - - /* Reset all contexts' slices/subslices configurations. */ - lrc_configure_all_contexts(stream, NULL); - - /* disable the context save/restore or OAR counters */ - if (stream->ctx) - gen12_emit_oar_config(stream->pinned_ctx, false); + gen8_configure_all_contexts(stream, NULL); /* Make sure we disable noa to save power. */ intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); @@ -2570,25 +2372,6 @@ static void gen8_oa_enable(struct i915_perf_stream *stream) GEN8_OA_COUNTER_ENABLE); } -static void gen12_oa_enable(struct i915_perf_stream *stream) -{ - struct intel_uncore *uncore = stream->uncore; - u32 report_format = stream->oa_buffer.format; - - /* - * If we don't want OA reports from the OA buffer, then we don't even - * need to program the OAG unit. - */ - if (!(stream->sample_flags & SAMPLE_OA_REPORT)) - return; - - gen12_init_oa_buffer(stream); - - intel_uncore_write(uncore, GEN12_OAG_OACONTROL, - (report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) | - GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE); -} - /** * i915_oa_stream_enable - handle `I915_PERF_IOCTL_ENABLE` for OA stream * @stream: An i915 perf stream opened for OA metrics @@ -2630,18 +2413,6 @@ static void gen8_oa_disable(struct i915_perf_stream *stream) DRM_ERROR("wait for OA to be disabled timed out\n"); } -static void gen12_oa_disable(struct i915_perf_stream *stream) -{ - struct intel_uncore *uncore = stream->uncore; - - intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0); - if (intel_wait_for_register(uncore, - GEN12_OAG_OACONTROL, - GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0, - 50)) - DRM_ERROR("wait for OA to be disabled timed out\n"); -} - /** * i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream * @stream: An i915 perf stream opened for OA metrics @@ -2843,7 +2614,8 @@ void i915_oa_init_reg_state(const struct intel_context *ce, { struct i915_perf_stream *stream; - /* perf.exclusive_stream serialised by lrc_configure_all_contexts() */ + /* perf.exclusive_stream serialised by gen8_configure_all_contexts() */ + lockdep_assert_held(&ce->pin_mutex); if (engine->class != RENDER_CLASS) return; @@ -3322,24 +3094,16 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, * rest of the system, which we consider acceptable for a * non-privileged client. * - * For Gen8->11 the OA unit no longer supports clock gating off for a + * For Gen8+ the OA unit no longer supports clock gating off for a * specific context and the kernel can't securely stop the counters * from updating as system-wide / global values. Even though we can * filter reports based on the included context ID we can't block * clients from seeing the raw / global counter values via * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to * enable the OA unit by default. - * - * For Gen12+ we gain a new OAR unit that only monitors the RCS on a - * per context basis. So we can relax requirements there if the user - * doesn't request global stream access (i.e. query based sampling - * using MI_RECORD_PERF_COUNT. */ if (IS_HASWELL(perf->i915) && specific_ctx && !props->hold_preemption) privileged_op = false; - else if (IS_GEN(perf->i915, 12) && specific_ctx && - (props->sample_flags & SAMPLE_OA_REPORT) == 0) - privileged_op = false; /* Similar to perf's kernel.perf_paranoid_cpu sysctl option * we check a dev.i915.perf_stream_paranoid sysctl option @@ -3654,9 +3418,7 @@ void i915_perf_register(struct drm_i915_private *i915) sysfs_attr_init(&perf->test_config.sysfs_metric_id.attr); - if (IS_TIGERLAKE(i915)) { - i915_perf_load_test_config_tgl(i915); - } else if (INTEL_GEN(i915) >= 11) { + if (INTEL_GEN(i915) >= 11) { i915_perf_load_test_config_icl(i915); } else if (IS_CANNONLAKE(i915)) { i915_perf_load_test_config_cnl(i915); @@ -3753,80 +3515,56 @@ static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr) return false; } -#define ADDR_IN_RANGE(addr, start, end) \ - ((addr) >= (start) && \ - (addr) <= (end)) - -#define REG_IN_RANGE(addr, start, end) \ - ((addr) >= i915_mmio_reg_offset(start) && \ - (addr) <= i915_mmio_reg_offset(end)) - -#define REG_EQUAL(addr, mmio) \ - ((addr) == i915_mmio_reg_offset(mmio)) - static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) { - return REG_IN_RANGE(addr, OASTARTTRIG1, OASTARTTRIG8) || - REG_IN_RANGE(addr, OAREPORTTRIG1, OAREPORTTRIG8) || - REG_IN_RANGE(addr, OACEC0_0, OACEC7_1); + return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) && + addr <= i915_mmio_reg_offset(OASTARTTRIG8)) || + (addr >= i915_mmio_reg_offset(OAREPORTTRIG1) && + addr <= i915_mmio_reg_offset(OAREPORTTRIG8)) || + (addr >= i915_mmio_reg_offset(OACEC0_0) && + addr <= i915_mmio_reg_offset(OACEC7_1)); } static bool gen7_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return REG_EQUAL(addr, HALF_SLICE_CHICKEN2) || - REG_IN_RANGE(addr, MICRO_BP0_0, NOA_WRITE) || - REG_IN_RANGE(addr, OA_PERFCNT1_LO, OA_PERFCNT2_HI) || - REG_IN_RANGE(addr, OA_PERFMATRIX_LO, OA_PERFMATRIX_HI); + return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) || + (addr >= i915_mmio_reg_offset(MICRO_BP0_0) && + addr <= i915_mmio_reg_offset(NOA_WRITE)) || + (addr >= i915_mmio_reg_offset(OA_PERFCNT1_LO) && + addr <= i915_mmio_reg_offset(OA_PERFCNT2_HI)) || + (addr >= i915_mmio_reg_offset(OA_PERFMATRIX_LO) && + addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI)); } static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return gen7_is_valid_mux_addr(perf, addr) || - REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) || - REG_IN_RANGE(addr, RPM_CONFIG0, NOA_CONFIG(8)); + addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) || + (addr >= i915_mmio_reg_offset(RPM_CONFIG0) && + addr <= i915_mmio_reg_offset(NOA_CONFIG(8))); } static bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return gen8_is_valid_mux_addr(perf, addr) || - REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) || - REG_IN_RANGE(addr, OA_PERFCNT3_LO, OA_PERFCNT4_HI); + addr == i915_mmio_reg_offset(GEN10_NOA_WRITE_HIGH) || + (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) && + addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI)); } static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return gen7_is_valid_mux_addr(perf, addr) || - ADDR_IN_RANGE(addr, 0x25100, 0x2FF90) || - REG_IN_RANGE(addr, HSW_MBVID2_NOA0, HSW_MBVID2_NOA9) || - REG_EQUAL(addr, HSW_MBVID2_MISR0); + (addr >= 0x25100 && addr <= 0x2FF90) || + (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) && + addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) || + addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0); } static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return gen7_is_valid_mux_addr(perf, addr) || - ADDR_IN_RANGE(addr, 0x182300, 0x1823A4); -} - -static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) -{ - return REG_IN_RANGE(addr, GEN12_OAG_OASTARTTRIG1, GEN12_OAG_OASTARTTRIG8) || - REG_IN_RANGE(addr, GEN12_OAG_OAREPORTTRIG1, GEN12_OAG_OAREPORTTRIG8) || - REG_IN_RANGE(addr, GEN12_OAG_CEC0_0, GEN12_OAG_CEC7_1) || - REG_IN_RANGE(addr, GEN12_OAG_SCEC0_0, GEN12_OAG_SCEC7_1) || - REG_EQUAL(addr, GEN12_OAA_DBG_REG) || - REG_EQUAL(addr, GEN12_OAG_OA_PESS) || - REG_EQUAL(addr, GEN12_OAG_SPCTR_CNF); -} - -static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr) -{ - return REG_EQUAL(addr, NOA_WRITE) || - REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) || - REG_EQUAL(addr, GDT_CHICKEN_BITS) || - REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) || - REG_EQUAL(addr, RPM_CONFIG0) || - REG_EQUAL(addr, RPM_CONFIG1) || - REG_IN_RANGE(addr, NOA_CONFIG(0), NOA_CONFIG(8)); + (addr >= 0x182300 && addr <= 0x1823A4); } static u32 mask_reg_value(u32 reg, u32 val) @@ -3835,14 +3573,14 @@ static u32 mask_reg_value(u32 reg, u32 val) * WaDisableSTUnitPowerOptimization workaround. Make sure the value * programmed by userspace doesn't change this. */ - if (REG_EQUAL(reg, HALF_SLICE_CHICKEN2)) + if (i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) == reg) val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE); /* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function * indicated by its name and a bunch of selection fields used by OA * configs. */ - if (REG_EQUAL(reg, WAIT_FOR_RC6_EXIT)) + if (i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) == reg) val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE); return val; @@ -4221,11 +3959,14 @@ void i915_perf_init(struct drm_i915_private *i915) * worth the complexity to maintain now that BDW+ enable * execlist mode by default. */ + perf->oa_formats = gen8_plus_oa_formats; + + perf->ops.oa_enable = gen8_oa_enable; + perf->ops.oa_disable = gen8_oa_disable; perf->ops.read = gen8_oa_read; + perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; if (IS_GEN_RANGE(i915, 8, 9)) { - perf->oa_formats = gen8_plus_oa_formats; - perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; perf->ops.is_valid_mux_reg = @@ -4238,11 +3979,8 @@ void i915_perf_init(struct drm_i915_private *i915) chv_is_valid_mux_addr; } - perf->ops.oa_enable = gen8_oa_enable; - perf->ops.oa_disable = gen8_oa_disable; perf->ops.enable_metric_set = gen8_enable_metric_set; perf->ops.disable_metric_set = gen8_disable_metric_set; - perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; if (IS_GEN(i915, 8)) { perf->ctx_oactxctrl_offset = 0x120; @@ -4256,8 +3994,6 @@ void i915_perf_init(struct drm_i915_private *i915) perf->gen8_valid_ctx_bit = BIT(16); } } else if (IS_GEN_RANGE(i915, 10, 11)) { - perf->oa_formats = gen8_plus_oa_formats; - perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; perf->ops.is_valid_mux_reg = @@ -4265,11 +4001,8 @@ void i915_perf_init(struct drm_i915_private *i915) perf->ops.is_valid_flex_reg = gen8_is_valid_flex_addr; - perf->ops.oa_enable = gen8_oa_enable; - perf->ops.oa_disable = gen8_oa_disable; perf->ops.enable_metric_set = gen8_enable_metric_set; perf->ops.disable_metric_set = gen10_disable_metric_set; - perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; if (IS_GEN(i915, 10)) { perf->ctx_oactxctrl_offset = 0x128; @@ -4279,24 +4012,6 @@ void i915_perf_init(struct drm_i915_private *i915) perf->ctx_flexeu0_offset = 0x78e; } perf->gen8_valid_ctx_bit = BIT(16); - } else if (IS_GEN(i915, 12)) { - perf->oa_formats = gen12_oa_formats; - - perf->ops.is_valid_b_counter_reg = - gen12_is_valid_b_counter_addr; - perf->ops.is_valid_mux_reg = - gen12_is_valid_mux_addr; - perf->ops.is_valid_flex_reg = - gen8_is_valid_flex_addr; - - perf->ops.oa_enable = gen12_oa_enable; - perf->ops.oa_disable = gen12_oa_disable; - perf->ops.enable_metric_set = gen12_enable_metric_set; - perf->ops.disable_metric_set = gen12_disable_metric_set; - perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read; - - perf->ctx_flexeu0_offset = 0; - perf->ctx_oactxctrl_offset = 0x144; } } diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index 74ddc20a0d37..a1f733fc905a 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -199,43 +199,14 @@ struct i915_perf_stream { * @pinned_ctx: The OA context specific information. */ struct intel_context *pinned_ctx; - - /** - * @specific_ctx_id: The id of the specific context. - */ u32 specific_ctx_id; - - /** - * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits. - */ u32 specific_ctx_id_mask; - /** - * @poll_check_timer: High resolution timer that will periodically - * check for data in the circular OA buffer for notifying userspace - * (e.g. during a read() or poll()). - */ struct hrtimer poll_check_timer; - - /** - * @poll_wq: The wait queue that hrtimer callback wakes when it - * sees data ready to read in the circular OA buffer. - */ wait_queue_head_t poll_wq; - - /** - * @pollin: Whether there is data available to read. - */ bool pollin; - /** - * @periodic: Whether periodic sampling is currently enabled. - */ bool periodic; - - /** - * @period_exponent: The OA unit sampling frequency is derived from this. - */ int period_exponent; /** @@ -305,7 +276,7 @@ struct i915_perf_stream { } oa_buffer; /** - * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be + * A batch buffer doing a wait on the GPU for the NOA logic to be * reprogrammed. */ struct i915_vma *noa_wait; diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 05395015d1f2..85912917c062 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -12,7 +12,6 @@ #include "gt/intel_engine_user.h" #include "gt/intel_gt_pm.h" #include "gt/intel_rc6.h" -#include "gt/intel_rps.h" #include "i915_drv.h" #include "i915_pmu.h" @@ -359,26 +358,25 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) struct drm_i915_private *i915 = gt->i915; struct intel_uncore *uncore = gt->uncore; struct i915_pmu *pmu = &i915->pmu; - struct intel_rps *rps = >->rps; if (pmu->enable & config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) { u32 val; - val = rps->cur_freq; + val = i915->gt_pm.rps.cur_freq; if (intel_gt_pm_get_if_awake(gt)) { val = intel_uncore_read_notrace(uncore, GEN6_RPSTAT1); - val = intel_get_cagf(rps, val); + val = intel_get_cagf(i915, val); intel_gt_pm_put(gt); } add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT], - intel_gpu_freq(rps, val), + intel_gpu_freq(i915, val), period_ns / 1000); } if (pmu->enable & config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) { add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ], - intel_gpu_freq(rps, rps->cur_freq), + intel_gpu_freq(i915, i915->gt_pm.rps.cur_freq), period_ns / 1000); } } @@ -1103,6 +1101,20 @@ void i915_pmu_register(struct drm_i915_private *i915) return; } + i915_pmu_events_attr_group.attrs = create_event_attributes(pmu); + if (!i915_pmu_events_attr_group.attrs) + goto err; + + pmu->base.attr_groups = i915_pmu_attr_groups; + pmu->base.task_ctx_nr = perf_invalid_context; + pmu->base.event_init = i915_pmu_event_init; + pmu->base.add = i915_pmu_event_add; + pmu->base.del = i915_pmu_event_del; + pmu->base.start = i915_pmu_event_start; + pmu->base.stop = i915_pmu_event_stop; + pmu->base.read = i915_pmu_event_read; + pmu->base.event_idx = i915_pmu_event_event_idx; + spin_lock_init(&pmu->lock); hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); pmu->timer.function = i915_sample; @@ -1116,23 +1128,9 @@ void i915_pmu_register(struct drm_i915_private *i915) if (!pmu->name) goto err; - i915_pmu_events_attr_group.attrs = create_event_attributes(pmu); - if (!i915_pmu_events_attr_group.attrs) - goto err_name; - - pmu->base.attr_groups = i915_pmu_attr_groups; - pmu->base.task_ctx_nr = perf_invalid_context; - pmu->base.event_init = i915_pmu_event_init; - pmu->base.add = i915_pmu_event_add; - pmu->base.del = i915_pmu_event_del; - pmu->base.start = i915_pmu_event_start; - pmu->base.stop = i915_pmu_event_stop; - pmu->base.read = i915_pmu_event_read; - pmu->base.event_idx = i915_pmu_event_event_idx; - ret = perf_pmu_register(&pmu->base, pmu->name, -1); if (ret) - goto err_attr; + goto err_name; ret = i915_pmu_register_cpuhp_state(pmu); if (ret) @@ -1142,14 +1140,13 @@ void i915_pmu_register(struct drm_i915_private *i915) err_unreg: perf_pmu_unregister(&pmu->base); -err_attr: - pmu->base.event_init = NULL; - free_event_attributes(pmu); err_name: if (!is_igp(i915)) kfree(pmu->name); err: - dev_notice(i915->drm.dev, "Failed to register PMU!\n"); + pmu->base.event_init = NULL; + free_event_attributes(pmu); + DRM_NOTE("Failed to register PMU! (err=%d)\n", ret); } void i915_pmu_unregister(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h index 732aad148881..21037a2e2038 100644 --- a/drivers/gpu/drm/i915/i915_priolist_types.h +++ b/drivers/gpu/drm/i915/i915_priolist_types.h @@ -16,12 +16,6 @@ enum { I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, - - /* A preemptive pulse used to monitor the health of each engine */ - I915_PRIORITY_HEARTBEAT, - - /* Interactive workload, scheduled for immediate pageflipping */ - I915_PRIORITY_DISPLAY, }; #define I915_USER_PRIORITY_SHIFT 2 @@ -45,7 +39,6 @@ enum { * active request. */ #define I915_PRIORITY_UNPREEMPTABLE INT_MAX -#define I915_PRIORITY_BARRIER INT_MAX #define __NO_PREEMPTION (I915_PRIORITY_WAIT) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 53c280c4e741..855db888516c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -413,9 +413,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN11_VECS_SFC_USAGE(engine) _MMIO((engine)->mmio_base + 0x2014) #define GEN11_VECS_SFC_USAGE_BIT (1 << 0) -#define GEN12_SFC_DONE(n) _MMIO(0x1cc00 + (n) * 0x100) -#define GEN12_SFC_DONE_MAX 4 - #define RING_PP_DIR_BASE(base) _MMIO((base) + 0x228) #define RING_PP_DIR_BASE_READ(base) _MMIO((base) + 0x518) #define RING_PP_DIR_DCLV(base) _MMIO((base) + 0x220) @@ -687,45 +684,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OABUFFER_SIZE_8M (6 << 3) #define OABUFFER_SIZE_16M (7 << 3) -/* Gen12 OAR unit */ -#define GEN12_OAR_OACONTROL _MMIO(0x2960) -#define GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1 -#define GEN12_OAR_OACONTROL_COUNTER_ENABLE (1 << 0) - -#define GEN12_OACTXCONTROL _MMIO(0x2360) -#define GEN12_OAR_OASTATUS _MMIO(0x2968) - -/* Gen12 OAG unit */ -#define GEN12_OAG_OAHEADPTR _MMIO(0xdb00) -#define GEN12_OAG_OAHEADPTR_MASK 0xffffffc0 -#define GEN12_OAG_OATAILPTR _MMIO(0xdb04) -#define GEN12_OAG_OATAILPTR_MASK 0xffffffc0 - -#define GEN12_OAG_OABUFFER _MMIO(0xdb08) -#define GEN12_OAG_OABUFFER_BUFFER_SIZE_MASK (0x7) -#define GEN12_OAG_OABUFFER_BUFFER_SIZE_SHIFT (3) -#define GEN12_OAG_OABUFFER_MEMORY_SELECT (1 << 0) /* 0: PPGTT, 1: GGTT */ - -#define GEN12_OAG_OAGLBCTXCTRL _MMIO(0x2b28) -#define GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT 2 -#define GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE (1 << 1) -#define GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME (1 << 0) - -#define GEN12_OAG_OACONTROL _MMIO(0xdaf4) -#define GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT 2 -#define GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE (1 << 0) - -#define GEN12_OAG_OA_DEBUG _MMIO(0xdaf8) -#define GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO (1 << 6) -#define GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS (1 << 5) -#define GEN12_OAG_OA_DEBUG_DISABLE_GO_1_0_REPORTS (1 << 2) -#define GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS (1 << 1) - -#define GEN12_OAG_OASTATUS _MMIO(0xdafc) -#define GEN12_OAG_OASTATUS_COUNTER_OVERFLOW (1 << 2) -#define GEN12_OAG_OASTATUS_BUFFER_OVERFLOW (1 << 1) -#define GEN12_OAG_OASTATUS_REPORT_LOST (1 << 0) - /* * Flexible, Aggregate EU Counter Registers. * Note: these aren't contiguous @@ -962,26 +920,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OAREPORTTRIG8_NOA_SELECT_6_SHIFT 24 #define OAREPORTTRIG8_NOA_SELECT_7_SHIFT 28 -/* Same layout as OASTARTTRIGX */ -#define GEN12_OAG_OASTARTTRIG1 _MMIO(0xd900) -#define GEN12_OAG_OASTARTTRIG2 _MMIO(0xd904) -#define GEN12_OAG_OASTARTTRIG3 _MMIO(0xd908) -#define GEN12_OAG_OASTARTTRIG4 _MMIO(0xd90c) -#define GEN12_OAG_OASTARTTRIG5 _MMIO(0xd910) -#define GEN12_OAG_OASTARTTRIG6 _MMIO(0xd914) -#define GEN12_OAG_OASTARTTRIG7 _MMIO(0xd918) -#define GEN12_OAG_OASTARTTRIG8 _MMIO(0xd91c) - -/* Same layout as OAREPORTTRIGX */ -#define GEN12_OAG_OAREPORTTRIG1 _MMIO(0xd920) -#define GEN12_OAG_OAREPORTTRIG2 _MMIO(0xd924) -#define GEN12_OAG_OAREPORTTRIG3 _MMIO(0xd928) -#define GEN12_OAG_OAREPORTTRIG4 _MMIO(0xd92c) -#define GEN12_OAG_OAREPORTTRIG5 _MMIO(0xd930) -#define GEN12_OAG_OAREPORTTRIG6 _MMIO(0xd934) -#define GEN12_OAG_OAREPORTTRIG7 _MMIO(0xd938) -#define GEN12_OAG_OAREPORTTRIG8 _MMIO(0xd93c) - /* CECX_0 */ #define OACEC_COMPARE_LESS_OR_EQUAL 6 #define OACEC_COMPARE_NOT_EQUAL 5 @@ -998,10 +936,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OACEC_SELECT_PREV (1 << 19) #define OACEC_SELECT_BOOLEAN (2 << 19) -/* 11-bit array 0: pass-through, 1: negated */ -#define GEN12_OASCEC_NEGATE_MASK 0x7ff -#define GEN12_OASCEC_NEGATE_SHIFT 21 - /* CECX_1 */ #define OACEC_MASK_MASK 0xffff #define OACEC_CONSIDERATIONS_MASK 0xffff @@ -1024,42 +958,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OACEC7_0 _MMIO(0x27a8) #define OACEC7_1 _MMIO(0x27ac) -/* Same layout as CECX_Y */ -#define GEN12_OAG_CEC0_0 _MMIO(0xd940) -#define GEN12_OAG_CEC0_1 _MMIO(0xd944) -#define GEN12_OAG_CEC1_0 _MMIO(0xd948) -#define GEN12_OAG_CEC1_1 _MMIO(0xd94c) -#define GEN12_OAG_CEC2_0 _MMIO(0xd950) -#define GEN12_OAG_CEC2_1 _MMIO(0xd954) -#define GEN12_OAG_CEC3_0 _MMIO(0xd958) -#define GEN12_OAG_CEC3_1 _MMIO(0xd95c) -#define GEN12_OAG_CEC4_0 _MMIO(0xd960) -#define GEN12_OAG_CEC4_1 _MMIO(0xd964) -#define GEN12_OAG_CEC5_0 _MMIO(0xd968) -#define GEN12_OAG_CEC5_1 _MMIO(0xd96c) -#define GEN12_OAG_CEC6_0 _MMIO(0xd970) -#define GEN12_OAG_CEC6_1 _MMIO(0xd974) -#define GEN12_OAG_CEC7_0 _MMIO(0xd978) -#define GEN12_OAG_CEC7_1 _MMIO(0xd97c) - -/* Same layout as CECX_Y + negate 11-bit array */ -#define GEN12_OAG_SCEC0_0 _MMIO(0xdc00) -#define GEN12_OAG_SCEC0_1 _MMIO(0xdc04) -#define GEN12_OAG_SCEC1_0 _MMIO(0xdc08) -#define GEN12_OAG_SCEC1_1 _MMIO(0xdc0c) -#define GEN12_OAG_SCEC2_0 _MMIO(0xdc10) -#define GEN12_OAG_SCEC2_1 _MMIO(0xdc14) -#define GEN12_OAG_SCEC3_0 _MMIO(0xdc18) -#define GEN12_OAG_SCEC3_1 _MMIO(0xdc1c) -#define GEN12_OAG_SCEC4_0 _MMIO(0xdc20) -#define GEN12_OAG_SCEC4_1 _MMIO(0xdc24) -#define GEN12_OAG_SCEC5_0 _MMIO(0xdc28) -#define GEN12_OAG_SCEC5_1 _MMIO(0xdc2c) -#define GEN12_OAG_SCEC6_0 _MMIO(0xdc30) -#define GEN12_OAG_SCEC6_1 _MMIO(0xdc34) -#define GEN12_OAG_SCEC7_0 _MMIO(0xdc38) -#define GEN12_OAG_SCEC7_1 _MMIO(0xdc3c) - /* OA perf counters */ #define OA_PERFCNT1_LO _MMIO(0x91B8) #define OA_PERFCNT1_HI _MMIO(0x91BC) @@ -1140,10 +1038,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MICRO_BP3_COUNT_STATUS23 _MMIO(0x9838) #define MICRO_BP_FIRED_ARMED _MMIO(0x983C) -#define GEN12_OAA_DBG_REG _MMIO(0xdc44) -#define GEN12_OAG_OA_PESS _MMIO(0x2b2c) -#define GEN12_OAG_SPCTR_CNF _MMIO(0xdc40) - #define GDT_CHICKEN_BITS _MMIO(0x9840) #define GT_NOA_ENABLE 0x00000080 @@ -2561,7 +2455,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RING_FAULT_FAULT_TYPE(x) (((x) >> 1) & 0x3) #define RING_FAULT_VALID (1 << 0) #define DONE_REG _MMIO(0x40b0) -#define GEN12_GAM_DONE _MMIO(0xcf68) #define GEN8_PRIVATE_PAT_LO _MMIO(0x40e0) #define GEN8_PRIVATE_PAT_HI _MMIO(0x40e0 + 4) #define GEN10_PAT_INDEX(index) _MMIO(0x40e0 + (index) * 4) @@ -2597,7 +2490,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN8_RING_CS_GPR_UDW(base, n) _MMIO((base) + 0x600 + (n) * 8 + 4) #define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4D0) + (i) * 4) -#define RING_FORCE_TO_NONPRIV_ADDRESS_MASK REG_GENMASK(25, 2) #define RING_FORCE_TO_NONPRIV_ACCESS_RW (0 << 28) /* CFL+ & Gen11+ */ #define RING_FORCE_TO_NONPRIV_ACCESS_RD (1 << 28) #define RING_FORCE_TO_NONPRIV_ACCESS_WR (2 << 28) @@ -2710,8 +2602,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define FAULT_VA_HIGH_BITS (0xf << 0) #define FAULT_GTT_SEL (1 << 4) -#define GEN12_AUX_ERR_DBG _MMIO(0x43f4) - #define FPGA_DBG _MMIO(0x42300) #define FPGA_DBG_RM_NOCLAIM (1 << 31) @@ -5645,9 +5535,45 @@ enum { */ #define _DPA_AUX_CH_CTL (DISPLAY_MMIO_BASE(dev_priv) + 0x64010) #define _DPA_AUX_CH_DATA1 (DISPLAY_MMIO_BASE(dev_priv) + 0x64014) +#define _DPA_AUX_CH_DATA2 (DISPLAY_MMIO_BASE(dev_priv) + 0x64018) +#define _DPA_AUX_CH_DATA3 (DISPLAY_MMIO_BASE(dev_priv) + 0x6401c) +#define _DPA_AUX_CH_DATA4 (DISPLAY_MMIO_BASE(dev_priv) + 0x64020) +#define _DPA_AUX_CH_DATA5 (DISPLAY_MMIO_BASE(dev_priv) + 0x64024) #define _DPB_AUX_CH_CTL (DISPLAY_MMIO_BASE(dev_priv) + 0x64110) #define _DPB_AUX_CH_DATA1 (DISPLAY_MMIO_BASE(dev_priv) + 0x64114) +#define _DPB_AUX_CH_DATA2 (DISPLAY_MMIO_BASE(dev_priv) + 0x64118) +#define _DPB_AUX_CH_DATA3 (DISPLAY_MMIO_BASE(dev_priv) + 0x6411c) +#define _DPB_AUX_CH_DATA4 (DISPLAY_MMIO_BASE(dev_priv) + 0x64120) +#define _DPB_AUX_CH_DATA5 (DISPLAY_MMIO_BASE(dev_priv) + 0x64124) + +#define _DPC_AUX_CH_CTL (DISPLAY_MMIO_BASE(dev_priv) + 0x64210) +#define _DPC_AUX_CH_DATA1 (DISPLAY_MMIO_BASE(dev_priv) + 0x64214) +#define _DPC_AUX_CH_DATA2 (DISPLAY_MMIO_BASE(dev_priv) + 0x64218) +#define _DPC_AUX_CH_DATA3 (DISPLAY_MMIO_BASE(dev_priv) + 0x6421c) +#define _DPC_AUX_CH_DATA4 (DISPLAY_MMIO_BASE(dev_priv) + 0x64220) +#define _DPC_AUX_CH_DATA5 (DISPLAY_MMIO_BASE(dev_priv) + 0x64224) + +#define _DPD_AUX_CH_CTL (DISPLAY_MMIO_BASE(dev_priv) + 0x64310) +#define _DPD_AUX_CH_DATA1 (DISPLAY_MMIO_BASE(dev_priv) + 0x64314) +#define _DPD_AUX_CH_DATA2 (DISPLAY_MMIO_BASE(dev_priv) + 0x64318) +#define _DPD_AUX_CH_DATA3 (DISPLAY_MMIO_BASE(dev_priv) + 0x6431c) +#define _DPD_AUX_CH_DATA4 (DISPLAY_MMIO_BASE(dev_priv) + 0x64320) +#define _DPD_AUX_CH_DATA5 (DISPLAY_MMIO_BASE(dev_priv) + 0x64324) + +#define _DPE_AUX_CH_CTL (DISPLAY_MMIO_BASE(dev_priv) + 0x64410) +#define _DPE_AUX_CH_DATA1 (DISPLAY_MMIO_BASE(dev_priv) + 0x64414) +#define _DPE_AUX_CH_DATA2 (DISPLAY_MMIO_BASE(dev_priv) + 0x64418) +#define _DPE_AUX_CH_DATA3 (DISPLAY_MMIO_BASE(dev_priv) + 0x6441c) +#define _DPE_AUX_CH_DATA4 (DISPLAY_MMIO_BASE(dev_priv) + 0x64420) +#define _DPE_AUX_CH_DATA5 (DISPLAY_MMIO_BASE(dev_priv) + 0x64424) + +#define _DPF_AUX_CH_CTL (DISPLAY_MMIO_BASE(dev_priv) + 0x64510) +#define _DPF_AUX_CH_DATA1 (DISPLAY_MMIO_BASE(dev_priv) + 0x64514) +#define _DPF_AUX_CH_DATA2 (DISPLAY_MMIO_BASE(dev_priv) + 0x64518) +#define _DPF_AUX_CH_DATA3 (DISPLAY_MMIO_BASE(dev_priv) + 0x6451c) +#define _DPF_AUX_CH_DATA4 (DISPLAY_MMIO_BASE(dev_priv) + 0x64520) +#define _DPF_AUX_CH_DATA5 (DISPLAY_MMIO_BASE(dev_priv) + 0x64524) #define DP_AUX_CH_CTL(aux_ch) _MMIO_PORT(aux_ch, _DPA_AUX_CH_CTL, _DPB_AUX_CH_CTL) #define DP_AUX_CH_DATA(aux_ch, i) _MMIO(_PORT(aux_ch, _DPA_AUX_CH_DATA1, _DPB_AUX_CH_DATA1) + (i) * 4) /* 5 registers */ @@ -7464,9 +7390,6 @@ enum { #define GEN8_PIPE_VSYNC (1 << 1) #define GEN8_PIPE_VBLANK (1 << 0) #define GEN9_PIPE_CURSOR_FAULT (1 << 11) -#define GEN11_PIPE_PLANE7_FAULT (1 << 22) -#define GEN11_PIPE_PLANE6_FAULT (1 << 21) -#define GEN11_PIPE_PLANE5_FAULT (1 << 20) #define GEN9_PIPE_PLANE4_FAULT (1 << 10) #define GEN9_PIPE_PLANE3_FAULT (1 << 9) #define GEN9_PIPE_PLANE2_FAULT (1 << 8) @@ -7486,11 +7409,6 @@ enum { GEN9_PIPE_PLANE3_FAULT | \ GEN9_PIPE_PLANE2_FAULT | \ GEN9_PIPE_PLANE1_FAULT) -#define GEN11_DE_PIPE_IRQ_FAULT_ERRORS \ - (GEN9_DE_PIPE_IRQ_FAULT_ERRORS | \ - GEN11_PIPE_PLANE7_FAULT | \ - GEN11_PIPE_PLANE6_FAULT | \ - GEN11_PIPE_PLANE5_FAULT) #define GEN8_DE_PORT_ISR _MMIO(0x44440) #define GEN8_DE_PORT_IMR _MMIO(0x44444) @@ -7510,12 +7428,6 @@ enum { #define GEN8_PORT_DP_A_HOTPLUG (1 << 3) #define BXT_DE_PORT_GMBUS (1 << 1) #define GEN8_AUX_CHANNEL_A (1 << 0) -#define TGL_DE_PORT_AUX_USBC6 (1 << 13) -#define TGL_DE_PORT_AUX_USBC5 (1 << 12) -#define TGL_DE_PORT_AUX_USBC4 (1 << 11) -#define TGL_DE_PORT_AUX_USBC3 (1 << 10) -#define TGL_DE_PORT_AUX_USBC2 (1 << 9) -#define TGL_DE_PORT_AUX_USBC1 (1 << 8) #define TGL_DE_PORT_AUX_DDIC (1 << 2) #define TGL_DE_PORT_AUX_DDIB (1 << 1) #define TGL_DE_PORT_AUX_DDIA (1 << 0) @@ -7704,17 +7616,10 @@ enum { #define BDW_DPRS_MASK_VBLANK_SRD (1 << 0) #define CHICKEN_PIPESL_1(pipe) _MMIO_PIPE(pipe, _CHICKEN_PIPESL_1_A, _CHICKEN_PIPESL_1_B) -#define _CHICKEN_TRANS_A 0x420c0 -#define _CHICKEN_TRANS_B 0x420c4 -#define _CHICKEN_TRANS_C 0x420c8 -#define _CHICKEN_TRANS_EDP 0x420cc -#define _CHICKEN_TRANS_D 0x420d8 -#define CHICKEN_TRANS(trans) _MMIO(_PICK((trans), \ - [TRANSCODER_EDP] = _CHICKEN_TRANS_EDP, \ - [TRANSCODER_A] = _CHICKEN_TRANS_A, \ - [TRANSCODER_B] = _CHICKEN_TRANS_B, \ - [TRANSCODER_C] = _CHICKEN_TRANS_C, \ - [TRANSCODER_D] = _CHICKEN_TRANS_D)) +#define CHICKEN_TRANS_A _MMIO(0x420c0) +#define CHICKEN_TRANS_B _MMIO(0x420c4) +#define CHICKEN_TRANS_C _MMIO(0x420c8) +#define CHICKEN_TRANS_EDP _MMIO(0x420cc) #define VSC_DATA_SEL_SOFTWARE_CONTROL (1 << 25) /* GLK and CNL+ */ #define DDI_TRAINING_OVERRIDE_ENABLE (1 << 19) #define DDI_TRAINING_OVERRIDE_VALUE (1 << 18) @@ -7747,19 +7652,15 @@ enum { #define CNL_DDI_CLOCK_REG_ACCESS_ON (1 << 7) #define SKL_DFSM _MMIO(0x51000) -#define SKL_DFSM_DISPLAY_PM_DISABLE (1 << 27) -#define SKL_DFSM_DISPLAY_HDCP_DISABLE (1 << 25) -#define SKL_DFSM_CDCLK_LIMIT_MASK (3 << 23) -#define SKL_DFSM_CDCLK_LIMIT_675 (0 << 23) -#define SKL_DFSM_CDCLK_LIMIT_540 (1 << 23) -#define SKL_DFSM_CDCLK_LIMIT_450 (2 << 23) -#define SKL_DFSM_CDCLK_LIMIT_337_5 (3 << 23) -#define ICL_DFSM_DMC_DISABLE (1 << 23) -#define SKL_DFSM_PIPE_A_DISABLE (1 << 30) -#define SKL_DFSM_PIPE_B_DISABLE (1 << 21) -#define SKL_DFSM_PIPE_C_DISABLE (1 << 28) -#define TGL_DFSM_PIPE_D_DISABLE (1 << 22) -#define CNL_DFSM_DISPLAY_DSC_DISABLE (1 << 7) +#define SKL_DFSM_CDCLK_LIMIT_MASK (3 << 23) +#define SKL_DFSM_CDCLK_LIMIT_675 (0 << 23) +#define SKL_DFSM_CDCLK_LIMIT_540 (1 << 23) +#define SKL_DFSM_CDCLK_LIMIT_450 (2 << 23) +#define SKL_DFSM_CDCLK_LIMIT_337_5 (3 << 23) +#define SKL_DFSM_PIPE_A_DISABLE (1 << 30) +#define SKL_DFSM_PIPE_B_DISABLE (1 << 21) +#define SKL_DFSM_PIPE_C_DISABLE (1 << 28) +#define TGL_DFSM_PIPE_D_DISABLE (1 << 22) #define SKL_DSSM _MMIO(0x51004) #define CNL_DSSM_CDCLK_PLL_REFCLK_24MHz (1 << 31) @@ -9661,9 +9562,6 @@ enum skl_power_gate { #define TRANS_DDI_EDP_INPUT_A_ONOFF (4 << 12) #define TRANS_DDI_EDP_INPUT_B_ONOFF (5 << 12) #define TRANS_DDI_EDP_INPUT_C_ONOFF (6 << 12) -#define TRANS_DDI_MST_TRANSPORT_SELECT_MASK REG_GENMASK(12, 10) -#define TRANS_DDI_MST_TRANSPORT_SELECT(trans) \ - REG_FIELD_PREP(TRANS_DDI_MST_TRANSPORT_SELECT_MASK, trans) #define TRANS_DDI_HDCP_SIGNALLING (1 << 9) #define TRANS_DDI_DP_VC_PAYLOAD_ALLOC (1 << 8) #define TRANS_DDI_HDMI_SCRAMBLER_CTS_ENABLE (1 << 7) @@ -10351,12 +10249,6 @@ enum skl_power_gate { _DKL_PHY2_BASE) + \ _DKL_TX_FW_CALIB) -#define _DKL_TX_PMD_LANE_SUS 0xD00 -#define DKL_TX_PMD_LANE_SUS(tc_port) _MMIO(_PORT(tc_port, \ - _DKL_PHY1_BASE, \ - _DKL_PHY2_BASE) + \ - _DKL_TX_PMD_LANE_SUS) - #define _DKL_TX_DW17 0xDC4 #define DKL_TX_DW17(tc_port) _MMIO(_PORT(tc_port, \ _DKL_PHY1_BASE, \ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 00011f9533b6..4575f368455d 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -31,8 +31,6 @@ #include "gem/i915_gem_context.h" #include "gt/intel_context.h" -#include "gt/intel_ring.h" -#include "gt/intel_rps.h" #include "i915_active.h" #include "i915_drv.h" @@ -259,8 +257,8 @@ bool i915_request_retire(struct i915_request *rq) if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) i915_request_cancel_breadcrumb(rq); if (i915_request_has_waitboost(rq)) { - GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters)); - atomic_dec(&rq->engine->gt->rps.num_waiters); + GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters)); + atomic_dec(&rq->i915->gt_pm.rps.num_waiters); } if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) { set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); @@ -1448,7 +1446,7 @@ long i915_request_wait(struct i915_request *rq, * completion. That requires having a good predictor for the request * duration, which we currently lack. */ - if (IS_ACTIVE(CONFIG_DRM_I915_SPIN_REQUEST) && + if (CONFIG_DRM_I915_SPIN_REQUEST && __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) { dma_fence_signal(&rq->fence); goto out; @@ -1468,7 +1466,7 @@ long i915_request_wait(struct i915_request *rq, */ if (flags & I915_WAIT_PRIORITY) { if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6) - intel_rps_boost(rq); + gen6_rps_boost(rq); i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); } diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 010d67f48ad9..0ca40f6bf08c 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -189,39 +189,22 @@ static inline bool need_preempt(int prio, int active) return prio >= max(I915_PRIORITY_NORMAL, active); } -static void kick_submission(struct intel_engine_cs *engine, - const struct i915_request *rq, - int prio) +static void kick_submission(struct intel_engine_cs *engine, int prio) { - const struct i915_request *inflight; - - /* - * We only need to kick the tasklet once for the high priority - * new context we add into the queue. - */ - if (prio <= engine->execlists.queue_priority_hint) - return; - - rcu_read_lock(); - - /* Nothing currently active? We're overdue for a submission! */ - inflight = execlists_active(&engine->execlists); - if (!inflight) - goto unlock; + const struct i915_request *inflight = + execlists_active(&engine->execlists); /* * If we are already the currently executing context, don't - * bother evaluating if we should preempt ourselves. + * bother evaluating if we should preempt ourselves, or if + * we expect nothing to change as a result of running the + * tasklet, i.e. we have not change the priority queue + * sufficiently to oust the running context. */ - if (inflight->hw_context == rq->hw_context) - goto unlock; + if (!inflight || !need_preempt(prio, rq_prio(inflight))) + return; - engine->execlists.queue_priority_hint = prio; - if (need_preempt(prio, rq_prio(inflight))) - tasklet_hi_schedule(&engine->execlists.tasklet); - -unlock: - rcu_read_unlock(); + tasklet_hi_schedule(&engine->execlists.tasklet); } static void __i915_schedule(struct i915_sched_node *node, @@ -347,8 +330,13 @@ static void __i915_schedule(struct i915_sched_node *node, list_move_tail(&node->link, cache.priolist); } + if (prio <= engine->execlists.queue_priority_hint) + continue; + + engine->execlists.queue_priority_hint = prio; + /* Defer (tasklet) submission until after all of our updates. */ - kick_submission(engine, node_to_request(node), prio); + kick_submission(engine, prio); } spin_unlock(&engine->active.lock); diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 65476909d1bf..bf039b8ba593 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -31,7 +31,6 @@ #include #include "gt/intel_rc6.h" -#include "gt/intel_rps.h" #include "i915_drv.h" #include "i915_sysfs.h" @@ -260,7 +259,6 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; intel_wakeref_t wakeref; u32 freq; @@ -273,31 +271,31 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, freq = (freq >> 8) & 0xff; } else { - freq = intel_get_cagf(rps, I915_READ(GEN6_RPSTAT1)); + freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1)); } intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(rps, freq)); + return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, freq)); } static ssize_t gt_cur_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(rps, rps->cur_freq)); + intel_gpu_freq(dev_priv, + dev_priv->gt_pm.rps.cur_freq)); } static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(rps, rps->boost_freq)); + intel_gpu_freq(dev_priv, + dev_priv->gt_pm.rps.boost_freq)); } static ssize_t gt_boost_freq_mhz_store(struct device *kdev, @@ -305,7 +303,7 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; + struct intel_rps *rps = &dev_priv->gt_pm.rps; bool boost = false; ssize_t ret; u32 val; @@ -315,7 +313,7 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, return ret; /* Validate against (static) hardware limits */ - val = intel_freq_opcode(rps, val); + val = intel_freq_opcode(dev_priv, val); if (val < rps->min_freq || val > rps->max_freq) return -EINVAL; @@ -335,19 +333,19 @@ static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(rps, rps->efficient_freq)); + intel_gpu_freq(dev_priv, + dev_priv->gt_pm.rps.efficient_freq)); } static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(rps, rps->max_freq_softlimit)); + intel_gpu_freq(dev_priv, + dev_priv->gt_pm.rps.max_freq_softlimit)); } static ssize_t gt_max_freq_mhz_store(struct device *kdev, @@ -355,17 +353,19 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; - ssize_t ret; + struct intel_rps *rps = &dev_priv->gt_pm.rps; + intel_wakeref_t wakeref; u32 val; + ssize_t ret; ret = kstrtou32(buf, 0, &val); if (ret) return ret; + wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); mutex_lock(&rps->lock); - val = intel_freq_opcode(rps, val); + val = intel_freq_opcode(dev_priv, val); if (val < rps->min_freq || val > rps->max_freq || val < rps->min_freq_softlimit) { @@ -375,7 +375,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, if (val > rps->rp0_freq) DRM_DEBUG("User requested overclocking to %d\n", - intel_gpu_freq(rps, val)); + intel_gpu_freq(dev_priv, val)); rps->max_freq_softlimit = val; @@ -383,15 +383,14 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, rps->min_freq_softlimit, rps->max_freq_softlimit); - /* - * We still need *_set_rps to process the new max_delay and + /* We still need *_set_rps to process the new max_delay and * update the interrupt limits and PMINTRMSK even though - * frequency request may be unchanged. - */ - intel_rps_set(rps, val); + * frequency request may be unchanged. */ + ret = intel_set_rps(dev_priv, val); unlock: mutex_unlock(&rps->lock); + intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); return ret ?: count; } @@ -399,10 +398,10 @@ unlock: static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(rps, rps->min_freq_softlimit)); + intel_gpu_freq(dev_priv, + dev_priv->gt_pm.rps.min_freq_softlimit)); } static ssize_t gt_min_freq_mhz_store(struct device *kdev, @@ -410,17 +409,19 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; - ssize_t ret; + struct intel_rps *rps = &dev_priv->gt_pm.rps; + intel_wakeref_t wakeref; u32 val; + ssize_t ret; ret = kstrtou32(buf, 0, &val); if (ret) return ret; + wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); mutex_lock(&rps->lock); - val = intel_freq_opcode(rps, val); + val = intel_freq_opcode(dev_priv, val); if (val < rps->min_freq || val > rps->max_freq || val > rps->max_freq_softlimit) { @@ -434,15 +435,14 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, rps->min_freq_softlimit, rps->max_freq_softlimit); - /* - * We still need *_set_rps to process the new min_delay and + /* We still need *_set_rps to process the new min_delay and * update the interrupt limits and PMINTRMSK even though - * frequency request may be unchanged. - */ - intel_rps_set(rps, val); + * frequency request may be unchanged. */ + ret = intel_set_rps(dev_priv, val); unlock: mutex_unlock(&rps->lock); + intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); return ret ?: count; } @@ -464,15 +464,15 @@ static DEVICE_ATTR(gt_RPn_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL); static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; if (attr == &dev_attr_gt_RP0_freq_mhz) - val = intel_gpu_freq(rps, rps->rp0_freq); + val = intel_gpu_freq(dev_priv, rps->rp0_freq); else if (attr == &dev_attr_gt_RP1_freq_mhz) - val = intel_gpu_freq(rps, rps->rp1_freq); + val = intel_gpu_freq(dev_priv, rps->rp1_freq); else if (attr == &dev_attr_gt_RPn_freq_mhz) - val = intel_gpu_freq(rps, rps->min_freq); + val = intel_gpu_freq(dev_priv, rps->min_freq); else BUG(); diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c index 0348c6d0ef5f..16acdf7bdbe6 100644 --- a/drivers/gpu/drm/i915/i915_utils.c +++ b/drivers/gpu/drm/i915/i915_utils.c @@ -54,54 +54,25 @@ __i915_printk(struct drm_i915_private *dev_priv, const char *level, #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) static unsigned int i915_probe_fail_count; -int __i915_inject_probe_error(struct drm_i915_private *i915, int err, - const char *func, int line) +int __i915_inject_load_error(struct drm_i915_private *i915, int err, + const char *func, int line) { - if (i915_probe_fail_count >= i915_modparams.inject_probe_failure) + if (i915_probe_fail_count >= i915_modparams.inject_load_failure) return 0; - if (++i915_probe_fail_count < i915_modparams.inject_probe_failure) + if (++i915_probe_fail_count < i915_modparams.inject_load_failure) return 0; __i915_printk(i915, KERN_INFO, "Injecting failure %d at checkpoint %u [%s:%d]\n", - err, i915_modparams.inject_probe_failure, func, line); - i915_modparams.inject_probe_failure = 0; + err, i915_modparams.inject_load_failure, func, line); + i915_modparams.inject_load_failure = 0; return err; } bool i915_error_injected(void) { - return i915_probe_fail_count && !i915_modparams.inject_probe_failure; + return i915_probe_fail_count && !i915_modparams.inject_load_failure; } #endif - -void cancel_timer(struct timer_list *t) -{ - if (!READ_ONCE(t->expires)) - return; - - del_timer(t); - WRITE_ONCE(t->expires, 0); -} - -void set_timer_ms(struct timer_list *t, unsigned long timeout) -{ - if (!timeout) { - cancel_timer(t); - return; - } - - timeout = msecs_to_jiffies_timeout(timeout); - - /* - * Paranoia to make sure the compiler computes the timeout before - * loading 'jiffies' as jiffies is volatile and may be updated in - * the background by a timer tick. All to reduce the complexity - * of the addition and reduce the risk of losing a jiffie. - */ - barrier(); - - mod_timer(t, jiffies + timeout); -} diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 04139ba1191e..562f756da421 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -32,7 +32,6 @@ #include struct drm_i915_private; -struct timer_list; #undef WARN_ON /* Many gcc seem to no see through this and fall over :( */ @@ -61,20 +60,20 @@ __i915_printk(struct drm_i915_private *dev_priv, const char *level, #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) -int __i915_inject_probe_error(struct drm_i915_private *i915, int err, - const char *func, int line); -#define i915_inject_probe_error(_i915, _err) \ - __i915_inject_probe_error((_i915), (_err), __func__, __LINE__) +int __i915_inject_load_error(struct drm_i915_private *i915, int err, + const char *func, int line); +#define i915_inject_load_error(_i915, _err) \ + __i915_inject_load_error((_i915), (_err), __func__, __LINE__) bool i915_error_injected(void); #else -#define i915_inject_probe_error(_i915, _err) 0 +#define i915_inject_load_error(_i915, _err) 0 #define i915_error_injected() false #endif -#define i915_inject_probe_failure(i915) i915_inject_probe_error((i915), -ENODEV) +#define i915_inject_probe_failure(i915) i915_inject_load_error((i915), -ENODEV) #define i915_probe_error(i915, fmt, ...) \ __i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \ @@ -422,25 +421,4 @@ static inline void add_taint_for_CI(unsigned int taint) add_taint(taint, LOCKDEP_STILL_OK); } -void cancel_timer(struct timer_list *t); -void set_timer_ms(struct timer_list *t, unsigned long timeout); - -static inline bool timer_expired(const struct timer_list *t) -{ - return READ_ONCE(t->expires) && !timer_pending(t); -} - -/* - * This is a lookalike for IS_ENABLED() that takes a kconfig value, - * e.g. CONFIG_DRM_I915_SPIN_REQUEST, and evaluates whether it is non-zero - * i.e. whether the configuration is active. Wrapping up the config inside - * a boolean context prevents clang and smatch from complaining about potential - * issues in confusing logical-&& with bitwise-& for constants. - * - * Sadly IS_ENABLED() itself does not work with kconfig values. - * - * Returns 0 if @config is 0, 1 if set to any value. - */ -#define IS_ACTIVE(config) ((config) != 0) - #endif /* !__I915_UTILS_H */ diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index e5512f26e20a..e90c4d0af8fd 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -106,7 +106,7 @@ vma_create(struct drm_i915_gem_object *obj, struct rb_node *rb, **p; /* The aliasing_ppgtt should never be used directly! */ - GEM_BUG_ON(vm == &vm->gt->ggtt->alias->vm); + GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm); vma = i915_vma_alloc(); if (vma == NULL) @@ -412,7 +412,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) int err; /* Access through the GTT requires the device to be awake. */ - assert_rpm_wakelock_held(vma->vm->gt->uncore->rpm); + assert_rpm_wakelock_held(&vma->vm->i915->runtime_pm); if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { err = -ENODEV; goto err; @@ -700,35 +700,41 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color)); - if (vma->obj) { - struct drm_i915_gem_object *obj = vma->obj; - - atomic_inc(&obj->bind_count); - assert_bind_count(obj); - } list_add_tail(&vma->vm_link, &vma->vm->bound_list); + if (vma->obj) { + atomic_inc(&vma->obj->bind_count); + assert_bind_count(vma->obj); + } + return 0; } static void -i915_vma_detach(struct i915_vma *vma) +i915_vma_remove(struct i915_vma *vma) { GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); - /* - * And finally now the object is completely decoupled from this - * vma, we can drop its hold on the backing storage and allow - * it to be reaped by the shrinker. - */ list_del(&vma->vm_link); + + /* + * Since the unbound list is global, only move to that list if + * no more VMAs exist. + */ if (vma->obj) { struct drm_i915_gem_object *obj = vma->obj; - assert_bind_count(obj); + /* + * And finally now the object is completely decoupled from this + * vma, we can drop its hold on the backing storage and allow + * it to be reaped by the shrinker. + */ atomic_dec(&obj->bind_count); + assert_bind_count(obj); } + + drm_mm_remove_node(&vma->node); } static bool try_qad_pin(struct i915_vma *vma, unsigned int flags) @@ -923,10 +929,8 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); err_remove: - if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) { - i915_vma_detach(vma); - drm_mm_remove_node(&vma->node); - } + if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) + i915_vma_remove(vma); err_active: i915_active_release(&vma->active); err_unlock: @@ -941,7 +945,7 @@ err_pages: void i915_vma_close(struct i915_vma *vma) { - struct intel_gt *gt = vma->vm->gt; + struct drm_i915_private *i915 = vma->vm->i915; unsigned long flags; GEM_BUG_ON(i915_vma_is_closed(vma)); @@ -958,18 +962,18 @@ void i915_vma_close(struct i915_vma *vma) * causing us to rebind the VMA once more. This ends up being a lot * of wasted work for the steady state. */ - spin_lock_irqsave(>->closed_lock, flags); - list_add(&vma->closed_link, >->closed_vma); - spin_unlock_irqrestore(>->closed_lock, flags); + spin_lock_irqsave(&i915->gt.closed_lock, flags); + list_add(&vma->closed_link, &i915->gt.closed_vma); + spin_unlock_irqrestore(&i915->gt.closed_lock, flags); } static void __i915_vma_remove_closed(struct i915_vma *vma) { - struct intel_gt *gt = vma->vm->gt; + struct drm_i915_private *i915 = vma->vm->i915; - spin_lock_irq(>->closed_lock); + spin_lock_irq(&i915->gt.closed_lock); list_del_init(&vma->closed_link); - spin_unlock_irq(>->closed_lock); + spin_unlock_irq(&i915->gt.closed_lock); } void i915_vma_reopen(struct i915_vma *vma) @@ -1005,12 +1009,12 @@ void i915_vma_destroy(struct i915_vma *vma) i915_vma_free(vma); } -void i915_vma_parked(struct intel_gt *gt) +void i915_vma_parked(struct drm_i915_private *i915) { struct i915_vma *vma, *next; - spin_lock_irq(>->closed_lock); - list_for_each_entry_safe(vma, next, >->closed_vma, closed_link) { + spin_lock_irq(&i915->gt.closed_lock); + list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) { struct drm_i915_gem_object *obj = vma->obj; struct i915_address_space *vm = vma->vm; @@ -1024,7 +1028,7 @@ void i915_vma_parked(struct intel_gt *gt) obj = NULL; } - spin_unlock_irq(>->closed_lock); + spin_unlock_irq(&i915->gt.closed_lock); if (obj) { i915_vma_destroy(vma); @@ -1034,11 +1038,11 @@ void i915_vma_parked(struct intel_gt *gt) i915_vm_close(vm); /* Restart after dropping lock */ - spin_lock_irq(>->closed_lock); - next = list_first_entry(>->closed_vma, + spin_lock_irq(&i915->gt.closed_lock); + next = list_first_entry(&i915->gt.closed_vma, typeof(*next), closed_link); } - spin_unlock_irq(>->closed_lock); + spin_unlock_irq(&i915->gt.closed_lock); } static void __i915_vma_iounmap(struct i915_vma *vma) @@ -1183,10 +1187,9 @@ int __i915_vma_unbind(struct i915_vma *vma) } atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR), &vma->flags); - i915_vma_detach(vma); vma_unbind_pages(vma); + i915_vma_remove(vma); - drm_mm_remove_node(&vma->node); /* pairs with i915_vma_destroy() */ return 0; } diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 465932813bc5..858908e3d1cc 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -462,7 +462,7 @@ i915_vma_unpin_fence(struct i915_vma *vma) __i915_vma_unpin_fence(vma); } -void i915_vma_parked(struct intel_gt *gt); +void i915_vma_parked(struct drm_i915_private *i915); #define for_each_until(cond) if (cond) break; else diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index a5b571364cf6..85e480bdc673 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -981,19 +981,6 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) enabled_mask); else info->pipe_mask = enabled_mask; - - if (dfsm & SKL_DFSM_DISPLAY_HDCP_DISABLE) - info->display.has_hdcp = 0; - - if (dfsm & SKL_DFSM_DISPLAY_PM_DISABLE) - info->display.has_fbc = 0; - - if (INTEL_GEN(dev_priv) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE)) - info->display.has_csr = 0; - - if (INTEL_GEN(dev_priv) >= 10 && - (dfsm & CNL_DFSM_DISPLAY_DSC_DISABLE)) - info->display.has_dsc = 0; } /* Initialize slice/subslice/EU info */ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 4bdf8a6cfb47..e9940f932d26 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -107,7 +107,6 @@ enum intel_ppgtt_type { func(is_mobile); \ func(is_lp); \ func(require_force_probe); \ - func(is_dgfx); \ /* Keep has_* in alphabetical order */ \ func(has_64bit_reloc); \ func(gpu_reset_clobbers_display); \ @@ -137,10 +136,8 @@ enum intel_ppgtt_type { func(has_ddi); \ func(has_dp_mst); \ func(has_dsb); \ - func(has_dsc); \ func(has_fbc); \ func(has_gmch); \ - func(has_hdcp); \ func(has_hotplug); \ func(has_ipc); \ func(has_modular_fia); \ diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index baaeaecc64af..72f98a111de1 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -207,65 +207,6 @@ void intel_memory_region_put(struct intel_memory_region *mem) kref_put(&mem->kref, __intel_memory_region_destroy); } -/* Global memory region registration -- only slight layer inversions! */ - -int intel_memory_regions_hw_probe(struct drm_i915_private *i915) -{ - int err, i; - - for (i = 0; i < ARRAY_SIZE(i915->mm.regions); i++) { - struct intel_memory_region *mem = ERR_PTR(-ENODEV); - u32 type; - - if (!HAS_REGION(i915, BIT(i))) - continue; - - type = MEMORY_TYPE_FROM_REGION(intel_region_map[i]); - switch (type) { - case INTEL_MEMORY_SYSTEM: - mem = i915_gem_shmem_setup(i915); - break; - case INTEL_MEMORY_STOLEN: - mem = i915_gem_stolen_setup(i915); - break; - case INTEL_MEMORY_LOCAL: - mem = intel_setup_fake_lmem(i915); - break; - } - - if (IS_ERR(mem)) { - err = PTR_ERR(mem); - DRM_ERROR("Failed to setup region(%d) type=%d\n", err, type); - goto out_cleanup; - } - - mem->id = intel_region_map[i]; - mem->type = type; - mem->instance = MEMORY_INSTANCE_FROM_REGION(intel_region_map[i]); - - i915->mm.regions[i] = mem; - } - - return 0; - -out_cleanup: - intel_memory_regions_driver_release(i915); - return err; -} - -void intel_memory_regions_driver_release(struct drm_i915_private *i915) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(i915->mm.regions); i++) { - struct intel_memory_region *region = - fetch_and_zero(&i915->mm.regions[i]); - - if (region) - intel_memory_region_put(region); - } -} - #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/intel_memory_region.c" #include "selftests/mock_region.c" diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 238722009677..49b059a2be70 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -10,7 +10,6 @@ #include #include #include -#include #include "i915_buddy.h" @@ -72,9 +71,6 @@ struct intel_memory_region { struct io_mapping iomap; struct resource region; - /* For fake LMEM */ - struct drm_mm_node fake_mappable; - struct i915_buddy_mm mm; struct mutex mm_lock; @@ -87,8 +83,6 @@ struct intel_memory_region { unsigned int instance; unsigned int id; - dma_addr_t remap_addr; - struct { struct mutex lock; /* Protects access to objects */ struct list_head list; @@ -123,7 +117,4 @@ struct intel_memory_region * intel_memory_region_get(struct intel_memory_region *mem); void intel_memory_region_put(struct intel_memory_region *mem); -int intel_memory_regions_hw_probe(struct drm_i915_private *i915); -void intel_memory_regions_driver_release(struct drm_i915_private *i915); - #endif diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c index 000ba43e2c02..1035d3d46fd8 100644 --- a/drivers/gpu/drm/i915/intel_pch.c +++ b/drivers/gpu/drm/i915/intel_pch.c @@ -52,8 +52,7 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) return PCH_SPT; case INTEL_PCH_SPT_LP_DEVICE_ID_TYPE: DRM_DEBUG_KMS("Found SunrisePoint LP PCH\n"); - WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv) && - !IS_COFFEELAKE(dev_priv)); + WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)); return PCH_SPT; case INTEL_PCH_KBP_DEVICE_ID_TYPE: DRM_DEBUG_KMS("Found Kaby Lake PCH (KBP)\n"); @@ -62,7 +61,6 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) /* KBP is SPT compatible */ return PCH_SPT; case INTEL_PCH_CNP_DEVICE_ID_TYPE: - case INTEL_PCH_CNP2_DEVICE_ID_TYPE: DRM_DEBUG_KMS("Found Cannon Lake PCH (CNP)\n"); WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); return PCH_CNP; diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h index 1115c6a0522c..f4dc18c34291 100644 --- a/drivers/gpu/drm/i915/intel_pch.h +++ b/drivers/gpu/drm/i915/intel_pch.h @@ -40,7 +40,6 @@ enum intel_pch { #define INTEL_PCH_SPT_LP_DEVICE_ID_TYPE 0x9D00 #define INTEL_PCH_KBP_DEVICE_ID_TYPE 0xA280 #define INTEL_PCH_CNP_DEVICE_ID_TYPE 0xA300 -#define INTEL_PCH_CNP2_DEVICE_ID_TYPE 0xA380 #define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE 0x9D80 #define INTEL_PCH_CMP_DEVICE_ID_TYPE 0x0280 #define INTEL_PCH_CMP2_DEVICE_ID_TYPE 0x0680 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 5d2b460d3ee5..362234449087 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -197,6 +197,8 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv) break; } + dev_priv->ips.r_t = dev_priv->mem_freq; + switch (csipll & 0x3ff) { case 0x00c: dev_priv->fsb_freq = 3200; @@ -225,6 +227,14 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv) dev_priv->fsb_freq = 0; break; } + + if (dev_priv->fsb_freq == 3200) { + dev_priv->ips.c_m = 0; + } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) { + dev_priv->ips.c_m = 1; + } else { + dev_priv->ips.c_m = 2; + } } static const struct cxsr_latency cxsr_latency_table[] = { @@ -4087,6 +4097,93 @@ skl_plane_downscale_amount(const struct intel_crtc_state *crtc_state, return mul_fixed16(downscale_w, downscale_h); } +static uint_fixed_16_16_t +skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state) +{ + uint_fixed_16_16_t pipe_downscale = u32_to_fixed16(1); + + if (!crtc_state->base.enable) + return pipe_downscale; + + if (crtc_state->pch_pfit.enabled) { + u32 src_w, src_h, dst_w, dst_h; + u32 pfit_size = crtc_state->pch_pfit.size; + uint_fixed_16_16_t fp_w_ratio, fp_h_ratio; + uint_fixed_16_16_t downscale_h, downscale_w; + + src_w = crtc_state->pipe_src_w; + src_h = crtc_state->pipe_src_h; + dst_w = pfit_size >> 16; + dst_h = pfit_size & 0xffff; + + if (!dst_w || !dst_h) + return pipe_downscale; + + fp_w_ratio = div_fixed16(src_w, dst_w); + fp_h_ratio = div_fixed16(src_h, dst_h); + downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1)); + downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1)); + + pipe_downscale = mul_fixed16(downscale_w, downscale_h); + } + + return pipe_downscale; +} + +int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, + struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); + struct drm_atomic_state *state = crtc_state->base.state; + const struct intel_plane_state *plane_state; + struct intel_plane *plane; + int crtc_clock, dotclk; + u32 pipe_max_pixel_rate; + uint_fixed_16_16_t pipe_downscale; + uint_fixed_16_16_t max_downscale = u32_to_fixed16(1); + + if (!crtc_state->base.enable) + return 0; + + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) { + uint_fixed_16_16_t plane_downscale; + uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8); + int bpp; + + if (!intel_wm_plane_visible(crtc_state, plane_state)) + continue; + + if (WARN_ON(!plane_state->base.fb)) + return -EINVAL; + + plane_downscale = skl_plane_downscale_amount(crtc_state, plane_state); + bpp = plane_state->base.fb->format->cpp[0] * 8; + if (bpp == 64) + plane_downscale = mul_fixed16(plane_downscale, + fp_9_div_8); + + max_downscale = max_fixed16(plane_downscale, max_downscale); + } + pipe_downscale = skl_pipe_downscale_amount(crtc_state); + + pipe_downscale = mul_fixed16(pipe_downscale, max_downscale); + + crtc_clock = crtc_state->base.adjusted_mode.crtc_clock; + dotclk = to_intel_atomic_state(state)->cdclk.logical.cdclk; + + if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10) + dotclk *= 2; + + pipe_max_pixel_rate = div_round_up_u32_fixed16(dotclk, pipe_downscale); + + if (pipe_max_pixel_rate < crtc_clock) { + DRM_DEBUG_KMS("Max supported pixel clock with scaling exceeded\n"); + return -EINVAL; + } + + return 0; +} + static u64 skl_plane_relative_data_rate(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, @@ -6242,6 +6339,1627 @@ void intel_init_ipc(struct drm_i915_private *dev_priv) intel_enable_ipc(dev_priv); } +/* + * Lock protecting IPS related data structures + */ +DEFINE_SPINLOCK(mchdev_lock); + +bool ironlake_set_drps(struct drm_i915_private *i915, u8 val) +{ + struct intel_uncore *uncore = &i915->uncore; + u16 rgvswctl; + + lockdep_assert_held(&mchdev_lock); + + rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); + if (rgvswctl & MEMCTL_CMD_STS) { + DRM_DEBUG("gpu busy, RCS change rejected\n"); + return false; /* still busy with another command */ + } + + rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | + (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM; + intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); + intel_uncore_posting_read16(uncore, MEMSWCTL); + + rgvswctl |= MEMCTL_CMD_STS; + intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); + + return true; +} + +static void ironlake_enable_drps(struct drm_i915_private *dev_priv) +{ + struct intel_uncore *uncore = &dev_priv->uncore; + u32 rgvmodectl; + u8 fmax, fmin, fstart, vstart; + + spin_lock_irq(&mchdev_lock); + + rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); + + /* Enable temp reporting */ + intel_uncore_write16(uncore, PMMISC, I915_READ(PMMISC) | MCPPCE_EN); + intel_uncore_write16(uncore, TSC1, I915_READ(TSC1) | TSE); + + /* 100ms RC evaluation intervals */ + intel_uncore_write(uncore, RCUPEI, 100000); + intel_uncore_write(uncore, RCDNEI, 100000); + + /* Set max/min thresholds to 90ms and 80ms respectively */ + intel_uncore_write(uncore, RCBMAXAVG, 90000); + intel_uncore_write(uncore, RCBMINAVG, 80000); + + intel_uncore_write(uncore, MEMIHYST, 1); + + /* Set up min, max, and cur for interrupt handling */ + fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; + fmin = (rgvmodectl & MEMMODE_FMIN_MASK); + fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> + MEMMODE_FSTART_SHIFT; + + vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) & + PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; + + dev_priv->ips.fmax = fmax; /* IPS callback will increase this */ + dev_priv->ips.fstart = fstart; + + dev_priv->ips.max_delay = fstart; + dev_priv->ips.min_delay = fmin; + dev_priv->ips.cur_delay = fstart; + + DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", + fmax, fmin, fstart); + + intel_uncore_write(uncore, + MEMINTREN, + MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); + + /* + * Interrupts will be enabled in ironlake_irq_postinstall + */ + + intel_uncore_write(uncore, VIDSTART, vstart); + intel_uncore_posting_read(uncore, VIDSTART); + + rgvmodectl |= MEMMODE_SWMODE_EN; + intel_uncore_write(uncore, MEMMODECTL, rgvmodectl); + + if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) & + MEMCTL_CMD_STS) == 0, 10)) + DRM_ERROR("stuck trying to change perf mode\n"); + mdelay(1); + + ironlake_set_drps(dev_priv, fstart); + + dev_priv->ips.last_count1 = + intel_uncore_read(uncore, DMIEC) + + intel_uncore_read(uncore, DDREC) + + intel_uncore_read(uncore, CSIEC); + dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies); + dev_priv->ips.last_count2 = intel_uncore_read(uncore, GFXEC); + dev_priv->ips.last_time2 = ktime_get_raw_ns(); + + spin_unlock_irq(&mchdev_lock); +} + +static void ironlake_disable_drps(struct drm_i915_private *i915) +{ + struct intel_uncore *uncore = &i915->uncore; + u16 rgvswctl; + + spin_lock_irq(&mchdev_lock); + + rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); + + /* Ack interrupts, disable EFC interrupt */ + intel_uncore_write(uncore, + MEMINTREN, + intel_uncore_read(uncore, MEMINTREN) & + ~MEMINT_EVAL_CHG_EN); + intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); + intel_uncore_write(uncore, + DEIER, + intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT); + intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT); + intel_uncore_write(uncore, + DEIMR, + intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT); + + /* Go back to the starting frequency */ + ironlake_set_drps(i915, i915->ips.fstart); + mdelay(1); + rgvswctl |= MEMCTL_CMD_STS; + intel_uncore_write(uncore, MEMSWCTL, rgvswctl); + mdelay(1); + + spin_unlock_irq(&mchdev_lock); +} + +/* There's a funny hw issue where the hw returns all 0 when reading from + * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value + * ourselves, instead of doing a rmw cycle (which might result in us clearing + * all limits and the gpu stuck at whatever frequency it is at atm). + */ +static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + u32 limits; + + /* Only set the down limit when we've reached the lowest level to avoid + * getting more interrupts, otherwise leave this clear. This prevents a + * race in the hw when coming out of rc6: There's a tiny window where + * the hw runs at the minimal clock before selecting the desired + * frequency, if the down threshold expires in that window we will not + * receive a down interrupt. */ + if (INTEL_GEN(dev_priv) >= 9) { + limits = (rps->max_freq_softlimit) << 23; + if (val <= rps->min_freq_softlimit) + limits |= (rps->min_freq_softlimit) << 14; + } else { + limits = rps->max_freq_softlimit << 24; + if (val <= rps->min_freq_softlimit) + limits |= rps->min_freq_softlimit << 16; + } + + return limits; +} + +static void rps_set_power(struct drm_i915_private *dev_priv, int new_power) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + u32 threshold_up = 0, threshold_down = 0; /* in % */ + u32 ei_up = 0, ei_down = 0; + + lockdep_assert_held(&rps->power.mutex); + + if (new_power == rps->power.mode) + return; + + /* Note the units here are not exactly 1us, but 1280ns. */ + switch (new_power) { + case LOW_POWER: + /* Upclock if more than 95% busy over 16ms */ + ei_up = 16000; + threshold_up = 95; + + /* Downclock if less than 85% busy over 32ms */ + ei_down = 32000; + threshold_down = 85; + break; + + case BETWEEN: + /* Upclock if more than 90% busy over 13ms */ + ei_up = 13000; + threshold_up = 90; + + /* Downclock if less than 75% busy over 32ms */ + ei_down = 32000; + threshold_down = 75; + break; + + case HIGH_POWER: + /* Upclock if more than 85% busy over 10ms */ + ei_up = 10000; + threshold_up = 85; + + /* Downclock if less than 60% busy over 32ms */ + ei_down = 32000; + threshold_down = 60; + break; + } + + /* When byt can survive without system hang with dynamic + * sw freq adjustments, this restriction can be lifted. + */ + if (IS_VALLEYVIEW(dev_priv)) + goto skip_hw_write; + + I915_WRITE(GEN6_RP_UP_EI, + GT_INTERVAL_FROM_US(dev_priv, ei_up)); + I915_WRITE(GEN6_RP_UP_THRESHOLD, + GT_INTERVAL_FROM_US(dev_priv, + ei_up * threshold_up / 100)); + + I915_WRITE(GEN6_RP_DOWN_EI, + GT_INTERVAL_FROM_US(dev_priv, ei_down)); + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, + GT_INTERVAL_FROM_US(dev_priv, + ei_down * threshold_down / 100)); + + I915_WRITE(GEN6_RP_CONTROL, + (INTEL_GEN(dev_priv) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); + +skip_hw_write: + rps->power.mode = new_power; + rps->power.up_threshold = threshold_up; + rps->power.down_threshold = threshold_down; +} + +static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + int new_power; + + new_power = rps->power.mode; + switch (rps->power.mode) { + case LOW_POWER: + if (val > rps->efficient_freq + 1 && + val > rps->cur_freq) + new_power = BETWEEN; + break; + + case BETWEEN: + if (val <= rps->efficient_freq && + val < rps->cur_freq) + new_power = LOW_POWER; + else if (val >= rps->rp0_freq && + val > rps->cur_freq) + new_power = HIGH_POWER; + break; + + case HIGH_POWER: + if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && + val < rps->cur_freq) + new_power = BETWEEN; + break; + } + /* Max/min bins are special */ + if (val <= rps->min_freq_softlimit) + new_power = LOW_POWER; + if (val >= rps->max_freq_softlimit) + new_power = HIGH_POWER; + + mutex_lock(&rps->power.mutex); + if (rps->power.interactive) + new_power = HIGH_POWER; + rps_set_power(dev_priv, new_power); + mutex_unlock(&rps->power.mutex); +} + +void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive) +{ + struct intel_rps *rps = &i915->gt_pm.rps; + + if (INTEL_GEN(i915) < 6) + return; + + mutex_lock(&rps->power.mutex); + if (interactive) { + if (!rps->power.interactive++ && READ_ONCE(i915->gt.awake)) + rps_set_power(i915, HIGH_POWER); + } else { + GEM_BUG_ON(!rps->power.interactive); + rps->power.interactive--; + } + mutex_unlock(&rps->power.mutex); +} + +static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + u32 mask = 0; + + /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */ + if (val > rps->min_freq_softlimit) + mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; + if (val < rps->max_freq_softlimit) + mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; + + mask &= dev_priv->pm_rps_events; + + return gen6_sanitize_rps_pm_mask(dev_priv, ~mask); +} + +/* gen6_set_rps is called to update the frequency request, but should also be + * called when the range (min_delay and max_delay) is modified so that we can + * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ +static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + /* min/max delay may still have been modified so be sure to + * write the limits value. + */ + if (val != rps->cur_freq) { + gen6_set_rps_thresholds(dev_priv, val); + + if (INTEL_GEN(dev_priv) >= 9) + I915_WRITE(GEN6_RPNSWREQ, + GEN9_FREQUENCY(val)); + else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + I915_WRITE(GEN6_RPNSWREQ, + HSW_FREQUENCY(val)); + else + I915_WRITE(GEN6_RPNSWREQ, + GEN6_FREQUENCY(val) | + GEN6_OFFSET(0) | + GEN6_AGGRESSIVE_TURBO); + } + + /* Make sure we continue to get interrupts + * until we hit the minimum or maximum frequencies. + */ + I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val)); + I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); + + rps->cur_freq = val; + trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); + + return 0; +} + +static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) +{ + int err; + + if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1), + "Odd GPU freq value\n")) + val &= ~1; + + I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); + + if (val != dev_priv->gt_pm.rps.cur_freq) { + vlv_punit_get(dev_priv); + err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); + vlv_punit_put(dev_priv); + if (err) + return err; + + gen6_set_rps_thresholds(dev_priv, val); + } + + dev_priv->gt_pm.rps.cur_freq = val; + trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); + + return 0; +} + +/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down + * + * * If Gfx is Idle, then + * 1. Forcewake Media well. + * 2. Request idle freq. + * 3. Release Forcewake of Media well. +*/ +static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + u32 val = rps->idle_freq; + int err; + + if (rps->cur_freq <= val) + return; + + /* The punit delays the write of the frequency and voltage until it + * determines the GPU is awake. During normal usage we don't want to + * waste power changing the frequency if the GPU is sleeping (rc6). + * However, the GPU and driver is now idle and we do not want to delay + * switching to minimum voltage (reducing power whilst idle) as we do + * not expect to be woken in the near future and so must flush the + * change by waking the device. + * + * We choose to take the media powerwell (either would do to trick the + * punit into committing the voltage change) as that takes a lot less + * power than the render powerwell. + */ + intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_MEDIA); + err = valleyview_set_rps(dev_priv, val); + intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_MEDIA); + + if (err) + DRM_ERROR("Failed to set RPS for idle\n"); +} + +void gen6_rps_busy(struct drm_i915_private *dev_priv) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + mutex_lock(&rps->lock); + if (rps->enabled) { + u8 freq; + + if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED) + gen6_rps_reset_ei(dev_priv); + I915_WRITE(GEN6_PMINTRMSK, + gen6_rps_pm_mask(dev_priv, rps->cur_freq)); + + gen6_enable_rps_interrupts(dev_priv); + + /* Use the user's desired frequency as a guide, but for better + * performance, jump directly to RPe as our starting frequency. + */ + freq = max(rps->cur_freq, + rps->efficient_freq); + + if (intel_set_rps(dev_priv, + clamp(freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit))) + DRM_DEBUG_DRIVER("Failed to set idle frequency\n"); + } + mutex_unlock(&rps->lock); +} + +void gen6_rps_idle(struct drm_i915_private *dev_priv) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + /* Flush our bottom-half so that it does not race with us + * setting the idle frequency and so that it is bounded by + * our rpm wakeref. And then disable the interrupts to stop any + * futher RPS reclocking whilst we are asleep. + */ + gen6_disable_rps_interrupts(dev_priv); + + mutex_lock(&rps->lock); + if (rps->enabled) { + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + vlv_set_rps_idle(dev_priv); + else + gen6_set_rps(dev_priv, rps->idle_freq); + rps->last_adj = 0; + I915_WRITE(GEN6_PMINTRMSK, + gen6_sanitize_rps_pm_mask(dev_priv, ~0)); + } + mutex_unlock(&rps->lock); +} + +void gen6_rps_boost(struct i915_request *rq) +{ + struct intel_rps *rps = &rq->i915->gt_pm.rps; + unsigned long flags; + bool boost; + + /* This is intentionally racy! We peek at the state here, then + * validate inside the RPS worker. + */ + if (!rps->enabled) + return; + + if (i915_request_signaled(rq)) + return; + + /* Serializes with i915_request_retire() */ + boost = false; + spin_lock_irqsave(&rq->lock, flags); + if (!i915_request_has_waitboost(rq) && + !dma_fence_is_signaled_locked(&rq->fence)) { + boost = !atomic_fetch_inc(&rps->num_waiters); + rq->flags |= I915_REQUEST_WAITBOOST; + } + spin_unlock_irqrestore(&rq->lock, flags); + if (!boost) + return; + + if (READ_ONCE(rps->cur_freq) < rps->boost_freq) + schedule_work(&rps->work); + + atomic_inc(&rps->boosts); +} + +int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + int err; + + lockdep_assert_held(&rps->lock); + GEM_BUG_ON(val > rps->max_freq); + GEM_BUG_ON(val < rps->min_freq); + + if (!rps->enabled) { + rps->cur_freq = val; + return 0; + } + + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + err = valleyview_set_rps(dev_priv, val); + else + err = gen6_set_rps(dev_priv, val); + + return err; +} + +static void gen9_disable_rps(struct drm_i915_private *dev_priv) +{ + I915_WRITE(GEN6_RP_CONTROL, 0); +} + +static void gen6_disable_rps(struct drm_i915_private *dev_priv) +{ + I915_WRITE(GEN6_RPNSWREQ, 1 << 31); + I915_WRITE(GEN6_RP_CONTROL, 0); +} + +static void cherryview_disable_rps(struct drm_i915_private *dev_priv) +{ + I915_WRITE(GEN6_RP_CONTROL, 0); +} + +static void valleyview_disable_rps(struct drm_i915_private *dev_priv) +{ + I915_WRITE(GEN6_RP_CONTROL, 0); +} + +static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + /* All of these values are in units of 50MHz */ + + /* static values from HW: RP0 > RP1 > RPn (min_freq) */ + if (IS_GEN9_LP(dev_priv)) { + u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP); + rps->rp0_freq = (rp_state_cap >> 16) & 0xff; + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; + rps->min_freq = (rp_state_cap >> 0) & 0xff; + } else { + u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); + rps->rp0_freq = (rp_state_cap >> 0) & 0xff; + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; + rps->min_freq = (rp_state_cap >> 16) & 0xff; + } + /* hw_max = RP0 until we check for overclocking */ + rps->max_freq = rps->rp0_freq; + + rps->efficient_freq = rps->rp1_freq; + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || + IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { + u32 ddcc_status = 0; + + if (sandybridge_pcode_read(dev_priv, + HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, + &ddcc_status, NULL) == 0) + rps->efficient_freq = + clamp_t(u8, + ((ddcc_status >> 8) & 0xff), + rps->min_freq, + rps->max_freq); + } + + if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { + /* Store the frequency values in 16.66 MHZ units, which is + * the natural hardware unit for SKL + */ + rps->rp0_freq *= GEN9_FREQ_SCALER; + rps->rp1_freq *= GEN9_FREQ_SCALER; + rps->min_freq *= GEN9_FREQ_SCALER; + rps->max_freq *= GEN9_FREQ_SCALER; + rps->efficient_freq *= GEN9_FREQ_SCALER; + } +} + +static void reset_rps(struct drm_i915_private *dev_priv, + int (*set)(struct drm_i915_private *, u8)) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + u8 freq = rps->cur_freq; + + /* force a reset */ + rps->power.mode = -1; + rps->cur_freq = -1; + + if (set(dev_priv, freq)) + DRM_ERROR("Failed to reset RPS to initial values\n"); +} + +/* See the Gen9_GT_PM_Programming_Guide doc for the below */ +static void gen9_enable_rps(struct drm_i915_private *dev_priv) +{ + intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); + + /* Program defaults and thresholds for RPS */ + if (IS_GEN(dev_priv, 9)) + I915_WRITE(GEN6_RC_VIDEO_FREQ, + GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq)); + + /* 1 second timeout*/ + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, + GT_INTERVAL_FROM_US(dev_priv, 1000000)); + + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa); + + /* Leaning on the below call to gen6_set_rps to program/setup the + * Up/Down EI & threshold registers, as well as the RP_CONTROL, + * RP_INTERRUPT_LIMITS & RPNSWREQ registers */ + reset_rps(dev_priv, gen6_set_rps); + + intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); +} + +static void gen8_enable_rps(struct drm_i915_private *dev_priv) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); + + /* 1 Program defaults and thresholds for RPS*/ + I915_WRITE(GEN6_RPNSWREQ, + HSW_FREQUENCY(rps->rp1_freq)); + I915_WRITE(GEN6_RC_VIDEO_FREQ, + HSW_FREQUENCY(rps->rp1_freq)); + /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ + + /* Docs recommend 900MHz, and 300 MHz respectively */ + I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, + rps->max_freq_softlimit << 24 | + rps->min_freq_softlimit << 16); + + I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ + I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */ + I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */ + + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + + /* 2: Enable RPS */ + I915_WRITE(GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); + + reset_rps(dev_priv, gen6_set_rps); + + intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); +} + +static void gen6_enable_rps(struct drm_i915_private *dev_priv) +{ + /* Here begins a magic sequence of register writes to enable + * auto-downclocking. + * + * Perhaps there might be some value in exposing these to + * userspace... + */ + intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); + + /* Power down if completely idle for over 50ms */ + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + + reset_rps(dev_priv, gen6_set_rps); + + intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); +} + +static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) +{ + u32 val, rp0; + + val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); + + switch (RUNTIME_INFO(dev_priv)->sseu.eu_total) { + case 8: + /* (2 * 4) config */ + rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT); + break; + case 12: + /* (2 * 6) config */ + rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT); + break; + case 16: + /* (2 * 8) config */ + default: + /* Setting (2 * 8) Min RP0 for any other combination */ + rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT); + break; + } + + rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK); + + return rp0; +} + +static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv) +{ + u32 val, rpe; + + val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG); + rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; + + return rpe; +} + +static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) +{ + u32 val, rp1; + + val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); + rp1 = (val & FB_GFX_FREQ_FUSE_MASK); + + return rp1; +} + +static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv) +{ + u32 val, rpn; + + val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE); + rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) & + FB_GFX_FREQ_FUSE_MASK); + + return rpn; +} + +static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) +{ + u32 val, rp1; + + val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); + + rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; + + return rp1; +} + +static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv) +{ + u32 val, rp0; + + val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); + + rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; + /* Clamp to max */ + rp0 = min_t(u32, rp0, 0xea); + + return rp0; +} + +static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv) +{ + u32 val, rpe; + + val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO); + rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; + val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI); + rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; + + return rpe; +} + +static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv) +{ + u32 val; + + val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff; + /* + * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value + * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on + * a BYT-M B0 the above register contains 0xbf. Moreover when setting + * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0 + * to make sure it matches what Punit accepts. + */ + return max_t(u32, val, 0xc0); +} + +static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv) +{ + dev_priv->gt_pm.rps.gpll_ref_freq = + vlv_get_cck_clock(dev_priv, "GPLL ref", + CCK_GPLL_CLOCK_CONTROL, + dev_priv->czclk_freq); + + DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", + dev_priv->gt_pm.rps.gpll_ref_freq); +} + +static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + u32 val; + + vlv_iosf_sb_get(dev_priv, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + + vlv_init_gpll_ref_freq(dev_priv); + + val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); + switch ((val >> 6) & 3) { + case 0: + case 1: + dev_priv->mem_freq = 800; + break; + case 2: + dev_priv->mem_freq = 1066; + break; + case 3: + dev_priv->mem_freq = 1333; + break; + } + DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); + + rps->max_freq = valleyview_rps_max_freq(dev_priv); + rps->rp0_freq = rps->max_freq; + DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", + intel_gpu_freq(dev_priv, rps->max_freq), + rps->max_freq); + + rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv); + DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", + intel_gpu_freq(dev_priv, rps->efficient_freq), + rps->efficient_freq); + + rps->rp1_freq = valleyview_rps_guar_freq(dev_priv); + DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", + intel_gpu_freq(dev_priv, rps->rp1_freq), + rps->rp1_freq); + + rps->min_freq = valleyview_rps_min_freq(dev_priv); + DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", + intel_gpu_freq(dev_priv, rps->min_freq), + rps->min_freq); + + vlv_iosf_sb_put(dev_priv, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); +} + +static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + u32 val; + + vlv_iosf_sb_get(dev_priv, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + + vlv_init_gpll_ref_freq(dev_priv); + + val = vlv_cck_read(dev_priv, CCK_FUSE_REG); + + switch ((val >> 2) & 0x7) { + case 3: + dev_priv->mem_freq = 2000; + break; + default: + dev_priv->mem_freq = 1600; + break; + } + DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); + + rps->max_freq = cherryview_rps_max_freq(dev_priv); + rps->rp0_freq = rps->max_freq; + DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", + intel_gpu_freq(dev_priv, rps->max_freq), + rps->max_freq); + + rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv); + DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", + intel_gpu_freq(dev_priv, rps->efficient_freq), + rps->efficient_freq); + + rps->rp1_freq = cherryview_rps_guar_freq(dev_priv); + DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", + intel_gpu_freq(dev_priv, rps->rp1_freq), + rps->rp1_freq); + + rps->min_freq = cherryview_rps_min_freq(dev_priv); + DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", + intel_gpu_freq(dev_priv, rps->min_freq), + rps->min_freq); + + vlv_iosf_sb_put(dev_priv, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + + WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq | + rps->min_freq) & 1, + "Odd GPU freq values\n"); +} + +static void cherryview_enable_rps(struct drm_i915_private *dev_priv) +{ + u32 val; + + intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); + + /* 1: Program defaults and thresholds for RPS*/ + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); + I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); + I915_WRITE(GEN6_RP_UP_EI, 66000); + I915_WRITE(GEN6_RP_DOWN_EI, 350000); + + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + + /* 2: Enable RPS */ + I915_WRITE(GEN6_RP_CONTROL, + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); + + /* Setting Fixed Bias */ + vlv_punit_get(dev_priv); + + val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50; + vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); + + val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); + + vlv_punit_put(dev_priv); + + /* RPS code assumes GPLL is used */ + WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); + + DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); + DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); + + reset_rps(dev_priv, valleyview_set_rps); + + intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); +} + +static void valleyview_enable_rps(struct drm_i915_private *dev_priv) +{ + u32 val; + + intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); + + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); + I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); + I915_WRITE(GEN6_RP_UP_EI, 66000); + I915_WRITE(GEN6_RP_DOWN_EI, 350000); + + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + + I915_WRITE(GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_CONT); + + vlv_punit_get(dev_priv); + + /* Setting Fixed Bias */ + val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875; + vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); + + val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); + + vlv_punit_put(dev_priv); + + /* RPS code assumes GPLL is used */ + WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); + + DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); + DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); + + reset_rps(dev_priv, valleyview_set_rps); + + intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); +} + +static unsigned long intel_pxfreq(u32 vidfreq) +{ + unsigned long freq; + int div = (vidfreq & 0x3f0000) >> 16; + int post = (vidfreq & 0x3000) >> 12; + int pre = (vidfreq & 0x7); + + if (!pre) + return 0; + + freq = ((div * 133333) / ((1<ips.last_time1; + + /* Prevent division-by-zero if we are asking too fast. + * Also, we don't get interesting results if we are polling + * faster than once in 10ms, so just return the saved value + * in such cases. + */ + if (diff1 <= 10) + return dev_priv->ips.chipset_power; + + count1 = I915_READ(DMIEC); + count2 = I915_READ(DDREC); + count3 = I915_READ(CSIEC); + + total_count = count1 + count2 + count3; + + /* FIXME: handle per-counter overflow */ + if (total_count < dev_priv->ips.last_count1) { + diff = ~0UL - dev_priv->ips.last_count1; + diff += total_count; + } else { + diff = total_count - dev_priv->ips.last_count1; + } + + for (i = 0; i < ARRAY_SIZE(cparams); i++) { + if (cparams[i].i == dev_priv->ips.c_m && + cparams[i].t == dev_priv->ips.r_t) { + m = cparams[i].m; + c = cparams[i].c; + break; + } + } + + diff = div_u64(diff, diff1); + ret = ((m * diff) + c); + ret = div_u64(ret, 10); + + dev_priv->ips.last_count1 = total_count; + dev_priv->ips.last_time1 = now; + + dev_priv->ips.chipset_power = ret; + + return ret; +} + +unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) +{ + intel_wakeref_t wakeref; + unsigned long val = 0; + + if (!IS_GEN(dev_priv, 5)) + return 0; + + with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { + spin_lock_irq(&mchdev_lock); + val = __i915_chipset_val(dev_priv); + spin_unlock_irq(&mchdev_lock); + } + + return val; +} + +unsigned long i915_mch_val(struct drm_i915_private *i915) +{ + unsigned long m, x, b; + u32 tsfs; + + tsfs = intel_uncore_read(&i915->uncore, TSFS); + + m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT); + x = intel_uncore_read8(&i915->uncore, TR1); + + b = tsfs & TSFS_INTR_MASK; + + return ((m * x) / 127) - b; +} + +static int _pxvid_to_vd(u8 pxvid) +{ + if (pxvid == 0) + return 0; + + if (pxvid >= 8 && pxvid < 31) + pxvid = 31; + + return (pxvid + 2) * 125; +} + +static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid) +{ + const int vd = _pxvid_to_vd(pxvid); + const int vm = vd - 1125; + + if (INTEL_INFO(dev_priv)->is_mobile) + return vm > 0 ? vm : 0; + + return vd; +} + +static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) +{ + u64 now, diff, diffms; + u32 count; + + lockdep_assert_held(&mchdev_lock); + + now = ktime_get_raw_ns(); + diffms = now - dev_priv->ips.last_time2; + do_div(diffms, NSEC_PER_MSEC); + + /* Don't divide by 0 */ + if (!diffms) + return; + + count = I915_READ(GFXEC); + + if (count < dev_priv->ips.last_count2) { + diff = ~0UL - dev_priv->ips.last_count2; + diff += count; + } else { + diff = count - dev_priv->ips.last_count2; + } + + dev_priv->ips.last_count2 = count; + dev_priv->ips.last_time2 = now; + + /* More magic constants... */ + diff = diff * 1181; + diff = div_u64(diff, diffms * 10); + dev_priv->ips.gfx_power = diff; +} + +void i915_update_gfx_val(struct drm_i915_private *dev_priv) +{ + intel_wakeref_t wakeref; + + if (!IS_GEN(dev_priv, 5)) + return; + + with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { + spin_lock_irq(&mchdev_lock); + __i915_update_gfx_val(dev_priv); + spin_unlock_irq(&mchdev_lock); + } +} + +static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) +{ + unsigned long t, corr, state1, corr2, state2; + u32 pxvid, ext_v; + + lockdep_assert_held(&mchdev_lock); + + pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq)); + pxvid = (pxvid >> 24) & 0x7f; + ext_v = pvid_to_extvid(dev_priv, pxvid); + + state1 = ext_v; + + t = i915_mch_val(dev_priv); + + /* Revel in the empirically derived constants */ + + /* Correction factor in 1/100000 units */ + if (t > 80) + corr = ((t * 2349) + 135940); + else if (t >= 50) + corr = ((t * 964) + 29317); + else /* < 50 */ + corr = ((t * 301) + 1004); + + corr = corr * ((150142 * state1) / 10000 - 78642); + corr /= 100000; + corr2 = (corr * dev_priv->ips.corr); + + state2 = (corr2 * state1) / 10000; + state2 /= 100; /* convert to mW */ + + __i915_update_gfx_val(dev_priv); + + return dev_priv->ips.gfx_power + state2; +} + +unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) +{ + intel_wakeref_t wakeref; + unsigned long val = 0; + + if (!IS_GEN(dev_priv, 5)) + return 0; + + with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { + spin_lock_irq(&mchdev_lock); + val = __i915_gfx_val(dev_priv); + spin_unlock_irq(&mchdev_lock); + } + + return val; +} + +static struct drm_i915_private __rcu *i915_mch_dev; + +static struct drm_i915_private *mchdev_get(void) +{ + struct drm_i915_private *i915; + + rcu_read_lock(); + i915 = rcu_dereference(i915_mch_dev); + if (!kref_get_unless_zero(&i915->drm.ref)) + i915 = NULL; + rcu_read_unlock(); + + return i915; +} + +/** + * i915_read_mch_val - return value for IPS use + * + * Calculate and return a value for the IPS driver to use when deciding whether + * we have thermal and power headroom to increase CPU or GPU power budget. + */ +unsigned long i915_read_mch_val(void) +{ + struct drm_i915_private *i915; + unsigned long chipset_val = 0; + unsigned long graphics_val = 0; + intel_wakeref_t wakeref; + + i915 = mchdev_get(); + if (!i915) + return 0; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + spin_lock_irq(&mchdev_lock); + chipset_val = __i915_chipset_val(i915); + graphics_val = __i915_gfx_val(i915); + spin_unlock_irq(&mchdev_lock); + } + + drm_dev_put(&i915->drm); + return chipset_val + graphics_val; +} +EXPORT_SYMBOL_GPL(i915_read_mch_val); + +/** + * i915_gpu_raise - raise GPU frequency limit + * + * Raise the limit; IPS indicates we have thermal headroom. + */ +bool i915_gpu_raise(void) +{ + struct drm_i915_private *i915; + + i915 = mchdev_get(); + if (!i915) + return false; + + spin_lock_irq(&mchdev_lock); + if (i915->ips.max_delay > i915->ips.fmax) + i915->ips.max_delay--; + spin_unlock_irq(&mchdev_lock); + + drm_dev_put(&i915->drm); + return true; +} +EXPORT_SYMBOL_GPL(i915_gpu_raise); + +/** + * i915_gpu_lower - lower GPU frequency limit + * + * IPS indicates we're close to a thermal limit, so throttle back the GPU + * frequency maximum. + */ +bool i915_gpu_lower(void) +{ + struct drm_i915_private *i915; + + i915 = mchdev_get(); + if (!i915) + return false; + + spin_lock_irq(&mchdev_lock); + if (i915->ips.max_delay < i915->ips.min_delay) + i915->ips.max_delay++; + spin_unlock_irq(&mchdev_lock); + + drm_dev_put(&i915->drm); + return true; +} +EXPORT_SYMBOL_GPL(i915_gpu_lower); + +/** + * i915_gpu_busy - indicate GPU business to IPS + * + * Tell the IPS driver whether or not the GPU is busy. + */ +bool i915_gpu_busy(void) +{ + struct drm_i915_private *i915; + bool ret; + + i915 = mchdev_get(); + if (!i915) + return false; + + ret = i915->gt.awake; + + drm_dev_put(&i915->drm); + return ret; +} +EXPORT_SYMBOL_GPL(i915_gpu_busy); + +/** + * i915_gpu_turbo_disable - disable graphics turbo + * + * Disable graphics turbo by resetting the max frequency and setting the + * current frequency to the default. + */ +bool i915_gpu_turbo_disable(void) +{ + struct drm_i915_private *i915; + bool ret; + + i915 = mchdev_get(); + if (!i915) + return false; + + spin_lock_irq(&mchdev_lock); + i915->ips.max_delay = i915->ips.fstart; + ret = ironlake_set_drps(i915, i915->ips.fstart); + spin_unlock_irq(&mchdev_lock); + + drm_dev_put(&i915->drm); + return ret; +} +EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); + +/** + * Tells the intel_ips driver that the i915 driver is now loaded, if + * IPS got loaded first. + * + * This awkward dance is so that neither module has to depend on the + * other in order for IPS to do the appropriate communication of + * GPU turbo limits to i915. + */ +static void +ips_ping_for_i915_load(void) +{ + void (*link)(void); + + link = symbol_get(ips_link_to_i915_driver); + if (link) { + link(); + symbol_put(ips_link_to_i915_driver); + } +} + +void intel_gpu_ips_init(struct drm_i915_private *dev_priv) +{ + /* We only register the i915 ips part with intel-ips once everything is + * set up, to avoid intel-ips sneaking in and reading bogus values. */ + rcu_assign_pointer(i915_mch_dev, dev_priv); + + ips_ping_for_i915_load(); +} + +void intel_gpu_ips_teardown(void) +{ + rcu_assign_pointer(i915_mch_dev, NULL); +} + +static void intel_init_emon(struct drm_i915_private *dev_priv) +{ + u32 lcfuse; + u8 pxw[16]; + int i; + + /* Disable to program */ + I915_WRITE(ECR, 0); + POSTING_READ(ECR); + + /* Program energy weights for various events */ + I915_WRITE(SDEW, 0x15040d00); + I915_WRITE(CSIEW0, 0x007f0000); + I915_WRITE(CSIEW1, 0x1e220004); + I915_WRITE(CSIEW2, 0x04000004); + + for (i = 0; i < 5; i++) + I915_WRITE(PEW(i), 0); + for (i = 0; i < 3; i++) + I915_WRITE(DEW(i), 0); + + /* Program P-state weights to account for frequency power adjustment */ + for (i = 0; i < 16; i++) { + u32 pxvidfreq = I915_READ(PXVFREQ(i)); + unsigned long freq = intel_pxfreq(pxvidfreq); + unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >> + PXVFREQ_PX_SHIFT; + unsigned long val; + + val = vid * vid; + val *= (freq / 1000); + val *= 255; + val /= (127*127*900); + if (val > 0xff) + DRM_ERROR("bad pxval: %ld\n", val); + pxw[i] = val; + } + /* Render standby states get 0 weight */ + pxw[14] = 0; + pxw[15] = 0; + + for (i = 0; i < 4; i++) { + u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) | + (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]); + I915_WRITE(PXW(i), val); + } + + /* Adjust magic regs to magic values (more experimental results) */ + I915_WRITE(OGW0, 0); + I915_WRITE(OGW1, 0); + I915_WRITE(EG0, 0x00007f00); + I915_WRITE(EG1, 0x0000000e); + I915_WRITE(EG2, 0x000e0000); + I915_WRITE(EG3, 0x68000300); + I915_WRITE(EG4, 0x42000000); + I915_WRITE(EG5, 0x00140031); + I915_WRITE(EG6, 0); + I915_WRITE(EG7, 0); + + for (i = 0; i < 8; i++) + I915_WRITE(PXWL(i), 0); + + /* Enable PMON + select events */ + I915_WRITE(ECR, 0x80000019); + + lcfuse = I915_READ(LCFUSE02); + + dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); +} + +void intel_init_gt_powersave(struct drm_i915_private *dev_priv) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + /* Powersaving is controlled by the host when inside a VM */ + if (intel_vgpu_active(dev_priv)) + mkwrite_device_info(dev_priv)->has_rps = false; + + /* Initialize RPS limits (for userspace) */ + if (IS_CHERRYVIEW(dev_priv)) + cherryview_init_gt_powersave(dev_priv); + else if (IS_VALLEYVIEW(dev_priv)) + valleyview_init_gt_powersave(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_init_rps_frequencies(dev_priv); + + /* Derive initial user preferences/limits from the hardware limits */ + rps->max_freq_softlimit = rps->max_freq; + rps->min_freq_softlimit = rps->min_freq; + + /* After setting max-softlimit, find the overclock max freq */ + if (IS_GEN(dev_priv, 6) || + IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) { + u32 params = 0; + + sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, + ¶ms, NULL); + if (params & BIT(31)) { /* OC supported */ + DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", + (rps->max_freq & 0xff) * 50, + (params & 0xff) * 50); + rps->max_freq = params & 0xff; + } + } + + /* Finally allow us to boost to max by default */ + rps->boost_freq = rps->max_freq; + rps->idle_freq = rps->min_freq; + rps->cur_freq = rps->idle_freq; +} + +void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) +{ + dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */ + intel_disable_gt_powersave(dev_priv); + + if (INTEL_GEN(dev_priv) >= 11) + gen11_reset_rps_interrupts(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_reset_rps_interrupts(dev_priv); +} + +static void intel_disable_rps(struct drm_i915_private *dev_priv) +{ + lockdep_assert_held(&dev_priv->gt_pm.rps.lock); + + if (!dev_priv->gt_pm.rps.enabled) + return; + + if (INTEL_GEN(dev_priv) >= 9) + gen9_disable_rps(dev_priv); + else if (IS_CHERRYVIEW(dev_priv)) + cherryview_disable_rps(dev_priv); + else if (IS_VALLEYVIEW(dev_priv)) + valleyview_disable_rps(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_disable_rps(dev_priv); + else if (IS_IRONLAKE_M(dev_priv)) + ironlake_disable_drps(dev_priv); + + dev_priv->gt_pm.rps.enabled = false; +} + +void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) +{ + mutex_lock(&dev_priv->gt_pm.rps.lock); + + intel_disable_rps(dev_priv); + if (HAS_LLC(dev_priv)) + intel_llc_disable(&dev_priv->gt.llc); + + mutex_unlock(&dev_priv->gt_pm.rps.lock); +} + +static void intel_enable_rps(struct drm_i915_private *dev_priv) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + lockdep_assert_held(&rps->lock); + + if (rps->enabled) + return; + + if (IS_CHERRYVIEW(dev_priv)) { + cherryview_enable_rps(dev_priv); + } else if (IS_VALLEYVIEW(dev_priv)) { + valleyview_enable_rps(dev_priv); + } else if (INTEL_GEN(dev_priv) >= 9) { + gen9_enable_rps(dev_priv); + } else if (IS_BROADWELL(dev_priv)) { + gen8_enable_rps(dev_priv); + } else if (INTEL_GEN(dev_priv) >= 6) { + gen6_enable_rps(dev_priv); + } else if (IS_IRONLAKE_M(dev_priv)) { + ironlake_enable_drps(dev_priv); + intel_init_emon(dev_priv); + } + + WARN_ON(rps->max_freq < rps->min_freq); + WARN_ON(rps->idle_freq > rps->max_freq); + + WARN_ON(rps->efficient_freq < rps->min_freq); + WARN_ON(rps->efficient_freq > rps->max_freq); + + rps->enabled = true; +} + +void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) +{ + /* Powersaving is controlled by the host when inside a VM */ + if (intel_vgpu_active(dev_priv)) + return; + + mutex_lock(&dev_priv->gt_pm.rps.lock); + + if (HAS_RPS(dev_priv)) + intel_enable_rps(dev_priv); + + intel_llc_enable(&dev_priv->gt.llc); + + mutex_unlock(&dev_priv->gt_pm.rps.lock); +} + static void ibx_init_clock_gating(struct drm_i915_private *dev_priv) { /* @@ -7224,8 +8942,90 @@ void intel_init_pm(struct drm_i915_private *dev_priv) } } +static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + /* + * N = val - 0xb7 + * Slow = Fast = GPLL ref * N + */ + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); +} + +static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; +} + +static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + /* + * N = val / 2 + * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 + */ + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); +} + +static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + /* CHV needs even values */ + return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; +} + +int intel_gpu_freq(struct drm_i915_private *dev_priv, int val) +{ + if (INTEL_GEN(dev_priv) >= 9) + return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, + GEN9_FREQ_SCALER); + else if (IS_CHERRYVIEW(dev_priv)) + return chv_gpu_freq(dev_priv, val); + else if (IS_VALLEYVIEW(dev_priv)) + return byt_gpu_freq(dev_priv, val); + else + return val * GT_FREQUENCY_MULTIPLIER; +} + +int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) +{ + if (INTEL_GEN(dev_priv) >= 9) + return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, + GT_FREQUENCY_MULTIPLIER); + else if (IS_CHERRYVIEW(dev_priv)) + return chv_freq_opcode(dev_priv, val); + else if (IS_VALLEYVIEW(dev_priv)) + return byt_freq_opcode(dev_priv, val); + else + return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); +} + void intel_pm_setup(struct drm_i915_private *dev_priv) { + mutex_init(&dev_priv->gt_pm.rps.lock); + mutex_init(&dev_priv->gt_pm.rps.power.mutex); + + atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0); + dev_priv->runtime_pm.suspended = false; atomic_set(&dev_priv->runtime_pm.wakeref_count, 0); } + +u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat) +{ + u32 cagf; + + if (INTEL_GEN(dev_priv) >= 9) + cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; + else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; + else + cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; + + return cagf; +} diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index b579c724b915..93d192d0610a 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -29,6 +29,15 @@ void intel_update_watermarks(struct intel_crtc *crtc); void intel_init_pm(struct drm_i915_private *dev_priv); void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv); void intel_pm_setup(struct drm_i915_private *dev_priv); +void intel_gpu_ips_init(struct drm_i915_private *dev_priv); +void intel_gpu_ips_teardown(void); +void intel_init_gt_powersave(struct drm_i915_private *dev_priv); +void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv); +void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); +void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); +void gen6_rps_busy(struct drm_i915_private *dev_priv); +void gen6_rps_idle(struct drm_i915_private *dev_priv); +void gen6_rps_boost(struct i915_request *rq); void g4x_wm_get_hw_state(struct drm_i915_private *dev_priv); void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv); void ilk_wm_get_hw_state(struct drm_i915_private *dev_priv); @@ -55,9 +64,24 @@ void skl_write_plane_wm(struct intel_plane *plane, void skl_write_cursor_wm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state); bool ilk_disable_lp_wm(struct drm_device *dev); +int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, + struct intel_crtc_state *cstate); void intel_init_ipc(struct drm_i915_private *dev_priv); void intel_enable_ipc(struct drm_i915_private *dev_priv); +int intel_gpu_freq(struct drm_i915_private *dev_priv, int val); +int intel_freq_opcode(struct drm_i915_private *dev_priv, int val); + +u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat1); + +unsigned long i915_chipset_val(struct drm_i915_private *dev_priv); +unsigned long i915_mch_val(struct drm_i915_private *dev_priv); +unsigned long i915_gfx_val(struct drm_i915_private *dev_priv); +void i915_update_gfx_val(struct drm_i915_private *dev_priv); + +bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val); +int intel_set_rps(struct drm_i915_private *dev_priv, u8 val); +void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive); bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable); #endif /* __INTEL_PM_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index 260b0ee5d1e3..268192b5613b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -79,6 +79,7 @@ __live_active_setup(struct drm_i915_private *i915) struct intel_engine_cs *engine; struct i915_sw_fence *submit; struct live_active *active; + enum intel_engine_id id; unsigned int count = 0; int err = 0; @@ -96,7 +97,7 @@ __live_active_setup(struct drm_i915_private *i915) if (err) goto out; - for_each_uabi_engine(engine, i915) { + for_each_engine(engine, i915, id) { struct i915_request *rq; rq = i915_request_create(engine->kernel_context); @@ -205,48 +206,3 @@ int i915_active_live_selftests(struct drm_i915_private *i915) return i915_subtests(tests, i915); } - -static struct intel_engine_cs *node_to_barrier(struct active_node *it) -{ - struct intel_engine_cs *engine; - - if (!is_barrier(&it->base)) - return NULL; - - engine = __barrier_to_engine(it); - smp_rmb(); /* serialise with add_active_barriers */ - if (!is_barrier(&it->base)) - return NULL; - - return engine; -} - -void i915_active_print(struct i915_active *ref, struct drm_printer *m) -{ - drm_printf(m, "active %pS:%pS\n", ref->active, ref->retire); - drm_printf(m, "\tcount: %d\n", atomic_read(&ref->count)); - drm_printf(m, "\tpreallocated barriers? %s\n", - yesno(!llist_empty(&ref->preallocated_barriers))); - - if (i915_active_acquire_if_busy(ref)) { - struct active_node *it, *n; - - rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { - struct intel_engine_cs *engine; - - engine = node_to_barrier(it); - if (engine) { - drm_printf(m, "\tbarrier: %s\n", engine->name); - continue; - } - - if (i915_active_fence_isset(&it->base)) { - drm_printf(m, - "\ttimeline: %llx\n", it->timeline); - continue; - } - } - - i915_active_release(ref); - } -} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index d83f6bf6d9d4..97f89f744ee2 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -15,26 +15,23 @@ #include "igt_flush_test.h" #include "mock_drm.h" -static int switch_to_context(struct i915_gem_context *ctx) +static int switch_to_context(struct drm_i915_private *i915, + struct i915_gem_context *ctx) { - struct i915_gem_engines_iter it; - struct intel_context *ce; - int err = 0; + struct intel_engine_cs *engine; + enum intel_engine_id id; - for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + for_each_engine(engine, i915, id) { struct i915_request *rq; - rq = intel_context_create_request(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - break; - } + rq = igt_request_alloc(ctx, engine); + if (IS_ERR(rq)) + return PTR_ERR(rq); i915_request_add(rq); } - i915_gem_context_unlock_engines(ctx); - return err; + return 0; } static void trash_stolen(struct drm_i915_private *i915) @@ -45,10 +42,6 @@ static void trash_stolen(struct drm_i915_private *i915) unsigned long page; u32 prng = 0x12345678; - /* XXX: fsck. needs some more thought... */ - if (!i915_ggtt_has_aperture(ggtt)) - return; - for (page = 0; page < size; page += PAGE_SIZE) { const dma_addr_t dma = i915->dsm.start + page; u32 __iomem *s; @@ -124,6 +117,7 @@ static void pm_resume(struct drm_i915_private *i915) */ with_intel_runtime_pm(&i915->runtime_pm, wakeref) { intel_gt_sanitize(&i915->gt, false); + i915_gem_sanitize(i915); i915_gem_restore_gtt_mappings(i915); i915_gem_restore_fences(&i915->ggtt); @@ -146,7 +140,7 @@ static int igt_gem_suspend(void *arg) err = -ENOMEM; ctx = live_context(i915, file); if (!IS_ERR(ctx)) - err = switch_to_context(ctx); + err = switch_to_context(i915, ctx); if (err) goto out; @@ -161,7 +155,7 @@ static int igt_gem_suspend(void *arg) pm_resume(i915); - err = switch_to_context(ctx); + err = switch_to_context(i915, ctx); out: mock_file_free(i915, file); return err; @@ -181,7 +175,7 @@ static int igt_gem_hibernate(void *arg) err = -ENOMEM; ctx = live_context(i915, file); if (!IS_ERR(ctx)) - err = switch_to_context(ctx); + err = switch_to_context(i915, ctx); if (err) goto out; @@ -196,7 +190,7 @@ static int igt_gem_hibernate(void *arg) pm_resume(i915); - err = switch_to_context(ctx); + err = switch_to_context(i915, ctx); out: mock_file_free(i915, file); return err; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 42e948144f1b..0af9a58d011d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -43,7 +43,8 @@ static void quirk_add(struct drm_i915_gem_object *obj, list_add(&obj->st_link, objects); } -static int populate_ggtt(struct i915_ggtt *ggtt, struct list_head *objects) +static int populate_ggtt(struct drm_i915_private *i915, + struct list_head *objects) { unsigned long unbound, bound, count; struct drm_i915_gem_object *obj; @@ -52,8 +53,7 @@ static int populate_ggtt(struct i915_ggtt *ggtt, struct list_head *objects) do { struct i915_vma *vma; - obj = i915_gem_object_create_internal(ggtt->vm.i915, - I915_GTT_PAGE_SIZE); + obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -70,7 +70,7 @@ static int populate_ggtt(struct i915_ggtt *ggtt, struct list_head *objects) count++; } while (1); pr_debug("Filled GGTT with %lu pages [%llu total]\n", - count, ggtt->vm.total / PAGE_SIZE); + count, i915->ggtt.vm.total / PAGE_SIZE); bound = 0; unbound = 0; @@ -96,7 +96,7 @@ static int populate_ggtt(struct i915_ggtt *ggtt, struct list_head *objects) return -EINVAL; } - if (list_empty(&ggtt->vm.bound_list)) { + if (list_empty(&i915->ggtt.vm.bound_list)) { pr_err("No objects on the GGTT inactive list!\n"); return -EINVAL; } @@ -104,16 +104,17 @@ static int populate_ggtt(struct i915_ggtt *ggtt, struct list_head *objects) return 0; } -static void unpin_ggtt(struct i915_ggtt *ggtt) +static void unpin_ggtt(struct drm_i915_private *i915) { struct i915_vma *vma; - list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) + list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link) if (vma->obj->mm.quirked) i915_vma_unpin(vma); } -static void cleanup_objects(struct i915_ggtt *ggtt, struct list_head *list) +static void cleanup_objects(struct drm_i915_private *i915, + struct list_head *list) { struct drm_i915_gem_object *obj, *on; @@ -123,19 +124,19 @@ static void cleanup_objects(struct i915_ggtt *ggtt, struct list_head *list) i915_gem_object_put(obj); } - i915_gem_drain_freed_objects(ggtt->vm.i915); + i915_gem_drain_freed_objects(i915); } static int igt_evict_something(void *arg) { - struct intel_gt *gt = arg; - struct i915_ggtt *ggtt = gt->ggtt; + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; LIST_HEAD(objects); int err; /* Fill the GGTT with pinned objects and try to evict one. */ - err = populate_ggtt(ggtt, &objects); + err = populate_ggtt(i915, &objects); if (err) goto cleanup; @@ -152,7 +153,7 @@ static int igt_evict_something(void *arg) goto cleanup; } - unpin_ggtt(ggtt); + unpin_ggtt(i915); /* Everything is unpinned, we should be able to evict something */ mutex_lock(&ggtt->vm.mutex); @@ -168,14 +169,13 @@ static int igt_evict_something(void *arg) } cleanup: - cleanup_objects(ggtt, &objects); + cleanup_objects(i915, &objects); return err; } static int igt_overcommit(void *arg) { - struct intel_gt *gt = arg; - struct i915_ggtt *ggtt = gt->ggtt; + struct drm_i915_private *i915 = arg; struct drm_i915_gem_object *obj; struct i915_vma *vma; LIST_HEAD(objects); @@ -185,11 +185,11 @@ static int igt_overcommit(void *arg) * We expect it to fail. */ - err = populate_ggtt(ggtt, &objects); + err = populate_ggtt(i915, &objects); if (err) goto cleanup; - obj = i915_gem_object_create_internal(gt->i915, I915_GTT_PAGE_SIZE); + obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto cleanup; @@ -205,14 +205,14 @@ static int igt_overcommit(void *arg) } cleanup: - cleanup_objects(ggtt, &objects); + cleanup_objects(i915, &objects); return err; } static int igt_evict_for_vma(void *arg) { - struct intel_gt *gt = arg; - struct i915_ggtt *ggtt = gt->ggtt; + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; struct drm_mm_node target = { .start = 0, .size = 4096, @@ -222,7 +222,7 @@ static int igt_evict_for_vma(void *arg) /* Fill the GGTT with pinned objects and try to evict a range. */ - err = populate_ggtt(ggtt, &objects); + err = populate_ggtt(i915, &objects); if (err) goto cleanup; @@ -236,7 +236,7 @@ static int igt_evict_for_vma(void *arg) goto cleanup; } - unpin_ggtt(ggtt); + unpin_ggtt(i915); /* Everything is unpinned, we should be able to evict the node */ mutex_lock(&ggtt->vm.mutex); @@ -249,7 +249,7 @@ static int igt_evict_for_vma(void *arg) } cleanup: - cleanup_objects(ggtt, &objects); + cleanup_objects(i915, &objects); return err; } @@ -262,8 +262,8 @@ static void mock_color_adjust(const struct drm_mm_node *node, static int igt_evict_for_cache_color(void *arg) { - struct intel_gt *gt = arg; - struct i915_ggtt *ggtt = gt->ggtt; + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; const unsigned long flags = PIN_OFFSET_FIXED; struct drm_mm_node target = { .start = I915_GTT_PAGE_SIZE * 2, @@ -284,7 +284,7 @@ static int igt_evict_for_cache_color(void *arg) ggtt->vm.mm.color_adjust = mock_color_adjust; GEM_BUG_ON(!i915_vm_has_cache_coloring(&ggtt->vm)); - obj = i915_gem_object_create_internal(gt->i915, I915_GTT_PAGE_SIZE); + obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto cleanup; @@ -300,7 +300,7 @@ static int igt_evict_for_cache_color(void *arg) goto cleanup; } - obj = i915_gem_object_create_internal(gt->i915, I915_GTT_PAGE_SIZE); + obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto cleanup; @@ -345,22 +345,22 @@ static int igt_evict_for_cache_color(void *arg) err = 0; cleanup: - unpin_ggtt(ggtt); - cleanup_objects(ggtt, &objects); + unpin_ggtt(i915); + cleanup_objects(i915, &objects); ggtt->vm.mm.color_adjust = NULL; return err; } static int igt_evict_vm(void *arg) { - struct intel_gt *gt = arg; - struct i915_ggtt *ggtt = gt->ggtt; + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; LIST_HEAD(objects); int err; /* Fill the GGTT with pinned objects and try to evict everything. */ - err = populate_ggtt(ggtt, &objects); + err = populate_ggtt(i915, &objects); if (err) goto cleanup; @@ -374,7 +374,7 @@ static int igt_evict_vm(void *arg) goto cleanup; } - unpin_ggtt(ggtt); + unpin_ggtt(i915); mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_vm(&ggtt->vm); @@ -386,16 +386,14 @@ static int igt_evict_vm(void *arg) } cleanup: - cleanup_objects(ggtt, &objects); + cleanup_objects(i915, &objects); return err; } static int igt_evict_contexts(void *arg) { const u64 PRETEND_GGTT_SIZE = 16ull << 20; - struct intel_gt *gt = arg; - struct i915_ggtt *ggtt = gt->ggtt; - struct drm_i915_private *i915 = gt->i915; + struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; enum intel_engine_id id; struct reserved { @@ -425,10 +423,10 @@ static int igt_evict_contexts(void *arg) /* Reserve a block so that we know we have enough to fit a few rq */ memset(&hole, 0, sizeof(hole)); - mutex_lock(&ggtt->vm.mutex); - err = i915_gem_gtt_insert(&ggtt->vm, &hole, + mutex_lock(&i915->ggtt.vm.mutex); + err = i915_gem_gtt_insert(&i915->ggtt.vm, &hole, PRETEND_GGTT_SIZE, 0, I915_COLOR_UNEVICTABLE, - 0, ggtt->vm.total, + 0, i915->ggtt.vm.total, PIN_NOEVICT); if (err) goto out_locked; @@ -438,17 +436,17 @@ static int igt_evict_contexts(void *arg) do { struct reserved *r; - mutex_unlock(&ggtt->vm.mutex); + mutex_unlock(&i915->ggtt.vm.mutex); r = kcalloc(1, sizeof(*r), GFP_KERNEL); - mutex_lock(&ggtt->vm.mutex); + mutex_lock(&i915->ggtt.vm.mutex); if (!r) { err = -ENOMEM; goto out_locked; } - if (i915_gem_gtt_insert(&ggtt->vm, &r->node, + if (i915_gem_gtt_insert(&i915->ggtt.vm, &r->node, 1ul << 20, 0, I915_COLOR_UNEVICTABLE, - 0, ggtt->vm.total, + 0, i915->ggtt.vm.total, PIN_NOEVICT)) { kfree(r); break; @@ -460,11 +458,11 @@ static int igt_evict_contexts(void *arg) count++; } while (1); drm_mm_remove_node(&hole); - mutex_unlock(&ggtt->vm.mutex); + mutex_unlock(&i915->ggtt.vm.mutex); pr_info("Filled GGTT with %lu 1MiB nodes\n", count); /* Overfill the GGTT with context objects and so try to evict one. */ - for_each_engine(engine, gt, id) { + for_each_engine(engine, i915, id) { struct i915_sw_fence fence; struct drm_file *file; @@ -520,7 +518,7 @@ static int igt_evict_contexts(void *arg) break; } - mutex_lock(&ggtt->vm.mutex); + mutex_lock(&i915->ggtt.vm.mutex); out_locked: if (igt_flush_test(i915)) err = -EIO; @@ -534,7 +532,7 @@ out_locked: } if (drm_mm_node_allocated(&hole)) drm_mm_remove_node(&hole); - mutex_unlock(&ggtt->vm.mutex); + mutex_unlock(&i915->ggtt.vm.mutex); intel_runtime_pm_put(&i915->runtime_pm, wakeref); return err; @@ -558,7 +556,7 @@ int i915_gem_evict_mock_selftests(void) return -ENOMEM; with_intel_runtime_pm(&i915->runtime_pm, wakeref) - err = i915_subtests(tests, &i915->gt); + err = i915_subtests(tests, i915); drm_dev_put(&i915->drm); return err; @@ -573,5 +571,5 @@ int i915_gem_evict_live_selftests(struct drm_i915_private *i915) if (intel_gt_is_wedged(&i915->gt)) return 0; - return intel_gt_live_subtests(tests, &i915->gt); + return i915_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 3f7e80fb3bbd..ebe735df6504 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -104,7 +104,6 @@ static const struct drm_i915_gem_object_ops fake_ops = { static struct drm_i915_gem_object * fake_dma_object(struct drm_i915_private *i915, u64 size) { - static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; GEM_BUG_ON(!size); @@ -118,7 +117,7 @@ fake_dma_object(struct drm_i915_private *i915, u64 size) goto err; drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &fake_ops, &lock_class); + i915_gem_object_init(obj, &fake_ops); i915_gem_object_set_volatile(obj); @@ -1149,9 +1148,6 @@ static int igt_ggtt_page(void *arg) unsigned int *order, n; int err; - if (!i915_ggtt_has_aperture(ggtt)) - return 0; - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 4b3cac73e291..6daf6599ec79 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -17,7 +17,6 @@ selftest(gt_timelines, intel_timeline_live_selftests) selftest(gt_contexts, intel_context_live_selftests) selftest(gt_lrc, intel_lrc_live_selftests) selftest(gt_pm, intel_gt_pm_live_selftests) -selftest(gt_heartbeat, intel_heartbeat_live_selftests) selftest(requests, i915_request_live_selftests) selftest(active, i915_active_live_selftests) selftest(objects, i915_gem_object_live_selftests) @@ -33,7 +32,6 @@ selftest(gem_contexts, i915_gem_context_live_selftests) selftest(blt, i915_gem_object_blt_live_selftests) selftest(client, i915_gem_client_blt_live_selftests) selftest(reset, intel_reset_live_selftests) -selftest(memory_region, intel_memory_region_live_selftests) selftest(hangcheck, intel_hangcheck_live_selftests) selftest(execlists, intel_execlists_live_selftests) selftest(guc, intel_guc_live_selftest) diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c index aabd07f67e49..dc6d689e4251 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf.c +++ b/drivers/gpu/drm/i915/selftests/i915_perf.c @@ -23,8 +23,7 @@ test_stream(struct i915_perf *perf) I915_ENGINE_CLASS_RENDER, 0), .sample_flags = SAMPLE_OA_REPORT, - .oa_format = IS_GEN(perf->i915, 12) ? - I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8, + .oa_format = I915_OA_FORMAT_C4_B8, .metrics_set = 1, }; struct i915_perf_stream *stream; diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 8618a4dc0701..30ae34f62176 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -464,7 +464,6 @@ static int mock_breadcrumbs_smoketest(void *arg) get_task_struct(threads[n]); } - yield(); /* start all threads before we begin */ msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); for (n = 0; n < ncpus; n++) { @@ -1159,8 +1158,6 @@ static int live_parallel_engines(void *arg) get_task_struct(tsk[idx++]); } - yield(); /* start all threads before we kthread_stop() */ - idx = 0; for_each_uabi_engine(engine, i915) { int status; @@ -1317,7 +1314,6 @@ static int live_breadcrumbs_smoketest(void *arg) idx++; } - yield(); /* start all threads before we begin */ msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); out_flush: diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index a6cca4ad96f6..825a8286cbe8 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -23,14 +23,13 @@ #include -#include "gt/intel_gt_pm.h" -#include "i915_drv.h" -#include "i915_selftest.h" +#include "../i915_drv.h" +#include "../i915_selftest.h" #include "igt_flush_test.h" struct i915_selftest i915_selftest __read_mostly = { - .timeout_ms = 500, + .timeout_ms = 1000, }; int i915_mock_sanitycheck(void) @@ -257,10 +256,6 @@ int __i915_live_setup(void *data) { struct drm_i915_private *i915 = data; - /* The selftests expect an idle system */ - if (intel_gt_pm_wait_for_idle(&i915->gt)) - return -EIO; - return intel_gt_terminally_wedged(&i915->gt); } @@ -280,10 +275,6 @@ int __intel_gt_live_setup(void *data) { struct intel_gt *gt = data; - /* The selftests expect an idle system */ - if (intel_gt_pm_wait_for_idle(gt)) - return -EIO; - return intel_gt_terminally_wedged(gt); } diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index c130010a7033..810b60100c2c 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -16,7 +16,6 @@ int igt_live_test_begin(struct igt_live_test *t, const char *func, const char *name) { - struct intel_gt *gt = &i915->gt; struct intel_engine_cs *engine; enum intel_engine_id id; int err; @@ -25,7 +24,7 @@ int igt_live_test_begin(struct igt_live_test *t, t->func = func; t->name = name; - err = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); + err = intel_gt_wait_for_idle(&i915->gt, MAX_SCHEDULE_TIMEOUT); if (err) { pr_err("%s(%s): failed to idle before, with err=%d!", func, name, err); @@ -34,7 +33,7 @@ int igt_live_test_begin(struct igt_live_test *t, t->reset_global = i915_reset_count(&i915->gpu_error); - for_each_engine(engine, gt, id) + for_each_engine(engine, i915, id) t->reset_engine[id] = i915_reset_engine_count(&i915->gpu_error, engine); @@ -57,7 +56,7 @@ int igt_live_test_end(struct igt_live_test *t) return -EIO; } - for_each_engine(engine, &i915->gt, id) { + for_each_engine(engine, i915, id) { if (t->reset_engine[id] == i915_reset_engine_count(&i915->gpu_error, engine)) continue; diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 19e1cca8f143..56091e7e599e 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -11,15 +11,8 @@ #include "mock_gem_device.h" #include "mock_region.h" -#include "gem/i915_gem_context.h" -#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" -#include "gem/i915_gem_object_blt.h" -#include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" -#include "gt/intel_engine_user.h" -#include "gt/intel_gt.h" -#include "selftests/igt_flush_test.h" #include "selftests/i915_random.h" static void close_objects(struct intel_memory_region *mem, @@ -259,322 +252,6 @@ err_close_objects: return err; } -static int igt_gpu_write_dw(struct intel_context *ce, - struct i915_vma *vma, - u32 dword, - u32 value) -{ - return igt_gpu_fill_dw(ce, vma, dword * sizeof(u32), - vma->size >> PAGE_SHIFT, value); -} - -static int igt_cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) -{ - unsigned long n; - int err; - - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_wc_domain(obj, false); - i915_gem_object_unlock(obj); - if (err) - return err; - - err = i915_gem_object_pin_pages(obj); - if (err) - return err; - - for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { - u32 __iomem *base; - u32 read_val; - - base = i915_gem_object_lmem_io_map_page_atomic(obj, n); - - read_val = ioread32(base + dword); - io_mapping_unmap_atomic(base); - if (read_val != val) { - pr_err("n=%lu base[%u]=%u, val=%u\n", - n, dword, read_val, val); - err = -EINVAL; - break; - } - } - - i915_gem_object_unpin_pages(obj); - return err; -} - -static int igt_gpu_write(struct i915_gem_context *ctx, - struct drm_i915_gem_object *obj) -{ - struct i915_gem_engines *engines; - struct i915_gem_engines_iter it; - struct i915_address_space *vm; - struct intel_context *ce; - I915_RND_STATE(prng); - IGT_TIMEOUT(end_time); - unsigned int count; - struct i915_vma *vma; - int *order; - int i, n; - int err = 0; - - GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - - n = 0; - count = 0; - for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { - count++; - if (!intel_engine_can_store_dword(ce->engine)) - continue; - - vm = ce->vm; - n++; - } - i915_gem_context_unlock_engines(ctx); - if (!n) - return 0; - - order = i915_random_order(count * count, &prng); - if (!order) - return -ENOMEM; - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_free; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto out_free; - - i = 0; - engines = i915_gem_context_lock_engines(ctx); - do { - u32 rng = prandom_u32_state(&prng); - u32 dword = offset_in_page(rng) / 4; - - ce = engines->engines[order[i] % engines->num_engines]; - i = (i + 1) % (count * count); - if (!ce || !intel_engine_can_store_dword(ce->engine)) - continue; - - err = igt_gpu_write_dw(ce, vma, dword, rng); - if (err) - break; - - err = igt_cpu_check(obj, dword, rng); - if (err) - break; - } while (!__igt_timeout(end_time, NULL)); - i915_gem_context_unlock_engines(ctx); - -out_free: - kfree(order); - - if (err == -ENOMEM) - err = 0; - - return err; -} - -static int igt_lmem_create(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - int err = 0; - - obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, 0); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - err = i915_gem_object_pin_pages(obj); - if (err) - goto out_put; - - i915_gem_object_unpin_pages(obj); -out_put: - i915_gem_object_put(obj); - - return err; -} - -static int igt_lmem_write_gpu(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - struct i915_gem_context *ctx; - struct drm_file *file; - I915_RND_STATE(prng); - u32 sz; - int err; - - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - ctx = live_context(i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out_file; - } - - sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE); - - obj = i915_gem_object_create_lmem(i915, sz, 0); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out_file; - } - - err = i915_gem_object_pin_pages(obj); - if (err) - goto out_put; - - err = igt_gpu_write(ctx, obj); - if (err) - pr_err("igt_gpu_write failed(%d)\n", err); - - i915_gem_object_unpin_pages(obj); -out_put: - i915_gem_object_put(obj); -out_file: - mock_file_free(i915, file); - return err; -} - -static struct intel_engine_cs * -random_engine_class(struct drm_i915_private *i915, - unsigned int class, - struct rnd_state *prng) -{ - struct intel_engine_cs *engine; - unsigned int count; - - count = 0; - for (engine = intel_engine_lookup_user(i915, class, 0); - engine && engine->uabi_class == class; - engine = rb_entry_safe(rb_next(&engine->uabi_node), - typeof(*engine), uabi_node)) - count++; - - count = i915_prandom_u32_max_state(count, prng); - return intel_engine_lookup_user(i915, class, count); -} - -static int igt_lmem_write_cpu(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - I915_RND_STATE(prng); - IGT_TIMEOUT(end_time); - u32 bytes[] = { - 0, /* rng placeholder */ - sizeof(u32), - sizeof(u64), - 64, /* cl */ - PAGE_SIZE, - PAGE_SIZE - sizeof(u32), - PAGE_SIZE - sizeof(u64), - PAGE_SIZE - 64, - }; - struct intel_engine_cs *engine; - u32 *vaddr; - u32 sz; - u32 i; - int *order; - int count; - int err; - - engine = random_engine_class(i915, I915_ENGINE_CLASS_COPY, &prng); - if (!engine) - return 0; - - pr_info("%s: using %s\n", __func__, engine->name); - - sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE); - sz = max_t(u32, 2 * PAGE_SIZE, sz); - - obj = i915_gem_object_create_lmem(i915, sz, I915_BO_ALLOC_CONTIGUOUS); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto out_put; - } - - /* Put the pages into a known state -- from the gpu for added fun */ - err = i915_gem_object_fill_blt(obj, engine->kernel_context, 0xdeadbeaf); - if (err) - goto out_unpin; - - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_wc_domain(obj, true); - i915_gem_object_unlock(obj); - if (err) - goto out_unpin; - - count = ARRAY_SIZE(bytes); - order = i915_random_order(count * count, &prng); - if (!order) { - err = -ENOMEM; - goto out_unpin; - } - - /* We want to throw in a random width/align */ - bytes[0] = igt_random_offset(&prng, 0, PAGE_SIZE, sizeof(u32), - sizeof(u32)); - - i = 0; - do { - u32 offset; - u32 align; - u32 dword; - u32 size; - u32 val; - - size = bytes[order[i] % count]; - i = (i + 1) % (count * count); - - align = bytes[order[i] % count]; - i = (i + 1) % (count * count); - - align = max_t(u32, sizeof(u32), rounddown_pow_of_two(align)); - - offset = igt_random_offset(&prng, 0, obj->base.size, - size, align); - - val = prandom_u32_state(&prng); - memset32(vaddr + offset / sizeof(u32), val ^ 0xdeadbeaf, - size / sizeof(u32)); - - /* - * Sample random dw -- don't waste precious time reading every - * single dw. - */ - dword = igt_random_offset(&prng, offset, - offset + size, - sizeof(u32), sizeof(u32)); - dword /= sizeof(u32); - if (vaddr[dword] != (val ^ 0xdeadbeaf)) { - pr_err("%s vaddr[%u]=%u, val=%u, size=%u, align=%u, offset=%u\n", - __func__, dword, vaddr[dword], val ^ 0xdeadbeaf, - size, align, offset); - err = -EINVAL; - break; - } - } while (!__igt_timeout(end_time, NULL)); - -out_unpin: - i915_gem_object_unpin_map(obj); -out_put: - i915_gem_object_put(obj); - - return err; -} - int intel_memory_region_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -603,22 +280,3 @@ out_unref: drm_dev_put(&i915->drm); return err; } - -int intel_memory_region_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_lmem_create), - SUBTEST(igt_lmem_write_cpu), - SUBTEST(igt_lmem_write_gpu), - }; - - if (!HAS_LMEM(i915)) { - pr_info("device lacks LMEM support, skipping\n"); - return 0; - } - - if (intel_gt_is_wedged(&i915->gt)) - return 0; - - return i915_live_subtests(tests, i915); -} diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 0e4e6be0101d..0ffb141eb988 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -140,19 +140,19 @@ static int live_forcewake_ops(void *arg) } }; const struct reg *r; - struct intel_gt *gt = arg; + struct drm_i915_private *i915 = arg; struct intel_uncore_forcewake_domain *domain; - struct intel_uncore *uncore = gt->uncore; + struct intel_uncore *uncore = &i915->uncore; struct intel_engine_cs *engine; enum intel_engine_id id; intel_wakeref_t wakeref; unsigned int tmp; int err = 0; - GEM_BUG_ON(gt->awake); + GEM_BUG_ON(i915->gt.awake); /* vlv/chv with their pcu behave differently wrt reads */ - if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915)) { + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { pr_debug("PCU fakes forcewake badly; skipping\n"); return 0; } @@ -170,15 +170,15 @@ static int live_forcewake_ops(void *arg) /* We have to pick carefully to get the exact behaviour we need */ for (r = registers; r->name; r++) - if (r->platforms & INTEL_INFO(gt->i915)->gen_mask) + if (r->platforms & INTEL_INFO(i915)->gen_mask) break; if (!r->name) { pr_debug("Forcewaked register not known for %s; skipping\n", - intel_platform_name(INTEL_INFO(gt->i915)->platform)); + intel_platform_name(INTEL_INFO(i915)->platform)); return 0; } - wakeref = intel_runtime_pm_get(uncore->rpm); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); for_each_fw_domain(domain, uncore, tmp) { smp_store_mb(domain->active, false); @@ -188,7 +188,7 @@ static int live_forcewake_ops(void *arg) intel_uncore_fw_release_timer(&domain->timer); } - for_each_engine(engine, gt, id) { + for_each_engine(engine, i915, id) { i915_reg_t mmio = _MMIO(engine->mmio_base + r->offset); u32 __iomem *reg = uncore->regs + engine->mmio_base + r->offset; enum forcewake_domains fw_domains; @@ -249,22 +249,22 @@ static int live_forcewake_ops(void *arg) } out_rpm: - intel_runtime_pm_put(uncore->rpm, wakeref); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); return err; } static int live_forcewake_domains(void *arg) { #define FW_RANGE 0x40000 - struct intel_gt *gt = arg; - struct intel_uncore *uncore = gt->uncore; + struct drm_i915_private *dev_priv = arg; + struct intel_uncore *uncore = &dev_priv->uncore; unsigned long *valid; u32 offset; int err; - if (!HAS_FPGA_DBG_UNCLAIMED(gt->i915) && - !IS_VALLEYVIEW(gt->i915) && - !IS_CHERRYVIEW(gt->i915)) + if (!HAS_FPGA_DBG_UNCLAIMED(dev_priv) && + !IS_VALLEYVIEW(dev_priv) && + !IS_CHERRYVIEW(dev_priv)) return 0; /* @@ -283,7 +283,7 @@ static int live_forcewake_domains(void *arg) for (offset = 0; offset < FW_RANGE; offset += 4) { i915_reg_t reg = { offset }; - intel_uncore_posting_read_fw(uncore, reg); + (void)I915_READ_FW(reg); if (!check_for_unclaimed_mmio(uncore)) set_bit(offset, valid); } @@ -300,7 +300,7 @@ static int live_forcewake_domains(void *arg) check_for_unclaimed_mmio(uncore); - intel_uncore_posting_read_fw(uncore, reg); + (void)I915_READ(reg); if (check_for_unclaimed_mmio(uncore)) { pr_err("Unclaimed mmio read to register 0x%04x\n", offset); @@ -312,23 +312,21 @@ static int live_forcewake_domains(void *arg) return err; } -static int live_fw_table(void *arg) -{ - struct intel_gt *gt = arg; - - /* Confirm the table we load is still valid */ - return intel_fw_table_check(gt->uncore->fw_domains_table, - gt->uncore->fw_domains_table_entries, - INTEL_GEN(gt->i915) >= 9); -} - int intel_uncore_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { - SUBTEST(live_fw_table), SUBTEST(live_forcewake_ops), SUBTEST(live_forcewake_domains), }; - return intel_gt_live_subtests(tests, &i915->gt); + int err; + + /* Confirm the table we load is still valid */ + err = intel_fw_table_check(i915->uncore.fw_domains_table, + i915->uncore.fw_domains_table_entries, + INTEL_GEN(i915) >= 9); + if (err) + return err; + + return i915_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 27ed3cee6a9b..cb8c3a501cc7 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -28,7 +28,6 @@ #include "gt/intel_gt.h" #include "gt/intel_gt_requests.h" #include "gt/mock_engine.h" -#include "intel_memory_region.h" #include "mock_request.h" #include "mock_gem_device.h" @@ -41,14 +40,14 @@ void mock_device_flush(struct drm_i915_private *i915) { - struct intel_gt *gt = &i915->gt; struct intel_engine_cs *engine; enum intel_engine_id id; do { - for_each_engine(engine, gt, id) + for_each_engine(engine, i915, id) mock_engine_flush(engine); - } while (intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT)); + } while (intel_gt_retire_requests_timeout(&i915->gt, + MAX_SCHEDULE_TIMEOUT)); } static void mock_device_release(struct drm_device *dev) @@ -61,7 +60,7 @@ static void mock_device_release(struct drm_device *dev) i915_gem_drain_workqueue(i915); - for_each_engine(engine, &i915->gt, id) + for_each_engine(engine, i915, id) mock_engine_free(engine); i915_gem_driver_release__contexts(i915); @@ -73,7 +72,7 @@ static void mock_device_release(struct drm_device *dev) mock_fini_ggtt(&i915->ggtt); destroy_workqueue(i915->wq); - intel_memory_regions_driver_release(i915); + i915_gem_cleanup_memory_regions(i915); drm_mode_config_cleanup(&i915->drm); @@ -165,7 +164,6 @@ struct drm_i915_private *mock_gem_device(void) I915_GTT_PAGE_SIZE_2M; mkwrite_device_info(i915)->memory_regions = REGION_SMEM; - intel_memory_regions_hw_probe(i915); mock_uncore_init(&i915->uncore, i915); @@ -199,6 +197,10 @@ struct drm_i915_private *mock_gem_device(void) intel_engines_driver_register(i915); + err = i915_gem_init_memory_regions(i915); + if (err) + goto err_context; + return i915; err_context: @@ -209,7 +211,6 @@ err_unlock: intel_timelines_fini(i915); destroy_workqueue(i915->wq); err_drv: - intel_memory_regions_driver_release(i915); drm_mode_config_cleanup(&i915->drm); drm_dev_fini(&i915->drm); put_device: diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index 20ac3844edec..173f2d4dbd14 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -63,7 +63,6 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name) if (!ppgtt) return NULL; - ppgtt->vm.gt = &i915->gt; ppgtt->vm.i915 = i915; ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE); ppgtt->vm.file = ERR_PTR(-ENODEV); @@ -118,7 +117,8 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt) ggtt->vm.vma_ops.clear_pages = clear_pages; i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); - i915->gt.ggtt = ggtt; + + intel_gt_init_hw_early(i915); } void mock_fini_ggtt(struct i915_ggtt *ggtt) diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index b2ad41c27e67..7b0c99ddc2d5 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -19,7 +19,6 @@ mock_object_create(struct intel_memory_region *mem, resource_size_t size, unsigned int flags) { - static struct lock_class_key lock_class; struct drm_i915_private *i915 = mem->i915; struct drm_i915_gem_object *obj; @@ -31,7 +30,7 @@ mock_object_create(struct intel_memory_region *mem, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &mock_region_obj_ops, &lock_class); + i915_gem_object_init(obj, &mock_region_obj_ops); obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c index 4a55bb6e2213..14878ebf59d7 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -3,8 +3,6 @@ * Copyright (c) 2015 MediaTek Inc. */ -#include - #include #include #include @@ -52,8 +50,6 @@ OVL_CON_CLRFMT_RGB : 0) #define OVL_CON_AEN BIT(8) #define OVL_CON_ALPHA 0xff -#define OVL_CON_VIRT_FLIP BIT(9) -#define OVL_CON_HORZ_FLIP BIT(10) struct mtk_disp_ovl_data { unsigned int addr; @@ -141,40 +137,6 @@ static unsigned int mtk_ovl_layer_nr(struct mtk_ddp_comp *comp) return ovl->data->layer_nr; } -static unsigned int mtk_ovl_supported_rotations(struct mtk_ddp_comp *comp) -{ - return DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_180 | - DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y; -} - -static int mtk_ovl_layer_check(struct mtk_ddp_comp *comp, unsigned int idx, - struct mtk_plane_state *mtk_state) -{ - struct drm_plane_state *state = &mtk_state->base; - unsigned int rotation = 0; - - rotation = drm_rotation_simplify(state->rotation, - DRM_MODE_ROTATE_0 | - DRM_MODE_REFLECT_X | - DRM_MODE_REFLECT_Y); - rotation &= ~DRM_MODE_ROTATE_0; - - /* We can only do reflection, not rotation */ - if ((rotation & DRM_MODE_ROTATE_MASK) != 0) - return -EINVAL; - - /* - * TODO: Rotating/reflecting YUV buffers is not supported at this time. - * Only RGB[AX] variants are supported. - */ - if (state->fb->format->is_yuv && rotation != 0) - return -EINVAL; - - state->rotation = rotation; - - return 0; -} - static void mtk_ovl_layer_on(struct mtk_ddp_comp *comp, unsigned int idx) { unsigned int reg; @@ -267,16 +229,6 @@ static void mtk_ovl_layer_config(struct mtk_ddp_comp *comp, unsigned int idx, if (idx != 0) con |= OVL_CON_AEN | OVL_CON_ALPHA; - if (pending->rotation & DRM_MODE_REFLECT_Y) { - con |= OVL_CON_VIRT_FLIP; - addr += (pending->height - 1) * pending->pitch; - } - - if (pending->rotation & DRM_MODE_REFLECT_X) { - con |= OVL_CON_HORZ_FLIP; - addr += pending->pitch - 1; - } - writel_relaxed(con, comp->regs + DISP_REG_OVL_CON(idx)); writel_relaxed(pitch, comp->regs + DISP_REG_OVL_PITCH(idx)); writel_relaxed(src_size, comp->regs + DISP_REG_OVL_SRC_SIZE(idx)); @@ -311,11 +263,9 @@ static const struct mtk_ddp_comp_funcs mtk_disp_ovl_funcs = { .stop = mtk_ovl_stop, .enable_vblank = mtk_ovl_enable_vblank, .disable_vblank = mtk_ovl_disable_vblank, - .supported_rotations = mtk_ovl_supported_rotations, .layer_nr = mtk_ovl_layer_nr, .layer_on = mtk_ovl_layer_on, .layer_off = mtk_ovl_layer_off, - .layer_check = mtk_ovl_layer_check, .layer_config = mtk_ovl_layer_config, .bgclr_in_on = mtk_ovl_bgclr_in_on, .bgclr_in_off = mtk_ovl_bgclr_in_off, diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index f80a8ba75977..b841d3706d8b 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -207,28 +207,6 @@ static void mtk_crtc_ddp_clk_disable(struct mtk_drm_crtc *mtk_crtc) clk_disable_unprepare(mtk_crtc->ddp_comp[i]->clk); } -static -struct mtk_ddp_comp *mtk_drm_ddp_comp_for_plane(struct drm_crtc *crtc, - struct drm_plane *plane, - unsigned int *local_layer) -{ - struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); - struct mtk_ddp_comp *comp; - int i, count = 0; - - for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) { - comp = mtk_crtc->ddp_comp[i]; - if (plane->index < (count + mtk_ddp_comp_layer_nr(comp))) { - *local_layer = plane->index - count; - return comp; - } - count += mtk_ddp_comp_layer_nr(comp); - } - - WARN(1, "Failed to find component for plane %d\n", plane->index); - return NULL; -} - static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc) { struct drm_crtc *crtc = &mtk_crtc->base; @@ -305,12 +283,19 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc) for (i = 0; i < mtk_crtc->layer_nr; i++) { struct drm_plane *plane = &mtk_crtc->planes[i]; struct mtk_plane_state *plane_state; - struct mtk_ddp_comp *comp; + struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0]; + unsigned int comp_layer_nr = mtk_ddp_comp_layer_nr(comp); unsigned int local_layer; plane_state = to_mtk_plane_state(plane->state); - comp = mtk_drm_ddp_comp_for_plane(crtc, plane, &local_layer); - mtk_ddp_comp_layer_config(comp, local_layer, plane_state); + + if (i >= comp_layer_nr) { + comp = mtk_crtc->ddp_comp[1]; + local_layer = i - comp_layer_nr; + } else + local_layer = i; + mtk_ddp_comp_layer_config(comp, local_layer, + plane_state); } return 0; @@ -358,6 +343,7 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc) struct mtk_crtc_state *state = to_mtk_crtc_state(mtk_crtc->base.state); struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0]; unsigned int i; + unsigned int comp_layer_nr = mtk_ddp_comp_layer_nr(comp); unsigned int local_layer; /* @@ -380,30 +366,22 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc) plane_state = to_mtk_plane_state(plane->state); - if (!plane_state->pending.config) - continue; + if (plane_state->pending.config) { + if (i >= comp_layer_nr) { + comp = mtk_crtc->ddp_comp[1]; + local_layer = i - comp_layer_nr; + } else + local_layer = i; - comp = mtk_drm_ddp_comp_for_plane(crtc, plane, - &local_layer); - - mtk_ddp_comp_layer_config(comp, local_layer, - plane_state); - plane_state->pending.config = false; + mtk_ddp_comp_layer_config(comp, local_layer, + plane_state); + plane_state->pending.config = false; + } } mtk_crtc->pending_planes = false; } } -int mtk_drm_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane, - struct mtk_plane_state *state) -{ - unsigned int local_layer; - struct mtk_ddp_comp *comp; - - comp = mtk_drm_ddp_comp_for_plane(crtc, plane, &local_layer); - return mtk_ddp_comp_layer_check(comp, local_layer, state); -} - static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { @@ -565,65 +543,14 @@ void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *comp) mtk_drm_finish_page_flip(mtk_crtc); } -static int mtk_drm_crtc_num_comp_planes(struct mtk_drm_crtc *mtk_crtc, - int comp_idx) -{ - struct mtk_ddp_comp *comp; - - if (comp_idx > 1) - return 0; - - comp = mtk_crtc->ddp_comp[comp_idx]; - if (!comp->funcs) - return 0; - - if (comp_idx == 1 && !comp->funcs->bgclr_in_on) - return 0; - - return mtk_ddp_comp_layer_nr(comp); -} - -static inline -enum drm_plane_type mtk_drm_crtc_plane_type(unsigned int plane_idx) -{ - if (plane_idx == 0) - return DRM_PLANE_TYPE_PRIMARY; - else if (plane_idx == 1) - return DRM_PLANE_TYPE_CURSOR; - else - return DRM_PLANE_TYPE_OVERLAY; - -} - -static int mtk_drm_crtc_init_comp_planes(struct drm_device *drm_dev, - struct mtk_drm_crtc *mtk_crtc, - int comp_idx, int pipe) -{ - int num_planes = mtk_drm_crtc_num_comp_planes(mtk_crtc, comp_idx); - struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[comp_idx]; - int i, ret; - - for (i = 0; i < num_planes; i++) { - ret = mtk_plane_init(drm_dev, - &mtk_crtc->planes[mtk_crtc->layer_nr], - BIT(pipe), - mtk_drm_crtc_plane_type(mtk_crtc->layer_nr), - mtk_ddp_comp_supported_rotations(comp)); - if (ret) - return ret; - - mtk_crtc->layer_nr++; - } - return 0; -} - int mtk_drm_crtc_create(struct drm_device *drm_dev, const enum mtk_ddp_comp_id *path, unsigned int path_len) { struct mtk_drm_private *priv = drm_dev->dev_private; struct device *dev = drm_dev->dev; struct mtk_drm_crtc *mtk_crtc; - unsigned int num_comp_planes = 0; + enum drm_plane_type type; + unsigned int zpos; int pipe = priv->num_pipes; int ret; int i; @@ -679,15 +606,23 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, mtk_crtc->ddp_comp[i] = comp; } - for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) - num_comp_planes += mtk_drm_crtc_num_comp_planes(mtk_crtc, i); + mtk_crtc->layer_nr = mtk_ddp_comp_layer_nr(mtk_crtc->ddp_comp[0]); + if (mtk_crtc->ddp_comp_nr > 1) { + struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[1]; - mtk_crtc->planes = devm_kcalloc(dev, num_comp_planes, - sizeof(struct drm_plane), GFP_KERNEL); + if (comp->funcs->bgclr_in_on) + mtk_crtc->layer_nr += mtk_ddp_comp_layer_nr(comp); + } + mtk_crtc->planes = devm_kcalloc(dev, mtk_crtc->layer_nr, + sizeof(struct drm_plane), + GFP_KERNEL); - for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) { - ret = mtk_drm_crtc_init_comp_planes(drm_dev, mtk_crtc, i, - pipe); + for (zpos = 0; zpos < mtk_crtc->layer_nr; zpos++) { + type = (zpos == 0) ? DRM_PLANE_TYPE_PRIMARY : + (zpos == 1) ? DRM_PLANE_TYPE_CURSOR : + DRM_PLANE_TYPE_OVERLAY; + ret = mtk_plane_init(drm_dev, &mtk_crtc->planes[zpos], + BIT(pipe), type); if (ret) return ret; } diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h index 6afe1c19557a..fcc134eb00c9 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h @@ -19,7 +19,5 @@ void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *comp); int mtk_drm_crtc_create(struct drm_device *drm_dev, const enum mtk_ddp_comp_id *path, unsigned int path_len); -int mtk_drm_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane, - struct mtk_plane_state *state); #endif /* MTK_DRM_CRTC_H */ diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h index 2f1e9e75b8da..26441f4d1ad3 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h @@ -77,13 +77,9 @@ struct mtk_ddp_comp_funcs { void (*stop)(struct mtk_ddp_comp *comp); void (*enable_vblank)(struct mtk_ddp_comp *comp, struct drm_crtc *crtc); void (*disable_vblank)(struct mtk_ddp_comp *comp); - unsigned int (*supported_rotations)(struct mtk_ddp_comp *comp); unsigned int (*layer_nr)(struct mtk_ddp_comp *comp); void (*layer_on)(struct mtk_ddp_comp *comp, unsigned int idx); void (*layer_off)(struct mtk_ddp_comp *comp, unsigned int idx); - int (*layer_check)(struct mtk_ddp_comp *comp, - unsigned int idx, - struct mtk_plane_state *state); void (*layer_config)(struct mtk_ddp_comp *comp, unsigned int idx, struct mtk_plane_state *state); void (*gamma_set)(struct mtk_ddp_comp *comp, @@ -134,15 +130,6 @@ static inline void mtk_ddp_comp_disable_vblank(struct mtk_ddp_comp *comp) comp->funcs->disable_vblank(comp); } -static inline -unsigned int mtk_ddp_comp_supported_rotations(struct mtk_ddp_comp *comp) -{ - if (comp->funcs && comp->funcs->supported_rotations) - return comp->funcs->supported_rotations(comp); - - return 0; -} - static inline unsigned int mtk_ddp_comp_layer_nr(struct mtk_ddp_comp *comp) { if (comp->funcs && comp->funcs->layer_nr) @@ -165,15 +152,6 @@ static inline void mtk_ddp_comp_layer_off(struct mtk_ddp_comp *comp, comp->funcs->layer_off(comp, idx); } -static inline int mtk_ddp_comp_layer_check(struct mtk_ddp_comp *comp, - unsigned int idx, - struct mtk_plane_state *state) -{ - if (comp->funcs && comp->funcs->layer_check) - return comp->funcs->layer_check(comp, idx, state); - return 0; -} - static inline void mtk_ddp_comp_layer_config(struct mtk_ddp_comp *comp, unsigned int idx, struct mtk_plane_state *state) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c index 3b0cc91c7023..584a9ecadce6 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c @@ -20,12 +20,6 @@ static const u32 formats[] = { DRM_FORMAT_XRGB8888, DRM_FORMAT_ARGB8888, - DRM_FORMAT_BGRX8888, - DRM_FORMAT_BGRA8888, - DRM_FORMAT_ABGR8888, - DRM_FORMAT_XBGR8888, - DRM_FORMAT_RGB888, - DRM_FORMAT_BGR888, DRM_FORMAT_RGB565, DRM_FORMAT_UYVY, DRM_FORMAT_YUYV, @@ -90,7 +84,6 @@ static int mtk_plane_atomic_check(struct drm_plane *plane, { struct drm_framebuffer *fb = state->fb; struct drm_crtc_state *crtc_state; - int ret; if (!fb) return 0; @@ -98,11 +91,6 @@ static int mtk_plane_atomic_check(struct drm_plane *plane, if (!state->crtc) return 0; - ret = mtk_drm_crtc_plane_check(state->crtc, plane, - to_mtk_plane_state(state)); - if (ret) - return ret; - crtc_state = drm_atomic_get_crtc_state(state->state, state->crtc); if (IS_ERR(crtc_state)) return PTR_ERR(crtc_state); @@ -144,7 +132,6 @@ static void mtk_plane_atomic_update(struct drm_plane *plane, state->pending.y = plane->state->dst.y1; state->pending.width = drm_rect_width(&plane->state->dst); state->pending.height = drm_rect_height(&plane->state->dst); - state->pending.rotation = plane->state->rotation; wmb(); /* Make sure the above parameters are set before update */ state->pending.dirty = true; } @@ -167,8 +154,7 @@ static const struct drm_plane_helper_funcs mtk_plane_helper_funcs = { }; int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane, - unsigned long possible_crtcs, enum drm_plane_type type, - unsigned int supported_rotations) + unsigned long possible_crtcs, enum drm_plane_type type) { int err; @@ -180,14 +166,6 @@ int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane, return err; } - if (supported_rotations & ~DRM_MODE_ROTATE_0) { - err = drm_plane_create_rotation_property(plane, - DRM_MODE_ROTATE_0, - supported_rotations); - if (err) - DRM_INFO("Create rotation property failed\n"); - } - drm_plane_helper_add(plane, &mtk_plane_helper_funcs); return 0; diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.h b/drivers/gpu/drm/mediatek/mtk_drm_plane.h index 760885e35b27..6f842df722c7 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.h @@ -20,7 +20,6 @@ struct mtk_plane_pending_state { unsigned int y; unsigned int width; unsigned int height; - unsigned int rotation; bool dirty; }; @@ -36,7 +35,6 @@ to_mtk_plane_state(struct drm_plane_state *state) } int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane, - unsigned long possible_crtcs, enum drm_plane_type type, - unsigned int supported_rotations); + unsigned long possible_crtcs, enum drm_plane_type type); #endif diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index 691c1a277d91..e686331fa089 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -352,26 +352,26 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu, cxdbg = ioremap(res->start, resource_size(res)); if (cxdbg) { - cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT, + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT, A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); - cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM, + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM, A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); - cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0); - cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0); - cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0); - cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0); + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0); + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0); + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0); + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0); - cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0, + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210); - cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1, + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98); - cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0); - cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0); - cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0); - cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0); + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0); + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0); + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0); + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0); } a6xx_state->debugbus = state_kcalloc(a6xx_state, diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index 1c74381a4fc9..6be879578140 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -47,8 +47,12 @@ static int msm_gpu_release(struct inode *inode, struct file *file) struct msm_gpu_show_priv *show_priv = m->private; struct msm_drm_private *priv = show_priv->dev->dev_private; struct msm_gpu *gpu = priv->gpu; + int ret; + + ret = mutex_lock_interruptible(&show_priv->dev->struct_mutex); + if (ret) + return ret; - mutex_lock(&show_priv->dev->struct_mutex); gpu->funcs->gpu_state_put(show_priv->state); mutex_unlock(&show_priv->dev->struct_mutex); diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 549486f1d937..a13924ae1992 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -986,11 +986,20 @@ nv50_mstc_atomic_check(struct drm_connector *connector, return drm_dp_atomic_release_vcpi_slots(state, mgr, mstc->port); } -static int -nv50_mstc_detect(struct drm_connector *connector, - struct drm_modeset_acquire_ctx *ctx, bool force) +static const struct drm_connector_helper_funcs +nv50_mstc_help = { + .get_modes = nv50_mstc_get_modes, + .mode_valid = nv50_mstc_mode_valid, + .best_encoder = nv50_mstc_best_encoder, + .atomic_best_encoder = nv50_mstc_atomic_best_encoder, + .atomic_check = nv50_mstc_atomic_check, +}; + +static enum drm_connector_status +nv50_mstc_detect(struct drm_connector *connector, bool force) { struct nv50_mstc *mstc = nv50_mstc(connector); + enum drm_connector_status conn_status; int ret; if (drm_connector_is_unregistered(connector)) @@ -1000,24 +1009,14 @@ nv50_mstc_detect(struct drm_connector *connector, if (ret < 0 && ret != -EACCES) return connector_status_disconnected; - ret = drm_dp_mst_detect_port(connector, ctx, mstc->port->mgr, - mstc->port); + conn_status = drm_dp_mst_detect_port(connector, mstc->port->mgr, + mstc->port); pm_runtime_mark_last_busy(connector->dev->dev); pm_runtime_put_autosuspend(connector->dev->dev); - return ret; + return conn_status; } -static const struct drm_connector_helper_funcs -nv50_mstc_help = { - .get_modes = nv50_mstc_get_modes, - .mode_valid = nv50_mstc_mode_valid, - .best_encoder = nv50_mstc_best_encoder, - .atomic_best_encoder = nv50_mstc_atomic_best_encoder, - .atomic_check = nv50_mstc_atomic_check, - .detect_ctx = nv50_mstc_detect, -}; - static void nv50_mstc_destroy(struct drm_connector *connector) { @@ -1032,6 +1031,7 @@ nv50_mstc_destroy(struct drm_connector *connector) static const struct drm_connector_funcs nv50_mstc = { .reset = nouveau_conn_reset, + .detect = nv50_mstc_detect, .fill_modes = drm_helper_probe_single_connector_modes, .destroy = nv50_mstc_destroy, .atomic_duplicate_state = nouveau_conn_atomic_duplicate_state, @@ -1309,14 +1309,14 @@ nv50_mstm_fini(struct nv50_mstm *mstm) } static void -nv50_mstm_init(struct nv50_mstm *mstm, bool runtime) +nv50_mstm_init(struct nv50_mstm *mstm) { int ret; if (!mstm || !mstm->mgr.mst_state) return; - ret = drm_dp_mst_topology_mgr_resume(&mstm->mgr, !runtime); + ret = drm_dp_mst_topology_mgr_resume(&mstm->mgr); if (ret == -1) { drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, false); drm_kms_helper_hotplug_event(mstm->mgr.dev); @@ -2263,7 +2263,7 @@ nv50_display_init(struct drm_device *dev, bool resume, bool runtime) if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) { struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); - nv50_mstm_init(nv_encoder->dp.mstm, runtime); + nv50_mstm_init(nv_encoder->dp.mstm); } } diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 5b413588b823..3a5db17bc5c7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -1130,16 +1130,6 @@ nouveau_connector_hotplug(struct nvif_notify *notify) const char *name = connector->name; struct nouveau_encoder *nv_encoder; int ret; - bool plugged = (rep->mask != NVIF_NOTIFY_CONN_V0_UNPLUG); - - if (rep->mask & NVIF_NOTIFY_CONN_V0_IRQ) { - NV_DEBUG(drm, "service %s\n", name); - drm_dp_cec_irq(&nv_connector->aux); - if ((nv_encoder = find_encoder(connector, DCB_OUTPUT_DP))) - nv50_mstm_service(nv_encoder->dp.mstm); - - return NVIF_NOTIFY_KEEP; - } ret = pm_runtime_get(drm->dev->dev); if (ret == 0) { @@ -1160,15 +1150,24 @@ nouveau_connector_hotplug(struct nvif_notify *notify) return NVIF_NOTIFY_DROP; } - if (!plugged) - drm_dp_cec_unset_edid(&nv_connector->aux); - NV_DEBUG(drm, "%splugged %s\n", plugged ? "" : "un", name); - if ((nv_encoder = find_encoder(connector, DCB_OUTPUT_DP))) { - if (!plugged) - nv50_mstm_remove(nv_encoder->dp.mstm); - } + if (rep->mask & NVIF_NOTIFY_CONN_V0_IRQ) { + NV_DEBUG(drm, "service %s\n", name); + drm_dp_cec_irq(&nv_connector->aux); + if ((nv_encoder = find_encoder(connector, DCB_OUTPUT_DP))) + nv50_mstm_service(nv_encoder->dp.mstm); + } else { + bool plugged = (rep->mask != NVIF_NOTIFY_CONN_V0_UNPLUG); - drm_helper_hpd_irq_event(connector->dev); + if (!plugged) + drm_dp_cec_unset_edid(&nv_connector->aux); + NV_DEBUG(drm, "%splugged %s\n", plugged ? "" : "un", name); + if ((nv_encoder = find_encoder(connector, DCB_OUTPUT_DP))) { + if (!plugged) + nv50_mstm_remove(nv_encoder->dp.mstm); + } + + drm_helper_hpd_irq_event(connector->dev); + } pm_runtime_mark_last_busy(drm->dev->dev); pm_runtime_put_autosuspend(drm->dev->dev); diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 53f9bceaf17a..6f038511a03a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -407,17 +407,6 @@ nouveau_display_init(struct drm_device *dev, bool resume, bool runtime) struct drm_connector_list_iter conn_iter; int ret; - /* - * Enable hotplug interrupts (done as early as possible, since we need - * them for MST) - */ - drm_connector_list_iter_begin(dev, &conn_iter); - nouveau_for_each_non_mst_connector_iter(connector, &conn_iter) { - struct nouveau_connector *conn = nouveau_connector(connector); - nvif_notify_get(&conn->hpd); - } - drm_connector_list_iter_end(&conn_iter); - ret = disp->init(dev, resume, runtime); if (ret) return ret; @@ -427,6 +416,14 @@ nouveau_display_init(struct drm_device *dev, bool resume, bool runtime) */ drm_kms_helper_poll_enable(dev); + /* enable hotplug interrupts */ + drm_connector_list_iter_begin(dev, &conn_iter); + nouveau_for_each_non_mst_connector_iter(connector, &conn_iter) { + struct nouveau_connector *conn = nouveau_connector(connector); + nvif_notify_get(&conn->hpd); + } + drm_connector_list_iter_end(&conn_iter); + return ret; } diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index daff9a2af3be..62eab82a64f9 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -221,7 +221,9 @@ int ci_get_temp(struct radeon_device *rdev) else actual_temp = temp & 0x1ff; - return actual_temp * 1000; + actual_temp = actual_temp * 1000; + + return actual_temp; } /* get temperature in millidegrees */ @@ -237,7 +239,9 @@ int kv_get_temp(struct radeon_device *rdev) else actual_temp = 0; - return actual_temp * 1000; + actual_temp = actual_temp * 1000; + + return actual_temp; } /* diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index 72db2b41e96d..b9aea5776d3d 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -367,10 +367,10 @@ static void radeon_audio_write_sad_regs(struct drm_encoder *encoder) return; sad_count = drm_edid_to_sad(radeon_connector_edid(connector), &sads); - if (sad_count < 0) + if (sad_count <= 0) { DRM_ERROR("Couldn't read SADs: %d\n", sad_count); - if (sad_count <= 0) return; + } BUG_ON(!sads); if (radeon_encoder->audio && radeon_encoder->audio->write_sad_regs) diff --git a/drivers/gpu/drm/radeon/radeon_dp_mst.c b/drivers/gpu/drm/radeon/radeon_dp_mst.c index ee28f5b3785e..2994f07fbad9 100644 --- a/drivers/gpu/drm/radeon/radeon_dp_mst.c +++ b/drivers/gpu/drm/radeon/radeon_dp_mst.c @@ -233,26 +233,21 @@ drm_encoder *radeon_mst_best_encoder(struct drm_connector *connector) return &radeon_connector->mst_encoder->base; } -static int -radeon_dp_mst_detect(struct drm_connector *connector, - struct drm_modeset_acquire_ctx *ctx, - bool force) -{ - struct radeon_connector *radeon_connector = - to_radeon_connector(connector); - struct radeon_connector *master = radeon_connector->mst_port; - - return drm_dp_mst_detect_port(connector, ctx, &master->mst_mgr, - radeon_connector->port); -} - static const struct drm_connector_helper_funcs radeon_dp_mst_connector_helper_funcs = { .get_modes = radeon_dp_mst_get_modes, .mode_valid = radeon_dp_mst_mode_valid, .best_encoder = radeon_mst_best_encoder, - .detect_ctx = radeon_dp_mst_detect, }; +static enum drm_connector_status +radeon_dp_mst_detect(struct drm_connector *connector, bool force) +{ + struct radeon_connector *radeon_connector = to_radeon_connector(connector); + struct radeon_connector *master = radeon_connector->mst_port; + + return drm_dp_mst_detect_port(connector, &master->mst_mgr, radeon_connector->port); +} + static void radeon_dp_mst_connector_destroy(struct drm_connector *connector) { @@ -267,6 +262,7 @@ radeon_dp_mst_connector_destroy(struct drm_connector *connector) static const struct drm_connector_funcs radeon_dp_mst_connector_funcs = { .dpms = drm_helper_connector_dpms, + .detect = radeon_dp_mst_detect, .fill_modes = drm_helper_probe_single_connector_modes, .destroy = radeon_dp_mst_connector_destroy, }; diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 41f08321a361..888e0f384c61 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -379,25 +379,11 @@ radeon_pci_remove(struct pci_dev *pdev) static void radeon_pci_shutdown(struct pci_dev *pdev) { -#ifdef CONFIG_PPC64 - struct drm_device *ddev = pci_get_drvdata(pdev); -#endif - /* if we are running in a VM, make sure the device * torn down properly on reboot/shutdown */ if (radeon_device_is_virtual()) radeon_pci_remove(pdev); - -#ifdef CONFIG_PPC64 - /* Some adapters need to be suspended before a - * shutdown occurs in order to prevent an error - * during kexec. - * Make this power specific becauase it breaks - * some non-power boards. - */ - radeon_suspend_kms(ddev, true, true, false); -#endif } static int radeon_pmops_suspend(struct device *dev) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 2af64459b3d7..d4cc7289147e 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -479,7 +479,6 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched) struct drm_sched_job *s_job, *tmp; uint64_t guilty_context; bool found_guilty = false; - struct dma_fence *fence; list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { struct drm_sched_fence *s_fence = s_job->s_fence; @@ -493,16 +492,7 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched) dma_fence_set_error(&s_fence->finished, -ECANCELED); dma_fence_put(s_job->s_fence->parent); - fence = sched->ops->run_job(s_job); - - if (IS_ERR_OR_NULL(fence)) { - s_job->s_fence->parent = NULL; - dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); - } else { - s_job->s_fence->parent = fence; - } - - + s_job->s_fence->parent = sched->ops->run_job(s_job); } } EXPORT_SYMBOL(drm_sched_resubmit_jobs); @@ -735,7 +725,7 @@ static int drm_sched_main(void *param) fence = sched->ops->run_job(sched_job); drm_sched_fence_scheduled(s_fence); - if (!IS_ERR_OR_NULL(fence)) { + if (fence) { s_fence->parent = dma_fence_get(fence); r = dma_fence_add_callback(fence, &sched_job->cb, drm_sched_process_job); @@ -745,11 +735,8 @@ static int drm_sched_main(void *param) DRM_ERROR("fence add callback failed (%d)\n", r); dma_fence_put(fence); - } else { - - dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); + } else drm_sched_process_job(NULL, &sched_job->cb); - } wake_up(&sched->job_scheduled); } diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig index 5043dcaf1cf9..1d1269fde3c1 100644 --- a/drivers/gpu/drm/tegra/Kconfig +++ b/drivers/gpu/drm/tegra/Kconfig @@ -9,7 +9,7 @@ config DRM_TEGRA select DRM_MIPI_DSI select DRM_PANEL select TEGRA_HOST1X - select IOMMU_IOVA + select IOMMU_IOVA if IOMMU_SUPPORT select CEC_CORE if CEC_NOTIFIER help Choose this option if you have an NVIDIA Tegra SoC. diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 5b1f9ff97576..fbf57bc3cdab 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -715,7 +715,9 @@ static void tegra_plane_atomic_update(struct drm_plane *plane, window.swap = state->swap; for (i = 0; i < fb->format->num_planes; i++) { - window.base[i] = state->iova[i] + fb->offsets[i]; + struct tegra_bo *bo = tegra_fb_get_plane(fb, i); + + window.base[i] = bo->paddr + fb->offsets[i]; /* * Tegra uses a shared stride for UV planes. Framebuffers are @@ -730,8 +732,6 @@ static void tegra_plane_atomic_update(struct drm_plane *plane, } static const struct drm_plane_helper_funcs tegra_plane_helper_funcs = { - .prepare_fb = tegra_plane_prepare_fb, - .cleanup_fb = tegra_plane_cleanup_fb, .atomic_check = tegra_plane_atomic_check, .atomic_disable = tegra_plane_atomic_disable, .atomic_update = tegra_plane_atomic_update, @@ -869,11 +869,11 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane, return; } - value |= (bo->iova >> 10) & 0x3fffff; + value |= (bo->paddr >> 10) & 0x3fffff; tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - value = (bo->iova >> 32) & 0x3; + value = (bo->paddr >> 32) & 0x3; tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR_HI); #endif @@ -914,8 +914,6 @@ static void tegra_cursor_atomic_disable(struct drm_plane *plane, } static const struct drm_plane_helper_funcs tegra_cursor_plane_helper_funcs = { - .prepare_fb = tegra_plane_prepare_fb, - .cleanup_fb = tegra_plane_cleanup_fb, .atomic_check = tegra_cursor_atomic_check, .atomic_update = tegra_cursor_atomic_update, .atomic_disable = tegra_cursor_atomic_disable, @@ -2016,8 +2014,9 @@ static int tegra_dc_init(struct host1x_client *client) if (!dc->syncpt) dev_warn(dc->dev, "failed to allocate syncpoint\n"); - err = host1x_client_iommu_attach(client); - if (err < 0) { + dc->group = host1x_client_iommu_attach(client, true); + if (IS_ERR(dc->group)) { + err = PTR_ERR(dc->group); dev_err(client->dev, "failed to attach to domain: %d\n", err); return err; } @@ -2075,12 +2074,6 @@ static int tegra_dc_init(struct host1x_client *client) goto cleanup; } - /* - * Inherit the DMA parameters (such as maximum segment size) from the - * parent device. - */ - client->dev->dma_parms = client->parent->dma_parms; - return 0; cleanup: @@ -2090,7 +2083,7 @@ cleanup: if (!IS_ERR(primary)) drm_plane_cleanup(primary); - host1x_client_iommu_detach(client); + host1x_client_iommu_detach(client, dc->group); host1x_syncpt_free(dc->syncpt); return err; @@ -2104,9 +2097,6 @@ static int tegra_dc_exit(struct host1x_client *client) if (!tegra_dc_has_window_groups(dc)) return 0; - /* avoid a dangling pointer just in case this disappears */ - client->dev->dma_parms = NULL; - devm_free_irq(dc->dev, dc->irq, dc); err = tegra_dc_rgb_exit(dc); @@ -2115,7 +2105,7 @@ static int tegra_dc_exit(struct host1x_client *client) return err; } - host1x_client_iommu_detach(client); + host1x_client_iommu_detach(client, dc->group); host1x_syncpt_free(dc->syncpt); return 0; diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h index 3d8ddccd758f..0c4d17851f47 100644 --- a/drivers/gpu/drm/tegra/dc.h +++ b/drivers/gpu/drm/tegra/dc.h @@ -90,6 +90,8 @@ struct tegra_dc { struct drm_info_list *debugfs_files; const struct tegra_dc_soc_info *soc; + + struct iommu_group *group; }; static inline struct tegra_dc * diff --git a/drivers/gpu/drm/tegra/dp.c b/drivers/gpu/drm/tegra/dp.c index 70dfb7d1dec5..50ba967ebcbd 100644 --- a/drivers/gpu/drm/tegra/dp.c +++ b/drivers/gpu/drm/tegra/dp.c @@ -4,158 +4,10 @@ * Copyright (C) 2015 Rob Clark */ -#include #include -#include #include "dp.h" -static const u8 drm_dp_edp_revisions[] = { 0x11, 0x12, 0x13, 0x14 }; - -static void drm_dp_link_caps_reset(struct drm_dp_link_caps *caps) -{ - caps->enhanced_framing = false; - caps->tps3_supported = false; - caps->fast_training = false; - caps->channel_coding = false; - caps->alternate_scrambler_reset = false; -} - -void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest, - const struct drm_dp_link_caps *src) -{ - dest->enhanced_framing = src->enhanced_framing; - dest->tps3_supported = src->tps3_supported; - dest->fast_training = src->fast_training; - dest->channel_coding = src->channel_coding; - dest->alternate_scrambler_reset = src->alternate_scrambler_reset; -} - -static void drm_dp_link_reset(struct drm_dp_link *link) -{ - unsigned int i; - - if (!link) - return; - - link->revision = 0; - link->max_rate = 0; - link->max_lanes = 0; - - drm_dp_link_caps_reset(&link->caps); - link->aux_rd_interval.cr = 0; - link->aux_rd_interval.ce = 0; - link->edp = 0; - - link->rate = 0; - link->lanes = 0; - - for (i = 0; i < DP_MAX_SUPPORTED_RATES; i++) - link->rates[i] = 0; - - link->num_rates = 0; -} - -/** - * drm_dp_link_add_rate() - add a rate to the list of supported rates - * @link: the link to add the rate to - * @rate: the rate to add - * - * Add a link rate to the list of supported link rates. - * - * Returns: - * 0 on success or one of the following negative error codes on failure: - * - ENOSPC if the maximum number of supported rates has been reached - * - EEXISTS if the link already supports this rate - * - * See also: - * drm_dp_link_remove_rate() - */ -int drm_dp_link_add_rate(struct drm_dp_link *link, unsigned long rate) -{ - unsigned int i, pivot; - - if (link->num_rates == DP_MAX_SUPPORTED_RATES) - return -ENOSPC; - - for (pivot = 0; pivot < link->num_rates; pivot++) - if (rate <= link->rates[pivot]) - break; - - if (pivot != link->num_rates && rate == link->rates[pivot]) - return -EEXIST; - - for (i = link->num_rates; i > pivot; i--) - link->rates[i] = link->rates[i - 1]; - - link->rates[pivot] = rate; - link->num_rates++; - - return 0; -} - -/** - * drm_dp_link_remove_rate() - remove a rate from the list of supported rates - * @link: the link from which to remove the rate - * @rate: the rate to remove - * - * Removes a link rate from the list of supported link rates. - * - * Returns: - * 0 on success or one of the following negative error codes on failure: - * - EINVAL if the specified rate is not among the supported rates - * - * See also: - * drm_dp_link_add_rate() - */ -int drm_dp_link_remove_rate(struct drm_dp_link *link, unsigned long rate) -{ - unsigned int i; - - for (i = 0; i < link->num_rates; i++) - if (rate == link->rates[i]) - break; - - if (i == link->num_rates) - return -EINVAL; - - link->num_rates--; - - while (i < link->num_rates) { - link->rates[i] = link->rates[i + 1]; - i++; - } - - return 0; -} - -/** - * drm_dp_link_update_rates() - normalize the supported link rates array - * @link: the link for which to normalize the supported link rates - * - * Users should call this function after they've manually modified the array - * of supported link rates. This function removes any stale entries, compacts - * the array and updates the supported link rate count. Note that calling the - * drm_dp_link_remove_rate() function already does this janitorial work. - * - * See also: - * drm_dp_link_add_rate(), drm_dp_link_remove_rate() - */ -void drm_dp_link_update_rates(struct drm_dp_link *link) -{ - unsigned int i, count = 0; - - for (i = 0; i < link->num_rates; i++) { - if (link->rates[i] != 0) - link->rates[count++] = link->rates[i]; - } - - for (i = count; i < link->num_rates; i++) - link->rates[i] = 0; - - link->num_rates = count; -} - /** * drm_dp_link_probe() - probe a DisplayPort link for capabilities * @aux: DisplayPort AUX channel @@ -169,88 +21,21 @@ void drm_dp_link_update_rates(struct drm_dp_link *link) */ int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link) { - u8 dpcd[DP_RECEIVER_CAP_SIZE], value; - unsigned int rd_interval; + u8 values[3]; int err; - drm_dp_link_reset(link); + memset(link, 0, sizeof(*link)); - err = drm_dp_dpcd_read(aux, DP_DPCD_REV, dpcd, sizeof(dpcd)); + err = drm_dp_dpcd_read(aux, DP_DPCD_REV, values, sizeof(values)); if (err < 0) return err; - link->revision = dpcd[DP_DPCD_REV]; - link->max_rate = drm_dp_max_link_rate(dpcd); - link->max_lanes = drm_dp_max_lane_count(dpcd); + link->revision = values[0]; + link->rate = drm_dp_bw_code_to_link_rate(values[1]); + link->num_lanes = values[2] & DP_MAX_LANE_COUNT_MASK; - link->caps.enhanced_framing = drm_dp_enhanced_frame_cap(dpcd); - link->caps.tps3_supported = drm_dp_tps3_supported(dpcd); - link->caps.fast_training = drm_dp_fast_training_cap(dpcd); - link->caps.channel_coding = drm_dp_channel_coding_supported(dpcd); - - if (drm_dp_alternate_scrambler_reset_cap(dpcd)) { - link->caps.alternate_scrambler_reset = true; - - err = drm_dp_dpcd_readb(aux, DP_EDP_DPCD_REV, &value); - if (err < 0) - return err; - - if (value >= ARRAY_SIZE(drm_dp_edp_revisions)) - DRM_ERROR("unsupported eDP version: %02x\n", value); - else - link->edp = drm_dp_edp_revisions[value]; - } - - /* - * The DPCD stores the AUX read interval in units of 4 ms. There are - * two special cases: - * - * 1) if the TRAINING_AUX_RD_INTERVAL field is 0, the clock recovery - * and channel equalization should use 100 us or 400 us AUX read - * intervals, respectively - * - * 2) for DP v1.4 and above, clock recovery should always use 100 us - * AUX read intervals - */ - rd_interval = dpcd[DP_TRAINING_AUX_RD_INTERVAL] & - DP_TRAINING_AUX_RD_MASK; - - if (rd_interval > 4) { - DRM_DEBUG_KMS("AUX interval %u out of range (max. 4)\n", - rd_interval); - rd_interval = 4; - } - - rd_interval *= 4 * USEC_PER_MSEC; - - if (rd_interval == 0 || link->revision >= DP_DPCD_REV_14) - link->aux_rd_interval.cr = 100; - - if (rd_interval == 0) - link->aux_rd_interval.ce = 400; - - link->rate = link->max_rate; - link->lanes = link->max_lanes; - - /* Parse SUPPORTED_LINK_RATES from eDP 1.4 */ - if (link->edp >= 0x14) { - u8 supported_rates[DP_MAX_SUPPORTED_RATES * 2]; - unsigned int i; - u16 rate; - - err = drm_dp_dpcd_read(aux, DP_SUPPORTED_LINK_RATES, - supported_rates, - sizeof(supported_rates)); - if (err < 0) - return err; - - for (i = 0; i < DP_MAX_SUPPORTED_RATES; i++) { - rate = supported_rates[i * 2 + 1] << 8 | - supported_rates[i * 2 + 0]; - - drm_dp_link_add_rate(link, rate * 200); - } - } + if (values[2] & DP_ENHANCED_FRAME_CAP) + link->capabilities |= DP_LINK_CAP_ENHANCED_FRAMING; return 0; } @@ -331,546 +116,18 @@ int drm_dp_link_power_down(struct drm_dp_aux *aux, struct drm_dp_link *link) */ int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link) { - u8 values[2], value; + u8 values[2]; int err; - if (link->ops && link->ops->configure) { - err = link->ops->configure(link); - if (err < 0) { - DRM_ERROR("failed to configure DP link: %d\n", err); - return err; - } - } - values[0] = drm_dp_link_rate_to_bw_code(link->rate); - values[1] = link->lanes; + values[1] = link->num_lanes; - if (link->caps.enhanced_framing) + if (link->capabilities & DP_LINK_CAP_ENHANCED_FRAMING) values[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN; err = drm_dp_dpcd_write(aux, DP_LINK_BW_SET, values, sizeof(values)); if (err < 0) return err; - if (link->caps.channel_coding) - value = DP_SET_ANSI_8B10B; - else - value = 0; - - err = drm_dp_dpcd_writeb(aux, DP_MAIN_LINK_CHANNEL_CODING_SET, value); - if (err < 0) - return err; - - if (link->caps.alternate_scrambler_reset) { - err = drm_dp_dpcd_writeb(aux, DP_EDP_CONFIGURATION_SET, - DP_ALTERNATE_SCRAMBLER_RESET_ENABLE); - if (err < 0) - return err; - } - return 0; } - -/** - * drm_dp_link_choose() - choose the lowest possible configuration for a mode - * @link: DRM DP link object - * @mode: DRM display mode - * @info: DRM display information - * - * According to the eDP specification, a source should select a configuration - * with the lowest number of lanes and the lowest possible link rate that can - * match the bitrate requirements of a video mode. However it must ensure not - * to exceed the capabilities of the sink. - * - * Returns: 0 on success or a negative error code on failure. - */ -int drm_dp_link_choose(struct drm_dp_link *link, - const struct drm_display_mode *mode, - const struct drm_display_info *info) -{ - /* available link symbol clock rates */ - static const unsigned int rates[3] = { 162000, 270000, 540000 }; - /* available number of lanes */ - static const unsigned int lanes[3] = { 1, 2, 4 }; - unsigned long requirement, capacity; - unsigned int rate = link->max_rate; - unsigned int i, j; - - /* bandwidth requirement */ - requirement = mode->clock * info->bpc * 3; - - for (i = 0; i < ARRAY_SIZE(lanes) && lanes[i] <= link->max_lanes; i++) { - for (j = 0; j < ARRAY_SIZE(rates) && rates[j] <= rate; j++) { - /* - * Capacity for this combination of lanes and rate, - * factoring in the ANSI 8B/10B encoding. - * - * Link rates in the DRM DP helpers are really link - * symbol frequencies, so a tenth of the actual rate - * of the link. - */ - capacity = lanes[i] * (rates[j] * 10) * 8 / 10; - - if (capacity >= requirement) { - DRM_DEBUG_KMS("using %u lanes at %u kHz (%lu/%lu kbps)\n", - lanes[i], rates[j], requirement, - capacity); - link->lanes = lanes[i]; - link->rate = rates[j]; - return 0; - } - } - } - - return -ERANGE; -} - -/** - * DOC: Link training - * - * These functions contain common logic and helpers to implement DisplayPort - * link training. - */ - -/** - * drm_dp_link_train_init() - initialize DisplayPort link training state - * @train: DisplayPort link training state - */ -void drm_dp_link_train_init(struct drm_dp_link_train *train) -{ - struct drm_dp_link_train_set *request = &train->request; - struct drm_dp_link_train_set *adjust = &train->adjust; - unsigned int i; - - for (i = 0; i < 4; i++) { - request->voltage_swing[i] = 0; - adjust->voltage_swing[i] = 0; - - request->pre_emphasis[i] = 0; - adjust->pre_emphasis[i] = 0; - - request->post_cursor[i] = 0; - adjust->post_cursor[i] = 0; - } - - train->pattern = DP_TRAINING_PATTERN_DISABLE; - train->clock_recovered = false; - train->channel_equalized = false; -} - -static bool drm_dp_link_train_valid(const struct drm_dp_link_train *train) -{ - return train->clock_recovered && train->channel_equalized; -} - -static int drm_dp_link_apply_training(struct drm_dp_link *link) -{ - struct drm_dp_link_train_set *request = &link->train.request; - unsigned int lanes = link->lanes, *vs, *pe, *pc, i; - struct drm_dp_aux *aux = link->aux; - u8 values[4], pattern = 0; - int err; - - err = link->ops->apply_training(link); - if (err < 0) { - DRM_ERROR("failed to apply link training: %d\n", err); - return err; - } - - vs = request->voltage_swing; - pe = request->pre_emphasis; - pc = request->post_cursor; - - /* write currently selected voltage-swing and pre-emphasis levels */ - for (i = 0; i < lanes; i++) - values[i] = DP_TRAIN_VOLTAGE_SWING_LEVEL(vs[i]) | - DP_TRAIN_PRE_EMPHASIS_LEVEL(pe[i]); - - err = drm_dp_dpcd_write(aux, DP_TRAINING_LANE0_SET, values, lanes); - if (err < 0) { - DRM_ERROR("failed to set training parameters: %d\n", err); - return err; - } - - /* write currently selected post-cursor level (if supported) */ - if (link->revision >= 0x12 && link->rate == 540000) { - values[0] = values[1] = 0; - - for (i = 0; i < lanes; i++) - values[i / 2] |= DP_LANE_POST_CURSOR(i, pc[i]); - - err = drm_dp_dpcd_write(aux, DP_TRAINING_LANE0_1_SET2, values, - DIV_ROUND_UP(lanes, 2)); - if (err < 0) { - DRM_ERROR("failed to set post-cursor: %d\n", err); - return err; - } - } - - /* write link pattern */ - if (link->train.pattern != DP_TRAINING_PATTERN_DISABLE) - pattern |= DP_LINK_SCRAMBLING_DISABLE; - - pattern |= link->train.pattern; - - err = drm_dp_dpcd_writeb(aux, DP_TRAINING_PATTERN_SET, pattern); - if (err < 0) { - DRM_ERROR("failed to set training pattern: %d\n", err); - return err; - } - - return 0; -} - -static void drm_dp_link_train_wait(struct drm_dp_link *link) -{ - unsigned long min = 0; - - switch (link->train.pattern) { - case DP_TRAINING_PATTERN_1: - min = link->aux_rd_interval.cr; - break; - - case DP_TRAINING_PATTERN_2: - case DP_TRAINING_PATTERN_3: - min = link->aux_rd_interval.ce; - break; - - default: - break; - } - - if (min > 0) - usleep_range(min, 2 * min); -} - -static void drm_dp_link_get_adjustments(struct drm_dp_link *link, - u8 status[DP_LINK_STATUS_SIZE]) -{ - struct drm_dp_link_train_set *adjust = &link->train.adjust; - unsigned int i; - - for (i = 0; i < link->lanes; i++) { - adjust->voltage_swing[i] = - drm_dp_get_adjust_request_voltage(status, i) >> - DP_TRAIN_VOLTAGE_SWING_SHIFT; - - adjust->pre_emphasis[i] = - drm_dp_get_adjust_request_pre_emphasis(status, i) >> - DP_TRAIN_PRE_EMPHASIS_SHIFT; - - adjust->post_cursor[i] = - drm_dp_get_adjust_request_post_cursor(status, i); - } -} - -static void drm_dp_link_train_adjust(struct drm_dp_link_train *train) -{ - struct drm_dp_link_train_set *request = &train->request; - struct drm_dp_link_train_set *adjust = &train->adjust; - unsigned int i; - - for (i = 0; i < 4; i++) - if (request->voltage_swing[i] != adjust->voltage_swing[i]) - request->voltage_swing[i] = adjust->voltage_swing[i]; - - for (i = 0; i < 4; i++) - if (request->pre_emphasis[i] != adjust->pre_emphasis[i]) - request->pre_emphasis[i] = adjust->pre_emphasis[i]; - - for (i = 0; i < 4; i++) - if (request->post_cursor[i] != adjust->post_cursor[i]) - request->post_cursor[i] = adjust->post_cursor[i]; -} - -static int drm_dp_link_recover_clock(struct drm_dp_link *link) -{ - u8 status[DP_LINK_STATUS_SIZE]; - int err; - - err = drm_dp_link_apply_training(link); - if (err < 0) - return err; - - drm_dp_link_train_wait(link); - - err = drm_dp_dpcd_read_link_status(link->aux, status); - if (err < 0) { - DRM_ERROR("failed to read link status: %d\n", err); - return err; - } - - if (!drm_dp_clock_recovery_ok(status, link->lanes)) - drm_dp_link_get_adjustments(link, status); - else - link->train.clock_recovered = true; - - return 0; -} - -static int drm_dp_link_clock_recovery(struct drm_dp_link *link) -{ - unsigned int repeat; - int err; - - /* start clock recovery using training pattern 1 */ - link->train.pattern = DP_TRAINING_PATTERN_1; - - for (repeat = 1; repeat < 5; repeat++) { - err = drm_dp_link_recover_clock(link); - if (err < 0) { - DRM_ERROR("failed to recover clock: %d\n", err); - return err; - } - - if (link->train.clock_recovered) - break; - - drm_dp_link_train_adjust(&link->train); - } - - return 0; -} - -static int drm_dp_link_equalize_channel(struct drm_dp_link *link) -{ - struct drm_dp_aux *aux = link->aux; - u8 status[DP_LINK_STATUS_SIZE]; - int err; - - err = drm_dp_link_apply_training(link); - if (err < 0) - return err; - - drm_dp_link_train_wait(link); - - err = drm_dp_dpcd_read_link_status(aux, status); - if (err < 0) { - DRM_ERROR("failed to read link status: %d\n", err); - return err; - } - - if (!drm_dp_clock_recovery_ok(status, link->lanes)) { - DRM_ERROR("clock recovery lost while equalizing channel\n"); - link->train.clock_recovered = false; - return 0; - } - - if (!drm_dp_channel_eq_ok(status, link->lanes)) - drm_dp_link_get_adjustments(link, status); - else - link->train.channel_equalized = true; - - return 0; -} - -static int drm_dp_link_channel_equalization(struct drm_dp_link *link) -{ - unsigned int repeat; - int err; - - /* start channel equalization using pattern 2 or 3 */ - if (link->caps.tps3_supported) - link->train.pattern = DP_TRAINING_PATTERN_3; - else - link->train.pattern = DP_TRAINING_PATTERN_2; - - for (repeat = 1; repeat < 5; repeat++) { - err = drm_dp_link_equalize_channel(link); - if (err < 0) { - DRM_ERROR("failed to equalize channel: %d\n", err); - return err; - } - - if (link->train.channel_equalized) - break; - - drm_dp_link_train_adjust(&link->train); - } - - return 0; -} - -static int drm_dp_link_downgrade(struct drm_dp_link *link) -{ - switch (link->rate) { - case 162000: - return -EINVAL; - - case 270000: - link->rate = 162000; - break; - - case 540000: - link->rate = 270000; - return 0; - } - - return 0; -} - -static void drm_dp_link_train_disable(struct drm_dp_link *link) -{ - int err; - - link->train.pattern = DP_TRAINING_PATTERN_DISABLE; - - err = drm_dp_link_apply_training(link); - if (err < 0) - DRM_ERROR("failed to disable link training: %d\n", err); -} - -static int drm_dp_link_train_full(struct drm_dp_link *link) -{ - int err; - -retry: - DRM_DEBUG_KMS("full-training link: %u lane%s at %u MHz\n", - link->lanes, (link->lanes > 1) ? "s" : "", - link->rate / 100); - - err = drm_dp_link_configure(link->aux, link); - if (err < 0) { - DRM_ERROR("failed to configure DP link: %d\n", err); - return err; - } - - err = drm_dp_link_clock_recovery(link); - if (err < 0) { - DRM_ERROR("clock recovery failed: %d\n", err); - goto out; - } - - if (!link->train.clock_recovered) { - DRM_ERROR("clock recovery failed, downgrading link\n"); - - err = drm_dp_link_downgrade(link); - if (err < 0) - goto out; - - goto retry; - } - - DRM_DEBUG_KMS("clock recovery succeeded\n"); - - err = drm_dp_link_channel_equalization(link); - if (err < 0) { - DRM_ERROR("channel equalization failed: %d\n", err); - goto out; - } - - if (!link->train.channel_equalized) { - DRM_ERROR("channel equalization failed, downgrading link\n"); - - err = drm_dp_link_downgrade(link); - if (err < 0) - goto out; - - goto retry; - } - - DRM_DEBUG_KMS("channel equalization succeeded\n"); - -out: - drm_dp_link_train_disable(link); - return err; -} - -static int drm_dp_link_train_fast(struct drm_dp_link *link) -{ - u8 status[DP_LINK_STATUS_SIZE]; - int err; - - DRM_DEBUG_KMS("fast-training link: %u lane%s at %u MHz\n", - link->lanes, (link->lanes > 1) ? "s" : "", - link->rate / 100); - - err = drm_dp_link_configure(link->aux, link); - if (err < 0) { - DRM_ERROR("failed to configure DP link: %d\n", err); - return err; - } - - /* transmit training pattern 1 for 500 microseconds */ - link->train.pattern = DP_TRAINING_PATTERN_1; - - err = drm_dp_link_apply_training(link); - if (err < 0) - goto out; - - usleep_range(500, 1000); - - /* transmit training pattern 2 or 3 for 500 microseconds */ - if (link->caps.tps3_supported) - link->train.pattern = DP_TRAINING_PATTERN_3; - else - link->train.pattern = DP_TRAINING_PATTERN_2; - - err = drm_dp_link_apply_training(link); - if (err < 0) - goto out; - - usleep_range(500, 1000); - - err = drm_dp_dpcd_read_link_status(link->aux, status); - if (err < 0) { - DRM_ERROR("failed to read link status: %d\n", err); - goto out; - } - - if (!drm_dp_clock_recovery_ok(status, link->lanes)) { - DRM_ERROR("clock recovery failed\n"); - err = -EIO; - } - - if (!drm_dp_channel_eq_ok(status, link->lanes)) { - DRM_ERROR("channel equalization failed\n"); - err = -EIO; - } - -out: - drm_dp_link_train_disable(link); - return err; -} - -/** - * drm_dp_link_train() - perform DisplayPort link training - * @link: a DP link object - * - * Uses the context stored in the DP link object to perform link training. It - * is expected that drivers will call drm_dp_link_probe() to obtain the link - * capabilities before performing link training. - * - * If the sink supports fast link training (no AUX CH handshake) and valid - * training settings are available, this function will try to perform fast - * link training and fall back to full link training on failure. - * - * Returns: 0 on success or a negative error code on failure. - */ -int drm_dp_link_train(struct drm_dp_link *link) -{ - int err; - - drm_dp_link_train_init(&link->train); - - if (link->caps.fast_training) { - if (drm_dp_link_train_valid(&link->train)) { - err = drm_dp_link_train_fast(link); - if (err < 0) - DRM_ERROR("fast link training failed: %d\n", - err); - else - return 0; - } else { - DRM_DEBUG_KMS("training parameters not available\n"); - } - } else { - DRM_DEBUG_KMS("fast link training not supported\n"); - } - - err = drm_dp_link_train_full(link); - if (err < 0) - DRM_ERROR("full link training failed: %d\n", err); - - return err; -} diff --git a/drivers/gpu/drm/tegra/dp.h b/drivers/gpu/drm/tegra/dp.h index cb12ed0c54e7..88842fd25abf 100644 --- a/drivers/gpu/drm/tegra/dp.h +++ b/drivers/gpu/drm/tegra/dp.h @@ -7,171 +7,20 @@ #ifndef DRM_TEGRA_DP_H #define DRM_TEGRA_DP_H 1 -#include - -struct drm_display_info; -struct drm_display_mode; struct drm_dp_aux; -struct drm_dp_link; -/** - * struct drm_dp_link_caps - DP link capabilities - */ -struct drm_dp_link_caps { - /** - * @enhanced_framing: - * - * enhanced framing capability (mandatory as of DP 1.2) - */ - bool enhanced_framing; +#define DP_LINK_CAP_ENHANCED_FRAMING (1 << 0) - /** - * tps3_supported: - * - * training pattern sequence 3 supported for equalization - */ - bool tps3_supported; - - /** - * @fast_training: - * - * AUX CH handshake not required for link training - */ - bool fast_training; - - /** - * @channel_coding: - * - * ANSI 8B/10B channel coding capability - */ - bool channel_coding; - - /** - * @alternate_scrambler_reset: - * - * eDP alternate scrambler reset capability - */ - bool alternate_scrambler_reset; -}; - -void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest, - const struct drm_dp_link_caps *src); - -/** - * struct drm_dp_link_ops - DP link operations - */ -struct drm_dp_link_ops { - /** - * @apply_training: - */ - int (*apply_training)(struct drm_dp_link *link); - - /** - * @configure: - */ - int (*configure)(struct drm_dp_link *link); -}; - -#define DP_TRAIN_VOLTAGE_SWING_LEVEL(x) ((x) << 0) -#define DP_TRAIN_PRE_EMPHASIS_LEVEL(x) ((x) << 3) -#define DP_LANE_POST_CURSOR(i, x) (((x) & 0x3) << (((i) & 1) << 2)) - -/** - * struct drm_dp_link_train_set - link training settings - * @voltage_swing: per-lane voltage swing - * @pre_emphasis: per-lane pre-emphasis - * @post_cursor: per-lane post-cursor - */ -struct drm_dp_link_train_set { - unsigned int voltage_swing[4]; - unsigned int pre_emphasis[4]; - unsigned int post_cursor[4]; -}; - -/** - * struct drm_dp_link_train - link training state information - * @request: currently requested settings - * @adjust: adjustments requested by sink - * @pattern: currently requested training pattern - * @clock_recovered: flag to track if clock recovery has completed - * @channel_equalized: flag to track if channel equalization has completed - */ -struct drm_dp_link_train { - struct drm_dp_link_train_set request; - struct drm_dp_link_train_set adjust; - - unsigned int pattern; - - bool clock_recovered; - bool channel_equalized; -}; - -/** - * struct drm_dp_link - DP link capabilities and configuration - * @revision: DP specification revision supported on the link - * @max_rate: maximum clock rate supported on the link - * @max_lanes: maximum number of lanes supported on the link - * @caps: capabilities supported on the link (see &drm_dp_link_caps) - * @aux_rd_interval: AUX read interval to use for training (in microseconds) - * @edp: eDP revision (0x11: eDP 1.1, 0x12: eDP 1.2, ...) - * @rate: currently configured link rate - * @lanes: currently configured number of lanes - * @rates: additional supported link rates in kHz (eDP 1.4) - * @num_rates: number of additional supported link rates (eDP 1.4) - */ struct drm_dp_link { unsigned char revision; - unsigned int max_rate; - unsigned int max_lanes; - - struct drm_dp_link_caps caps; - - /** - * @cr: clock recovery read interval - * @ce: channel equalization read interval - */ - struct { - unsigned int cr; - unsigned int ce; - } aux_rd_interval; - - unsigned char edp; - unsigned int rate; - unsigned int lanes; - - unsigned long rates[DP_MAX_SUPPORTED_RATES]; - unsigned int num_rates; - - /** - * @ops: DP link operations - */ - const struct drm_dp_link_ops *ops; - - /** - * @aux: DP AUX channel - */ - struct drm_dp_aux *aux; - - /** - * @train: DP link training state - */ - struct drm_dp_link_train train; + unsigned int num_lanes; + unsigned long capabilities; }; -int drm_dp_link_add_rate(struct drm_dp_link *link, unsigned long rate); -int drm_dp_link_remove_rate(struct drm_dp_link *link, unsigned long rate); -void drm_dp_link_update_rates(struct drm_dp_link *link); - int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link); int drm_dp_link_power_up(struct drm_dp_aux *aux, struct drm_dp_link *link); int drm_dp_link_power_down(struct drm_dp_aux *aux, struct drm_dp_link *link); int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link); -int drm_dp_link_choose(struct drm_dp_link *link, - const struct drm_display_mode *mode, - const struct drm_display_info *info); - -void drm_dp_link_train_init(struct drm_dp_link_train *train); -int drm_dp_link_train(struct drm_dp_link *link); #endif diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c index 622cdf1ad246..1144605c9737 100644 --- a/drivers/gpu/drm/tegra/dpaux.c +++ b/drivers/gpu/drm/tegra/dpaux.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -31,18 +30,10 @@ static DEFINE_MUTEX(dpaux_lock); static LIST_HEAD(dpaux_list); -struct tegra_dpaux_soc { - unsigned int cmh; - unsigned int drvz; - unsigned int drvi; -}; - struct tegra_dpaux { struct drm_dp_aux aux; struct device *dev; - const struct tegra_dpaux_soc *soc; - void __iomem *regs; int irq; @@ -130,7 +121,6 @@ static ssize_t tegra_dpaux_transfer(struct drm_dp_aux *aux, struct tegra_dpaux *dpaux = to_dpaux(aux); unsigned long status; ssize_t ret = 0; - u8 reply = 0; u32 value; /* Tegra has 4x4 byte DP AUX transmit and receive FIFOs. */ @@ -225,23 +215,23 @@ static ssize_t tegra_dpaux_transfer(struct drm_dp_aux *aux, switch ((value & DPAUX_DP_AUXSTAT_REPLY_TYPE_MASK) >> 16) { case 0x00: - reply = DP_AUX_NATIVE_REPLY_ACK; + msg->reply = DP_AUX_NATIVE_REPLY_ACK; break; case 0x01: - reply = DP_AUX_NATIVE_REPLY_NACK; + msg->reply = DP_AUX_NATIVE_REPLY_NACK; break; case 0x02: - reply = DP_AUX_NATIVE_REPLY_DEFER; + msg->reply = DP_AUX_NATIVE_REPLY_DEFER; break; case 0x04: - reply = DP_AUX_I2C_REPLY_NACK; + msg->reply = DP_AUX_I2C_REPLY_NACK; break; case 0x08: - reply = DP_AUX_I2C_REPLY_DEFER; + msg->reply = DP_AUX_I2C_REPLY_DEFER; break; } @@ -249,24 +239,14 @@ static ssize_t tegra_dpaux_transfer(struct drm_dp_aux *aux, if (msg->request & DP_AUX_I2C_READ) { size_t count = value & DPAUX_DP_AUXSTAT_REPLY_MASK; - /* - * There might be a smarter way to do this, but since - * the DP helpers will already retry transactions for - * an -EBUSY return value, simply reuse that instead. - */ - if (count != msg->size) { - ret = -EBUSY; - goto out; - } + if (WARN_ON(count != msg->size)) + count = min_t(size_t, count, msg->size); tegra_dpaux_read_fifo(dpaux, msg->buffer, count); ret = count; } } - msg->reply = reply; - -out: return ret; } @@ -331,9 +311,9 @@ static int tegra_dpaux_pad_config(struct tegra_dpaux *dpaux, unsigned function) switch (function) { case DPAUX_PADCTL_FUNC_AUX: - value = DPAUX_HYBRID_PADCTL_AUX_CMH(dpaux->soc->cmh) | - DPAUX_HYBRID_PADCTL_AUX_DRVZ(dpaux->soc->drvz) | - DPAUX_HYBRID_PADCTL_AUX_DRVI(dpaux->soc->drvi) | + value = DPAUX_HYBRID_PADCTL_AUX_CMH(2) | + DPAUX_HYBRID_PADCTL_AUX_DRVZ(4) | + DPAUX_HYBRID_PADCTL_AUX_DRVI(0x18) | DPAUX_HYBRID_PADCTL_AUX_INPUT_RCV | DPAUX_HYBRID_PADCTL_MODE_AUX; break; @@ -341,9 +321,9 @@ static int tegra_dpaux_pad_config(struct tegra_dpaux *dpaux, unsigned function) case DPAUX_PADCTL_FUNC_I2C: value = DPAUX_HYBRID_PADCTL_I2C_SDA_INPUT_RCV | DPAUX_HYBRID_PADCTL_I2C_SCL_INPUT_RCV | - DPAUX_HYBRID_PADCTL_AUX_CMH(dpaux->soc->cmh) | - DPAUX_HYBRID_PADCTL_AUX_DRVZ(dpaux->soc->drvz) | - DPAUX_HYBRID_PADCTL_AUX_DRVI(dpaux->soc->drvi) | + DPAUX_HYBRID_PADCTL_AUX_CMH(2) | + DPAUX_HYBRID_PADCTL_AUX_DRVZ(4) | + DPAUX_HYBRID_PADCTL_AUX_DRVI(0x18) | DPAUX_HYBRID_PADCTL_MODE_I2C; break; @@ -457,7 +437,6 @@ static int tegra_dpaux_probe(struct platform_device *pdev) if (!dpaux) return -ENOMEM; - dpaux->soc = of_device_get_match_data(&pdev->dev); INIT_WORK(&dpaux->work, tegra_dpaux_hotplug); init_completion(&dpaux->complete); INIT_LIST_HEAD(&dpaux->list); @@ -515,8 +494,6 @@ static int tegra_dpaux_probe(struct platform_device *pdev) return PTR_ERR(dpaux->vdd); } - - dpaux->vdd = NULL; } platform_set_drvdata(pdev, dpaux); @@ -665,29 +642,11 @@ static const struct dev_pm_ops tegra_dpaux_pm_ops = { SET_RUNTIME_PM_OPS(tegra_dpaux_suspend, tegra_dpaux_resume, NULL) }; -static const struct tegra_dpaux_soc tegra124_dpaux_soc = { - .cmh = 0x02, - .drvz = 0x04, - .drvi = 0x18, -}; - -static const struct tegra_dpaux_soc tegra210_dpaux_soc = { - .cmh = 0x02, - .drvz = 0x04, - .drvi = 0x30, -}; - -static const struct tegra_dpaux_soc tegra194_dpaux_soc = { - .cmh = 0x02, - .drvz = 0x04, - .drvi = 0x2c, -}; - static const struct of_device_id tegra_dpaux_of_match[] = { - { .compatible = "nvidia,tegra194-dpaux", .data = &tegra194_dpaux_soc }, - { .compatible = "nvidia,tegra186-dpaux", .data = &tegra210_dpaux_soc }, - { .compatible = "nvidia,tegra210-dpaux", .data = &tegra210_dpaux_soc }, - { .compatible = "nvidia,tegra124-dpaux", .data = &tegra124_dpaux_soc }, + { .compatible = "nvidia,tegra194-dpaux", }, + { .compatible = "nvidia,tegra186-dpaux", }, + { .compatible = "nvidia,tegra210-dpaux", }, + { .compatible = "nvidia,tegra124-dpaux", }, { }, }; MODULE_DEVICE_TABLE(of, tegra_dpaux_of_match); @@ -728,32 +687,25 @@ int drm_dp_aux_attach(struct drm_dp_aux *aux, struct tegra_output *output) output->connector.polled = DRM_CONNECTOR_POLL_HPD; dpaux->output = output; - if (output->panel) { + err = regulator_enable(dpaux->vdd); + if (err < 0) + return err; + + timeout = jiffies + msecs_to_jiffies(250); + + while (time_before(jiffies, timeout)) { enum drm_connector_status status; - if (dpaux->vdd) { - err = regulator_enable(dpaux->vdd); - if (err < 0) - return err; + status = drm_dp_aux_detect(aux); + if (status == connector_status_connected) { + enable_irq(dpaux->irq); + return 0; } - timeout = jiffies + msecs_to_jiffies(250); - - while (time_before(jiffies, timeout)) { - status = drm_dp_aux_detect(aux); - - if (status == connector_status_connected) - break; - - usleep_range(1000, 2000); - } - - if (status != connector_status_connected) - return -ETIMEDOUT; + usleep_range(1000, 2000); } - enable_irq(dpaux->irq); - return 0; + return -ETIMEDOUT; } int drm_dp_aux_detach(struct drm_dp_aux *aux) @@ -764,33 +716,25 @@ int drm_dp_aux_detach(struct drm_dp_aux *aux) disable_irq(dpaux->irq); - if (dpaux->output->panel) { + err = regulator_disable(dpaux->vdd); + if (err < 0) + return err; + + timeout = jiffies + msecs_to_jiffies(250); + + while (time_before(jiffies, timeout)) { enum drm_connector_status status; - if (dpaux->vdd) { - err = regulator_disable(dpaux->vdd); - if (err < 0) - return err; + status = drm_dp_aux_detect(aux); + if (status == connector_status_disconnected) { + dpaux->output = NULL; + return 0; } - timeout = jiffies + msecs_to_jiffies(250); - - while (time_before(jiffies, timeout)) { - status = drm_dp_aux_detect(aux); - - if (status == connector_status_disconnected) - break; - - usleep_range(1000, 2000); - } - - if (status != connector_status_disconnected) - return -ETIMEDOUT; - - dpaux->output = NULL; + usleep_range(1000, 2000); } - return 0; + return -ETIMEDOUT; } enum drm_connector_status drm_dp_aux_detect(struct drm_dp_aux *aux) @@ -821,3 +765,72 @@ int drm_dp_aux_disable(struct drm_dp_aux *aux) return 0; } + +int drm_dp_aux_prepare(struct drm_dp_aux *aux, u8 encoding) +{ + int err; + + err = drm_dp_dpcd_writeb(aux, DP_MAIN_LINK_CHANNEL_CODING_SET, + encoding); + if (err < 0) + return err; + + return 0; +} + +int drm_dp_aux_train(struct drm_dp_aux *aux, struct drm_dp_link *link, + u8 pattern) +{ + u8 tp = pattern & DP_TRAINING_PATTERN_MASK; + u8 status[DP_LINK_STATUS_SIZE], values[4]; + unsigned int i; + int err; + + err = drm_dp_dpcd_writeb(aux, DP_TRAINING_PATTERN_SET, pattern); + if (err < 0) + return err; + + if (tp == DP_TRAINING_PATTERN_DISABLE) + return 0; + + for (i = 0; i < link->num_lanes; i++) + values[i] = DP_TRAIN_MAX_PRE_EMPHASIS_REACHED | + DP_TRAIN_PRE_EMPH_LEVEL_0 | + DP_TRAIN_MAX_SWING_REACHED | + DP_TRAIN_VOLTAGE_SWING_LEVEL_0; + + err = drm_dp_dpcd_write(aux, DP_TRAINING_LANE0_SET, values, + link->num_lanes); + if (err < 0) + return err; + + usleep_range(500, 1000); + + err = drm_dp_dpcd_read_link_status(aux, status); + if (err < 0) + return err; + + switch (tp) { + case DP_TRAINING_PATTERN_1: + if (!drm_dp_clock_recovery_ok(status, link->num_lanes)) + return -EAGAIN; + + break; + + case DP_TRAINING_PATTERN_2: + if (!drm_dp_channel_eq_ok(status, link->num_lanes)) + return -EAGAIN; + + break; + + default: + dev_err(aux->dev, "unsupported training pattern %u\n", tp); + return -EINVAL; + } + + err = drm_dp_dpcd_writeb(aux, DP_EDP_CONFIGURATION_SET, 0); + if (err < 0) + return err; + + return 0; +} diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 56e5e7a5c108..6fb7d74ff553 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -20,6 +20,10 @@ #include #include +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) +#include +#endif + #include "drm.h" #include "gem.h" @@ -82,6 +86,168 @@ tegra_drm_mode_config_helpers = { .atomic_commit_tail = tegra_atomic_commit_tail, }; +static int tegra_drm_load(struct drm_device *drm, unsigned long flags) +{ + struct host1x_device *device = to_host1x_device(drm->dev); + struct tegra_drm *tegra; + int err; + + tegra = kzalloc(sizeof(*tegra), GFP_KERNEL); + if (!tegra) + return -ENOMEM; + + if (iommu_present(&platform_bus_type)) { + tegra->domain = iommu_domain_alloc(&platform_bus_type); + if (!tegra->domain) { + err = -ENOMEM; + goto free; + } + + err = iova_cache_get(); + if (err < 0) + goto domain; + } + + mutex_init(&tegra->clients_lock); + INIT_LIST_HEAD(&tegra->clients); + + drm->dev_private = tegra; + tegra->drm = drm; + + drm_mode_config_init(drm); + + drm->mode_config.min_width = 0; + drm->mode_config.min_height = 0; + + drm->mode_config.max_width = 4096; + drm->mode_config.max_height = 4096; + + drm->mode_config.allow_fb_modifiers = true; + + drm->mode_config.normalize_zpos = true; + + drm->mode_config.funcs = &tegra_drm_mode_config_funcs; + drm->mode_config.helper_private = &tegra_drm_mode_config_helpers; + + err = tegra_drm_fb_prepare(drm); + if (err < 0) + goto config; + + drm_kms_helper_poll_init(drm); + + err = host1x_device_init(device); + if (err < 0) + goto fbdev; + + if (tegra->domain) { + u64 carveout_start, carveout_end, gem_start, gem_end; + u64 dma_mask = dma_get_mask(&device->dev); + dma_addr_t start, end; + unsigned long order; + + start = tegra->domain->geometry.aperture_start & dma_mask; + end = tegra->domain->geometry.aperture_end & dma_mask; + + gem_start = start; + gem_end = end - CARVEOUT_SZ; + carveout_start = gem_end + 1; + carveout_end = end; + + order = __ffs(tegra->domain->pgsize_bitmap); + init_iova_domain(&tegra->carveout.domain, 1UL << order, + carveout_start >> order); + + tegra->carveout.shift = iova_shift(&tegra->carveout.domain); + tegra->carveout.limit = carveout_end >> tegra->carveout.shift; + + drm_mm_init(&tegra->mm, gem_start, gem_end - gem_start + 1); + mutex_init(&tegra->mm_lock); + + DRM_DEBUG("IOMMU apertures:\n"); + DRM_DEBUG(" GEM: %#llx-%#llx\n", gem_start, gem_end); + DRM_DEBUG(" Carveout: %#llx-%#llx\n", carveout_start, + carveout_end); + } + + if (tegra->hub) { + err = tegra_display_hub_prepare(tegra->hub); + if (err < 0) + goto device; + } + + /* + * We don't use the drm_irq_install() helpers provided by the DRM + * core, so we need to set this manually in order to allow the + * DRM_IOCTL_WAIT_VBLANK to operate correctly. + */ + drm->irq_enabled = true; + + /* syncpoints are used for full 32-bit hardware VBLANK counters */ + drm->max_vblank_count = 0xffffffff; + + err = drm_vblank_init(drm, drm->mode_config.num_crtc); + if (err < 0) + goto hub; + + drm_mode_config_reset(drm); + + err = tegra_drm_fb_init(drm); + if (err < 0) + goto hub; + + return 0; + +hub: + if (tegra->hub) + tegra_display_hub_cleanup(tegra->hub); +device: + host1x_device_exit(device); +fbdev: + drm_kms_helper_poll_fini(drm); + tegra_drm_fb_free(drm); +config: + drm_mode_config_cleanup(drm); + + if (tegra->domain) { + mutex_destroy(&tegra->mm_lock); + drm_mm_takedown(&tegra->mm); + put_iova_domain(&tegra->carveout.domain); + iova_cache_put(); + } +domain: + if (tegra->domain) + iommu_domain_free(tegra->domain); +free: + kfree(tegra); + return err; +} + +static void tegra_drm_unload(struct drm_device *drm) +{ + struct host1x_device *device = to_host1x_device(drm->dev); + struct tegra_drm *tegra = drm->dev_private; + int err; + + drm_kms_helper_poll_fini(drm); + tegra_drm_fb_exit(drm); + drm_atomic_helper_shutdown(drm); + drm_mode_config_cleanup(drm); + + err = host1x_device_exit(device); + if (err < 0) + return; + + if (tegra->domain) { + mutex_destroy(&tegra->mm_lock); + drm_mm_takedown(&tegra->mm); + put_iova_domain(&tegra->carveout.domain); + iova_cache_put(); + iommu_domain_free(tegra->domain); + } + + kfree(tegra); +} + static int tegra_drm_open(struct drm_device *drm, struct drm_file *filp) { struct tegra_drm_file *fpriv; @@ -145,8 +311,6 @@ static int host1x_reloc_copy_from_user(struct host1x_reloc *dest, if (err < 0) return err; - dest->flags = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE; - dest->cmdbuf.bo = host1x_bo_lookup(file, cmdbuf); if (!dest->cmdbuf.bo) return -ENOENT; @@ -850,6 +1014,8 @@ static int tegra_debugfs_init(struct drm_minor *minor) static struct drm_driver tegra_drm_driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC | DRIVER_RENDER, + .load = tegra_drm_load, + .unload = tegra_drm_unload, .open = tegra_drm_open, .postclose = tegra_drm_postclose, .lastclose = drm_fb_helper_lastclose, @@ -902,63 +1068,57 @@ int tegra_drm_unregister_client(struct tegra_drm *tegra, return 0; } -int host1x_client_iommu_attach(struct host1x_client *client) +struct iommu_group *host1x_client_iommu_attach(struct host1x_client *client, + bool shared) { - struct iommu_domain *domain = iommu_get_domain_for_dev(client->dev); struct drm_device *drm = dev_get_drvdata(client->parent); struct tegra_drm *tegra = drm->dev_private; struct iommu_group *group = NULL; int err; - /* - * If the host1x client is already attached to an IOMMU domain that is - * not the shared IOMMU domain, don't try to attach it to a different - * domain. This allows using the IOMMU-backed DMA API. - */ - if (domain && domain != tegra->domain) - return 0; - if (tegra->domain) { group = iommu_group_get(client->dev); if (!group) { dev_err(client->dev, "failed to get IOMMU group\n"); - return -ENODEV; + return ERR_PTR(-ENODEV); } - if (domain != tegra->domain) { + if (!shared || (shared && (group != tegra->group))) { +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) + if (client->dev->archdata.mapping) { + struct dma_iommu_mapping *mapping = + to_dma_iommu_mapping(client->dev); + arm_iommu_detach_device(client->dev); + arm_iommu_release_mapping(mapping); + } +#endif err = iommu_attach_group(tegra->domain, group); if (err < 0) { iommu_group_put(group); - return err; + return ERR_PTR(err); } - } - tegra->use_explicit_iommu = true; + if (shared && !tegra->group) + tegra->group = group; + } } - client->group = group; - - return 0; + return group; } -void host1x_client_iommu_detach(struct host1x_client *client) +void host1x_client_iommu_detach(struct host1x_client *client, + struct iommu_group *group) { struct drm_device *drm = dev_get_drvdata(client->parent); struct tegra_drm *tegra = drm->dev_private; - struct iommu_domain *domain; - if (client->group) { - /* - * Devices that are part of the same group may no longer be - * attached to a domain at this point because their group may - * have been detached by an earlier client. - */ - domain = iommu_get_domain_for_dev(client->dev); - if (domain) - iommu_detach_group(tegra->domain, client->group); + if (group) { + if (group == tegra->group) { + iommu_detach_group(tegra->domain, group); + tegra->group = NULL; + } - iommu_group_put(client->group); - client->group = NULL; + iommu_group_put(group); } } @@ -1042,8 +1202,6 @@ void tegra_drm_free(struct tegra_drm *tegra, size_t size, void *virt, static int host1x_drm_probe(struct host1x_device *dev) { struct drm_driver *driver = &tegra_drm_driver; - struct iommu_domain *domain; - struct tegra_drm *tegra; struct drm_device *drm; int err; @@ -1051,180 +1209,18 @@ static int host1x_drm_probe(struct host1x_device *dev) if (IS_ERR(drm)) return PTR_ERR(drm); - tegra = kzalloc(sizeof(*tegra), GFP_KERNEL); - if (!tegra) { - err = -ENOMEM; - goto put; - } - - /* - * If the Tegra DRM clients are backed by an IOMMU, push buffers are - * likely to be allocated beyond the 32-bit boundary if sufficient - * system memory is available. This is problematic on earlier Tegra - * generations where host1x supports a maximum of 32 address bits in - * the GATHER opcode. In this case, unless host1x is behind an IOMMU - * as well it won't be able to process buffers allocated beyond the - * 32-bit boundary. - * - * The DMA API will use bounce buffers in this case, so that could - * perhaps still be made to work, even if less efficient, but there - * is another catch: in order to perform cache maintenance on pages - * allocated for discontiguous buffers we need to map and unmap the - * SG table representing these buffers. This is fine for something - * small like a push buffer, but it exhausts the bounce buffer pool - * (typically on the order of a few MiB) for framebuffers (many MiB - * for any modern resolution). - * - * Work around this by making sure that Tegra DRM clients only use - * an IOMMU if the parent host1x also uses an IOMMU. - * - * Note that there's still a small gap here that we don't cover: if - * the DMA API is backed by an IOMMU there's no way to control which - * device is attached to an IOMMU and which isn't, except via wiring - * up the device tree appropriately. This is considered an problem - * of integration, so care must be taken for the DT to be consistent. - */ - domain = iommu_get_domain_for_dev(drm->dev->parent); - - if (domain && iommu_present(&platform_bus_type)) { - tegra->domain = iommu_domain_alloc(&platform_bus_type); - if (!tegra->domain) { - err = -ENOMEM; - goto free; - } - - err = iova_cache_get(); - if (err < 0) - goto domain; - } - - mutex_init(&tegra->clients_lock); - INIT_LIST_HEAD(&tegra->clients); - dev_set_drvdata(&dev->dev, drm); - drm->dev_private = tegra; - tegra->drm = drm; - drm_mode_config_init(drm); - - drm->mode_config.min_width = 0; - drm->mode_config.min_height = 0; - - drm->mode_config.max_width = 4096; - drm->mode_config.max_height = 4096; - - drm->mode_config.allow_fb_modifiers = true; - - drm->mode_config.normalize_zpos = true; - - drm->mode_config.funcs = &tegra_drm_mode_config_funcs; - drm->mode_config.helper_private = &tegra_drm_mode_config_helpers; - - err = tegra_drm_fb_prepare(drm); + err = drm_fb_helper_remove_conflicting_framebuffers(NULL, "tegradrmfb", false); if (err < 0) - goto config; - - drm_kms_helper_poll_init(drm); - - err = host1x_device_init(dev); - if (err < 0) - goto fbdev; - - if (tegra->use_explicit_iommu) { - u64 carveout_start, carveout_end, gem_start, gem_end; - u64 dma_mask = dma_get_mask(&dev->dev); - dma_addr_t start, end; - unsigned long order; - - start = tegra->domain->geometry.aperture_start & dma_mask; - end = tegra->domain->geometry.aperture_end & dma_mask; - - gem_start = start; - gem_end = end - CARVEOUT_SZ; - carveout_start = gem_end + 1; - carveout_end = end; - - order = __ffs(tegra->domain->pgsize_bitmap); - init_iova_domain(&tegra->carveout.domain, 1UL << order, - carveout_start >> order); - - tegra->carveout.shift = iova_shift(&tegra->carveout.domain); - tegra->carveout.limit = carveout_end >> tegra->carveout.shift; - - drm_mm_init(&tegra->mm, gem_start, gem_end - gem_start + 1); - mutex_init(&tegra->mm_lock); - - DRM_DEBUG_DRIVER("IOMMU apertures:\n"); - DRM_DEBUG_DRIVER(" GEM: %#llx-%#llx\n", gem_start, gem_end); - DRM_DEBUG_DRIVER(" Carveout: %#llx-%#llx\n", carveout_start, - carveout_end); - } else if (tegra->domain) { - iommu_domain_free(tegra->domain); - tegra->domain = NULL; - iova_cache_put(); - } - - if (tegra->hub) { - err = tegra_display_hub_prepare(tegra->hub); - if (err < 0) - goto device; - } - - /* - * We don't use the drm_irq_install() helpers provided by the DRM - * core, so we need to set this manually in order to allow the - * DRM_IOCTL_WAIT_VBLANK to operate correctly. - */ - drm->irq_enabled = true; - - /* syncpoints are used for full 32-bit hardware VBLANK counters */ - drm->max_vblank_count = 0xffffffff; - - err = drm_vblank_init(drm, drm->mode_config.num_crtc); - if (err < 0) - goto hub; - - drm_mode_config_reset(drm); - - err = drm_fb_helper_remove_conflicting_framebuffers(NULL, "tegradrmfb", - false); - if (err < 0) - goto hub; - - err = tegra_drm_fb_init(drm); - if (err < 0) - goto hub; + goto put; err = drm_dev_register(drm, 0); if (err < 0) - goto fb; + goto put; return 0; -fb: - tegra_drm_fb_exit(drm); -hub: - if (tegra->hub) - tegra_display_hub_cleanup(tegra->hub); -device: - if (tegra->domain) { - mutex_destroy(&tegra->mm_lock); - drm_mm_takedown(&tegra->mm); - put_iova_domain(&tegra->carveout.domain); - iova_cache_put(); - } - - host1x_device_exit(dev); -fbdev: - drm_kms_helper_poll_fini(drm); - tegra_drm_fb_free(drm); -config: - drm_mode_config_cleanup(drm); -domain: - if (tegra->domain) - iommu_domain_free(tegra->domain); -free: - kfree(tegra); put: drm_dev_put(drm); return err; @@ -1233,29 +1229,8 @@ put: static int host1x_drm_remove(struct host1x_device *dev) { struct drm_device *drm = dev_get_drvdata(&dev->dev); - struct tegra_drm *tegra = drm->dev_private; - int err; drm_dev_unregister(drm); - - drm_kms_helper_poll_fini(drm); - tegra_drm_fb_exit(drm); - drm_atomic_helper_shutdown(drm); - drm_mode_config_cleanup(drm); - - err = host1x_device_exit(dev); - if (err < 0) - dev_err(&dev->dev, "host1x device cleanup failed: %d\n", err); - - if (tegra->domain) { - mutex_destroy(&tegra->mm_lock); - drm_mm_takedown(&tegra->mm); - put_iova_domain(&tegra->carveout.domain); - iova_cache_put(); - iommu_domain_free(tegra->domain); - } - - kfree(tegra); drm_dev_put(drm); return 0; diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h index d941553f7a3d..29911eff9ceb 100644 --- a/drivers/gpu/drm/tegra/drm.h +++ b/drivers/gpu/drm/tegra/drm.h @@ -36,7 +36,7 @@ struct tegra_drm { struct drm_device *drm; struct iommu_domain *domain; - bool use_explicit_iommu; + struct iommu_group *group; struct mutex mm_lock; struct drm_mm mm; @@ -100,8 +100,10 @@ int tegra_drm_register_client(struct tegra_drm *tegra, struct tegra_drm_client *client); int tegra_drm_unregister_client(struct tegra_drm *tegra, struct tegra_drm_client *client); -int host1x_client_iommu_attach(struct host1x_client *client); -void host1x_client_iommu_detach(struct host1x_client *client); +struct iommu_group *host1x_client_iommu_attach(struct host1x_client *client, + bool shared); +void host1x_client_iommu_detach(struct host1x_client *client, + struct iommu_group *group); int tegra_drm_init(struct tegra_drm *tegra, struct drm_device *drm); int tegra_drm_exit(struct tegra_drm *tegra); @@ -153,12 +155,17 @@ void tegra_output_connector_destroy(struct drm_connector *connector); void tegra_output_encoder_destroy(struct drm_encoder *encoder); /* from dpaux.c */ +struct drm_dp_link; + struct drm_dp_aux *drm_dp_aux_find_by_of_node(struct device_node *np); enum drm_connector_status drm_dp_aux_detect(struct drm_dp_aux *aux); int drm_dp_aux_attach(struct drm_dp_aux *aux, struct tegra_output *output); int drm_dp_aux_detach(struct drm_dp_aux *aux); int drm_dp_aux_enable(struct drm_dp_aux *aux); int drm_dp_aux_disable(struct drm_dp_aux *aux); +int drm_dp_aux_prepare(struct drm_dp_aux *aux, u8 encoding); +int drm_dp_aux_train(struct drm_dp_aux *aux, struct drm_dp_link *link, + u8 pattern); /* from fb.c */ struct tegra_bo *tegra_fb_get_plane(struct drm_framebuffer *framebuffer, diff --git a/drivers/gpu/drm/tegra/falcon.c b/drivers/gpu/drm/tegra/falcon.c index 56edef06c48e..f49ad36e24db 100644 --- a/drivers/gpu/drm/tegra/falcon.c +++ b/drivers/gpu/drm/tegra/falcon.c @@ -58,17 +58,32 @@ static int falcon_copy_chunk(struct falcon *falcon, static void falcon_copy_firmware_image(struct falcon *falcon, const struct firmware *firmware) { - u32 *virt = falcon->firmware.virt; + u32 *firmware_vaddr = falcon->firmware.vaddr; + dma_addr_t daddr; size_t i; + int err; /* copy the whole thing taking into account endianness */ for (i = 0; i < firmware->size / sizeof(u32); i++) - virt[i] = le32_to_cpu(((u32 *)firmware->data)[i]); + firmware_vaddr[i] = le32_to_cpu(((u32 *)firmware->data)[i]); + + /* ensure that caches are flushed and falcon can see the firmware */ + daddr = dma_map_single(falcon->dev, firmware_vaddr, + falcon->firmware.size, DMA_TO_DEVICE); + err = dma_mapping_error(falcon->dev, daddr); + if (err) { + dev_err(falcon->dev, "failed to map firmware: %d\n", err); + return; + } + dma_sync_single_for_device(falcon->dev, daddr, + falcon->firmware.size, DMA_TO_DEVICE); + dma_unmap_single(falcon->dev, daddr, falcon->firmware.size, + DMA_TO_DEVICE); } static int falcon_parse_firmware_image(struct falcon *falcon) { - struct falcon_fw_bin_header_v1 *bin = (void *)falcon->firmware.virt; + struct falcon_fw_bin_header_v1 *bin = (void *)falcon->firmware.vaddr; struct falcon_fw_os_header_v1 *os; /* endian problems would show up right here */ @@ -89,7 +104,7 @@ static int falcon_parse_firmware_image(struct falcon *falcon) return -EINVAL; } - os = falcon->firmware.virt + bin->os_header_offset; + os = falcon->firmware.vaddr + bin->os_header_offset; falcon->firmware.bin_data.size = bin->os_size; falcon->firmware.bin_data.offset = bin->os_data_offset; @@ -110,8 +125,6 @@ int falcon_read_firmware(struct falcon *falcon, const char *name) if (err < 0) return err; - falcon->firmware.size = falcon->firmware.firmware->size; - return 0; } @@ -120,6 +133,16 @@ int falcon_load_firmware(struct falcon *falcon) const struct firmware *firmware = falcon->firmware.firmware; int err; + falcon->firmware.size = firmware->size; + + /* allocate iova space for the firmware */ + falcon->firmware.vaddr = falcon->ops->alloc(falcon, firmware->size, + &falcon->firmware.paddr); + if (IS_ERR(falcon->firmware.vaddr)) { + dev_err(falcon->dev, "DMA memory mapping failed\n"); + return PTR_ERR(falcon->firmware.vaddr); + } + /* copy firmware image into local area. this also ensures endianness */ falcon_copy_firmware_image(falcon, firmware); @@ -127,26 +150,45 @@ int falcon_load_firmware(struct falcon *falcon) err = falcon_parse_firmware_image(falcon); if (err < 0) { dev_err(falcon->dev, "failed to parse firmware image\n"); - return err; + goto err_setup_firmware_image; } release_firmware(firmware); falcon->firmware.firmware = NULL; return 0; + +err_setup_firmware_image: + falcon->ops->free(falcon, falcon->firmware.size, + falcon->firmware.paddr, falcon->firmware.vaddr); + + return err; } int falcon_init(struct falcon *falcon) { - falcon->firmware.virt = NULL; + /* check mandatory ops */ + if (!falcon->ops || !falcon->ops->alloc || !falcon->ops->free) + return -EINVAL; + + falcon->firmware.vaddr = NULL; return 0; } void falcon_exit(struct falcon *falcon) { - if (falcon->firmware.firmware) + if (falcon->firmware.firmware) { release_firmware(falcon->firmware.firmware); + falcon->firmware.firmware = NULL; + } + + if (falcon->firmware.vaddr) { + falcon->ops->free(falcon, falcon->firmware.size, + falcon->firmware.paddr, + falcon->firmware.vaddr); + falcon->firmware.vaddr = NULL; + } } int falcon_boot(struct falcon *falcon) @@ -155,7 +197,7 @@ int falcon_boot(struct falcon *falcon) u32 value; int err; - if (!falcon->firmware.virt) + if (!falcon->firmware.vaddr) return -EINVAL; err = readl_poll_timeout(falcon->regs + FALCON_DMACTL, value, @@ -168,7 +210,7 @@ int falcon_boot(struct falcon *falcon) falcon_writel(falcon, 0, FALCON_DMACTL); /* setup the address of the binary data so Falcon can access it later */ - falcon_writel(falcon, (falcon->firmware.iova + + falcon_writel(falcon, (falcon->firmware.paddr + falcon->firmware.bin_data.offset) >> 8, FALCON_DMATRFBASE); diff --git a/drivers/gpu/drm/tegra/falcon.h b/drivers/gpu/drm/tegra/falcon.h index c56ee32d92ee..3d1243217410 100644 --- a/drivers/gpu/drm/tegra/falcon.h +++ b/drivers/gpu/drm/tegra/falcon.h @@ -74,6 +74,15 @@ struct falcon_fw_os_header_v1 { u32 data_size; }; +struct falcon; + +struct falcon_ops { + void *(*alloc)(struct falcon *falcon, size_t size, + dma_addr_t *paddr); + void (*free)(struct falcon *falcon, size_t size, + dma_addr_t paddr, void *vaddr); +}; + struct falcon_firmware_section { unsigned long offset; size_t size; @@ -84,9 +93,8 @@ struct falcon_firmware { const struct firmware *firmware; /* Raw firmware data */ - dma_addr_t iova; - dma_addr_t phys; - void *virt; + dma_addr_t paddr; + void *vaddr; size_t size; /* Parsed firmware information */ @@ -99,6 +107,8 @@ struct falcon { /* Set by falcon client */ struct device *dev; void __iomem *regs; + const struct falcon_ops *ops; + void *data; struct falcon_firmware firmware; }; diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c index 7cea89f29a5c..e34325c83d28 100644 --- a/drivers/gpu/drm/tegra/fb.c +++ b/drivers/gpu/drm/tegra/fb.c @@ -269,10 +269,10 @@ static int tegra_fbdev_probe(struct drm_fb_helper *helper, } } - drm->mode_config.fb_base = (resource_size_t)bo->iova; + drm->mode_config.fb_base = (resource_size_t)bo->paddr; info->screen_base = (void __iomem *)bo->vaddr + offset; info->screen_size = size; - info->fix.smem_start = (unsigned long)(bo->iova + offset); + info->fix.smem_start = (unsigned long)(bo->paddr + offset); info->fix.smem_len = size; return 0; diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index 746dae32c484..fb7667c8dd4c 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -27,55 +27,17 @@ static void tegra_bo_put(struct host1x_bo *bo) drm_gem_object_put_unlocked(&obj->gem); } -static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, - dma_addr_t *phys) +static dma_addr_t tegra_bo_pin(struct host1x_bo *bo, struct sg_table **sgt) { struct tegra_bo *obj = host1x_to_tegra_bo(bo); - struct sg_table *sgt; - int err; - /* - * If we've manually mapped the buffer object through the IOMMU, make - * sure to return the IOVA address of our mapping. - */ - if (phys && obj->mm) { - *phys = obj->iova; - return NULL; - } + *sgt = obj->sgt; - /* - * If we don't have a mapping for this buffer yet, return an SG table - * so that host1x can do the mapping for us via the DMA API. - */ - sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); - if (!sgt) - return ERR_PTR(-ENOMEM); - - if (obj->pages) { - err = sg_alloc_table_from_pages(sgt, obj->pages, obj->num_pages, - 0, obj->gem.size, GFP_KERNEL); - if (err < 0) - goto free; - } else { - err = dma_get_sgtable(dev, sgt, obj->vaddr, obj->iova, - obj->gem.size); - if (err < 0) - goto free; - } - - return sgt; - -free: - kfree(sgt); - return ERR_PTR(err); + return obj->paddr; } -static void tegra_bo_unpin(struct device *dev, struct sg_table *sgt) +static void tegra_bo_unpin(struct host1x_bo *bo, struct sg_table *sgt) { - if (sgt) { - sg_free_table(sgt); - kfree(sgt); - } } static void *tegra_bo_mmap(struct host1x_bo *bo) @@ -171,9 +133,9 @@ static int tegra_bo_iommu_map(struct tegra_drm *tegra, struct tegra_bo *bo) goto unlock; } - bo->iova = bo->mm->start; + bo->paddr = bo->mm->start; - bo->size = iommu_map_sg(tegra->domain, bo->iova, bo->sgt->sgl, + bo->size = iommu_map_sg(tegra->domain, bo->paddr, bo->sgt->sgl, bo->sgt->nents, prot); if (!bo->size) { dev_err(tegra->drm->dev, "failed to map buffer\n"); @@ -199,7 +161,7 @@ static int tegra_bo_iommu_unmap(struct tegra_drm *tegra, struct tegra_bo *bo) return 0; mutex_lock(&tegra->mm_lock); - iommu_unmap(tegra->domain, bo->iova, bo->size); + iommu_unmap(tegra->domain, bo->paddr, bo->size); drm_mm_remove_node(bo->mm); mutex_unlock(&tegra->mm_lock); @@ -247,7 +209,7 @@ static void tegra_bo_free(struct drm_device *drm, struct tegra_bo *bo) sg_free_table(bo->sgt); kfree(bo->sgt); } else if (bo->vaddr) { - dma_free_wc(drm->dev, bo->gem.size, bo->vaddr, bo->iova); + dma_free_wc(drm->dev, bo->gem.size, bo->vaddr, bo->paddr); } } @@ -302,7 +264,7 @@ static int tegra_bo_alloc(struct drm_device *drm, struct tegra_bo *bo) } else { size_t size = bo->gem.size; - bo->vaddr = dma_alloc_wc(drm->dev, size, &bo->iova, + bo->vaddr = dma_alloc_wc(drm->dev, size, &bo->paddr, GFP_KERNEL | __GFP_NOWARN); if (!bo->vaddr) { dev_err(drm->dev, @@ -403,7 +365,7 @@ static struct tegra_bo *tegra_bo_import(struct drm_device *drm, goto detach; } - bo->iova = sg_dma_address(bo->sgt->sgl); + bo->paddr = sg_dma_address(bo->sgt->sgl); } bo->gem.import_attach = attach; @@ -499,7 +461,7 @@ int __tegra_gem_mmap(struct drm_gem_object *gem, struct vm_area_struct *vma) vma->vm_flags &= ~VM_PFNMAP; vma->vm_pgoff = 0; - err = dma_mmap_wc(gem->dev->dev, vma, bo->vaddr, bo->iova, + err = dma_mmap_wc(gem->dev->dev, vma, bo->vaddr, bo->paddr, gem->size); if (err < 0) { drm_gem_vm_close(vma); @@ -546,17 +508,24 @@ tegra_gem_prime_map_dma_buf(struct dma_buf_attachment *attach, return NULL; if (bo->pages) { - if (sg_alloc_table_from_pages(sgt, bo->pages, bo->num_pages, - 0, gem->size, GFP_KERNEL) < 0) + struct scatterlist *sg; + unsigned int i; + + if (sg_alloc_table(sgt, bo->num_pages, GFP_KERNEL)) + goto free; + + for_each_sg(sgt->sgl, sg, bo->num_pages, i) + sg_set_page(sg, bo->pages[i], PAGE_SIZE, 0); + + if (dma_map_sg(attach->dev, sgt->sgl, sgt->nents, dir) == 0) goto free; } else { - if (dma_get_sgtable(attach->dev, sgt, bo->vaddr, bo->iova, - gem->size) < 0) + if (sg_alloc_table(sgt, 1, GFP_KERNEL)) goto free; - } - if (dma_map_sg(attach->dev, sgt->sgl, sgt->nents, dir) == 0) - goto free; + sg_dma_address(sgt->sgl) = bo->paddr; + sg_dma_len(sgt->sgl) = gem->size; + } return sgt; diff --git a/drivers/gpu/drm/tegra/gem.h b/drivers/gpu/drm/tegra/gem.h index fafb5724499b..83ffb1e14ca3 100644 --- a/drivers/gpu/drm/tegra/gem.h +++ b/drivers/gpu/drm/tegra/gem.h @@ -31,7 +31,7 @@ struct tegra_bo { struct host1x_bo base; unsigned long flags; struct sg_table *sgt; - dma_addr_t iova; + dma_addr_t paddr; void *vaddr; struct drm_mm_node *mm; diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c index 1fc4e56c7cc5..641299cc85b8 100644 --- a/drivers/gpu/drm/tegra/gr2d.c +++ b/drivers/gpu/drm/tegra/gr2d.c @@ -17,6 +17,7 @@ struct gr2d_soc { }; struct gr2d { + struct iommu_group *group; struct tegra_drm_client client; struct host1x_channel *channel; struct clk *clk; @@ -39,7 +40,7 @@ static int gr2d_init(struct host1x_client *client) struct gr2d *gr2d = to_gr2d(drm); int err; - gr2d->channel = host1x_channel_request(client); + gr2d->channel = host1x_channel_request(client->dev); if (!gr2d->channel) return -ENOMEM; @@ -50,8 +51,9 @@ static int gr2d_init(struct host1x_client *client) goto put; } - err = host1x_client_iommu_attach(client); - if (err < 0) { + gr2d->group = host1x_client_iommu_attach(client, false); + if (IS_ERR(gr2d->group)) { + err = PTR_ERR(gr2d->group); dev_err(client->dev, "failed to attach to domain: %d\n", err); goto free; } @@ -65,7 +67,7 @@ static int gr2d_init(struct host1x_client *client) return 0; detach: - host1x_client_iommu_detach(client); + host1x_client_iommu_detach(client, gr2d->group); free: host1x_syncpt_free(client->syncpts[0]); put: @@ -85,7 +87,7 @@ static int gr2d_exit(struct host1x_client *client) if (err < 0) return err; - host1x_client_iommu_detach(client); + host1x_client_iommu_detach(client, gr2d->group); host1x_syncpt_free(client->syncpts[0]); host1x_channel_put(gr2d->channel); diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c index 24fae0f64032..8b9a35b1cbb3 100644 --- a/drivers/gpu/drm/tegra/gr3d.c +++ b/drivers/gpu/drm/tegra/gr3d.c @@ -23,6 +23,7 @@ struct gr3d_soc { }; struct gr3d { + struct iommu_group *group; struct tegra_drm_client client; struct host1x_channel *channel; struct clk *clk_secondary; @@ -48,7 +49,7 @@ static int gr3d_init(struct host1x_client *client) struct gr3d *gr3d = to_gr3d(drm); int err; - gr3d->channel = host1x_channel_request(client); + gr3d->channel = host1x_channel_request(client->dev); if (!gr3d->channel) return -ENOMEM; @@ -59,8 +60,9 @@ static int gr3d_init(struct host1x_client *client) goto put; } - err = host1x_client_iommu_attach(client); - if (err < 0) { + gr3d->group = host1x_client_iommu_attach(client, false); + if (IS_ERR(gr3d->group)) { + err = PTR_ERR(gr3d->group); dev_err(client->dev, "failed to attach to domain: %d\n", err); goto free; } @@ -74,7 +76,7 @@ static int gr3d_init(struct host1x_client *client) return 0; detach: - host1x_client_iommu_detach(client); + host1x_client_iommu_detach(client, gr3d->group); free: host1x_syncpt_free(client->syncpts[0]); put: @@ -93,7 +95,7 @@ static int gr3d_exit(struct host1x_client *client) if (err < 0) return err; - host1x_client_iommu_detach(client); + host1x_client_iommu_detach(client, gr3d->group); host1x_syncpt_free(client->syncpts[0]); host1x_channel_put(gr3d->channel); diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c index 2b4082d0bc9e..839b49c40e51 100644 --- a/drivers/gpu/drm/tegra/hub.c +++ b/drivers/gpu/drm/tegra/hub.c @@ -413,6 +413,7 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane, unsigned int zpos = plane->state->normalized_zpos; struct drm_framebuffer *fb = plane->state->fb; struct tegra_plane *p = to_tegra_plane(plane); + struct tegra_bo *bo; dma_addr_t base; u32 value; @@ -455,7 +456,8 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane, /* disable compression */ tegra_plane_writel(p, 0, DC_WINBUF_CDE_CONTROL); - base = state->iova[0] + fb->offsets[0]; + bo = tegra_fb_get_plane(fb, 0); + base = bo->paddr; tegra_plane_writel(p, state->format, DC_WIN_COLOR_DEPTH); tegra_plane_writel(p, 0, DC_WIN_PRECOMP_WGRP_PARAMS); @@ -519,8 +521,6 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane, } static const struct drm_plane_helper_funcs tegra_shared_plane_helper_funcs = { - .prepare_fb = tegra_plane_prepare_fb, - .cleanup_fb = tegra_plane_cleanup_fb, .atomic_check = tegra_shared_plane_atomic_check, .atomic_update = tegra_shared_plane_atomic_update, .atomic_disable = tegra_shared_plane_atomic_disable, diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c index 34373734ff68..bdcaa4c7168c 100644 --- a/drivers/gpu/drm/tegra/output.c +++ b/drivers/gpu/drm/tegra/output.c @@ -70,11 +70,6 @@ tegra_output_connector_detect(struct drm_connector *connector, bool force) void tegra_output_connector_destroy(struct drm_connector *connector) { - struct tegra_output *output = connector_to_output(connector); - - if (output->cec) - cec_notifier_conn_unregister(output->cec); - drm_connector_unregister(connector); drm_connector_cleanup(connector); } @@ -168,11 +163,18 @@ int tegra_output_probe(struct tegra_output *output) disable_irq(output->hpd_irq); } + output->cec = cec_notifier_get(output->dev); + if (!output->cec) + return -ENOMEM; + return 0; } void tegra_output_remove(struct tegra_output *output) { + if (output->cec) + cec_notifier_put(output->cec); + if (output->hpd_gpio) free_irq(output->hpd_irq, output); @@ -182,7 +184,6 @@ void tegra_output_remove(struct tegra_output *output) int tegra_output_init(struct drm_device *drm, struct tegra_output *output) { - int connector_type; int err; if (output->panel) { @@ -198,21 +199,6 @@ int tegra_output_init(struct drm_device *drm, struct tegra_output *output) if (output->hpd_gpio) enable_irq(output->hpd_irq); - connector_type = output->connector.connector_type; - /* - * Create a CEC notifier for HDMI connector. - */ - if (connector_type == DRM_MODE_CONNECTOR_HDMIA || - connector_type == DRM_MODE_CONNECTOR_HDMIB) { - struct cec_connector_info conn_info; - - cec_fill_conn_info_from_drm(&conn_info, &output->connector); - output->cec = cec_notifier_conn_register(output->dev, NULL, - &conn_info); - if (!output->cec) - return -ENOMEM; - } - return 0; } diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index 163b590be224..6bab71d6e81d 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include "dc.h" @@ -24,7 +23,6 @@ static void tegra_plane_reset(struct drm_plane *plane) { struct tegra_plane *p = to_tegra_plane(plane); struct tegra_plane_state *state; - unsigned int i; if (plane->state) __drm_atomic_helper_plane_destroy_state(plane->state); @@ -38,9 +36,6 @@ static void tegra_plane_reset(struct drm_plane *plane) plane->state->plane = plane; plane->state->zpos = p->index; plane->state->normalized_zpos = p->index; - - for (i = 0; i < 3; i++) - state->iova[i] = DMA_MAPPING_ERROR; } } @@ -65,11 +60,6 @@ tegra_plane_atomic_duplicate_state(struct drm_plane *plane) for (i = 0; i < 2; i++) copy->blending[i] = state->blending[i]; - for (i = 0; i < 3; i++) { - copy->iova[i] = DMA_MAPPING_ERROR; - copy->sgt[i] = NULL; - } - return ©->base; } @@ -105,100 +95,6 @@ const struct drm_plane_funcs tegra_plane_funcs = { .format_mod_supported = tegra_plane_format_mod_supported, }; -static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) -{ - unsigned int i; - int err; - - for (i = 0; i < state->base.fb->format->num_planes; i++) { - struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); - - if (!dc->client.group) { - struct sg_table *sgt; - - sgt = host1x_bo_pin(dc->dev, &bo->base, NULL); - if (IS_ERR(sgt)) { - err = PTR_ERR(sgt); - goto unpin; - } - - err = dma_map_sg(dc->dev, sgt->sgl, sgt->nents, - DMA_TO_DEVICE); - if (err == 0) { - err = -ENOMEM; - goto unpin; - } - - state->iova[i] = sg_dma_address(sgt->sgl); - state->sgt[i] = sgt; - } else { - state->iova[i] = bo->iova; - } - } - - return 0; - -unpin: - dev_err(dc->dev, "failed to map plane %u: %d\n", i, err); - - while (i--) { - struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); - struct sg_table *sgt = state->sgt[i]; - - dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents, DMA_TO_DEVICE); - host1x_bo_unpin(dc->dev, &bo->base, sgt); - - state->iova[i] = DMA_MAPPING_ERROR; - state->sgt[i] = NULL; - } - - return err; -} - -static void tegra_dc_unpin(struct tegra_dc *dc, struct tegra_plane_state *state) -{ - unsigned int i; - - for (i = 0; i < state->base.fb->format->num_planes; i++) { - struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); - - if (!dc->client.group) { - struct sg_table *sgt = state->sgt[i]; - - if (sgt) { - dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents, - DMA_TO_DEVICE); - host1x_bo_unpin(dc->dev, &bo->base, sgt); - } - } - - state->iova[i] = DMA_MAPPING_ERROR; - state->sgt[i] = NULL; - } -} - -int tegra_plane_prepare_fb(struct drm_plane *plane, - struct drm_plane_state *state) -{ - struct tegra_dc *dc = to_tegra_dc(state->crtc); - - if (!state->fb) - return 0; - - drm_gem_fb_prepare_fb(plane, state); - - return tegra_dc_pin(dc, to_tegra_plane_state(state)); -} - -void tegra_plane_cleanup_fb(struct drm_plane *plane, - struct drm_plane_state *state) -{ - struct tegra_dc *dc = to_tegra_dc(state->crtc); - - if (dc) - tegra_dc_unpin(dc, to_tegra_plane_state(state)); -} - int tegra_plane_state_add(struct tegra_plane *plane, struct drm_plane_state *state) { diff --git a/drivers/gpu/drm/tegra/plane.h b/drivers/gpu/drm/tegra/plane.h index a158a915109a..510c394e6d9a 100644 --- a/drivers/gpu/drm/tegra/plane.h +++ b/drivers/gpu/drm/tegra/plane.h @@ -39,9 +39,6 @@ struct tegra_plane_legacy_blending_state { struct tegra_plane_state { struct drm_plane_state base; - struct sg_table *sgt[3]; - dma_addr_t iova[3]; - struct tegra_bo_tiling tiling; u32 format; u32 swap; @@ -64,11 +61,6 @@ to_tegra_plane_state(struct drm_plane_state *state) extern const struct drm_plane_funcs tegra_plane_funcs; -int tegra_plane_prepare_fb(struct drm_plane *plane, - struct drm_plane_state *state); -void tegra_plane_cleanup_fb(struct drm_plane *plane, - struct drm_plane_state *state); - int tegra_plane_state_add(struct tegra_plane *plane, struct drm_plane_state *state); diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c index 615cb319fa8b..fbbb974c1e1a 100644 --- a/drivers/gpu/drm/tegra/sor.c +++ b/drivers/gpu/drm/tegra/sor.c @@ -371,11 +371,10 @@ struct tegra_sor_regs { }; struct tegra_sor_soc { + bool supports_edp; bool supports_lvds; bool supports_hdmi; bool supports_dp; - bool supports_audio; - bool supports_hdcp; const struct tegra_sor_regs *regs; bool has_nvdisplay; @@ -384,12 +383,6 @@ struct tegra_sor_soc { unsigned int num_settings; const u8 *xbar_cfg; - const u8 *lane_map; - - const u8 (*voltage_swing)[4][4]; - const u8 (*pre_emphasis)[4][4]; - const u8 (*post_cursor)[4][4]; - const u8 (*tx_pu)[4][4]; }; struct tegra_sor; @@ -398,8 +391,6 @@ struct tegra_sor_ops { const char *name; int (*probe)(struct tegra_sor *sor); int (*remove)(struct tegra_sor *sor); - void (*audio_enable)(struct tegra_sor *sor); - void (*audio_disable)(struct tegra_sor *sor); }; struct tegra_sor { @@ -422,7 +413,6 @@ struct tegra_sor { u8 xbar_cfg[5]; - struct drm_dp_link link; struct drm_dp_aux *aux; struct drm_info_list *debugfs_files; @@ -525,19 +515,10 @@ static inline struct tegra_clk_sor_pad *to_pad(struct clk_hw *hw) return container_of(hw, struct tegra_clk_sor_pad, hw); } -static const char * const tegra_clk_sor_pad_parents[2][2] = { - { "pll_d_out0", "pll_dp" }, - { "pll_d2_out0", "pll_dp" }, +static const char * const tegra_clk_sor_pad_parents[] = { + "pll_d2_out0", "pll_dp" }; -/* - * Implementing ->set_parent() here isn't really required because the parent - * will be explicitly selected in the driver code via the DP_CLK_SEL mux in - * the SOR_CLK_CNTRL register. This is primarily for compatibility with the - * Tegra186 and later SoC generations where the BPMP implements this clock - * and doesn't expose the mux via the common clock framework. - */ - static int tegra_clk_sor_pad_set_parent(struct clk_hw *hw, u8 index) { struct tegra_clk_sor_pad *pad = to_pad(hw); @@ -606,8 +587,8 @@ static struct clk *tegra_clk_sor_pad_register(struct tegra_sor *sor, init.name = name; init.flags = 0; - init.parent_names = tegra_clk_sor_pad_parents[sor->index]; - init.num_parents = ARRAY_SIZE(tegra_clk_sor_pad_parents[sor->index]); + init.parent_names = tegra_clk_sor_pad_parents; + init.num_parents = ARRAY_SIZE(tegra_clk_sor_pad_parents); init.ops = &tegra_clk_sor_pad_ops; pad->hw.init = &init; @@ -617,340 +598,112 @@ static struct clk *tegra_clk_sor_pad_register(struct tegra_sor *sor, return clk; } -static void tegra_sor_filter_rates(struct tegra_sor *sor) +static int tegra_sor_dp_train_fast(struct tegra_sor *sor, + struct drm_dp_link *link) { - struct drm_dp_link *link = &sor->link; unsigned int i; - - /* Tegra only supports RBR, HBR and HBR2 */ - for (i = 0; i < link->num_rates; i++) { - switch (link->rates[i]) { - case 1620000: - case 2700000: - case 5400000: - break; - - default: - DRM_DEBUG_KMS("link rate %lu kHz not supported\n", - link->rates[i]); - link->rates[i] = 0; - break; - } - } - - drm_dp_link_update_rates(link); -} - -static int tegra_sor_power_up_lanes(struct tegra_sor *sor, unsigned int lanes) -{ - unsigned long timeout; + u8 pattern; u32 value; + int err; + + /* setup lane parameters */ + value = SOR_LANE_DRIVE_CURRENT_LANE3(0x40) | + SOR_LANE_DRIVE_CURRENT_LANE2(0x40) | + SOR_LANE_DRIVE_CURRENT_LANE1(0x40) | + SOR_LANE_DRIVE_CURRENT_LANE0(0x40); + tegra_sor_writel(sor, value, SOR_LANE_DRIVE_CURRENT0); + + value = SOR_LANE_PREEMPHASIS_LANE3(0x0f) | + SOR_LANE_PREEMPHASIS_LANE2(0x0f) | + SOR_LANE_PREEMPHASIS_LANE1(0x0f) | + SOR_LANE_PREEMPHASIS_LANE0(0x0f); + tegra_sor_writel(sor, value, SOR_LANE_PREEMPHASIS0); + + value = SOR_LANE_POSTCURSOR_LANE3(0x00) | + SOR_LANE_POSTCURSOR_LANE2(0x00) | + SOR_LANE_POSTCURSOR_LANE1(0x00) | + SOR_LANE_POSTCURSOR_LANE0(0x00); + tegra_sor_writel(sor, value, SOR_LANE_POSTCURSOR0); + + /* disable LVDS mode */ + tegra_sor_writel(sor, 0, SOR_LVDS); - /* - * Clear or set the PD_TXD bit corresponding to each lane, depending - * on whether it is used or not. - */ value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0); - - if (lanes <= 2) - value &= ~(SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[3]) | - SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[2])); - else - value |= SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[3]) | - SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[2]); - - if (lanes <= 1) - value &= ~SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[1]); - else - value |= SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[1]); - - if (lanes == 0) - value &= ~SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[0]); - else - value |= SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[0]); - + value |= SOR_DP_PADCTL_TX_PU_ENABLE; + value &= ~SOR_DP_PADCTL_TX_PU_MASK; + value |= SOR_DP_PADCTL_TX_PU(2); /* XXX: don't hardcode? */ tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0); - /* start lane sequencer */ - value = SOR_LANE_SEQ_CTL_TRIGGER | SOR_LANE_SEQ_CTL_SEQUENCE_DOWN | - SOR_LANE_SEQ_CTL_POWER_STATE_UP; - tegra_sor_writel(sor, value, SOR_LANE_SEQ_CTL); - - timeout = jiffies + msecs_to_jiffies(250); - - while (time_before(jiffies, timeout)) { - value = tegra_sor_readl(sor, SOR_LANE_SEQ_CTL); - if ((value & SOR_LANE_SEQ_CTL_TRIGGER) == 0) - break; - - usleep_range(250, 1000); - } - - if ((value & SOR_LANE_SEQ_CTL_TRIGGER) != 0) - return -ETIMEDOUT; - - return 0; -} - -static int tegra_sor_power_down_lanes(struct tegra_sor *sor) -{ - unsigned long timeout; - u32 value; - - /* power down all lanes */ value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0); - value &= ~(SOR_DP_PADCTL_PD_TXD_3 | SOR_DP_PADCTL_PD_TXD_0 | - SOR_DP_PADCTL_PD_TXD_1 | SOR_DP_PADCTL_PD_TXD_2); + value |= SOR_DP_PADCTL_CM_TXD_3 | SOR_DP_PADCTL_CM_TXD_2 | + SOR_DP_PADCTL_CM_TXD_1 | SOR_DP_PADCTL_CM_TXD_0; tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0); - /* start lane sequencer */ - value = SOR_LANE_SEQ_CTL_TRIGGER | SOR_LANE_SEQ_CTL_SEQUENCE_UP | - SOR_LANE_SEQ_CTL_POWER_STATE_DOWN; - tegra_sor_writel(sor, value, SOR_LANE_SEQ_CTL); - - timeout = jiffies + msecs_to_jiffies(250); - - while (time_before(jiffies, timeout)) { - value = tegra_sor_readl(sor, SOR_LANE_SEQ_CTL); - if ((value & SOR_LANE_SEQ_CTL_TRIGGER) == 0) - break; - - usleep_range(25, 100); - } - - if ((value & SOR_LANE_SEQ_CTL_TRIGGER) != 0) - return -ETIMEDOUT; - - return 0; -} - -static void tegra_sor_dp_precharge(struct tegra_sor *sor, unsigned int lanes) -{ - u32 value; - - /* pre-charge all used lanes */ - value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0); - - if (lanes <= 2) - value &= ~(SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[3]) | - SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[2])); - else - value |= SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[3]) | - SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[2]); - - if (lanes <= 1) - value &= ~SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[1]); - else - value |= SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[1]); - - if (lanes == 0) - value &= ~SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[0]); - else - value |= SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[0]); - - tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0); - - usleep_range(15, 100); + usleep_range(10, 100); value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0); value &= ~(SOR_DP_PADCTL_CM_TXD_3 | SOR_DP_PADCTL_CM_TXD_2 | SOR_DP_PADCTL_CM_TXD_1 | SOR_DP_PADCTL_CM_TXD_0); tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0); -} -static void tegra_sor_dp_term_calibrate(struct tegra_sor *sor) -{ - u32 mask = 0x08, adj = 0, value; + err = drm_dp_aux_prepare(sor->aux, DP_SET_ANSI_8B10B); + if (err < 0) + return err; - /* enable pad calibration logic */ - value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0); - value &= ~SOR_DP_PADCTL_PAD_CAL_PD; - tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0); - - value = tegra_sor_readl(sor, sor->soc->regs->pll1); - value |= SOR_PLL1_TMDS_TERM; - tegra_sor_writel(sor, value, sor->soc->regs->pll1); - - while (mask) { - adj |= mask; - - value = tegra_sor_readl(sor, sor->soc->regs->pll1); - value &= ~SOR_PLL1_TMDS_TERMADJ_MASK; - value |= SOR_PLL1_TMDS_TERMADJ(adj); - tegra_sor_writel(sor, value, sor->soc->regs->pll1); - - usleep_range(100, 200); - - value = tegra_sor_readl(sor, sor->soc->regs->pll1); - if (value & SOR_PLL1_TERM_COMPOUT) - adj &= ~mask; - - mask >>= 1; + for (i = 0, value = 0; i < link->num_lanes; i++) { + unsigned long lane = SOR_DP_TPG_CHANNEL_CODING | + SOR_DP_TPG_SCRAMBLER_NONE | + SOR_DP_TPG_PATTERN_TRAIN1; + value = (value << 8) | lane; } - value = tegra_sor_readl(sor, sor->soc->regs->pll1); - value &= ~SOR_PLL1_TMDS_TERMADJ_MASK; - value |= SOR_PLL1_TMDS_TERMADJ(adj); - tegra_sor_writel(sor, value, sor->soc->regs->pll1); + tegra_sor_writel(sor, value, SOR_DP_TPG); - /* disable pad calibration logic */ - value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0); - value |= SOR_DP_PADCTL_PAD_CAL_PD; - tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0); -} + pattern = DP_TRAINING_PATTERN_1; -static int tegra_sor_dp_link_apply_training(struct drm_dp_link *link) -{ - struct tegra_sor *sor = container_of(link, struct tegra_sor, link); - u32 voltage_swing = 0, pre_emphasis = 0, post_cursor = 0; - const struct tegra_sor_soc *soc = sor->soc; - u32 pattern = 0, tx_pu = 0, value; - unsigned int i; + err = drm_dp_aux_train(sor->aux, link, pattern); + if (err < 0) + return err; - for (value = 0, i = 0; i < link->lanes; i++) { - u8 vs = link->train.request.voltage_swing[i]; - u8 pe = link->train.request.pre_emphasis[i]; - u8 pc = link->train.request.post_cursor[i]; - u8 shift = sor->soc->lane_map[i] << 3; - - voltage_swing |= soc->voltage_swing[pc][vs][pe] << shift; - pre_emphasis |= soc->pre_emphasis[pc][vs][pe] << shift; - post_cursor |= soc->post_cursor[pc][vs][pe] << shift; - - if (sor->soc->tx_pu[pc][vs][pe] > tx_pu) - tx_pu = sor->soc->tx_pu[pc][vs][pe]; - - switch (link->train.pattern) { - case DP_TRAINING_PATTERN_DISABLE: - value = SOR_DP_TPG_SCRAMBLER_GALIOS | - SOR_DP_TPG_PATTERN_NONE; - break; - - case DP_TRAINING_PATTERN_1: - value = SOR_DP_TPG_SCRAMBLER_NONE | - SOR_DP_TPG_PATTERN_TRAIN1; - break; - - case DP_TRAINING_PATTERN_2: - value = SOR_DP_TPG_SCRAMBLER_NONE | - SOR_DP_TPG_PATTERN_TRAIN2; - break; - - case DP_TRAINING_PATTERN_3: - value = SOR_DP_TPG_SCRAMBLER_NONE | - SOR_DP_TPG_PATTERN_TRAIN3; - break; - - default: - return -EINVAL; - } - - if (link->caps.channel_coding) - value |= SOR_DP_TPG_CHANNEL_CODING; - - pattern = pattern << 8 | value; - } - - tegra_sor_writel(sor, voltage_swing, SOR_LANE_DRIVE_CURRENT0); - tegra_sor_writel(sor, pre_emphasis, SOR_LANE_PREEMPHASIS0); - - if (link->caps.tps3_supported) - tegra_sor_writel(sor, post_cursor, SOR_LANE_POSTCURSOR0); - - tegra_sor_writel(sor, pattern, SOR_DP_TPG); - - value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0); - value &= ~SOR_DP_PADCTL_TX_PU_MASK; - value |= SOR_DP_PADCTL_TX_PU_ENABLE; - value |= SOR_DP_PADCTL_TX_PU(tx_pu); - tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0); - - usleep_range(20, 100); - - return 0; -} - -static int tegra_sor_dp_link_configure(struct drm_dp_link *link) -{ - struct tegra_sor *sor = container_of(link, struct tegra_sor, link); - unsigned int rate, lanes; - u32 value; - int err; - - rate = drm_dp_link_rate_to_bw_code(link->rate); - lanes = link->lanes; - - /* configure link speed and lane count */ - value = tegra_sor_readl(sor, SOR_CLK_CNTRL); - value &= ~SOR_CLK_CNTRL_DP_LINK_SPEED_MASK; - value |= SOR_CLK_CNTRL_DP_LINK_SPEED(rate); - tegra_sor_writel(sor, value, SOR_CLK_CNTRL); - - value = tegra_sor_readl(sor, SOR_DP_LINKCTL0); - value &= ~SOR_DP_LINKCTL_LANE_COUNT_MASK; - value |= SOR_DP_LINKCTL_LANE_COUNT(lanes); - - if (link->caps.enhanced_framing) - value |= SOR_DP_LINKCTL_ENHANCED_FRAME; - - tegra_sor_writel(sor, value, SOR_DP_LINKCTL0); - - usleep_range(400, 1000); - - /* configure load pulse position adjustment */ - value = tegra_sor_readl(sor, sor->soc->regs->pll1); - value &= ~SOR_PLL1_LOADADJ_MASK; - - switch (rate) { - case DP_LINK_BW_1_62: - value |= SOR_PLL1_LOADADJ(0x3); - break; - - case DP_LINK_BW_2_7: - value |= SOR_PLL1_LOADADJ(0x4); - break; - - case DP_LINK_BW_5_4: - value |= SOR_PLL1_LOADADJ(0x6); - break; - } - - tegra_sor_writel(sor, value, sor->soc->regs->pll1); - - /* use alternate scrambler reset for eDP */ value = tegra_sor_readl(sor, SOR_DP_SPARE0); - - if (link->edp == 0) - value &= ~SOR_DP_SPARE_PANEL_INTERNAL; - else - value |= SOR_DP_SPARE_PANEL_INTERNAL; - + value |= SOR_DP_SPARE_SEQ_ENABLE; + value &= ~SOR_DP_SPARE_PANEL_INTERNAL; + value |= SOR_DP_SPARE_MACRO_SOR_CLK; tegra_sor_writel(sor, value, SOR_DP_SPARE0); - err = tegra_sor_power_down_lanes(sor); - if (err < 0) { - dev_err(sor->dev, "failed to power down lanes: %d\n", err); - return err; + for (i = 0, value = 0; i < link->num_lanes; i++) { + unsigned long lane = SOR_DP_TPG_CHANNEL_CODING | + SOR_DP_TPG_SCRAMBLER_NONE | + SOR_DP_TPG_PATTERN_TRAIN2; + value = (value << 8) | lane; } - /* power up and pre-charge lanes */ - err = tegra_sor_power_up_lanes(sor, lanes); - if (err < 0) { - dev_err(sor->dev, "failed to power up %u lane%s: %d\n", - lanes, (lanes != 1) ? "s" : "", err); + tegra_sor_writel(sor, value, SOR_DP_TPG); + + pattern = DP_LINK_SCRAMBLING_DISABLE | DP_TRAINING_PATTERN_2; + + err = drm_dp_aux_train(sor->aux, link, pattern); + if (err < 0) return err; + + for (i = 0, value = 0; i < link->num_lanes; i++) { + unsigned long lane = SOR_DP_TPG_CHANNEL_CODING | + SOR_DP_TPG_SCRAMBLER_GALIOS | + SOR_DP_TPG_PATTERN_NONE; + value = (value << 8) | lane; } - tegra_sor_dp_precharge(sor, lanes); + tegra_sor_writel(sor, value, SOR_DP_TPG); + + pattern = DP_TRAINING_PATTERN_DISABLE; + + err = drm_dp_aux_train(sor->aux, link, pattern); + if (err < 0) + return err; return 0; } -static const struct drm_dp_link_ops tegra_sor_dp_link_ops = { - .apply_training = tegra_sor_dp_link_apply_training, - .configure = tegra_sor_dp_link_configure, -}; - static void tegra_sor_super_update(struct tegra_sor *sor) { tegra_sor_writel(sor, 0, SOR_SUPER_STATE0); @@ -1160,11 +913,11 @@ static int tegra_sor_compute_config(struct tegra_sor *sor, u32 num_syms_per_line; unsigned int i; - if (!link_rate || !link->lanes || !pclk || !config->bits_per_pixel) + if (!link_rate || !link->num_lanes || !pclk || !config->bits_per_pixel) return -EINVAL; + output = link_rate * 8 * link->num_lanes; input = pclk * config->bits_per_pixel; - output = link_rate * 8 * link->lanes; if (input >= output) return -ERANGE; @@ -1207,7 +960,7 @@ static int tegra_sor_compute_config(struct tegra_sor *sor, watermark = div_u64(watermark + params.error, f); config->watermark = watermark + (config->bits_per_pixel / 8) + 2; num_syms_per_line = (mode->hdisplay * config->bits_per_pixel) * - (link->lanes * 8); + (link->num_lanes * 8); if (config->watermark > 30) { config->watermark = 30; @@ -1224,15 +977,15 @@ static int tegra_sor_compute_config(struct tegra_sor *sor, num = ((mode->htotal - mode->hdisplay) - 7) * link_rate; config->hblank_symbols = div_u64(num, pclk); - if (link->caps.enhanced_framing) + if (link->capabilities & DP_LINK_CAP_ENHANCED_FRAMING) config->hblank_symbols -= 3; - config->hblank_symbols -= 12 / link->lanes; + config->hblank_symbols -= 12 / link->num_lanes; /* compute the number of symbols per vertical blanking interval */ num = (mode->hdisplay - 25) * link_rate; config->vblank_symbols = div_u64(num, pclk); - config->vblank_symbols -= 36 / link->lanes + 4; + config->vblank_symbols -= 36 / link->num_lanes + 4; dev_dbg(sor->dev, "blank symbols: H:%u V:%u\n", config->hblank_symbols, config->vblank_symbols); @@ -1448,6 +1201,29 @@ static int tegra_sor_power_down(struct tegra_sor *sor) return err; } + value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0); + value &= ~(SOR_DP_PADCTL_PD_TXD_3 | SOR_DP_PADCTL_PD_TXD_0 | + SOR_DP_PADCTL_PD_TXD_1 | SOR_DP_PADCTL_PD_TXD_2); + tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0); + + /* stop lane sequencer */ + value = SOR_LANE_SEQ_CTL_TRIGGER | SOR_LANE_SEQ_CTL_SEQUENCE_UP | + SOR_LANE_SEQ_CTL_POWER_STATE_DOWN; + tegra_sor_writel(sor, value, SOR_LANE_SEQ_CTL); + + timeout = jiffies + msecs_to_jiffies(250); + + while (time_before(jiffies, timeout)) { + value = tegra_sor_readl(sor, SOR_LANE_SEQ_CTL); + if ((value & SOR_LANE_SEQ_CTL_TRIGGER) == 0) + break; + + usleep_range(25, 100); + } + + if ((value & SOR_LANE_SEQ_CTL_TRIGGER) != 0) + return -ETIMEDOUT; + value = tegra_sor_readl(sor, sor->soc->regs->pll2); value |= SOR_PLL2_PORT_POWERDOWN; tegra_sor_writel(sor, value, sor->soc->regs->pll2); @@ -1809,6 +1585,403 @@ static const struct drm_encoder_funcs tegra_sor_encoder_funcs = { .destroy = tegra_output_encoder_destroy, }; +static void tegra_sor_edp_disable(struct drm_encoder *encoder) +{ + struct tegra_output *output = encoder_to_output(encoder); + struct tegra_dc *dc = to_tegra_dc(encoder->crtc); + struct tegra_sor *sor = to_sor(output); + u32 value; + int err; + + if (output->panel) + drm_panel_disable(output->panel); + + err = tegra_sor_detach(sor); + if (err < 0) + dev_err(sor->dev, "failed to detach SOR: %d\n", err); + + tegra_sor_writel(sor, 0, SOR_STATE1); + tegra_sor_update(sor); + + /* + * The following accesses registers of the display controller, so make + * sure it's only executed when the output is attached to one. + */ + if (dc) { + value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS); + value &= ~SOR_ENABLE(0); + tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS); + + tegra_dc_commit(dc); + } + + err = tegra_sor_power_down(sor); + if (err < 0) + dev_err(sor->dev, "failed to power down SOR: %d\n", err); + + if (sor->aux) { + err = drm_dp_aux_disable(sor->aux); + if (err < 0) + dev_err(sor->dev, "failed to disable DP: %d\n", err); + } + + err = tegra_io_pad_power_disable(sor->pad); + if (err < 0) + dev_err(sor->dev, "failed to power off I/O pad: %d\n", err); + + if (output->panel) + drm_panel_unprepare(output->panel); + + pm_runtime_put(sor->dev); +} + +#if 0 +static int calc_h_ref_to_sync(const struct drm_display_mode *mode, + unsigned int *value) +{ + unsigned int hfp, hsw, hbp, a = 0, b; + + hfp = mode->hsync_start - mode->hdisplay; + hsw = mode->hsync_end - mode->hsync_start; + hbp = mode->htotal - mode->hsync_end; + + pr_info("hfp: %u, hsw: %u, hbp: %u\n", hfp, hsw, hbp); + + b = hfp - 1; + + pr_info("a: %u, b: %u\n", a, b); + pr_info("a + hsw + hbp = %u\n", a + hsw + hbp); + + if (a + hsw + hbp <= 11) { + a = 1 + 11 - hsw - hbp; + pr_info("a: %u\n", a); + } + + if (a > b) + return -EINVAL; + + if (hsw < 1) + return -EINVAL; + + if (mode->hdisplay < 16) + return -EINVAL; + + if (value) { + if (b > a && a % 2) + *value = a + 1; + else + *value = a; + } + + return 0; +} +#endif + +static void tegra_sor_edp_enable(struct drm_encoder *encoder) +{ + struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode; + struct tegra_output *output = encoder_to_output(encoder); + struct tegra_dc *dc = to_tegra_dc(encoder->crtc); + struct tegra_sor *sor = to_sor(output); + struct tegra_sor_config config; + struct tegra_sor_state *state; + struct drm_dp_link link; + u8 rate, lanes; + unsigned int i; + int err = 0; + u32 value; + + state = to_sor_state(output->connector.state); + + pm_runtime_get_sync(sor->dev); + + if (output->panel) + drm_panel_prepare(output->panel); + + err = drm_dp_aux_enable(sor->aux); + if (err < 0) + dev_err(sor->dev, "failed to enable DP: %d\n", err); + + err = drm_dp_link_probe(sor->aux, &link); + if (err < 0) { + dev_err(sor->dev, "failed to probe eDP link: %d\n", err); + return; + } + + /* switch to safe parent clock */ + err = tegra_sor_set_parent_clock(sor, sor->clk_safe); + if (err < 0) + dev_err(sor->dev, "failed to set safe parent clock: %d\n", err); + + memset(&config, 0, sizeof(config)); + config.bits_per_pixel = state->bpc * 3; + + err = tegra_sor_compute_config(sor, mode, &config, &link); + if (err < 0) + dev_err(sor->dev, "failed to compute configuration: %d\n", err); + + value = tegra_sor_readl(sor, SOR_CLK_CNTRL); + value &= ~SOR_CLK_CNTRL_DP_CLK_SEL_MASK; + value |= SOR_CLK_CNTRL_DP_CLK_SEL_SINGLE_DPCLK; + tegra_sor_writel(sor, value, SOR_CLK_CNTRL); + + value = tegra_sor_readl(sor, sor->soc->regs->pll2); + value &= ~SOR_PLL2_BANDGAP_POWERDOWN; + tegra_sor_writel(sor, value, sor->soc->regs->pll2); + usleep_range(20, 100); + + value = tegra_sor_readl(sor, sor->soc->regs->pll3); + value |= SOR_PLL3_PLL_VDD_MODE_3V3; + tegra_sor_writel(sor, value, sor->soc->regs->pll3); + + value = SOR_PLL0_ICHPMP(0xf) | SOR_PLL0_VCOCAP_RST | + SOR_PLL0_PLLREG_LEVEL_V45 | SOR_PLL0_RESISTOR_EXT; + tegra_sor_writel(sor, value, sor->soc->regs->pll0); + + value = tegra_sor_readl(sor, sor->soc->regs->pll2); + value |= SOR_PLL2_SEQ_PLLCAPPD; + value &= ~SOR_PLL2_SEQ_PLLCAPPD_ENFORCE; + value |= SOR_PLL2_LVDS_ENABLE; + tegra_sor_writel(sor, value, sor->soc->regs->pll2); + + value = SOR_PLL1_TERM_COMPOUT | SOR_PLL1_TMDS_TERM; + tegra_sor_writel(sor, value, sor->soc->regs->pll1); + + while (true) { + value = tegra_sor_readl(sor, sor->soc->regs->pll2); + if ((value & SOR_PLL2_SEQ_PLLCAPPD_ENFORCE) == 0) + break; + + usleep_range(250, 1000); + } + + value = tegra_sor_readl(sor, sor->soc->regs->pll2); + value &= ~SOR_PLL2_POWERDOWN_OVERRIDE; + value &= ~SOR_PLL2_PORT_POWERDOWN; + tegra_sor_writel(sor, value, sor->soc->regs->pll2); + + /* + * power up + */ + + /* set safe link bandwidth (1.62 Gbps) */ + value = tegra_sor_readl(sor, SOR_CLK_CNTRL); + value &= ~SOR_CLK_CNTRL_DP_LINK_SPEED_MASK; + value |= SOR_CLK_CNTRL_DP_LINK_SPEED_G1_62; + tegra_sor_writel(sor, value, SOR_CLK_CNTRL); + + /* step 1 */ + value = tegra_sor_readl(sor, sor->soc->regs->pll2); + value |= SOR_PLL2_SEQ_PLLCAPPD_ENFORCE | SOR_PLL2_PORT_POWERDOWN | + SOR_PLL2_BANDGAP_POWERDOWN; + tegra_sor_writel(sor, value, sor->soc->regs->pll2); + + value = tegra_sor_readl(sor, sor->soc->regs->pll0); + value |= SOR_PLL0_VCOPD | SOR_PLL0_PWR; + tegra_sor_writel(sor, value, sor->soc->regs->pll0); + + value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0); + value &= ~SOR_DP_PADCTL_PAD_CAL_PD; + tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0); + + /* step 2 */ + err = tegra_io_pad_power_enable(sor->pad); + if (err < 0) + dev_err(sor->dev, "failed to power on I/O pad: %d\n", err); + + usleep_range(5, 100); + + /* step 3 */ + value = tegra_sor_readl(sor, sor->soc->regs->pll2); + value &= ~SOR_PLL2_BANDGAP_POWERDOWN; + tegra_sor_writel(sor, value, sor->soc->regs->pll2); + + usleep_range(20, 100); + + /* step 4 */ + value = tegra_sor_readl(sor, sor->soc->regs->pll0); + value &= ~SOR_PLL0_VCOPD; + value &= ~SOR_PLL0_PWR; + tegra_sor_writel(sor, value, sor->soc->regs->pll0); + + value = tegra_sor_readl(sor, sor->soc->regs->pll2); + value &= ~SOR_PLL2_SEQ_PLLCAPPD_ENFORCE; + tegra_sor_writel(sor, value, sor->soc->regs->pll2); + + usleep_range(200, 1000); + + /* step 5 */ + value = tegra_sor_readl(sor, sor->soc->regs->pll2); + value &= ~SOR_PLL2_PORT_POWERDOWN; + tegra_sor_writel(sor, value, sor->soc->regs->pll2); + + /* XXX not in TRM */ + for (value = 0, i = 0; i < 5; i++) + value |= SOR_XBAR_CTRL_LINK0_XSEL(i, sor->xbar_cfg[i]) | + SOR_XBAR_CTRL_LINK1_XSEL(i, i); + + tegra_sor_writel(sor, 0x00000000, SOR_XBAR_POL); + tegra_sor_writel(sor, value, SOR_XBAR_CTRL); + + /* switch to DP parent clock */ + err = tegra_sor_set_parent_clock(sor, sor->clk_dp); + if (err < 0) + dev_err(sor->dev, "failed to set parent clock: %d\n", err); + + /* power DP lanes */ + value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0); + + if (link.num_lanes <= 2) + value &= ~(SOR_DP_PADCTL_PD_TXD_3 | SOR_DP_PADCTL_PD_TXD_2); + else + value |= SOR_DP_PADCTL_PD_TXD_3 | SOR_DP_PADCTL_PD_TXD_2; + + if (link.num_lanes <= 1) + value &= ~SOR_DP_PADCTL_PD_TXD_1; + else + value |= SOR_DP_PADCTL_PD_TXD_1; + + if (link.num_lanes == 0) + value &= ~SOR_DP_PADCTL_PD_TXD_0; + else + value |= SOR_DP_PADCTL_PD_TXD_0; + + tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0); + + value = tegra_sor_readl(sor, SOR_DP_LINKCTL0); + value &= ~SOR_DP_LINKCTL_LANE_COUNT_MASK; + value |= SOR_DP_LINKCTL_LANE_COUNT(link.num_lanes); + tegra_sor_writel(sor, value, SOR_DP_LINKCTL0); + + /* start lane sequencer */ + value = SOR_LANE_SEQ_CTL_TRIGGER | SOR_LANE_SEQ_CTL_SEQUENCE_DOWN | + SOR_LANE_SEQ_CTL_POWER_STATE_UP; + tegra_sor_writel(sor, value, SOR_LANE_SEQ_CTL); + + while (true) { + value = tegra_sor_readl(sor, SOR_LANE_SEQ_CTL); + if ((value & SOR_LANE_SEQ_CTL_TRIGGER) == 0) + break; + + usleep_range(250, 1000); + } + + /* set link bandwidth */ + value = tegra_sor_readl(sor, SOR_CLK_CNTRL); + value &= ~SOR_CLK_CNTRL_DP_LINK_SPEED_MASK; + value |= drm_dp_link_rate_to_bw_code(link.rate) << 2; + tegra_sor_writel(sor, value, SOR_CLK_CNTRL); + + tegra_sor_apply_config(sor, &config); + + /* enable link */ + value = tegra_sor_readl(sor, SOR_DP_LINKCTL0); + value |= SOR_DP_LINKCTL_ENABLE; + value |= SOR_DP_LINKCTL_ENHANCED_FRAME; + tegra_sor_writel(sor, value, SOR_DP_LINKCTL0); + + for (i = 0, value = 0; i < 4; i++) { + unsigned long lane = SOR_DP_TPG_CHANNEL_CODING | + SOR_DP_TPG_SCRAMBLER_GALIOS | + SOR_DP_TPG_PATTERN_NONE; + value = (value << 8) | lane; + } + + tegra_sor_writel(sor, value, SOR_DP_TPG); + + /* enable pad calibration logic */ + value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0); + value |= SOR_DP_PADCTL_PAD_CAL_PD; + tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0); + + err = drm_dp_link_probe(sor->aux, &link); + if (err < 0) + dev_err(sor->dev, "failed to probe eDP link: %d\n", err); + + err = drm_dp_link_power_up(sor->aux, &link); + if (err < 0) + dev_err(sor->dev, "failed to power up eDP link: %d\n", err); + + err = drm_dp_link_configure(sor->aux, &link); + if (err < 0) + dev_err(sor->dev, "failed to configure eDP link: %d\n", err); + + rate = drm_dp_link_rate_to_bw_code(link.rate); + lanes = link.num_lanes; + + value = tegra_sor_readl(sor, SOR_CLK_CNTRL); + value &= ~SOR_CLK_CNTRL_DP_LINK_SPEED_MASK; + value |= SOR_CLK_CNTRL_DP_LINK_SPEED(rate); + tegra_sor_writel(sor, value, SOR_CLK_CNTRL); + + value = tegra_sor_readl(sor, SOR_DP_LINKCTL0); + value &= ~SOR_DP_LINKCTL_LANE_COUNT_MASK; + value |= SOR_DP_LINKCTL_LANE_COUNT(lanes); + + if (link.capabilities & DP_LINK_CAP_ENHANCED_FRAMING) + value |= SOR_DP_LINKCTL_ENHANCED_FRAME; + + tegra_sor_writel(sor, value, SOR_DP_LINKCTL0); + + /* disable training pattern generator */ + + for (i = 0; i < link.num_lanes; i++) { + unsigned long lane = SOR_DP_TPG_CHANNEL_CODING | + SOR_DP_TPG_SCRAMBLER_GALIOS | + SOR_DP_TPG_PATTERN_NONE; + value = (value << 8) | lane; + } + + tegra_sor_writel(sor, value, SOR_DP_TPG); + + err = tegra_sor_dp_train_fast(sor, &link); + if (err < 0) + dev_err(sor->dev, "DP fast link training failed: %d\n", err); + + dev_dbg(sor->dev, "fast link training succeeded\n"); + + err = tegra_sor_power_up(sor, 250); + if (err < 0) + dev_err(sor->dev, "failed to power up SOR: %d\n", err); + + /* CSTM (LVDS, link A/B, upper) */ + value = SOR_CSTM_LVDS | SOR_CSTM_LINK_ACT_A | SOR_CSTM_LINK_ACT_B | + SOR_CSTM_UPPER; + tegra_sor_writel(sor, value, SOR_CSTM); + + /* use DP-A protocol */ + value = tegra_sor_readl(sor, SOR_STATE1); + value &= ~SOR_STATE_ASY_PROTOCOL_MASK; + value |= SOR_STATE_ASY_PROTOCOL_DP_A; + tegra_sor_writel(sor, value, SOR_STATE1); + + tegra_sor_mode_set(sor, mode, state); + + /* PWM setup */ + err = tegra_sor_setup_pwm(sor, 250); + if (err < 0) + dev_err(sor->dev, "failed to setup PWM: %d\n", err); + + tegra_sor_update(sor); + + value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS); + value |= SOR_ENABLE(0); + tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS); + + tegra_dc_commit(dc); + + err = tegra_sor_attach(sor); + if (err < 0) + dev_err(sor->dev, "failed to attach SOR: %d\n", err); + + err = tegra_sor_wakeup(sor); + if (err < 0) + dev_err(sor->dev, "failed to enable DC: %d\n", err); + + if (output->panel) + drm_panel_enable(output->panel); +} + static int tegra_sor_encoder_atomic_check(struct drm_encoder *encoder, struct drm_crtc_state *crtc_state, @@ -1858,6 +2031,12 @@ tegra_sor_encoder_atomic_check(struct drm_encoder *encoder, return 0; } +static const struct drm_encoder_helper_funcs tegra_sor_edp_helpers = { + .disable = tegra_sor_edp_disable, + .enable = tegra_sor_edp_enable, + .atomic_check = tegra_sor_encoder_atomic_check, +}; + static inline u32 tegra_sor_hdmi_subpack(const u8 *ptr, size_t size) { u32 value = 0; @@ -1982,15 +2161,6 @@ static void tegra_sor_audio_prepare(struct tegra_sor *sor) { u32 value; - /* - * Enable and unmask the HDA codec SCRATCH0 register interrupt. This - * is used for interoperability between the HDA codec driver and the - * HDMI/DP driver. - */ - value = SOR_INT_CODEC_SCRATCH1 | SOR_INT_CODEC_SCRATCH0; - tegra_sor_writel(sor, value, SOR_INT_ENABLE); - tegra_sor_writel(sor, value, SOR_INT_MASK); - tegra_sor_write_eld(sor); value = SOR_AUDIO_HDA_PRESENSE_ELDV | SOR_AUDIO_HDA_PRESENSE_PD; @@ -2000,32 +2170,6 @@ static void tegra_sor_audio_prepare(struct tegra_sor *sor) static void tegra_sor_audio_unprepare(struct tegra_sor *sor) { tegra_sor_writel(sor, 0, SOR_AUDIO_HDA_PRESENSE); - tegra_sor_writel(sor, 0, SOR_INT_MASK); - tegra_sor_writel(sor, 0, SOR_INT_ENABLE); -} - -static void tegra_sor_audio_enable(struct tegra_sor *sor) -{ - u32 value; - - value = tegra_sor_readl(sor, SOR_AUDIO_CNTRL); - - /* select HDA audio input */ - value &= ~SOR_AUDIO_CNTRL_SOURCE_SELECT(SOURCE_SELECT_MASK); - value |= SOR_AUDIO_CNTRL_SOURCE_SELECT(SOURCE_SELECT_HDA); - - /* inject null samples */ - if (sor->format.channels != 2) - value &= ~SOR_AUDIO_CNTRL_INJECT_NULLSMPL; - else - value |= SOR_AUDIO_CNTRL_INJECT_NULLSMPL; - - value |= SOR_AUDIO_CNTRL_AFIFO_FLUSH; - - tegra_sor_writel(sor, value, SOR_AUDIO_CNTRL); - - /* enable advertising HBR capability */ - tegra_sor_writel(sor, SOR_AUDIO_SPARE_HBR_ENABLE, SOR_AUDIO_SPARE); } static int tegra_sor_hdmi_enable_audio_infoframe(struct tegra_sor *sor) @@ -2063,7 +2207,24 @@ static void tegra_sor_hdmi_audio_enable(struct tegra_sor *sor) { u32 value; - tegra_sor_audio_enable(sor); + value = tegra_sor_readl(sor, SOR_AUDIO_CNTRL); + + /* select HDA audio input */ + value &= ~SOR_AUDIO_CNTRL_SOURCE_SELECT(SOURCE_SELECT_MASK); + value |= SOR_AUDIO_CNTRL_SOURCE_SELECT(SOURCE_SELECT_HDA); + + /* inject null samples */ + if (sor->format.channels != 2) + value &= ~SOR_AUDIO_CNTRL_INJECT_NULLSMPL; + else + value |= SOR_AUDIO_CNTRL_INJECT_NULLSMPL; + + value |= SOR_AUDIO_CNTRL_AFIFO_FLUSH; + + tegra_sor_writel(sor, value, SOR_AUDIO_CNTRL); + + /* enable advertising HBR capability */ + tegra_sor_writel(sor, SOR_AUDIO_SPARE_HBR_ENABLE, SOR_AUDIO_SPARE); tegra_sor_writel(sor, 0, SOR_HDMI_ACR_CTRL); @@ -2239,9 +2400,9 @@ static void tegra_sor_hdmi_disable(struct drm_encoder *encoder) value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS); if (!sor->soc->has_nvdisplay) - value &= ~SOR1_TIMING_CYA; - - value &= ~SOR_ENABLE(sor->index); + value &= ~(SOR1_TIMING_CYA | SOR_ENABLE(1)); + else + value &= ~SOR_ENABLE(sor->index); tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS); @@ -2399,34 +2560,16 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder) tegra_sor_writel(sor, 0x00000000, SOR_XBAR_POL); tegra_sor_writel(sor, value, SOR_XBAR_CTRL); - /* - * Switch the pad clock to the DP clock. Note that we cannot actually - * do this because Tegra186 and later don't support clk_set_parent() - * on the sorX_pad_clkout clocks. We already do the equivalent above - * using the DP_CLK_SEL mux of the SOR_CLK_CNTRL register. - */ -#if 0 - err = clk_set_parent(sor->clk_pad, sor->clk_dp); - if (err < 0) { - dev_err(sor->dev, "failed to select pad parent clock: %d\n", - err); - return; - } -#endif - - /* switch the SOR clock to the pad clock */ - err = tegra_sor_set_parent_clock(sor, sor->clk_pad); - if (err < 0) { - dev_err(sor->dev, "failed to select SOR parent clock: %d\n", - err); - return; - } - - /* switch the output clock to the parent pixel clock */ + /* switch to parent clock */ err = clk_set_parent(sor->clk, sor->clk_parent); if (err < 0) { - dev_err(sor->dev, "failed to select output parent clock: %d\n", - err); + dev_err(sor->dev, "failed to set parent clock: %d\n", err); + return; + } + + err = tegra_sor_set_parent_clock(sor, sor->clk_pad); + if (err < 0) { + dev_err(sor->dev, "failed to set pad clock: %d\n", err); return; } @@ -2632,9 +2775,9 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder) value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS); if (!sor->soc->has_nvdisplay) - value |= SOR1_TIMING_CYA; - - value |= SOR_ENABLE(sor->index); + value |= SOR_ENABLE(1) | SOR1_TIMING_CYA; + else + value |= SOR_ENABLE(sor->index); tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS); @@ -2661,396 +2804,6 @@ static const struct drm_encoder_helper_funcs tegra_sor_hdmi_helpers = { .atomic_check = tegra_sor_encoder_atomic_check, }; -static void tegra_sor_dp_disable(struct drm_encoder *encoder) -{ - struct tegra_output *output = encoder_to_output(encoder); - struct tegra_dc *dc = to_tegra_dc(encoder->crtc); - struct tegra_sor *sor = to_sor(output); - u32 value; - int err; - - if (output->panel) - drm_panel_disable(output->panel); - - /* - * Do not attempt to power down a DP link if we're not connected since - * the AUX transactions would just be timing out. - */ - if (output->connector.status != connector_status_disconnected) { - err = drm_dp_link_power_down(sor->aux, &sor->link); - if (err < 0) - dev_err(sor->dev, "failed to power down link: %d\n", - err); - } - - err = tegra_sor_detach(sor); - if (err < 0) - dev_err(sor->dev, "failed to detach SOR: %d\n", err); - - tegra_sor_writel(sor, 0, SOR_STATE1); - tegra_sor_update(sor); - - value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS); - value &= ~SOR_ENABLE(sor->index); - tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS); - tegra_dc_commit(dc); - - value = tegra_sor_readl(sor, SOR_STATE1); - value &= ~SOR_STATE_ASY_PROTOCOL_MASK; - value &= ~SOR_STATE_ASY_SUBOWNER_MASK; - value &= ~SOR_STATE_ASY_OWNER_MASK; - tegra_sor_writel(sor, value, SOR_STATE1); - tegra_sor_update(sor); - - /* switch to safe parent clock */ - err = tegra_sor_set_parent_clock(sor, sor->clk_safe); - if (err < 0) - dev_err(sor->dev, "failed to set safe clock: %d\n", err); - - err = tegra_sor_power_down(sor); - if (err < 0) - dev_err(sor->dev, "failed to power down SOR: %d\n", err); - - err = tegra_io_pad_power_disable(sor->pad); - if (err < 0) - dev_err(sor->dev, "failed to power off I/O pad: %d\n", err); - - err = drm_dp_aux_disable(sor->aux); - if (err < 0) - dev_err(sor->dev, "failed disable DPAUX: %d\n", err); - - if (output->panel) - drm_panel_unprepare(output->panel); - - pm_runtime_put(sor->dev); -} - -static void tegra_sor_dp_enable(struct drm_encoder *encoder) -{ - struct tegra_output *output = encoder_to_output(encoder); - struct tegra_dc *dc = to_tegra_dc(encoder->crtc); - struct tegra_sor *sor = to_sor(output); - struct tegra_sor_config config; - struct tegra_sor_state *state; - struct drm_display_mode *mode; - struct drm_display_info *info; - unsigned int i; - u32 value; - int err; - - state = to_sor_state(output->connector.state); - mode = &encoder->crtc->state->adjusted_mode; - info = &output->connector.display_info; - - pm_runtime_get_sync(sor->dev); - - /* switch to safe parent clock */ - err = tegra_sor_set_parent_clock(sor, sor->clk_safe); - if (err < 0) - dev_err(sor->dev, "failed to set safe parent clock: %d\n", err); - - err = tegra_io_pad_power_enable(sor->pad); - if (err < 0) - dev_err(sor->dev, "failed to power on LVDS rail: %d\n", err); - - usleep_range(20, 100); - - err = drm_dp_aux_enable(sor->aux); - if (err < 0) - dev_err(sor->dev, "failed to enable DPAUX: %d\n", err); - - err = drm_dp_link_probe(sor->aux, &sor->link); - if (err < 0) - dev_err(sor->dev, "failed to probe DP link: %d\n", err); - - tegra_sor_filter_rates(sor); - - err = drm_dp_link_choose(&sor->link, mode, info); - if (err < 0) - dev_err(sor->dev, "failed to choose link: %d\n", err); - - if (output->panel) - drm_panel_prepare(output->panel); - - value = tegra_sor_readl(sor, sor->soc->regs->pll2); - value &= ~SOR_PLL2_BANDGAP_POWERDOWN; - tegra_sor_writel(sor, value, sor->soc->regs->pll2); - - usleep_range(20, 40); - - value = tegra_sor_readl(sor, sor->soc->regs->pll3); - value |= SOR_PLL3_PLL_VDD_MODE_3V3; - tegra_sor_writel(sor, value, sor->soc->regs->pll3); - - value = tegra_sor_readl(sor, sor->soc->regs->pll0); - value &= ~(SOR_PLL0_VCOPD | SOR_PLL0_PWR); - tegra_sor_writel(sor, value, sor->soc->regs->pll0); - - value = tegra_sor_readl(sor, sor->soc->regs->pll2); - value &= ~SOR_PLL2_SEQ_PLLCAPPD_ENFORCE; - value |= SOR_PLL2_SEQ_PLLCAPPD; - tegra_sor_writel(sor, value, sor->soc->regs->pll2); - - usleep_range(200, 400); - - value = tegra_sor_readl(sor, sor->soc->regs->pll2); - value &= ~SOR_PLL2_POWERDOWN_OVERRIDE; - value &= ~SOR_PLL2_PORT_POWERDOWN; - tegra_sor_writel(sor, value, sor->soc->regs->pll2); - - value = tegra_sor_readl(sor, SOR_CLK_CNTRL); - value &= ~SOR_CLK_CNTRL_DP_CLK_SEL_MASK; - - if (output->panel) - value |= SOR_CLK_CNTRL_DP_CLK_SEL_SINGLE_DPCLK; - else - value |= SOR_CLK_CNTRL_DP_CLK_SEL_DIFF_DPCLK; - - tegra_sor_writel(sor, value, SOR_CLK_CNTRL); - - usleep_range(200, 400); - - value = tegra_sor_readl(sor, SOR_DP_SPARE0); - /* XXX not in TRM */ - if (output->panel) - value |= SOR_DP_SPARE_PANEL_INTERNAL; - else - value &= ~SOR_DP_SPARE_PANEL_INTERNAL; - - value |= SOR_DP_SPARE_SEQ_ENABLE; - tegra_sor_writel(sor, value, SOR_DP_SPARE0); - - /* XXX not in TRM */ - tegra_sor_writel(sor, 0, SOR_LVDS); - - value = tegra_sor_readl(sor, sor->soc->regs->pll0); - value &= ~SOR_PLL0_ICHPMP_MASK; - value &= ~SOR_PLL0_VCOCAP_MASK; - value |= SOR_PLL0_ICHPMP(0x1); - value |= SOR_PLL0_VCOCAP(0x3); - value |= SOR_PLL0_RESISTOR_EXT; - tegra_sor_writel(sor, value, sor->soc->regs->pll0); - - /* XXX not in TRM */ - for (value = 0, i = 0; i < 5; i++) - value |= SOR_XBAR_CTRL_LINK0_XSEL(i, sor->soc->xbar_cfg[i]) | - SOR_XBAR_CTRL_LINK1_XSEL(i, i); - - tegra_sor_writel(sor, 0x00000000, SOR_XBAR_POL); - tegra_sor_writel(sor, value, SOR_XBAR_CTRL); - - /* - * Switch the pad clock to the DP clock. Note that we cannot actually - * do this because Tegra186 and later don't support clk_set_parent() - * on the sorX_pad_clkout clocks. We already do the equivalent above - * using the DP_CLK_SEL mux of the SOR_CLK_CNTRL register. - */ -#if 0 - err = clk_set_parent(sor->clk_pad, sor->clk_parent); - if (err < 0) { - dev_err(sor->dev, "failed to select pad parent clock: %d\n", - err); - return; - } -#endif - - /* switch the SOR clock to the pad clock */ - err = tegra_sor_set_parent_clock(sor, sor->clk_pad); - if (err < 0) { - dev_err(sor->dev, "failed to select SOR parent clock: %d\n", - err); - return; - } - - /* switch the output clock to the parent pixel clock */ - err = clk_set_parent(sor->clk, sor->clk_parent); - if (err < 0) { - dev_err(sor->dev, "failed to select output parent clock: %d\n", - err); - return; - } - - /* use DP-A protocol */ - value = tegra_sor_readl(sor, SOR_STATE1); - value &= ~SOR_STATE_ASY_PROTOCOL_MASK; - value |= SOR_STATE_ASY_PROTOCOL_DP_A; - tegra_sor_writel(sor, value, SOR_STATE1); - - /* enable port */ - value = tegra_sor_readl(sor, SOR_DP_LINKCTL0); - value |= SOR_DP_LINKCTL_ENABLE; - tegra_sor_writel(sor, value, SOR_DP_LINKCTL0); - - tegra_sor_dp_term_calibrate(sor); - - err = drm_dp_link_train(&sor->link); - if (err < 0) - dev_err(sor->dev, "link training failed: %d\n", err); - else - dev_dbg(sor->dev, "link training succeeded\n"); - - err = drm_dp_link_power_up(sor->aux, &sor->link); - if (err < 0) - dev_err(sor->dev, "failed to power up DP link: %d\n", err); - - /* compute configuration */ - memset(&config, 0, sizeof(config)); - config.bits_per_pixel = state->bpc * 3; - - err = tegra_sor_compute_config(sor, mode, &config, &sor->link); - if (err < 0) - dev_err(sor->dev, "failed to compute configuration: %d\n", err); - - tegra_sor_apply_config(sor, &config); - tegra_sor_mode_set(sor, mode, state); - - if (output->panel) { - /* CSTM (LVDS, link A/B, upper) */ - value = SOR_CSTM_LVDS | SOR_CSTM_LINK_ACT_A | SOR_CSTM_LINK_ACT_B | - SOR_CSTM_UPPER; - tegra_sor_writel(sor, value, SOR_CSTM); - - /* PWM setup */ - err = tegra_sor_setup_pwm(sor, 250); - if (err < 0) - dev_err(sor->dev, "failed to setup PWM: %d\n", err); - } - - tegra_sor_update(sor); - - err = tegra_sor_power_up(sor, 250); - if (err < 0) - dev_err(sor->dev, "failed to power up SOR: %d\n", err); - - /* attach and wake up */ - err = tegra_sor_attach(sor); - if (err < 0) - dev_err(sor->dev, "failed to attach SOR: %d\n", err); - - value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS); - value |= SOR_ENABLE(sor->index); - tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS); - - tegra_dc_commit(dc); - - err = tegra_sor_wakeup(sor); - if (err < 0) - dev_err(sor->dev, "failed to wakeup SOR: %d\n", err); - - if (output->panel) - drm_panel_enable(output->panel); -} - -static const struct drm_encoder_helper_funcs tegra_sor_dp_helpers = { - .disable = tegra_sor_dp_disable, - .enable = tegra_sor_dp_enable, - .atomic_check = tegra_sor_encoder_atomic_check, -}; - -static int tegra_sor_hdmi_probe(struct tegra_sor *sor) -{ - int err; - - sor->avdd_io_supply = devm_regulator_get(sor->dev, "avdd-io"); - if (IS_ERR(sor->avdd_io_supply)) { - dev_err(sor->dev, "cannot get AVDD I/O supply: %ld\n", - PTR_ERR(sor->avdd_io_supply)); - return PTR_ERR(sor->avdd_io_supply); - } - - err = regulator_enable(sor->avdd_io_supply); - if (err < 0) { - dev_err(sor->dev, "failed to enable AVDD I/O supply: %d\n", - err); - return err; - } - - sor->vdd_pll_supply = devm_regulator_get(sor->dev, "vdd-pll"); - if (IS_ERR(sor->vdd_pll_supply)) { - dev_err(sor->dev, "cannot get VDD PLL supply: %ld\n", - PTR_ERR(sor->vdd_pll_supply)); - return PTR_ERR(sor->vdd_pll_supply); - } - - err = regulator_enable(sor->vdd_pll_supply); - if (err < 0) { - dev_err(sor->dev, "failed to enable VDD PLL supply: %d\n", - err); - return err; - } - - sor->hdmi_supply = devm_regulator_get(sor->dev, "hdmi"); - if (IS_ERR(sor->hdmi_supply)) { - dev_err(sor->dev, "cannot get HDMI supply: %ld\n", - PTR_ERR(sor->hdmi_supply)); - return PTR_ERR(sor->hdmi_supply); - } - - err = regulator_enable(sor->hdmi_supply); - if (err < 0) { - dev_err(sor->dev, "failed to enable HDMI supply: %d\n", err); - return err; - } - - INIT_DELAYED_WORK(&sor->scdc, tegra_sor_hdmi_scdc_work); - - return 0; -} - -static int tegra_sor_hdmi_remove(struct tegra_sor *sor) -{ - regulator_disable(sor->hdmi_supply); - regulator_disable(sor->vdd_pll_supply); - regulator_disable(sor->avdd_io_supply); - - return 0; -} - -static const struct tegra_sor_ops tegra_sor_hdmi_ops = { - .name = "HDMI", - .probe = tegra_sor_hdmi_probe, - .remove = tegra_sor_hdmi_remove, - .audio_enable = tegra_sor_hdmi_audio_enable, - .audio_disable = tegra_sor_hdmi_audio_disable, -}; - -static int tegra_sor_dp_probe(struct tegra_sor *sor) -{ - int err; - - sor->avdd_io_supply = devm_regulator_get(sor->dev, "avdd-io-hdmi-dp"); - if (IS_ERR(sor->avdd_io_supply)) - return PTR_ERR(sor->avdd_io_supply); - - err = regulator_enable(sor->avdd_io_supply); - if (err < 0) - return err; - - sor->vdd_pll_supply = devm_regulator_get(sor->dev, "vdd-hdmi-dp-pll"); - if (IS_ERR(sor->vdd_pll_supply)) - return PTR_ERR(sor->vdd_pll_supply); - - err = regulator_enable(sor->vdd_pll_supply); - if (err < 0) - return err; - - return 0; -} - -static int tegra_sor_dp_remove(struct tegra_sor *sor) -{ - regulator_disable(sor->vdd_pll_supply); - regulator_disable(sor->avdd_io_supply); - - return 0; -} - -static const struct tegra_sor_ops tegra_sor_dp_ops = { - .name = "DP", - .probe = tegra_sor_dp_probe, - .remove = tegra_sor_dp_remove, -}; - static int tegra_sor_init(struct host1x_client *client) { struct drm_device *drm = dev_get_drvdata(client->parent); @@ -3058,10 +2811,11 @@ static int tegra_sor_init(struct host1x_client *client) struct tegra_sor *sor = host1x_client_to_sor(client); int connector = DRM_MODE_CONNECTOR_Unknown; int encoder = DRM_MODE_ENCODER_NONE; + u32 value; int err; if (!sor->aux) { - if (sor->ops == &tegra_sor_hdmi_ops) { + if (sor->soc->supports_hdmi) { connector = DRM_MODE_CONNECTOR_HDMIA; encoder = DRM_MODE_ENCODER_TMDS; helpers = &tegra_sor_hdmi_helpers; @@ -3070,18 +2824,14 @@ static int tegra_sor_init(struct host1x_client *client) encoder = DRM_MODE_ENCODER_LVDS; } } else { - if (sor->output.panel) { + if (sor->soc->supports_edp) { connector = DRM_MODE_CONNECTOR_eDP; encoder = DRM_MODE_ENCODER_TMDS; - helpers = &tegra_sor_dp_helpers; - } else { + helpers = &tegra_sor_edp_helpers; + } else if (sor->soc->supports_dp) { connector = DRM_MODE_CONNECTOR_DisplayPort; encoder = DRM_MODE_ENCODER_TMDS; - helpers = &tegra_sor_dp_helpers; } - - sor->link.ops = &tegra_sor_dp_link_ops; - sor->link.aux = sor->aux; } sor->output.dev = sor->dev; @@ -3164,6 +2914,15 @@ static int tegra_sor_init(struct host1x_client *client) if (err < 0) return err; + /* + * Enable and unmask the HDA codec SCRATCH0 register interrupt. This + * is used for interoperability between the HDA codec driver and the + * HDMI/DP driver. + */ + value = SOR_INT_CODEC_SCRATCH1 | SOR_INT_CODEC_SCRATCH0; + tegra_sor_writel(sor, value, SOR_INT_ENABLE); + tegra_sor_writel(sor, value, SOR_INT_MASK); + return 0; } @@ -3172,6 +2931,9 @@ static int tegra_sor_exit(struct host1x_client *client) struct tegra_sor *sor = host1x_client_to_sor(client); int err; + tegra_sor_writel(sor, 0, SOR_INT_MASK); + tegra_sor_writel(sor, 0, SOR_INT_ENABLE); + tegra_output_exit(&sor->output); if (sor->aux) { @@ -3194,6 +2956,75 @@ static const struct host1x_client_ops sor_client_ops = { .exit = tegra_sor_exit, }; +static const struct tegra_sor_ops tegra_sor_edp_ops = { + .name = "eDP", +}; + +static int tegra_sor_hdmi_probe(struct tegra_sor *sor) +{ + int err; + + sor->avdd_io_supply = devm_regulator_get(sor->dev, "avdd-io"); + if (IS_ERR(sor->avdd_io_supply)) { + dev_err(sor->dev, "cannot get AVDD I/O supply: %ld\n", + PTR_ERR(sor->avdd_io_supply)); + return PTR_ERR(sor->avdd_io_supply); + } + + err = regulator_enable(sor->avdd_io_supply); + if (err < 0) { + dev_err(sor->dev, "failed to enable AVDD I/O supply: %d\n", + err); + return err; + } + + sor->vdd_pll_supply = devm_regulator_get(sor->dev, "vdd-pll"); + if (IS_ERR(sor->vdd_pll_supply)) { + dev_err(sor->dev, "cannot get VDD PLL supply: %ld\n", + PTR_ERR(sor->vdd_pll_supply)); + return PTR_ERR(sor->vdd_pll_supply); + } + + err = regulator_enable(sor->vdd_pll_supply); + if (err < 0) { + dev_err(sor->dev, "failed to enable VDD PLL supply: %d\n", + err); + return err; + } + + sor->hdmi_supply = devm_regulator_get(sor->dev, "hdmi"); + if (IS_ERR(sor->hdmi_supply)) { + dev_err(sor->dev, "cannot get HDMI supply: %ld\n", + PTR_ERR(sor->hdmi_supply)); + return PTR_ERR(sor->hdmi_supply); + } + + err = regulator_enable(sor->hdmi_supply); + if (err < 0) { + dev_err(sor->dev, "failed to enable HDMI supply: %d\n", err); + return err; + } + + INIT_DELAYED_WORK(&sor->scdc, tegra_sor_hdmi_scdc_work); + + return 0; +} + +static int tegra_sor_hdmi_remove(struct tegra_sor *sor) +{ + regulator_disable(sor->hdmi_supply); + regulator_disable(sor->vdd_pll_supply); + regulator_disable(sor->avdd_io_supply); + + return 0; +} + +static const struct tegra_sor_ops tegra_sor_hdmi_ops = { + .name = "HDMI", + .probe = tegra_sor_hdmi_probe, + .remove = tegra_sor_hdmi_remove, +}; + static const u8 tegra124_sor_xbar_cfg[5] = { 0, 1, 2, 3, 4 }; @@ -3213,161 +3044,14 @@ static const struct tegra_sor_regs tegra124_sor_regs = { .dp_padctl2 = 0x73, }; -/* Tegra124 and Tegra132 have lanes 0 and 2 swapped. */ -static const u8 tegra124_sor_lane_map[4] = { - 2, 1, 0, 3, -}; - -static const u8 tegra124_sor_voltage_swing[4][4][4] = { - { - { 0x13, 0x19, 0x1e, 0x28 }, - { 0x1e, 0x25, 0x2d, }, - { 0x28, 0x32, }, - { 0x3c, }, - }, { - { 0x12, 0x17, 0x1b, 0x25 }, - { 0x1c, 0x23, 0x2a, }, - { 0x25, 0x2f, }, - { 0x39, } - }, { - { 0x12, 0x16, 0x1a, 0x22 }, - { 0x1b, 0x20, 0x27, }, - { 0x24, 0x2d, }, - { 0x36, }, - }, { - { 0x11, 0x14, 0x17, 0x1f }, - { 0x19, 0x1e, 0x24, }, - { 0x22, 0x2a, }, - { 0x32, }, - }, -}; - -static const u8 tegra124_sor_pre_emphasis[4][4][4] = { - { - { 0x00, 0x09, 0x13, 0x25 }, - { 0x00, 0x0f, 0x1e, }, - { 0x00, 0x14, }, - { 0x00, }, - }, { - { 0x00, 0x0a, 0x14, 0x28 }, - { 0x00, 0x0f, 0x1e, }, - { 0x00, 0x14, }, - { 0x00 }, - }, { - { 0x00, 0x0a, 0x14, 0x28 }, - { 0x00, 0x0f, 0x1e, }, - { 0x00, 0x14, }, - { 0x00, }, - }, { - { 0x00, 0x0a, 0x14, 0x28 }, - { 0x00, 0x0f, 0x1e, }, - { 0x00, 0x14, }, - { 0x00, }, - }, -}; - -static const u8 tegra124_sor_post_cursor[4][4][4] = { - { - { 0x00, 0x00, 0x00, 0x00 }, - { 0x00, 0x00, 0x00, }, - { 0x00, 0x00, }, - { 0x00, }, - }, { - { 0x02, 0x02, 0x04, 0x05 }, - { 0x02, 0x04, 0x05, }, - { 0x04, 0x05, }, - { 0x05, }, - }, { - { 0x04, 0x05, 0x08, 0x0b }, - { 0x05, 0x09, 0x0b, }, - { 0x08, 0x0a, }, - { 0x0b, }, - }, { - { 0x05, 0x09, 0x0b, 0x12 }, - { 0x09, 0x0d, 0x12, }, - { 0x0b, 0x0f, }, - { 0x12, }, - }, -}; - -static const u8 tegra124_sor_tx_pu[4][4][4] = { - { - { 0x20, 0x30, 0x40, 0x60 }, - { 0x30, 0x40, 0x60, }, - { 0x40, 0x60, }, - { 0x60, }, - }, { - { 0x20, 0x20, 0x30, 0x50 }, - { 0x30, 0x40, 0x50, }, - { 0x40, 0x50, }, - { 0x60, }, - }, { - { 0x20, 0x20, 0x30, 0x40, }, - { 0x30, 0x30, 0x40, }, - { 0x40, 0x50, }, - { 0x60, }, - }, { - { 0x20, 0x20, 0x20, 0x40, }, - { 0x30, 0x30, 0x40, }, - { 0x40, 0x40, }, - { 0x60, }, - }, -}; - static const struct tegra_sor_soc tegra124_sor = { + .supports_edp = true, .supports_lvds = true, .supports_hdmi = false, - .supports_dp = true, - .supports_audio = false, - .supports_hdcp = false, + .supports_dp = false, .regs = &tegra124_sor_regs, .has_nvdisplay = false, .xbar_cfg = tegra124_sor_xbar_cfg, - .lane_map = tegra124_sor_lane_map, - .voltage_swing = tegra124_sor_voltage_swing, - .pre_emphasis = tegra124_sor_pre_emphasis, - .post_cursor = tegra124_sor_post_cursor, - .tx_pu = tegra124_sor_tx_pu, -}; - -static const u8 tegra132_sor_pre_emphasis[4][4][4] = { - { - { 0x00, 0x08, 0x12, 0x24 }, - { 0x01, 0x0e, 0x1d, }, - { 0x01, 0x13, }, - { 0x00, }, - }, { - { 0x00, 0x08, 0x12, 0x24 }, - { 0x00, 0x0e, 0x1d, }, - { 0x00, 0x13, }, - { 0x00 }, - }, { - { 0x00, 0x08, 0x12, 0x24 }, - { 0x00, 0x0e, 0x1d, }, - { 0x00, 0x13, }, - { 0x00, }, - }, { - { 0x00, 0x08, 0x12, 0x24 }, - { 0x00, 0x0e, 0x1d, }, - { 0x00, 0x13, }, - { 0x00, }, - }, -}; - -static const struct tegra_sor_soc tegra132_sor = { - .supports_lvds = true, - .supports_hdmi = false, - .supports_dp = true, - .supports_audio = false, - .supports_hdcp = false, - .regs = &tegra124_sor_regs, - .has_nvdisplay = false, - .xbar_cfg = tegra124_sor_xbar_cfg, - .lane_map = tegra124_sor_lane_map, - .voltage_swing = tegra124_sor_voltage_swing, - .pre_emphasis = tegra132_sor_pre_emphasis, - .post_cursor = tegra124_sor_post_cursor, - .tx_pu = tegra124_sor_tx_pu, }; static const struct tegra_sor_regs tegra210_sor_regs = { @@ -3385,50 +3069,33 @@ static const struct tegra_sor_regs tegra210_sor_regs = { .dp_padctl2 = 0x73, }; +static const struct tegra_sor_soc tegra210_sor = { + .supports_edp = true, + .supports_lvds = false, + .supports_hdmi = false, + .supports_dp = false, + .regs = &tegra210_sor_regs, + .has_nvdisplay = false, + .xbar_cfg = tegra124_sor_xbar_cfg, +}; + static const u8 tegra210_sor_xbar_cfg[5] = { 2, 1, 0, 3, 4 }; -static const u8 tegra210_sor_lane_map[4] = { - 0, 1, 2, 3, -}; - -static const struct tegra_sor_soc tegra210_sor = { - .supports_lvds = false, - .supports_hdmi = false, - .supports_dp = true, - .supports_audio = false, - .supports_hdcp = false, - - .regs = &tegra210_sor_regs, - .has_nvdisplay = false, - - .xbar_cfg = tegra210_sor_xbar_cfg, - .lane_map = tegra210_sor_lane_map, - .voltage_swing = tegra124_sor_voltage_swing, - .pre_emphasis = tegra124_sor_pre_emphasis, - .post_cursor = tegra124_sor_post_cursor, - .tx_pu = tegra124_sor_tx_pu, -}; - static const struct tegra_sor_soc tegra210_sor1 = { + .supports_edp = false, .supports_lvds = false, .supports_hdmi = true, .supports_dp = true, - .supports_audio = true, - .supports_hdcp = true, .regs = &tegra210_sor_regs, .has_nvdisplay = false, .num_settings = ARRAY_SIZE(tegra210_sor_hdmi_defaults), .settings = tegra210_sor_hdmi_defaults, + .xbar_cfg = tegra210_sor_xbar_cfg, - .lane_map = tegra210_sor_lane_map, - .voltage_swing = tegra124_sor_voltage_swing, - .pre_emphasis = tegra124_sor_pre_emphasis, - .post_cursor = tegra124_sor_post_cursor, - .tx_pu = tegra124_sor_tx_pu, }; static const struct tegra_sor_regs tegra186_sor_regs = { @@ -3446,72 +3113,31 @@ static const struct tegra_sor_regs tegra186_sor_regs = { .dp_padctl2 = 0x16a, }; -static const u8 tegra186_sor_voltage_swing[4][4][4] = { - { - { 0x13, 0x19, 0x1e, 0x28 }, - { 0x1e, 0x25, 0x2d, }, - { 0x28, 0x32, }, - { 0x39, }, - }, { - { 0x12, 0x16, 0x1b, 0x25 }, - { 0x1c, 0x23, 0x2a, }, - { 0x25, 0x2f, }, - { 0x37, } - }, { - { 0x12, 0x16, 0x1a, 0x22 }, - { 0x1b, 0x20, 0x27, }, - { 0x24, 0x2d, }, - { 0x35, }, - }, { - { 0x11, 0x14, 0x17, 0x1f }, - { 0x19, 0x1e, 0x24, }, - { 0x22, 0x2a, }, - { 0x32, }, - }, -}; - -static const u8 tegra186_sor_pre_emphasis[4][4][4] = { - { - { 0x00, 0x08, 0x12, 0x24 }, - { 0x01, 0x0e, 0x1d, }, - { 0x01, 0x13, }, - { 0x00, }, - }, { - { 0x00, 0x08, 0x12, 0x24 }, - { 0x00, 0x0e, 0x1d, }, - { 0x00, 0x13, }, - { 0x00 }, - }, { - { 0x00, 0x08, 0x14, 0x24 }, - { 0x00, 0x0e, 0x1d, }, - { 0x00, 0x13, }, - { 0x00, }, - }, { - { 0x00, 0x08, 0x12, 0x24 }, - { 0x00, 0x0e, 0x1d, }, - { 0x00, 0x13, }, - { 0x00, }, - }, -}; - static const struct tegra_sor_soc tegra186_sor = { + .supports_edp = false, + .supports_lvds = false, + .supports_hdmi = false, + .supports_dp = true, + + .regs = &tegra186_sor_regs, + .has_nvdisplay = true, + + .xbar_cfg = tegra124_sor_xbar_cfg, +}; + +static const struct tegra_sor_soc tegra186_sor1 = { + .supports_edp = false, .supports_lvds = false, .supports_hdmi = true, .supports_dp = true, - .supports_audio = true, - .supports_hdcp = true, .regs = &tegra186_sor_regs, .has_nvdisplay = true, .num_settings = ARRAY_SIZE(tegra186_sor_hdmi_defaults), .settings = tegra186_sor_hdmi_defaults, + .xbar_cfg = tegra124_sor_xbar_cfg, - .lane_map = tegra124_sor_lane_map, - .voltage_swing = tegra186_sor_voltage_swing, - .pre_emphasis = tegra186_sor_pre_emphasis, - .post_cursor = tegra124_sor_post_cursor, - .tx_pu = tegra124_sor_tx_pu, }; static const struct tegra_sor_regs tegra194_sor_regs = { @@ -3530,11 +3156,10 @@ static const struct tegra_sor_regs tegra194_sor_regs = { }; static const struct tegra_sor_soc tegra194_sor = { + .supports_edp = true, .supports_lvds = false, .supports_hdmi = true, .supports_dp = true, - .supports_audio = true, - .supports_hdcp = true, .regs = &tegra194_sor_regs, .has_nvdisplay = true, @@ -3543,19 +3168,14 @@ static const struct tegra_sor_soc tegra194_sor = { .settings = tegra194_sor_hdmi_defaults, .xbar_cfg = tegra210_sor_xbar_cfg, - .lane_map = tegra124_sor_lane_map, - .voltage_swing = tegra186_sor_voltage_swing, - .pre_emphasis = tegra186_sor_pre_emphasis, - .post_cursor = tegra124_sor_post_cursor, - .tx_pu = tegra124_sor_tx_pu, }; static const struct of_device_id tegra_sor_of_match[] = { { .compatible = "nvidia,tegra194-sor", .data = &tegra194_sor }, + { .compatible = "nvidia,tegra186-sor1", .data = &tegra186_sor1 }, { .compatible = "nvidia,tegra186-sor", .data = &tegra186_sor }, { .compatible = "nvidia,tegra210-sor1", .data = &tegra210_sor1 }, { .compatible = "nvidia,tegra210-sor", .data = &tegra210_sor }, - { .compatible = "nvidia,tegra132-sor", .data = &tegra132_sor }, { .compatible = "nvidia,tegra124-sor", .data = &tegra124_sor }, { }, }; @@ -3581,11 +3201,6 @@ static int tegra_sor_parse_dt(struct tegra_sor *sor) * earlier */ sor->pad = TEGRA_IO_PAD_HDMI_DP0 + sor->index; - } else { - if (!sor->soc->supports_audio) - sor->index = 0; - else - sor->index = 1; } err = of_property_read_u32_array(np, "nvidia,xbar-cfg", xbar_cfg, 5); @@ -3620,11 +3235,9 @@ static irqreturn_t tegra_sor_irq(int irq, void *data) tegra_hda_parse_format(format, &sor->format); - if (sor->ops->audio_enable) - sor->ops->audio_enable(sor); + tegra_sor_hdmi_audio_enable(sor); } else { - if (sor->ops->audio_disable) - sor->ops->audio_disable(sor); + tegra_sor_hdmi_audio_disable(sor); } } @@ -3661,8 +3274,6 @@ static int tegra_sor_probe(struct platform_device *pdev) if (!sor->aux) return -EPROBE_DEFER; - - sor->output.ddc = &sor->aux->ddc; } if (!sor->aux) { @@ -3677,15 +3288,16 @@ static int tegra_sor_probe(struct platform_device *pdev) return -ENODEV; } } else { - np = of_parse_phandle(pdev->dev.of_node, "nvidia,panel", 0); - /* - * No need to keep this around since we only use it as a check - * to see if a panel is connected (eDP) or not (DP). - */ - of_node_put(np); - - sor->ops = &tegra_sor_dp_ops; - sor->pad = TEGRA_IO_PAD_LVDS; + if (sor->soc->supports_edp) { + sor->ops = &tegra_sor_edp_ops; + sor->pad = TEGRA_IO_PAD_LVDS; + } else if (sor->soc->supports_dp) { + dev_err(&pdev->dev, "DisplayPort not supported yet\n"); + return -ENODEV; + } else { + dev_err(&pdev->dev, "unknown (DP) support\n"); + return -ENODEV; + } } err = tegra_sor_parse_dt(sor); @@ -3840,8 +3452,6 @@ static int tegra_sor_probe(struct platform_device *pdev) * pad output clock. */ if (!sor->clk_pad) { - char *name; - err = pm_runtime_get_sync(&pdev->dev); if (err < 0) { dev_err(&pdev->dev, "failed to get runtime PM: %d\n", @@ -3849,13 +3459,8 @@ static int tegra_sor_probe(struct platform_device *pdev) goto remove; } - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "sor%u_pad_clkout", sor->index); - if (!name) { - err = -ENOMEM; - goto remove; - } - - sor->clk_pad = tegra_clk_sor_pad_register(sor, name); + sor->clk_pad = tegra_clk_sor_pad_register(sor, + "sor1_pad_clkout"); pm_runtime_put(&pdev->dev); } diff --git a/drivers/gpu/drm/tegra/sor.h b/drivers/gpu/drm/tegra/sor.h index 00e09d5dca30..f8efd8be4b7c 100644 --- a/drivers/gpu/drm/tegra/sor.h +++ b/drivers/gpu/drm/tegra/sor.h @@ -39,7 +39,6 @@ #define SOR_STATE_ASY_CRC_MODE_NON_ACTIVE (0x2 << 6) #define SOR_STATE_ASY_CRC_MODE_COMPLETE (0x1 << 6) #define SOR_STATE_ASY_CRC_MODE_ACTIVE (0x0 << 6) -#define SOR_STATE_ASY_SUBOWNER_MASK (0x3 << 4) #define SOR_STATE_ASY_OWNER_MASK 0xf #define SOR_STATE_ASY_OWNER(x) (((x) & 0xf) << 0) @@ -284,12 +283,10 @@ #define SOR_DP_PADCTL_CM_TXD_2 (1 << 6) #define SOR_DP_PADCTL_CM_TXD_1 (1 << 5) #define SOR_DP_PADCTL_CM_TXD_0 (1 << 4) -#define SOR_DP_PADCTL_CM_TXD(x) (1 << (4 + (x))) #define SOR_DP_PADCTL_PD_TXD_3 (1 << 3) #define SOR_DP_PADCTL_PD_TXD_0 (1 << 2) #define SOR_DP_PADCTL_PD_TXD_1 (1 << 1) #define SOR_DP_PADCTL_PD_TXD_2 (1 << 0) -#define SOR_DP_PADCTL_PD_TXD(x) (1 << (0 + (x))) #define SOR_DP_PADCTL1 0x5d diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index 9444ba183990..cd0399fd8c63 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -34,6 +34,7 @@ struct vic { void __iomem *regs; struct tegra_drm_client client; struct host1x_channel *channel; + struct iommu_domain *domain; struct device *dev; struct clk *clk; struct reset_control *rst; @@ -96,9 +97,6 @@ static int vic_runtime_suspend(struct device *dev) static int vic_boot(struct vic *vic) { -#ifdef CONFIG_IOMMU_API - struct iommu_fwspec *spec = dev_iommu_fwspec_get(vic->dev); -#endif u32 fce_ucode_size, fce_bin_data_offset; void *hdr; int err = 0; @@ -107,14 +105,15 @@ static int vic_boot(struct vic *vic) return 0; #ifdef CONFIG_IOMMU_API - if (vic->config->supports_sid && spec) { + if (vic->config->supports_sid) { + struct iommu_fwspec *spec = dev_iommu_fwspec_get(vic->dev); u32 value; value = TRANSCFG_ATT(1, TRANSCFG_SID_FALCON) | TRANSCFG_ATT(0, TRANSCFG_SID_HW); vic_writel(vic, value, VIC_TFBIF_TRANSCFG); - if (spec->num_ids > 0) { + if (spec && spec->num_ids > 0) { value = spec->ids[0] & 0xffff; vic_writel(vic, value, VIC_THI_STREAMID0); @@ -133,9 +132,9 @@ static int vic_boot(struct vic *vic) if (err < 0) return err; - hdr = vic->falcon.firmware.virt; + hdr = vic->falcon.firmware.vaddr; fce_bin_data_offset = *(u32 *)(hdr + VIC_UCODE_FCE_DATA_OFFSET); - hdr = vic->falcon.firmware.virt + + hdr = vic->falcon.firmware.vaddr + *(u32 *)(hdr + VIC_UCODE_FCE_HEADER_OFFSET); fce_ucode_size = *(u32 *)(hdr + FCE_UCODE_SIZE_OFFSET); @@ -143,7 +142,7 @@ static int vic_boot(struct vic *vic) falcon_execute_method(&vic->falcon, VIC_SET_FCE_UCODE_SIZE, fce_ucode_size); falcon_execute_method(&vic->falcon, VIC_SET_FCE_UCODE_OFFSET, - (vic->falcon.firmware.iova + fce_bin_data_offset) + (vic->falcon.firmware.paddr + fce_bin_data_offset) >> 8); err = falcon_wait_idle(&vic->falcon); @@ -158,21 +157,48 @@ static int vic_boot(struct vic *vic) return 0; } +static void *vic_falcon_alloc(struct falcon *falcon, size_t size, + dma_addr_t *iova) +{ + struct tegra_drm *tegra = falcon->data; + + return tegra_drm_alloc(tegra, size, iova); +} + +static void vic_falcon_free(struct falcon *falcon, size_t size, + dma_addr_t iova, void *va) +{ + struct tegra_drm *tegra = falcon->data; + + return tegra_drm_free(tegra, size, va, iova); +} + +static const struct falcon_ops vic_falcon_ops = { + .alloc = vic_falcon_alloc, + .free = vic_falcon_free +}; + static int vic_init(struct host1x_client *client) { struct tegra_drm_client *drm = host1x_to_drm_client(client); + struct iommu_group *group = iommu_group_get(client->dev); struct drm_device *dev = dev_get_drvdata(client->parent); struct tegra_drm *tegra = dev->dev_private; struct vic *vic = to_vic(drm); int err; - err = host1x_client_iommu_attach(client); - if (err < 0) { - dev_err(vic->dev, "failed to attach to domain: %d\n", err); - return err; + if (group && tegra->domain) { + err = iommu_attach_group(tegra->domain, group); + if (err < 0) { + dev_err(vic->dev, "failed to attach to domain: %d\n", + err); + return err; + } + + vic->domain = tegra->domain; } - vic->channel = host1x_channel_request(client); + vic->channel = host1x_channel_request(client->dev); if (!vic->channel) { err = -ENOMEM; goto detach; @@ -188,12 +214,6 @@ static int vic_init(struct host1x_client *client) if (err < 0) goto free_syncpt; - /* - * Inherit the DMA parameters (such as maximum segment size) from the - * parent device. - */ - client->dev->dma_parms = client->parent->dma_parms; - return 0; free_syncpt: @@ -201,7 +221,8 @@ free_syncpt: free_channel: host1x_channel_put(vic->channel); detach: - host1x_client_iommu_detach(client); + if (group && tegra->domain) + iommu_detach_group(tegra->domain, group); return err; } @@ -209,32 +230,22 @@ detach: static int vic_exit(struct host1x_client *client) { struct tegra_drm_client *drm = host1x_to_drm_client(client); + struct iommu_group *group = iommu_group_get(client->dev); struct drm_device *dev = dev_get_drvdata(client->parent); struct tegra_drm *tegra = dev->dev_private; struct vic *vic = to_vic(drm); int err; - /* avoid a dangling pointer just in case this disappears */ - client->dev->dma_parms = NULL; - err = tegra_drm_unregister_client(tegra, drm); if (err < 0) return err; host1x_syncpt_free(client->syncpts[0]); host1x_channel_put(vic->channel); - host1x_client_iommu_detach(client); - if (client->group) { - dma_unmap_single(vic->dev, vic->falcon.firmware.phys, - vic->falcon.firmware.size, DMA_TO_DEVICE); - tegra_drm_free(tegra, vic->falcon.firmware.size, - vic->falcon.firmware.virt, - vic->falcon.firmware.iova); - } else { - dma_free_coherent(vic->dev, vic->falcon.firmware.size, - vic->falcon.firmware.virt, - vic->falcon.firmware.iova); + if (vic->domain) { + iommu_detach_group(vic->domain, group); + vic->domain = NULL; } return 0; @@ -247,64 +258,25 @@ static const struct host1x_client_ops vic_client_ops = { static int vic_load_firmware(struct vic *vic) { - struct host1x_client *client = &vic->client.base; - struct tegra_drm *tegra = vic->client.drm; - dma_addr_t iova; - size_t size; - void *virt; int err; - if (vic->falcon.firmware.virt) + if (vic->falcon.data) return 0; + vic->falcon.data = vic->client.drm; + err = falcon_read_firmware(&vic->falcon, vic->config->firmware); if (err < 0) - return err; - - size = vic->falcon.firmware.size; - - if (!client->group) { - virt = dma_alloc_coherent(vic->dev, size, &iova, GFP_KERNEL); - - err = dma_mapping_error(vic->dev, iova); - if (err < 0) - return err; - } else { - virt = tegra_drm_alloc(tegra, size, &iova); - } - - vic->falcon.firmware.virt = virt; - vic->falcon.firmware.iova = iova; + goto cleanup; err = falcon_load_firmware(&vic->falcon); if (err < 0) goto cleanup; - /* - * In this case we have received an IOVA from the shared domain, so we - * need to make sure to get the physical address so that the DMA API - * knows what memory pages to flush the cache for. - */ - if (client->group) { - dma_addr_t phys; - - phys = dma_map_single(vic->dev, virt, size, DMA_TO_DEVICE); - - err = dma_mapping_error(vic->dev, phys); - if (err < 0) - goto cleanup; - - vic->falcon.firmware.phys = phys; - } - return 0; cleanup: - if (!client->group) - dma_free_coherent(vic->dev, size, virt, iova); - else - tegra_drm_free(tegra, size, virt, iova); - + vic->falcon.data = NULL; return err; } @@ -402,13 +374,6 @@ static int vic_probe(struct platform_device *pdev) struct vic *vic; int err; - /* inherit DMA mask from host1x parent */ - err = dma_coerce_mask_and_coherent(dev, *dev->parent->dma_mask); - if (err < 0) { - dev_err(&pdev->dev, "failed to set DMA mask: %d\n", err); - return err; - } - vic = devm_kzalloc(dev, sizeof(*vic), GFP_KERNEL); if (!vic) return -ENOMEM; @@ -445,6 +410,7 @@ static int vic_probe(struct platform_device *pdev) vic->falcon.dev = dev; vic->falcon.regs = vic->regs; + vic->falcon.ops = &vic_falcon_ops; err = falcon_init(&vic->falcon); if (err < 0) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 8d91b0428af1..d52fc16266ce 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -900,8 +900,7 @@ EXPORT_SYMBOL(ttm_bo_mem_put); */ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, struct ttm_mem_type_manager *man, - struct ttm_mem_reg *mem, - bool no_wait_gpu) + struct ttm_mem_reg *mem) { struct dma_fence *fence; int ret; @@ -910,22 +909,19 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, fence = dma_fence_get(man->move); spin_unlock(&man->move_lock); - if (!fence) - return 0; + if (fence) { + dma_resv_add_shared_fence(bo->base.resv, fence); - if (no_wait_gpu) - return -EBUSY; + ret = dma_resv_reserve_shared(bo->base.resv, 1); + if (unlikely(ret)) { + dma_fence_put(fence); + return ret; + } - dma_resv_add_shared_fence(bo->base.resv, fence); - - ret = dma_resv_reserve_shared(bo->base.resv, 1); - if (unlikely(ret)) { - dma_fence_put(fence); - return ret; + dma_fence_put(bo->moving); + bo->moving = fence; } - dma_fence_put(bo->moving); - bo->moving = fence; return 0; } @@ -956,7 +952,7 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo, return ret; } while (1); - return ttm_bo_add_move_fence(bo, man, mem, ctx->no_wait_gpu); + return ttm_bo_add_move_fence(bo, man, mem); } static uint32_t ttm_bo_select_caching(struct ttm_mem_type_manager *man, @@ -1096,18 +1092,14 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, if (unlikely(ret)) goto error; - if (!mem->mm_node) - continue; - - ret = ttm_bo_add_move_fence(bo, man, mem, ctx->no_wait_gpu); - if (unlikely(ret)) { - (*man->func->put_node)(man, mem); - if (ret == -EBUSY) - continue; - - goto error; + if (mem->mm_node) { + ret = ttm_bo_add_move_fence(bo, man, mem); + if (unlikely(ret)) { + (*man->func->put_node)(man, mem); + goto error; + } + return 0; } - return 0; } for (i = 0; i < placement->num_busy_placement; ++i) { diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig index 6dab94adf25e..cf987a317a55 100644 --- a/drivers/gpu/host1x/Kconfig +++ b/drivers/gpu/host1x/Kconfig @@ -2,7 +2,7 @@ config TEGRA_HOST1X tristate "NVIDIA Tegra host1x driver" depends on ARCH_TEGRA || (ARM && COMPILE_TEST) - select IOMMU_IOVA + select IOMMU_IOVA if IOMMU_SUPPORT help Driver for the NVIDIA Tegra host1x hardware. diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c index 2c8559ff3481..742aa9ff21b8 100644 --- a/drivers/gpu/host1x/bus.c +++ b/drivers/gpu/host1x/bus.c @@ -445,7 +445,7 @@ static int host1x_device_add(struct host1x *host1x, of_dma_configure(&device->dev, host1x->dev->of_node, true); device->dev.dma_parms = &device->dma_parms; - dma_set_max_seg_size(&device->dev, UINT_MAX); + dma_set_max_seg_size(&device->dev, SZ_4M); err = host1x_device_parse_dt(device, driver); if (err < 0) { diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index e8d3fda91d8a..48c84c48299c 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -232,9 +232,9 @@ unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma, * * Must be called with the cdma lock held. */ -static int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x, - struct host1x_cdma *cdma, - unsigned int needed) +int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x, + struct host1x_cdma *cdma, + unsigned int needed) { while (true) { struct push_buffer *pb = &cdma->push_buffer; diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c index 4cd212bb570d..1436295aa450 100644 --- a/drivers/gpu/host1x/channel.c +++ b/drivers/gpu/host1x/channel.c @@ -115,14 +115,14 @@ static struct host1x_channel *acquire_unused_channel(struct host1x *host) /** * host1x_channel_request() - Allocate a channel - * @client: Host1x client this channel will be used to send commands to + * @device: Host1x unit this channel will be used to send commands to * - * Allocates a new host1x channel for @client. May return NULL if CDMA + * Allocates a new host1x channel for @device. May return NULL if CDMA * initialization fails. */ -struct host1x_channel *host1x_channel_request(struct host1x_client *client) +struct host1x_channel *host1x_channel_request(struct device *dev) { - struct host1x *host = dev_get_drvdata(client->dev->parent); + struct host1x *host = dev_get_drvdata(dev->parent); struct host1x_channel_list *chlist = &host->channel_list; struct host1x_channel *channel; int err; @@ -133,8 +133,7 @@ struct host1x_channel *host1x_channel_request(struct host1x_client *client) kref_init(&channel->refcount); mutex_init(&channel->submitlock); - channel->client = client; - channel->dev = client->dev; + channel->dev = dev; err = host1x_hw_channel_init(host, channel, channel->id); if (err < 0) @@ -149,7 +148,7 @@ struct host1x_channel *host1x_channel_request(struct host1x_client *client) fail: clear_bit(channel->id, chlist->allocated_channels); - dev_err(client->dev, "failed to initialize channel\n"); + dev_err(dev, "failed to initialize channel\n"); return NULL; } diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h index 39044ff6c3aa..4fd694834f74 100644 --- a/drivers/gpu/host1x/channel.h +++ b/drivers/gpu/host1x/channel.h @@ -26,7 +26,6 @@ struct host1x_channel { unsigned int id; struct mutex submitlock; void __iomem *regs; - struct host1x_client *client; struct device *dev; struct host1x_cdma cdma; }; diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index a738ea55e407..5a3f797240d4 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -18,6 +18,10 @@ #include #undef CREATE_TRACE_POINTS +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) +#include +#endif + #include "bus.h" #include "channel.h" #include "debug.h" @@ -73,10 +77,6 @@ static const struct host1x_info host1x01_info = { .init = host1x01_init, .sync_offset = 0x3000, .dma_mask = DMA_BIT_MASK(32), - .has_wide_gather = false, - .has_hypervisor = false, - .num_sid_entries = 0, - .sid_table = NULL, }; static const struct host1x_info host1x02_info = { @@ -87,10 +87,6 @@ static const struct host1x_info host1x02_info = { .init = host1x02_init, .sync_offset = 0x3000, .dma_mask = DMA_BIT_MASK(32), - .has_wide_gather = false, - .has_hypervisor = false, - .num_sid_entries = 0, - .sid_table = NULL, }; static const struct host1x_info host1x04_info = { @@ -101,10 +97,6 @@ static const struct host1x_info host1x04_info = { .init = host1x04_init, .sync_offset = 0x2100, .dma_mask = DMA_BIT_MASK(34), - .has_wide_gather = false, - .has_hypervisor = false, - .num_sid_entries = 0, - .sid_table = NULL, }; static const struct host1x_info host1x05_info = { @@ -115,10 +107,6 @@ static const struct host1x_info host1x05_info = { .init = host1x05_init, .sync_offset = 0x2100, .dma_mask = DMA_BIT_MASK(34), - .has_wide_gather = false, - .has_hypervisor = false, - .num_sid_entries = 0, - .sid_table = NULL, }; static const struct host1x_sid_entry tegra186_sid_table[] = { @@ -138,7 +126,6 @@ static const struct host1x_info host1x06_info = { .init = host1x06_init, .sync_offset = 0x0, .dma_mask = DMA_BIT_MASK(40), - .has_wide_gather = true, .has_hypervisor = true, .num_sid_entries = ARRAY_SIZE(tegra186_sid_table), .sid_table = tegra186_sid_table, @@ -161,7 +148,6 @@ static const struct host1x_info host1x07_info = { .init = host1x07_init, .sync_offset = 0x0, .dma_mask = DMA_BIT_MASK(40), - .has_wide_gather = true, .has_hypervisor = true, .num_sid_entries = ARRAY_SIZE(tegra194_sid_table), .sid_table = tegra194_sid_table, @@ -192,117 +178,6 @@ static void host1x_setup_sid_table(struct host1x *host) } } -static struct iommu_domain *host1x_iommu_attach(struct host1x *host) -{ - struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev); - int err; - - /* - * If the host1x firewall is enabled, there's no need to enable IOMMU - * support. Similarly, if host1x is already attached to an IOMMU (via - * the DMA API), don't try to attach again. - */ - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) || domain) - return domain; - - host->group = iommu_group_get(host->dev); - if (host->group) { - struct iommu_domain_geometry *geometry; - dma_addr_t start, end; - unsigned long order; - - err = iova_cache_get(); - if (err < 0) - goto put_group; - - host->domain = iommu_domain_alloc(&platform_bus_type); - if (!host->domain) { - err = -ENOMEM; - goto put_cache; - } - - err = iommu_attach_group(host->domain, host->group); - if (err) { - if (err == -ENODEV) - err = 0; - - goto free_domain; - } - - geometry = &host->domain->geometry; - start = geometry->aperture_start & host->info->dma_mask; - end = geometry->aperture_end & host->info->dma_mask; - - order = __ffs(host->domain->pgsize_bitmap); - init_iova_domain(&host->iova, 1UL << order, start >> order); - host->iova_end = end; - - domain = host->domain; - } - - return domain; - -free_domain: - iommu_domain_free(host->domain); - host->domain = NULL; -put_cache: - iova_cache_put(); -put_group: - iommu_group_put(host->group); - host->group = NULL; - - return ERR_PTR(err); -} - -static int host1x_iommu_init(struct host1x *host) -{ - u64 mask = host->info->dma_mask; - struct iommu_domain *domain; - int err; - - domain = host1x_iommu_attach(host); - if (IS_ERR(domain)) { - err = PTR_ERR(domain); - dev_err(host->dev, "failed to attach to IOMMU: %d\n", err); - return err; - } - - /* - * If we're not behind an IOMMU make sure we don't get push buffers - * that are allocated outside of the range addressable by the GATHER - * opcode. - * - * Newer generations of Tegra (Tegra186 and later) support a wide - * variant of the GATHER opcode that allows addressing more bits. - */ - if (!domain && !host->info->has_wide_gather) - mask = DMA_BIT_MASK(32); - - err = dma_coerce_mask_and_coherent(host->dev, mask); - if (err < 0) { - dev_err(host->dev, "failed to set DMA mask: %d\n", err); - return err; - } - - return 0; -} - -static void host1x_iommu_exit(struct host1x *host) -{ - if (host->domain) { - put_iova_domain(&host->iova); - iommu_detach_group(host->domain, host->group); - - iommu_domain_free(host->domain); - host->domain = NULL; - - iova_cache_put(); - - iommu_group_put(host->group); - host->group = NULL; - } -} - static int host1x_probe(struct platform_device *pdev) { struct host1x *host; @@ -362,8 +237,7 @@ static int host1x_probe(struct platform_device *pdev) return PTR_ERR(host->hv_regs); } - host->dev->dma_parms = &host->dma_parms; - dma_set_max_seg_size(host->dev, UINT_MAX); + dma_set_mask_and_coherent(host->dev, host->info->dma_mask); if (host->info->init) { err = host->info->init(host); @@ -387,42 +261,87 @@ static int host1x_probe(struct platform_device *pdev) dev_err(&pdev->dev, "failed to get reset: %d\n", err); return err; } +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) + if (host->dev->archdata.mapping) { + struct dma_iommu_mapping *mapping = + to_dma_iommu_mapping(host->dev); + arm_iommu_detach_device(host->dev); + arm_iommu_release_mapping(mapping); + } +#endif + if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) + goto skip_iommu; - err = host1x_iommu_init(host); - if (err < 0) { - dev_err(&pdev->dev, "failed to setup IOMMU: %d\n", err); - return err; + host->group = iommu_group_get(&pdev->dev); + if (host->group) { + struct iommu_domain_geometry *geometry; + u64 mask = dma_get_mask(host->dev); + dma_addr_t start, end; + unsigned long order; + + err = iova_cache_get(); + if (err < 0) + goto put_group; + + host->domain = iommu_domain_alloc(&platform_bus_type); + if (!host->domain) { + err = -ENOMEM; + goto put_cache; + } + + err = iommu_attach_group(host->domain, host->group); + if (err) { + if (err == -ENODEV) { + iommu_domain_free(host->domain); + host->domain = NULL; + iova_cache_put(); + iommu_group_put(host->group); + host->group = NULL; + goto skip_iommu; + } + + goto fail_free_domain; + } + + geometry = &host->domain->geometry; + start = geometry->aperture_start & mask; + end = geometry->aperture_end & mask; + + order = __ffs(host->domain->pgsize_bitmap); + init_iova_domain(&host->iova, 1UL << order, start >> order); + host->iova_end = end; } +skip_iommu: err = host1x_channel_list_init(&host->channel_list, host->info->nb_channels); if (err) { dev_err(&pdev->dev, "failed to initialize channel list\n"); - goto iommu_exit; + goto fail_detach_device; } err = clk_prepare_enable(host->clk); if (err < 0) { dev_err(&pdev->dev, "failed to enable clock\n"); - goto free_channels; + goto fail_free_channels; } err = reset_control_deassert(host->rst); if (err < 0) { dev_err(&pdev->dev, "failed to deassert reset: %d\n", err); - goto unprepare_disable; + goto fail_unprepare_disable; } err = host1x_syncpt_init(host); if (err) { dev_err(&pdev->dev, "failed to initialize syncpts\n"); - goto reset_assert; + goto fail_reset_assert; } err = host1x_intr_init(host, syncpt_irq); if (err) { dev_err(&pdev->dev, "failed to initialize interrupts\n"); - goto deinit_syncpt; + goto fail_deinit_syncpt; } host1x_debug_init(host); @@ -432,22 +351,33 @@ static int host1x_probe(struct platform_device *pdev) err = host1x_register(host); if (err < 0) - goto deinit_intr; + goto fail_deinit_intr; return 0; -deinit_intr: +fail_deinit_intr: host1x_intr_deinit(host); -deinit_syncpt: +fail_deinit_syncpt: host1x_syncpt_deinit(host); -reset_assert: +fail_reset_assert: reset_control_assert(host->rst); -unprepare_disable: +fail_unprepare_disable: clk_disable_unprepare(host->clk); -free_channels: +fail_free_channels: host1x_channel_list_free(&host->channel_list); -iommu_exit: - host1x_iommu_exit(host); +fail_detach_device: + if (host->group && host->domain) { + put_iova_domain(&host->iova); + iommu_detach_group(host->domain, host->group); + } +fail_free_domain: + if (host->domain) + iommu_domain_free(host->domain); +put_cache: + if (host->group) + iova_cache_put(); +put_group: + iommu_group_put(host->group); return err; } @@ -457,12 +387,18 @@ static int host1x_remove(struct platform_device *pdev) struct host1x *host = platform_get_drvdata(pdev); host1x_unregister(host); - host1x_debug_deinit(host); host1x_intr_deinit(host); host1x_syncpt_deinit(host); reset_control_assert(host->rst); clk_disable_unprepare(host->clk); - host1x_iommu_exit(host); + + if (host->domain) { + put_iova_domain(&host->iova); + iommu_detach_group(host->domain, host->group); + iommu_domain_free(host->domain); + iova_cache_put(); + iommu_group_put(host->group); + } return 0; } diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index f781a9b0f39d..ff56f5e23a02 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -97,7 +97,6 @@ struct host1x_info { int (*init)(struct host1x *host1x); /* initialize per SoC ops */ unsigned int sync_offset; /* offset of syncpoint registers */ u64 dma_mask; /* mask of addressable memory */ - bool has_wide_gather; /* supports GATHER_W opcode */ bool has_hypervisor; /* has hypervisor registers */ unsigned int num_sid_entries; const struct host1x_sid_entry *sid_table; @@ -141,8 +140,6 @@ struct host1x { struct list_head devices; struct list_head list; - - struct device_dma_parameters dma_parms; }; void host1x_hypervisor_writel(struct host1x *host1x, u32 r, u32 v); diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c index 9245add23b5d..26f3c741d085 100644 --- a/drivers/gpu/host1x/intr.c +++ b/drivers/gpu/host1x/intr.c @@ -105,6 +105,7 @@ static void action_submit_complete(struct host1x_waitlist *waiter) /* Add nr_completed to trace */ trace_host1x_channel_submit_complete(dev_name(channel->dev), waiter->count, waiter->thresh); + } static void action_wakeup(struct host1x_waitlist *waiter) diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 25ca54de8fc5..eaa5c3352c13 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -99,8 +99,6 @@ EXPORT_SYMBOL(host1x_job_add_gather); static unsigned int pin_job(struct host1x *host, struct host1x_job *job) { - struct host1x_client *client = job->client; - struct device *dev = client->dev; unsigned int i; int err; @@ -108,8 +106,8 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) for (i = 0; i < job->num_relocs; i++) { struct host1x_reloc *reloc = &job->relocs[i]; - dma_addr_t phys_addr, *phys; struct sg_table *sgt; + dma_addr_t phys_addr; reloc->target.bo = host1x_bo_get(reloc->target.bo); if (!reloc->target.bo) { @@ -117,50 +115,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - if (client->group) - phys = &phys_addr; - else - phys = NULL; - - sgt = host1x_bo_pin(dev, reloc->target.bo, phys); - if (IS_ERR(sgt)) { - err = PTR_ERR(sgt); - goto unpin; - } - - if (sgt) { - unsigned long mask = HOST1X_RELOC_READ | - HOST1X_RELOC_WRITE; - enum dma_data_direction dir; - - switch (reloc->flags & mask) { - case HOST1X_RELOC_READ: - dir = DMA_TO_DEVICE; - break; - - case HOST1X_RELOC_WRITE: - dir = DMA_FROM_DEVICE; - break; - - case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE: - dir = DMA_BIDIRECTIONAL; - break; - - default: - err = -EINVAL; - goto unpin; - } - - err = dma_map_sg(dev, sgt->sgl, sgt->nents, dir); - if (!err) { - err = -ENOMEM; - goto unpin; - } - - job->unpins[job->num_unpins].dev = dev; - job->unpins[job->num_unpins].dir = dir; - phys_addr = sg_dma_address(sgt->sgl); - } + phys_addr = host1x_bo_pin(reloc->target.bo, &sgt); job->addr_phys[job->num_unpins] = phys_addr; job->unpins[job->num_unpins].bo = reloc->target.bo; @@ -184,11 +139,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - sgt = host1x_bo_pin(host->dev, g->bo, NULL); - if (IS_ERR(sgt)) { - err = PTR_ERR(sgt); - goto unpin; - } + phys_addr = host1x_bo_pin(g->bo, &sgt); if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) { for_each_sg(sgt->sgl, sg, sgt->nents, j) @@ -212,24 +163,15 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } + job->addr_phys[job->num_unpins] = + iova_dma_addr(&host->iova, alloc); job->unpins[job->num_unpins].size = gather_size; - phys_addr = iova_dma_addr(&host->iova, alloc); } else { - err = dma_map_sg(host->dev, sgt->sgl, sgt->nents, - DMA_TO_DEVICE); - if (!err) { - err = -ENOMEM; - goto unpin; - } - - job->unpins[job->num_unpins].dev = host->dev; - phys_addr = sg_dma_address(sgt->sgl); + job->addr_phys[job->num_unpins] = phys_addr; } - job->addr_phys[job->num_unpins] = phys_addr; - job->gather_addr_phys[i] = phys_addr; + job->gather_addr_phys[i] = job->addr_phys[job->num_unpins]; - job->unpins[job->num_unpins].dir = DMA_TO_DEVICE; job->unpins[job->num_unpins].bo = g->bo; job->unpins[job->num_unpins].sgt = sgt; job->num_unpins++; @@ -494,8 +436,7 @@ out: return err; } -static inline int copy_gathers(struct device *host, struct host1x_job *job, - struct device *dev) +static inline int copy_gathers(struct host1x_job *job, struct device *dev) { struct host1x_firewall fw; size_t size = 0; @@ -518,12 +459,12 @@ static inline int copy_gathers(struct device *host, struct host1x_job *job, * Try a non-blocking allocation from a higher priority pools first, * as awaiting for the allocation here is a major performance hit. */ - job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy, + job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy, GFP_NOWAIT); /* the higher priority allocation failed, try the generic-blocking */ if (!job->gather_copy_mapped) - job->gather_copy_mapped = dma_alloc_wc(host, size, + job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy, GFP_KERNEL); if (!job->gather_copy_mapped) @@ -571,7 +512,7 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) goto out; if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { - err = copy_gathers(host->dev, job, dev); + err = copy_gathers(job, dev); if (err) goto out; } @@ -616,8 +557,6 @@ void host1x_job_unpin(struct host1x_job *job) for (i = 0; i < job->num_unpins; i++) { struct host1x_job_unpin_data *unpin = &job->unpins[i]; - struct device *dev = unpin->dev ?: host->dev; - struct sg_table *sgt = unpin->sgt; if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && unpin->size && host->domain) { @@ -627,18 +566,14 @@ void host1x_job_unpin(struct host1x_job *job) iova_pfn(&host->iova, job->addr_phys[i])); } - if (unpin->dev && sgt) - dma_unmap_sg(unpin->dev, sgt->sgl, sgt->nents, - unpin->dir); - - host1x_bo_unpin(dev, unpin->bo, sgt); + host1x_bo_unpin(unpin->bo, unpin->sgt); host1x_bo_put(unpin->bo); } job->num_unpins = 0; if (job->gather_copy_size) - dma_free_wc(host->dev, job->gather_copy_size, + dma_free_wc(job->channel->dev, job->gather_copy_size, job->gather_copy_mapped, job->gather_copy); } EXPORT_SYMBOL(host1x_job_unpin); diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h index 94bc2e4ae241..62b8805e6b35 100644 --- a/drivers/gpu/host1x/job.h +++ b/drivers/gpu/host1x/job.h @@ -8,8 +8,6 @@ #ifndef __HOST1X_JOB_H #define __HOST1X_JOB_H -#include - struct host1x_job_gather { unsigned int words; dma_addr_t base; @@ -21,9 +19,7 @@ struct host1x_job_gather { struct host1x_job_unpin_data { struct host1x_bo *bo; struct sg_table *sgt; - struct device *dev; size_t size; - enum dma_data_direction dir; }; /* diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index ce4103208619..e94b19782c92 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -25,6 +25,13 @@ static __always_inline int __arch_get_clock_mode(struct timekeeper *tk) } #endif /* __arch_get_clock_mode */ +#ifndef __arch_use_vsyscall +static __always_inline int __arch_use_vsyscall(struct vdso_data *vdata) +{ + return 1; +} +#endif /* __arch_use_vsyscall */ + #ifndef __arch_update_vsyscall static __always_inline void __arch_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) diff --git a/include/drm/amd_asic_type.h b/include/drm/amd_asic_type.h index b1230e33d506..296aab724677 100644 --- a/include/drm/amd_asic_type.h +++ b/include/drm/amd_asic_type.h @@ -27,36 +27,34 @@ */ enum amd_asic_type { CHIP_TAHITI = 0, - CHIP_PITCAIRN, /* 1 */ - CHIP_VERDE, /* 2 */ - CHIP_OLAND, /* 3 */ - CHIP_HAINAN, /* 4 */ - CHIP_BONAIRE, /* 5 */ - CHIP_KAVERI, /* 6 */ - CHIP_KABINI, /* 7 */ - CHIP_HAWAII, /* 8 */ - CHIP_MULLINS, /* 9 */ - CHIP_TOPAZ, /* 10 */ - CHIP_TONGA, /* 11 */ - CHIP_FIJI, /* 12 */ - CHIP_CARRIZO, /* 13 */ - CHIP_STONEY, /* 14 */ - CHIP_POLARIS10, /* 15 */ - CHIP_POLARIS11, /* 16 */ - CHIP_POLARIS12, /* 17 */ - CHIP_VEGAM, /* 18 */ - CHIP_VEGA10, /* 19 */ - CHIP_VEGA12, /* 20 */ - CHIP_VEGA20, /* 21 */ - CHIP_RAVEN, /* 22 */ - CHIP_ARCTURUS, /* 23 */ - CHIP_RENOIR, /* 24 */ - CHIP_NAVI10, /* 25 */ - CHIP_NAVI14, /* 26 */ - CHIP_NAVI12, /* 27 */ + CHIP_PITCAIRN, + CHIP_VERDE, + CHIP_OLAND, + CHIP_HAINAN, + CHIP_BONAIRE, + CHIP_KAVERI, + CHIP_KABINI, + CHIP_HAWAII, + CHIP_MULLINS, + CHIP_TOPAZ, + CHIP_TONGA, + CHIP_FIJI, + CHIP_CARRIZO, + CHIP_STONEY, + CHIP_POLARIS10, + CHIP_POLARIS11, + CHIP_POLARIS12, + CHIP_VEGAM, + CHIP_VEGA10, + CHIP_VEGA12, + CHIP_VEGA20, + CHIP_RAVEN, + CHIP_ARCTURUS, + CHIP_RENOIR, + CHIP_NAVI10, + CHIP_NAVI14, + CHIP_NAVI12, CHIP_LAST, }; -extern const char *amdgpu_asic_name[]; - #endif /*__AMD_ASIC_TYPE_H__ */ diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h index d5fc90b30487..4a25e0577ae0 100644 --- a/include/drm/drm_dp_mst_helper.h +++ b/include/drm/drm_dp_mst_helper.h @@ -26,26 +26,6 @@ #include #include -#if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS) -#include -#include - -enum drm_dp_mst_topology_ref_type { - DRM_DP_MST_TOPOLOGY_REF_GET, - DRM_DP_MST_TOPOLOGY_REF_PUT, -}; - -struct drm_dp_mst_topology_ref_history { - struct drm_dp_mst_topology_ref_entry { - enum drm_dp_mst_topology_ref_type type; - int count; - ktime_t ts_nsec; - depot_stack_handle_t backtrace; - } *entries; - int len; -}; -#endif /* IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS) */ - struct drm_dp_mst_branch; /** @@ -65,31 +45,21 @@ struct drm_dp_vcpi { /** * struct drm_dp_mst_port - MST port * @port_num: port number - * @input: if this port is an input port. Protected by - * &drm_dp_mst_topology_mgr.base.lock. - * @mcs: message capability status - DP 1.2 spec. Protected by - * &drm_dp_mst_topology_mgr.base.lock. - * @ddps: DisplayPort Device Plug Status - DP 1.2. Protected by - * &drm_dp_mst_topology_mgr.base.lock. - * @pdt: Peer Device Type. Protected by - * &drm_dp_mst_topology_mgr.base.lock. - * @ldps: Legacy Device Plug Status. Protected by - * &drm_dp_mst_topology_mgr.base.lock. - * @dpcd_rev: DPCD revision of device on this port. Protected by - * &drm_dp_mst_topology_mgr.base.lock. - * @num_sdp_streams: Number of simultaneous streams. Protected by - * &drm_dp_mst_topology_mgr.base.lock. - * @num_sdp_stream_sinks: Number of stream sinks. Protected by - * &drm_dp_mst_topology_mgr.base.lock. - * @available_pbn: Available bandwidth for this port. Protected by - * &drm_dp_mst_topology_mgr.base.lock. + * @input: if this port is an input port. + * @mcs: message capability status - DP 1.2 spec. + * @ddps: DisplayPort Device Plug Status - DP 1.2 + * @pdt: Peer Device Type + * @ldps: Legacy Device Plug Status + * @dpcd_rev: DPCD revision of device on this port + * @num_sdp_streams: Number of simultaneous streams + * @num_sdp_stream_sinks: Number of stream sinks + * @available_pbn: Available bandwidth for this port. * @next: link to next port on this branch device - * @aux: i2c aux transport to talk to device connected to this port, protected - * by &drm_dp_mst_topology_mgr.base.lock. + * @mstb: branch device attach below this port + * @aux: i2c aux transport to talk to device connected to this port. * @parent: branch device parent of this port * @vcpi: Virtual Channel Payload info for this port. - * @connector: DRM connector this port is connected to. Protected by - * &drm_dp_mst_topology_mgr.base.lock. + * @connector: DRM connector this port is connected to. * @mgr: topology manager this port lives under. * * This structure represents an MST port endpoint on a device somewhere @@ -109,14 +79,6 @@ struct drm_dp_mst_port { */ struct kref malloc_kref; -#if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS) - /** - * @topology_ref_history: A history of each topology - * reference/dereference. See CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS. - */ - struct drm_dp_mst_topology_ref_history topology_ref_history; -#endif - u8 port_num; bool input; bool mcs; @@ -128,17 +90,7 @@ struct drm_dp_mst_port { u8 num_sdp_stream_sinks; uint16_t available_pbn; struct list_head next; - /** - * @mstb: the branch device connected to this port, if there is one. - * This should be considered protected for reading by - * &drm_dp_mst_topology_mgr.lock. There are two exceptions to this: - * &drm_dp_mst_topology_mgr.up_req_work and - * &drm_dp_mst_topology_mgr.work, which do not grab - * &drm_dp_mst_topology_mgr.lock during reads but are the only - * updaters of this list and are protected from writing concurrently - * by &drm_dp_mst_topology_mgr.probe_lock. - */ - struct drm_dp_mst_branch *mstb; + struct drm_dp_mst_branch *mstb; /* pointer to an mstb if this port has one */ struct drm_dp_aux aux; /* i2c bus for this port? */ struct drm_dp_mst_branch *parent; @@ -164,6 +116,7 @@ struct drm_dp_mst_port { * @lct: Link count total to talk to this branch device. * @num_ports: number of ports on the branch. * @msg_slots: one bit per transmitted msg slot. + * @ports: linked list of ports on this branch. * @port_parent: pointer to the port parent, NULL if toplevel. * @mgr: topology manager for this branch device. * @tx_slots: transmission slots for this device. @@ -190,35 +143,11 @@ struct drm_dp_mst_branch { */ struct kref malloc_kref; -#if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS) - /** - * @topology_ref_history: A history of each topology - * reference/dereference. See CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS. - */ - struct drm_dp_mst_topology_ref_history topology_ref_history; -#endif - - /** - * @destroy_next: linked-list entry used by - * drm_dp_delayed_destroy_work() - */ - struct list_head destroy_next; - u8 rad[8]; u8 lct; int num_ports; int msg_slots; - /** - * @ports: the list of ports on this branch device. This should be - * considered protected for reading by &drm_dp_mst_topology_mgr.lock. - * There are two exceptions to this: - * &drm_dp_mst_topology_mgr.up_req_work and - * &drm_dp_mst_topology_mgr.work, which do not grab - * &drm_dp_mst_topology_mgr.lock during reads but are the only - * updaters of this list and are protected from updating the list - * concurrently by @drm_dp_mst_topology_mgr.probe_lock - */ struct list_head ports; /* list of tx ops queue for this port */ @@ -565,13 +494,6 @@ struct drm_dp_mst_topology_mgr { */ struct mutex lock; - /** - * @probe_lock: Prevents @work and @up_req_work, the only writers of - * &drm_dp_mst_port.mstb and &drm_dp_mst_branch.ports, from racing - * while they update the topology. - */ - struct mutex probe_lock; - /** * @mst_state: If this manager is enabled for an MST capable port. False * if no MST sink/branch devices is connected. @@ -649,49 +571,18 @@ struct drm_dp_mst_topology_mgr { struct work_struct tx_work; /** - * @destroy_port_list: List of to be destroyed connectors. + * @destroy_connector_list: List of to be destroyed connectors. */ - struct list_head destroy_port_list; + struct list_head destroy_connector_list; /** - * @destroy_branch_device_list: List of to be destroyed branch - * devices. + * @destroy_connector_lock: Protects @connector_list. */ - struct list_head destroy_branch_device_list; + struct mutex destroy_connector_lock; /** - * @delayed_destroy_lock: Protects @destroy_port_list and - * @destroy_branch_device_list. + * @destroy_connector_work: Work item to destroy connectors. Needed to + * avoid locking inversion. */ - struct mutex delayed_destroy_lock; - /** - * @delayed_destroy_work: Work item to destroy MST port and branch - * devices, needed to avoid locking inversion. - */ - struct work_struct delayed_destroy_work; - - /** - * @up_req_list: List of pending up requests from the topology that - * need to be processed, in chronological order. - */ - struct list_head up_req_list; - /** - * @up_req_lock: Protects @up_req_list - */ - struct mutex up_req_lock; - /** - * @up_req_work: Work item to process up requests received from the - * topology. Needed to avoid blocking hotplug handling and sideband - * transmissions. - */ - struct work_struct up_req_work; - -#if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS) - /** - * @topology_ref_history_lock: protects - * &drm_dp_mst_port.topology_ref_history and - * &drm_dp_mst_branch.topology_ref_history. - */ - struct mutex topology_ref_history_lock; -#endif + struct work_struct destroy_connector_work; }; int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr, @@ -708,11 +599,7 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms int drm_dp_mst_hpd_irq(struct drm_dp_mst_topology_mgr *mgr, u8 *esi, bool *handled); -int -drm_dp_mst_detect_port(struct drm_connector *connector, - struct drm_modeset_acquire_ctx *ctx, - struct drm_dp_mst_topology_mgr *mgr, - struct drm_dp_mst_port *port); +enum drm_connector_status drm_dp_mst_detect_port(struct drm_connector *connector, struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port); bool drm_dp_mst_port_has_audio(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port); @@ -751,8 +638,7 @@ void drm_dp_mst_dump_topology(struct seq_file *m, void drm_dp_mst_topology_mgr_suspend(struct drm_dp_mst_topology_mgr *mgr); int __must_check -drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr, - bool sync); +drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr); ssize_t drm_dp_mst_dpcd_read(struct drm_dp_aux *aux, unsigned int offset, void *buffer, size_t size); diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 6f8d772591ba..e6eea45e1154 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -18,7 +18,6 @@ enum host1x_class { }; struct host1x_client; -struct iommu_group; /** * struct host1x_client_ops - host1x client operations @@ -35,7 +34,6 @@ struct host1x_client_ops { * @list: list node for the host1x client * @parent: pointer to struct device representing the host1x controller * @dev: pointer to struct device backing this host1x client - * @group: IOMMU group that this client is a member of * @ops: host1x client operations * @class: host1x class represented by this client * @channel: host1x channel associated with this client @@ -46,7 +44,6 @@ struct host1x_client { struct list_head list; struct device *parent; struct device *dev; - struct iommu_group *group; const struct host1x_client_ops *ops; @@ -67,9 +64,8 @@ struct sg_table; struct host1x_bo_ops { struct host1x_bo *(*get)(struct host1x_bo *bo); void (*put)(struct host1x_bo *bo); - struct sg_table *(*pin)(struct device *dev, struct host1x_bo *bo, - dma_addr_t *phys); - void (*unpin)(struct device *dev, struct sg_table *sgt); + dma_addr_t (*pin)(struct host1x_bo *bo, struct sg_table **sgt); + void (*unpin)(struct host1x_bo *bo, struct sg_table *sgt); void *(*mmap)(struct host1x_bo *bo); void (*munmap)(struct host1x_bo *bo, void *addr); void *(*kmap)(struct host1x_bo *bo, unsigned int pagenum); @@ -96,17 +92,15 @@ static inline void host1x_bo_put(struct host1x_bo *bo) bo->ops->put(bo); } -static inline struct sg_table *host1x_bo_pin(struct device *dev, - struct host1x_bo *bo, - dma_addr_t *phys) +static inline dma_addr_t host1x_bo_pin(struct host1x_bo *bo, + struct sg_table **sgt) { - return bo->ops->pin(dev, bo, phys); + return bo->ops->pin(bo, sgt); } -static inline void host1x_bo_unpin(struct device *dev, struct host1x_bo *bo, - struct sg_table *sgt) +static inline void host1x_bo_unpin(struct host1x_bo *bo, struct sg_table *sgt) { - bo->ops->unpin(dev, sgt); + bo->ops->unpin(bo, sgt); } static inline void *host1x_bo_mmap(struct host1x_bo *bo) @@ -164,7 +158,7 @@ u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base); struct host1x_channel; struct host1x_job; -struct host1x_channel *host1x_channel_request(struct host1x_client *client); +struct host1x_channel *host1x_channel_request(struct device *dev); struct host1x_channel *host1x_channel_get(struct host1x_channel *channel); void host1x_channel_put(struct host1x_channel *channel); int host1x_job_submit(struct host1x_job *job); @@ -173,9 +167,6 @@ int host1x_job_submit(struct host1x_job *job); * host1x job */ -#define HOST1X_RELOC_READ (1 << 0) -#define HOST1X_RELOC_WRITE (1 << 1) - struct host1x_reloc { struct { struct host1x_bo *bo; @@ -186,7 +177,6 @@ struct host1x_reloc { unsigned long offset; } target; unsigned long shift; - unsigned long flags; }; struct host1x_job { diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index cf97f6339acb..2bc9960a31aa 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -86,7 +86,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); ), - TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s", + TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s\n", __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, show_tcp_state_name(__entry->state)) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index bbdad866e3fe..4fe35d600ab8 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -500,8 +500,6 @@ struct drm_amdgpu_gem_op { #define AMDGPU_VM_MTYPE_CC (3 << 5) /* Use UC MTYPE instead of default MTYPE */ #define AMDGPU_VM_MTYPE_UC (4 << 5) -/* Use RW MTYPE instead of default MTYPE */ -#define AMDGPU_VM_MTYPE_RW (5 << 5) struct drm_amdgpu_gem_va { /** GEM object handle */ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 5400d7e057f1..63d40cba97e0 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1572,21 +1572,6 @@ struct drm_i915_gem_context_param { * i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND) */ #define I915_CONTEXT_PARAM_ENGINES 0xa - -/* - * I915_CONTEXT_PARAM_PERSISTENCE: - * - * Allow the context and active rendering to survive the process until - * completion. Persistence allows fire-and-forget clients to queue up a - * bunch of work, hand the output over to a display server and then quit. - * If the context is marked as not persistent, upon closing (either via - * an explicit DRM_I915_GEM_CONTEXT_DESTROY or implicitly from file closure - * or process termination), the context and any outstanding requests will be - * cancelled (and exported fences for cancelled requests marked as -EIO). - * - * By default, new contexts allow persistence. - */ -#define I915_CONTEXT_PARAM_PERSISTENCE 0xb /* Must be kept compact -- no holes and well documented */ __u64 value;