Merge tag 'drm-amdkfd-next-fixes-2015-06-10' of git://people.freedesktop.org/~gabbayo/linux into drm-next

- Make the reset wavefronts action be per process per device instead of
  per process, because one device can be stuck but the other one won't be

- Add some missing properties to the CZ device_info structure

- Rename symbols to not have CONFIG_ prefix

- Some more cleanups and debug prints

* tag 'drm-amdkfd-next-fixes-2015-06-10' of git://people.freedesktop.org/~gabbayo/linux:
  drm/amdkfd: remove not used defines from cik_regs.h
  drm/amdkfd: Add missing properties to CZ device info
  drm/amdkfd: make reset wavefronts per process per device
  drm/amdkfd: add debug print to kfd_events.c
  drm/amdkfd: avoid CONFIG_ prefix for non-Kconfig symbols
This commit is contained in:
Dave Airlie 2015-06-12 10:14:20 +10:00
commit 933ea180a4
8 changed files with 54 additions and 186 deletions

View file

@ -23,33 +23,11 @@
#ifndef CIK_REGS_H
#define CIK_REGS_H
#define IH_VMID_0_LUT 0x3D40u
#define BIF_DOORBELL_CNTL 0x530Cu
#define SRBM_GFX_CNTL 0xE44
#define PIPEID(x) ((x) << 0)
#define MEID(x) ((x) << 2)
#define VMID(x) ((x) << 4)
#define QUEUEID(x) ((x) << 8)
#define SQ_CONFIG 0x8C00
#define SH_MEM_BASES 0x8C28
/* if PTR32, these are the bases for scratch and lds */
#define PRIVATE_BASE(x) ((x) << 0) /* scratch */
#define SHARED_BASE(x) ((x) << 16) /* LDS */
#define SH_MEM_APE1_BASE 0x8C2C
/* if PTR32, this is the base location of GPUVM */
#define SH_MEM_APE1_LIMIT 0x8C30
/* if PTR32, this is the upper limit of GPUVM */
#define SH_MEM_CONFIG 0x8C34
#define PTR32 (1 << 0)
#define PRIVATE_ATC (1 << 1)
#define ALIGNMENT_MODE(x) ((x) << 2)
#define SH_MEM_ALIGNMENT_MODE_DWORD 0
#define SH_MEM_ALIGNMENT_MODE_DWORD_STRICT 1
#define SH_MEM_ALIGNMENT_MODE_STRICT 2
#define SH_MEM_ALIGNMENT_MODE_UNALIGNED 3
#define DEFAULT_MTYPE(x) ((x) << 4)
#define APE1_MTYPE(x) ((x) << 7)
@ -58,138 +36,35 @@
#define MTYPE_CACHED 0
#define MTYPE_NONCACHED 3
#define SH_STATIC_MEM_CONFIG 0x9604u
#define TC_CFG_L1_LOAD_POLICY0 0xAC68
#define TC_CFG_L1_LOAD_POLICY1 0xAC6C
#define TC_CFG_L1_STORE_POLICY 0xAC70
#define TC_CFG_L2_LOAD_POLICY0 0xAC74
#define TC_CFG_L2_LOAD_POLICY1 0xAC78
#define TC_CFG_L2_STORE_POLICY0 0xAC7C
#define TC_CFG_L2_STORE_POLICY1 0xAC80
#define TC_CFG_L2_ATOMIC_POLICY 0xAC84
#define TC_CFG_L1_VOLATILE 0xAC88
#define TC_CFG_L2_VOLATILE 0xAC8C
#define CP_PQ_WPTR_POLL_CNTL 0xC20C
#define WPTR_POLL_EN (1 << 31)
#define CPC_INT_CNTL 0xC2D0
#define CP_ME1_PIPE0_INT_CNTL 0xC214
#define CP_ME1_PIPE1_INT_CNTL 0xC218
#define CP_ME1_PIPE2_INT_CNTL 0xC21C
#define CP_ME1_PIPE3_INT_CNTL 0xC220
#define CP_ME2_PIPE0_INT_CNTL 0xC224
#define CP_ME2_PIPE1_INT_CNTL 0xC228
#define CP_ME2_PIPE2_INT_CNTL 0xC22C
#define CP_ME2_PIPE3_INT_CNTL 0xC230
#define DEQUEUE_REQUEST_INT_ENABLE (1 << 13)
#define WRM_POLL_TIMEOUT_INT_ENABLE (1 << 17)
#define PRIV_REG_INT_ENABLE (1 << 23)
#define TIME_STAMP_INT_ENABLE (1 << 26)
#define GENERIC2_INT_ENABLE (1 << 29)
#define GENERIC1_INT_ENABLE (1 << 30)
#define GENERIC0_INT_ENABLE (1 << 31)
#define CP_ME1_PIPE0_INT_STATUS 0xC214
#define CP_ME1_PIPE1_INT_STATUS 0xC218
#define CP_ME1_PIPE2_INT_STATUS 0xC21C
#define CP_ME1_PIPE3_INT_STATUS 0xC220
#define CP_ME2_PIPE0_INT_STATUS 0xC224
#define CP_ME2_PIPE1_INT_STATUS 0xC228
#define CP_ME2_PIPE2_INT_STATUS 0xC22C
#define CP_ME2_PIPE3_INT_STATUS 0xC230
#define DEQUEUE_REQUEST_INT_STATUS (1 << 13)
#define WRM_POLL_TIMEOUT_INT_STATUS (1 << 17)
#define PRIV_REG_INT_STATUS (1 << 23)
#define TIME_STAMP_INT_STATUS (1 << 26)
#define GENERIC2_INT_STATUS (1 << 29)
#define GENERIC1_INT_STATUS (1 << 30)
#define GENERIC0_INT_STATUS (1 << 31)
#define CP_HPD_EOP_BASE_ADDR 0xC904
#define CP_HPD_EOP_BASE_ADDR_HI 0xC908
#define CP_HPD_EOP_VMID 0xC90C
#define CP_HPD_EOP_CONTROL 0xC910
#define EOP_SIZE(x) ((x) << 0)
#define EOP_SIZE_MASK (0x3f << 0)
#define CP_MQD_BASE_ADDR 0xC914
#define CP_MQD_BASE_ADDR_HI 0xC918
#define CP_HQD_ACTIVE 0xC91C
#define CP_HQD_VMID 0xC920
#define CP_HQD_PERSISTENT_STATE 0xC924u
#define DEFAULT_CP_HQD_PERSISTENT_STATE (0x33U << 8)
#define PRELOAD_REQ (1 << 0)
#define CP_HQD_PIPE_PRIORITY 0xC928u
#define CP_HQD_QUEUE_PRIORITY 0xC92Cu
#define CP_HQD_QUANTUM 0xC930u
#define MQD_CONTROL_PRIV_STATE_EN (1U << 8)
#define DEFAULT_MIN_IB_AVAIL_SIZE (3U << 20)
#define IB_ATC_EN (1U << 23)
#define QUANTUM_EN 1U
#define QUANTUM_SCALE_1MS (1U << 4)
#define QUANTUM_DURATION(x) ((x) << 8)
#define CP_HQD_PQ_BASE 0xC934
#define CP_HQD_PQ_BASE_HI 0xC938
#define CP_HQD_PQ_RPTR 0xC93C
#define CP_HQD_PQ_RPTR_REPORT_ADDR 0xC940
#define CP_HQD_PQ_RPTR_REPORT_ADDR_HI 0xC944
#define CP_HQD_PQ_WPTR_POLL_ADDR 0xC948
#define CP_HQD_PQ_WPTR_POLL_ADDR_HI 0xC94C
#define CP_HQD_PQ_DOORBELL_CONTROL 0xC950
#define DOORBELL_OFFSET(x) ((x) << 2)
#define DOORBELL_OFFSET_MASK (0x1fffff << 2)
#define DOORBELL_SOURCE (1 << 28)
#define DOORBELL_SCHD_HIT (1 << 29)
#define DOORBELL_EN (1 << 30)
#define DOORBELL_HIT (1 << 31)
#define CP_HQD_PQ_WPTR 0xC954
#define CP_HQD_PQ_CONTROL 0xC958
#define QUEUE_SIZE(x) ((x) << 0)
#define QUEUE_SIZE_MASK (0x3f << 0)
#define RPTR_BLOCK_SIZE(x) ((x) << 8)
#define RPTR_BLOCK_SIZE_MASK (0x3f << 8)
#define MIN_AVAIL_SIZE(x) ((x) << 20)
#define PQ_ATC_EN (1 << 23)
#define PQ_VOLATILE (1 << 26)
#define NO_UPDATE_RPTR (1 << 27)
#define UNORD_DISPATCH (1 << 28)
#define ROQ_PQ_IB_FLIP (1 << 29)
#define PRIV_STATE (1 << 30)
#define KMD_QUEUE (1 << 31)
#define DEFAULT_RPTR_BLOCK_SIZE RPTR_BLOCK_SIZE(5)
#define DEFAULT_MIN_AVAIL_SIZE MIN_AVAIL_SIZE(3)
#define CP_HQD_IB_BASE_ADDR 0xC95Cu
#define CP_HQD_IB_BASE_ADDR_HI 0xC960u
#define CP_HQD_IB_RPTR 0xC964u
#define CP_HQD_IB_CONTROL 0xC968u
#define IB_ATC_EN (1U << 23)
#define DEFAULT_MIN_IB_AVAIL_SIZE (3U << 20)
#define PQ_ATC_EN (1 << 23)
#define NO_UPDATE_RPTR (1 << 27)
#define DOORBELL_OFFSET(x) ((x) << 2)
#define DOORBELL_EN (1 << 30)
#define PRIV_STATE (1 << 30)
#define KMD_QUEUE (1 << 31)
#define AQL_ENABLE 1
#define CP_HQD_DEQUEUE_REQUEST 0xC974
#define DEQUEUE_REQUEST_DRAIN 1
#define DEQUEUE_REQUEST_RESET 2
#define DEQUEUE_INT (1U << 8)
#define CP_HQD_SEMA_CMD 0xC97Cu
#define CP_HQD_MSG_TYPE 0xC980u
#define CP_HQD_ATOMIC0_PREOP_LO 0xC984u
#define CP_HQD_ATOMIC0_PREOP_HI 0xC988u
#define CP_HQD_ATOMIC1_PREOP_LO 0xC98Cu
#define CP_HQD_ATOMIC1_PREOP_HI 0xC990u
#define CP_HQD_HQ_SCHEDULER0 0xC994u
#define CP_HQD_HQ_SCHEDULER1 0xC998u
#define CP_MQD_CONTROL 0xC99C
#define MQD_VMID(x) ((x) << 0)
#define MQD_VMID_MASK (0xf << 0)
#define MQD_CONTROL_PRIV_STATE_EN (1U << 8)
#define SDMA_RB_VMID(x) (x << 24)
#define SDMA_RB_ENABLE (1 << 0)
#define SDMA_RB_SIZE(x) ((x) << 1) /* log2 */
@ -202,33 +77,7 @@
#define SDMA_VA_SHARED_BASE(x) (x << 8)
#define GRBM_GFX_INDEX 0x30800
#define INSTANCE_INDEX(x) ((x) << 0)
#define SH_INDEX(x) ((x) << 8)
#define SE_INDEX(x) ((x) << 16)
#define SH_BROADCAST_WRITES (1 << 29)
#define INSTANCE_BROADCAST_WRITES (1 << 30)
#define SE_BROADCAST_WRITES (1 << 31)
#define SQC_CACHES 0x30d20
#define SQC_POLICY 0x8C38u
#define SQC_VOLATILE 0x8C3Cu
#define CP_PERFMON_CNTL 0x36020
#define ATC_VMID0_PASID_MAPPING 0x339Cu
#define ATC_VMID_PASID_MAPPING_UPDATE_STATUS 0x3398u
#define ATC_VMID_PASID_MAPPING_VALID (1U << 31)
#define ATC_VM_APERTURE0_CNTL 0x3310u
#define ATS_ACCESS_MODE_NEVER 0
#define ATS_ACCESS_MODE_ALWAYS 1
#define ATC_VM_APERTURE0_CNTL2 0x3318u
#define ATC_VM_APERTURE0_HIGH_ADDR 0x3308u
#define ATC_VM_APERTURE0_LOW_ADDR 0x3300u
#define ATC_VM_APERTURE1_CNTL 0x3314u
#define ATC_VM_APERTURE1_CNTL2 0x331Cu
#define ATC_VM_APERTURE1_HIGH_ADDR 0x330Cu
#define ATC_VM_APERTURE1_LOW_ADDR 0x3304u
#endif

View file

@ -445,7 +445,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
aw_reg_add_dword /= sizeof(uint32_t);
packets_vec[0].bitfields2.reg_offset =
aw_reg_add_dword - CONFIG_REG_BASE;
aw_reg_add_dword - AMD_CONFIG_REG_BASE;
packets_vec[0].reg_data[0] = cntl.u32All;
@ -458,7 +458,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
aw_reg_add_dword /= sizeof(uint32_t);
packets_vec[1].bitfields2.reg_offset =
aw_reg_add_dword - CONFIG_REG_BASE;
aw_reg_add_dword - AMD_CONFIG_REG_BASE;
packets_vec[1].reg_data[0] = addrHi.u32All;
aw_reg_add_dword =
@ -470,7 +470,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
aw_reg_add_dword /= sizeof(uint32_t);
packets_vec[2].bitfields2.reg_offset =
aw_reg_add_dword - CONFIG_REG_BASE;
aw_reg_add_dword - AMD_CONFIG_REG_BASE;
packets_vec[2].reg_data[0] = addrLo.u32All;
/* enable watch flag if address is not zero*/
@ -488,7 +488,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
aw_reg_add_dword /= sizeof(uint32_t);
packets_vec[3].bitfields2.reg_offset =
aw_reg_add_dword - CONFIG_REG_BASE;
aw_reg_add_dword - AMD_CONFIG_REG_BASE;
packets_vec[3].reg_data[0] = cntl.u32All;
status = dbgdev_diq_submit_ib(
@ -690,7 +690,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
packets_vec[1].header.type = PM4_TYPE_3;
packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
CONFIG_REG_BASE;
AMD_CONFIG_REG_BASE;
packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
packets_vec[1].bitfields2.insert_vmid = 1;

View file

@ -48,9 +48,9 @@ enum {
/* CONFIG reg space definition */
enum {
CONFIG_REG_BASE = 0x2000, /* in dwords */
CONFIG_REG_END = 0x2B00,
CONFIG_REG_SIZE = CONFIG_REG_END - CONFIG_REG_BASE
AMD_CONFIG_REG_BASE = 0x2000, /* in dwords */
AMD_CONFIG_REG_END = 0x2B00,
AMD_CONFIG_REG_SIZE = AMD_CONFIG_REG_END - AMD_CONFIG_REG_BASE
};
/* SH reg space definition */

View file

@ -44,7 +44,10 @@ static const struct kfd_device_info kaveri_device_info = {
static const struct kfd_device_info carrizo_device_info = {
.asic_family = CHIP_CARRIZO,
.max_pasid_bits = 16,
/* max num of queues for CZ.TODO should be a dynamic value */
.max_no_of_hqd = 24,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED
};

View file

@ -946,7 +946,7 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm,
{
int retval;
enum kfd_preempt_type_filter preempt_type;
struct kfd_process *p;
struct kfd_process_device *pdd;
BUG_ON(!dqm);
@ -981,8 +981,9 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm,
retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
if (retval != 0) {
p = kfd_get_process(current);
p->reset_wavefronts = true;
pdd = kfd_get_process_device_data(dqm->dev,
kfd_get_process(current));
pdd->reset_wavefronts = true;
goto out;
}
pm_release_ib(&dqm->packets);

View file

@ -313,6 +313,10 @@ static int create_signal_event(struct file *devkfd,
p->signal_event_count, ev->event_id,
ev->user_signal_address);
pr_debug("signal event number %zu created with id %d, address %p\n",
p->signal_event_count, ev->event_id,
ev->user_signal_address);
return 0;
}

View file

@ -463,6 +463,11 @@ struct kfd_process_device {
/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
bool bound;
/* This flag tells if we should reset all
* wavefronts on process termination
*/
bool reset_wavefronts;
};
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@ -519,11 +524,6 @@ struct kfd_process {
event_pages */
u32 next_nonsignal_event_id;
size_t signal_event_count;
/*
* This flag tells if we should reset all wavefronts on
* process termination
*/
bool reset_wavefronts;
};
/**

View file

@ -173,7 +173,7 @@ static void kfd_process_wq_release(struct work_struct *work)
pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n",
pdd->dev->id, p->pasid);
if (p->reset_wavefronts)
if (pdd->reset_wavefronts)
dbgdev_wave_reset_wavefronts(pdd->dev, p);
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
@ -222,6 +222,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
struct mm_struct *mm)
{
struct kfd_process *p;
struct kfd_process_device *pdd = NULL;
/*
* The kfd_process structure can not be free because the
@ -240,6 +241,15 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
/* In case our notifier is called before IOMMU notifier */
pqm_uninit(&p->pqm);
/* Iterate over all process device data structure and check
* if we should reset all wavefronts */
list_for_each_entry(pdd, &p->per_device_data, per_device_list)
if (pdd->reset_wavefronts) {
pr_warn("amdkfd: Resetting all wave fronts\n");
dbgdev_wave_reset_wavefronts(pdd->dev, p);
pdd->reset_wavefronts = false;
}
mutex_unlock(&p->mutex);
/*
@ -305,8 +315,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
if (kfd_init_apertures(process) != 0)
goto err_init_apretures;
process->reset_wavefronts = false;
return process;
err_init_apretures:
@ -348,6 +356,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
INIT_LIST_HEAD(&pdd->qpd.queues_list);
INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
pdd->qpd.dqm = dev->dqm;
pdd->reset_wavefronts = false;
list_add(&pdd->per_device_list, &p->per_device_data);
}
@ -409,10 +418,12 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
kfd_dbgmgr_destroy(dev->dbgmgr);
pqm_uninit(&p->pqm);
if (p->reset_wavefronts)
dbgdev_wave_reset_wavefronts(dev, p);
pdd = kfd_get_process_device_data(dev, p);
if (pdd->reset_wavefronts) {
dbgdev_wave_reset_wavefronts(pdd->dev, p);
pdd->reset_wavefronts = false;
}
/*
* Just mark pdd as unbound, because we still need it to call