drm/radeon: add support for MC/VM setup on CIK (v6)

The vm callbacks are the same as the SI ones right now
(same regs and bits). We could share the SI variants, and
I may yet do that, but I figured I would add CIK specific
ones for now in case we need to change anything.

V2: add documentation, minor fixes.
V3: integrate vram offset fixes for APUs
V4: enable 2 level VM PTs
V5: index SH_MEM_* regs properly
V6: add ib_parse()

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Alex Deucher 2013-04-09 12:45:26 -04:00
parent 6f2043ce15
commit 1c49165d0a
3 changed files with 488 additions and 2 deletions

View file

@ -33,6 +33,7 @@
extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
/*
* Core functions
@ -1387,3 +1388,363 @@ int cik_asic_reset(struct radeon_device *rdev)
return cik_gfx_gpu_soft_reset(rdev);
}
/* MC */
/**
* cik_mc_program - program the GPU memory controller
*
* @rdev: radeon_device pointer
*
* Set the location of vram, gart, and AGP in the GPU's
* physical address space (CIK).
*/
static void cik_mc_program(struct radeon_device *rdev)
{
struct evergreen_mc_save save;
u32 tmp;
int i, j;
/* Initialize HDP */
for (i = 0, j = 0; i < 32; i++, j += 0x18) {
WREG32((0x2c14 + j), 0x00000000);
WREG32((0x2c18 + j), 0x00000000);
WREG32((0x2c1c + j), 0x00000000);
WREG32((0x2c20 + j), 0x00000000);
WREG32((0x2c24 + j), 0x00000000);
}
WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
evergreen_mc_stop(rdev, &save);
if (radeon_mc_wait_for_idle(rdev)) {
dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
}
/* Lockout access through VGA aperture*/
WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
/* Update configuration */
WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
rdev->mc.vram_start >> 12);
WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
rdev->mc.vram_end >> 12);
WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
rdev->vram_scratch.gpu_addr >> 12);
tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
WREG32(MC_VM_FB_LOCATION, tmp);
/* XXX double check these! */
WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
WREG32(MC_VM_AGP_BASE, 0);
WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
if (radeon_mc_wait_for_idle(rdev)) {
dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
}
evergreen_mc_resume(rdev, &save);
/* we need to own VRAM, so turn off the VGA renderer here
* to stop it overwriting our objects */
rv515_vga_render_disable(rdev);
}
/**
* cik_mc_init - initialize the memory controller driver params
*
* @rdev: radeon_device pointer
*
* Look up the amount of vram, vram width, and decide how to place
* vram and gart within the GPU's physical address space (CIK).
* Returns 0 for success.
*/
static int cik_mc_init(struct radeon_device *rdev)
{
u32 tmp;
int chansize, numchan;
/* Get VRAM informations */
rdev->mc.vram_is_ddr = true;
tmp = RREG32(MC_ARB_RAMCFG);
if (tmp & CHANSIZE_MASK) {
chansize = 64;
} else {
chansize = 32;
}
tmp = RREG32(MC_SHARED_CHMAP);
switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
case 0:
default:
numchan = 1;
break;
case 1:
numchan = 2;
break;
case 2:
numchan = 4;
break;
case 3:
numchan = 8;
break;
case 4:
numchan = 3;
break;
case 5:
numchan = 6;
break;
case 6:
numchan = 10;
break;
case 7:
numchan = 12;
break;
case 8:
numchan = 16;
break;
}
rdev->mc.vram_width = numchan * chansize;
/* Could aper size report 0 ? */
rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
/* size in MB on si */
rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
rdev->mc.visible_vram_size = rdev->mc.aper_size;
si_vram_gtt_location(rdev, &rdev->mc);
radeon_update_bandwidth_info(rdev);
return 0;
}
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
* VMIDs 1-15 are used for userspace clients and are handled
* by the radeon vm/hsa code.
*/
/**
* cik_pcie_gart_tlb_flush - gart tlb flush callback
*
* @rdev: radeon_device pointer
*
* Flush the TLB for the VMID 0 page table (CIK).
*/
void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
{
/* flush hdp cache */
WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
/* bits 0-15 are the VM contexts0-15 */
WREG32(VM_INVALIDATE_REQUEST, 0x1);
}
/**
* cik_pcie_gart_enable - gart enable
*
* @rdev: radeon_device pointer
*
* This sets up the TLBs, programs the page tables for VMID0,
* sets up the hw for VMIDs 1-15 which are allocated on
* demand, and sets up the global locations for the LDS, GDS,
* and GPUVM for FSA64 clients (CIK).
* Returns 0 for success, errors for failure.
*/
static int cik_pcie_gart_enable(struct radeon_device *rdev)
{
int r, i;
if (rdev->gart.robj == NULL) {
dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
r = radeon_gart_table_vram_pin(rdev);
if (r)
return r;
radeon_gart_restore(rdev);
/* Setup TLB control */
WREG32(MC_VM_MX_L1_TLB_CNTL,
(0xA << 7) |
ENABLE_L1_TLB |
SYSTEM_ACCESS_MODE_NOT_IN_SYS |
ENABLE_ADVANCED_DRIVER_MODEL |
SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
/* Setup L2 cache */
WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
ENABLE_L2_FRAGMENT_PROCESSING |
ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
EFFECTIVE_L2_QUEUE_SIZE(7) |
CONTEXT1_IDENTITY_ACCESS_MODE(1));
WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
L2_CACHE_BIGK_FRAGMENT_SIZE(6));
/* setup context0 */
WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
(u32)(rdev->dummy_page.addr >> 12));
WREG32(VM_CONTEXT0_CNTL2, 0);
WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
WREG32(0x15D4, 0);
WREG32(0x15D8, 0);
WREG32(0x15DC, 0);
/* empty context1-15 */
/* FIXME start with 4G, once using 2 level pt switch to full
* vm size space
*/
/* set vm size, must be a multiple of 4 */
WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
for (i = 1; i < 16; i++) {
if (i < 8)
WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
rdev->gart.table_addr >> 12);
else
WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
rdev->gart.table_addr >> 12);
}
/* enable context1-15 */
WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
(u32)(rdev->dummy_page.addr >> 12));
WREG32(VM_CONTEXT1_CNTL2, 0);
WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
/* TC cache setup ??? */
WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
WREG32(TC_CFG_L1_STORE_POLICY, 0);
WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
WREG32(TC_CFG_L2_STORE_POLICY0, 0);
WREG32(TC_CFG_L2_STORE_POLICY1, 0);
WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
WREG32(TC_CFG_L1_VOLATILE, 0);
WREG32(TC_CFG_L2_VOLATILE, 0);
if (rdev->family == CHIP_KAVERI) {
u32 tmp = RREG32(CHUB_CONTROL);
tmp &= ~BYPASS_VM;
WREG32(CHUB_CONTROL, tmp);
}
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
for (i = 0; i < 16; i++) {
WREG32(SRBM_GFX_CNTL, VMID(i));
WREG32(SH_MEM_CONFIG, 0);
WREG32(SH_MEM_APE1_BASE, 1);
WREG32(SH_MEM_APE1_LIMIT, 0);
WREG32(SH_MEM_BASES, 0);
}
WREG32(SRBM_GFX_CNTL, 0);
cik_pcie_gart_tlb_flush(rdev);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(rdev->mc.gtt_size >> 20),
(unsigned long long)rdev->gart.table_addr);
rdev->gart.ready = true;
return 0;
}
/**
* cik_pcie_gart_disable - gart disable
*
* @rdev: radeon_device pointer
*
* This disables all VM page table (CIK).
*/
static void cik_pcie_gart_disable(struct radeon_device *rdev)
{
/* Disable all tables */
WREG32(VM_CONTEXT0_CNTL, 0);
WREG32(VM_CONTEXT1_CNTL, 0);
/* Setup TLB control */
WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
/* Setup L2 cache */
WREG32(VM_L2_CNTL,
ENABLE_L2_FRAGMENT_PROCESSING |
ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
EFFECTIVE_L2_QUEUE_SIZE(7) |
CONTEXT1_IDENTITY_ACCESS_MODE(1));
WREG32(VM_L2_CNTL2, 0);
WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
L2_CACHE_BIGK_FRAGMENT_SIZE(6));
radeon_gart_table_vram_unpin(rdev);
}
/**
* cik_pcie_gart_fini - vm fini callback
*
* @rdev: radeon_device pointer
*
* Tears down the driver GART/VM setup (CIK).
*/
static void cik_pcie_gart_fini(struct radeon_device *rdev)
{
cik_pcie_gart_disable(rdev);
radeon_gart_table_vram_free(rdev);
radeon_gart_fini(rdev);
}
/* vm parser */
/**
* cik_ib_parse - vm ib_parse callback
*
* @rdev: radeon_device pointer
* @ib: indirect buffer pointer
*
* CIK uses hw IB checking so this is a nop (CIK).
*/
int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
{
return 0;
}
/*
* vm
* VMID 0 is the physical GPU addresses as used by the kernel.
* VMIDs 1-15 are used for userspace clients and are handled
* by the radeon vm/hsa code.
*/
/**
* cik_vm_init - cik vm init callback
*
* @rdev: radeon_device pointer
*
* Inits cik specific vm parameters (number of VMs, base of vram for
* VMIDs 1-15) (CIK).
* Returns 0 for success.
*/
int cik_vm_init(struct radeon_device *rdev)
{
/* number of VMs */
rdev->vm_manager.nvm = 16;
/* base offset of vram pages */
if (rdev->flags & RADEON_IS_IGP) {
u64 tmp = RREG32(MC_VM_FB_OFFSET);
tmp <<= 22;
rdev->vm_manager.vram_base_offset = tmp;
} else
rdev->vm_manager.vram_base_offset = 0;
return 0;
}
/**
* cik_vm_fini - cik vm fini callback
*
* @rdev: radeon_device pointer
*
* Tear down any asic specific VM setup (CIK).
*/
void cik_vm_fini(struct radeon_device *rdev)
{
}

View file

@ -28,16 +28,106 @@
#define CIK_RB_BITMAP_WIDTH_PER_SH 2
#define VGA_HDP_CONTROL 0x328
#define VGA_MEMORY_DISABLE (1 << 4)
#define DMIF_ADDR_CALC 0xC00
#define SRBM_GFX_CNTL 0xE44
#define PIPEID(x) ((x) << 0)
#define MEID(x) ((x) << 2)
#define VMID(x) ((x) << 4)
#define QUEUEID(x) ((x) << 8)
#define SRBM_STATUS2 0xE4C
#define SRBM_STATUS 0xE50
#define VM_L2_CNTL 0x1400
#define ENABLE_L2_CACHE (1 << 0)
#define ENABLE_L2_FRAGMENT_PROCESSING (1 << 1)
#define L2_CACHE_PTE_ENDIAN_SWAP_MODE(x) ((x) << 2)
#define L2_CACHE_PDE_ENDIAN_SWAP_MODE(x) ((x) << 4)
#define ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE (1 << 9)
#define ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE (1 << 10)
#define EFFECTIVE_L2_QUEUE_SIZE(x) (((x) & 7) << 15)
#define CONTEXT1_IDENTITY_ACCESS_MODE(x) (((x) & 3) << 19)
#define VM_L2_CNTL2 0x1404
#define INVALIDATE_ALL_L1_TLBS (1 << 0)
#define INVALIDATE_L2_CACHE (1 << 1)
#define INVALIDATE_CACHE_MODE(x) ((x) << 26)
#define INVALIDATE_PTE_AND_PDE_CACHES 0
#define INVALIDATE_ONLY_PTE_CACHES 1
#define INVALIDATE_ONLY_PDE_CACHES 2
#define VM_L2_CNTL3 0x1408
#define BANK_SELECT(x) ((x) << 0)
#define L2_CACHE_UPDATE_MODE(x) ((x) << 6)
#define L2_CACHE_BIGK_FRAGMENT_SIZE(x) ((x) << 15)
#define L2_CACHE_BIGK_ASSOCIATIVITY (1 << 20)
#define VM_L2_STATUS 0x140C
#define L2_BUSY (1 << 0)
#define VM_CONTEXT0_CNTL 0x1410
#define ENABLE_CONTEXT (1 << 0)
#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1)
#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4)
#define VM_CONTEXT1_CNTL 0x1414
#define VM_CONTEXT0_CNTL2 0x1430
#define VM_CONTEXT1_CNTL2 0x1434
#define VM_CONTEXT8_PAGE_TABLE_BASE_ADDR 0x1438
#define VM_CONTEXT9_PAGE_TABLE_BASE_ADDR 0x143c
#define VM_CONTEXT10_PAGE_TABLE_BASE_ADDR 0x1440
#define VM_CONTEXT11_PAGE_TABLE_BASE_ADDR 0x1444
#define VM_CONTEXT12_PAGE_TABLE_BASE_ADDR 0x1448
#define VM_CONTEXT13_PAGE_TABLE_BASE_ADDR 0x144c
#define VM_CONTEXT14_PAGE_TABLE_BASE_ADDR 0x1450
#define VM_CONTEXT15_PAGE_TABLE_BASE_ADDR 0x1454
#define VM_INVALIDATE_REQUEST 0x1478
#define VM_INVALIDATE_RESPONSE 0x147c
#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR 0x1518
#define VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR 0x151c
#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x153c
#define VM_CONTEXT1_PAGE_TABLE_BASE_ADDR 0x1540
#define VM_CONTEXT2_PAGE_TABLE_BASE_ADDR 0x1544
#define VM_CONTEXT3_PAGE_TABLE_BASE_ADDR 0x1548
#define VM_CONTEXT4_PAGE_TABLE_BASE_ADDR 0x154c
#define VM_CONTEXT5_PAGE_TABLE_BASE_ADDR 0x1550
#define VM_CONTEXT6_PAGE_TABLE_BASE_ADDR 0x1554
#define VM_CONTEXT7_PAGE_TABLE_BASE_ADDR 0x1558
#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x155c
#define VM_CONTEXT1_PAGE_TABLE_START_ADDR 0x1560
#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x157C
#define VM_CONTEXT1_PAGE_TABLE_END_ADDR 0x1580
#define MC_SHARED_CHMAP 0x2004
#define NOOFCHAN_SHIFT 12
#define NOOFCHAN_MASK 0x0000f000
#define MC_SHARED_CHREMAP 0x2008
#define CHUB_CONTROL 0x1864
#define BYPASS_VM (1 << 0)
#define MC_VM_FB_LOCATION 0x2024
#define MC_VM_AGP_TOP 0x2028
#define MC_VM_AGP_BOT 0x202C
#define MC_VM_AGP_BASE 0x2030
#define MC_VM_SYSTEM_APERTURE_LOW_ADDR 0x2034
#define MC_VM_SYSTEM_APERTURE_HIGH_ADDR 0x2038
#define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x203C
#define MC_VM_MX_L1_TLB_CNTL 0x2064
#define ENABLE_L1_TLB (1 << 0)
#define ENABLE_L1_FRAGMENT_PROCESSING (1 << 1)
#define SYSTEM_ACCESS_MODE_PA_ONLY (0 << 3)
#define SYSTEM_ACCESS_MODE_USE_SYS_MAP (1 << 3)
#define SYSTEM_ACCESS_MODE_IN_SYS (2 << 3)
#define SYSTEM_ACCESS_MODE_NOT_IN_SYS (3 << 3)
#define SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU (0 << 5)
#define ENABLE_ADVANCED_DRIVER_MODEL (1 << 6)
#define MC_VM_FB_OFFSET 0x2068
#define MC_ARB_RAMCFG 0x2760
#define NOOFBANK_SHIFT 0
#define NOOFBANK_MASK 0x00000003
@ -61,10 +151,16 @@
#define HDP_MISC_CNTL 0x2F4C
#define HDP_FLUSH_INVALIDATE_CACHE (1 << 0)
#define CONFIG_MEMSIZE 0x5428
#define HDP_MEM_COHERENCY_FLUSH_CNTL 0x5480
#define BIF_FB_EN 0x5490
#define FB_READ_EN (1 << 0)
#define FB_WRITE_EN (1 << 1)
#define HDP_REG_COHERENCY_FLUSH_CNTL 0x54A0
#define GRBM_CNTL 0x8000
#define GRBM_READ_TIMEOUT(x) ((x) << 0)
@ -189,6 +285,24 @@
#define SQ_CONFIG 0x8C00
#define SH_MEM_BASES 0x8C28
/* if PTR32, these are the bases for scratch and lds */
#define PRIVATE_BASE(x) ((x) << 0) /* scratch */
#define SHARED_BASE(x) ((x) << 16) /* LDS */
#define SH_MEM_APE1_BASE 0x8C2C
/* if PTR32, this is the base location of GPUVM */
#define SH_MEM_APE1_LIMIT 0x8C30
/* if PTR32, this is the upper limit of GPUVM */
#define SH_MEM_CONFIG 0x8C34
#define PTR32 (1 << 0)
#define ALIGNMENT_MODE(x) ((x) << 2)
#define SH_MEM_ALIGNMENT_MODE_DWORD 0
#define SH_MEM_ALIGNMENT_MODE_DWORD_STRICT 1
#define SH_MEM_ALIGNMENT_MODE_STRICT 2
#define SH_MEM_ALIGNMENT_MODE_UNALIGNED 3
#define DEFAULT_MTYPE(x) ((x) << 4)
#define APE1_MTYPE(x) ((x) << 7)
#define SX_DEBUG_1 0x9060
#define SPI_CONFIG_CNTL 0x9100
@ -293,6 +407,17 @@
#define TCP_CHAN_STEER_LO 0xac0c
#define TCP_CHAN_STEER_HI 0xac10
#define TC_CFG_L1_LOAD_POLICY0 0xAC68
#define TC_CFG_L1_LOAD_POLICY1 0xAC6C
#define TC_CFG_L1_STORE_POLICY 0xAC70
#define TC_CFG_L2_LOAD_POLICY0 0xAC74
#define TC_CFG_L2_LOAD_POLICY1 0xAC78
#define TC_CFG_L2_STORE_POLICY0 0xAC7C
#define TC_CFG_L2_STORE_POLICY1 0xAC80
#define TC_CFG_L2_ATOMIC_POLICY 0xAC84
#define TC_CFG_L1_VOLATILE 0xAC88
#define TC_CFG_L2_VOLATILE 0xAC8C
#define PA_SC_RASTER_CONFIG 0x28350
# define RASTER_CONFIG_RB_MAP_0 0
# define RASTER_CONFIG_RB_MAP_1 1

View file

@ -3535,8 +3535,8 @@ static void si_mc_program(struct radeon_device *rdev)
}
}
static void si_vram_gtt_location(struct radeon_device *rdev,
struct radeon_mc *mc)
void si_vram_gtt_location(struct radeon_device *rdev,
struct radeon_mc *mc)
{
if (mc->mc_vram_size > 0xFFC0000000ULL) {
/* leave room for at least 1024M GTT */