From 0a96d72be9ce6c5080f5b08a07f8e34b81b575ba Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 20 Mar 2012 17:18:11 -0400 Subject: [PATCH] drm/radeon/kms: add gpu init support for SI Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon.h | 32 + drivers/gpu/drm/radeon/si.c | 1005 +++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/sid.h | 201 +++++++ 3 files changed, 1238 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index c75ccc7f181f..972f1679d1c9 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1370,6 +1370,37 @@ struct cayman_asic { struct r100_gpu_lockup lockup; }; +struct si_asic { + unsigned max_shader_engines; + unsigned max_pipes_per_simd; + unsigned max_tile_pipes; + unsigned max_simds_per_se; + unsigned max_backends_per_se; + unsigned max_texture_channel_caches; + unsigned max_gprs; + unsigned max_gs_threads; + unsigned max_hw_contexts; + unsigned sc_prim_fifo_size_frontend; + unsigned sc_prim_fifo_size_backend; + unsigned sc_hiz_tile_fifo_size; + unsigned sc_earlyz_tile_fifo_size; + + unsigned num_shader_engines; + unsigned num_tile_pipes; + unsigned num_backends_per_se; + unsigned backend_disable_mask_per_asic; + unsigned backend_map; + unsigned num_texture_channel_caches; + unsigned mem_max_burst_length_bytes; + unsigned mem_row_size_in_kb; + unsigned shader_engine_tile_size; + unsigned num_gpus; + unsigned multi_gpu_tile_size; + + unsigned tile_config; + struct r100_gpu_lockup lockup; +}; + union radeon_asic_config { struct r300_asic r300; struct r100_asic r100; @@ -1377,6 +1408,7 @@ union radeon_asic_config { struct rv770_asic rv770; struct evergreen_asic evergreen; struct cayman_asic cayman; + struct si_asic si; }; /* diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 3e91429d7bd2..dd9e7d3d23be 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -28,6 +28,8 @@ #include "sid.h" #include "atom.h" +extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev); + /* get temperature in millidegrees */ int si_get_temp(struct radeon_device *rdev) { @@ -503,3 +505,1006 @@ void dce6_bandwidth_update(struct radeon_device *rdev) } } +/* + * Core functions + */ +static u32 si_get_tile_pipe_to_backend_map(struct radeon_device *rdev, + u32 num_tile_pipes, + u32 num_backends_per_asic, + u32 *backend_disable_mask_per_asic, + u32 num_shader_engines) +{ + u32 backend_map = 0; + u32 enabled_backends_mask = 0; + u32 enabled_backends_count = 0; + u32 num_backends_per_se; + u32 cur_pipe; + u32 swizzle_pipe[SI_MAX_PIPES]; + u32 cur_backend = 0; + u32 i; + bool force_no_swizzle; + + /* force legal values */ + if (num_tile_pipes < 1) + num_tile_pipes = 1; + if (num_tile_pipes > rdev->config.si.max_tile_pipes) + num_tile_pipes = rdev->config.si.max_tile_pipes; + if (num_shader_engines < 1) + num_shader_engines = 1; + if (num_shader_engines > rdev->config.si.max_shader_engines) + num_shader_engines = rdev->config.si.max_shader_engines; + if (num_backends_per_asic < num_shader_engines) + num_backends_per_asic = num_shader_engines; + if (num_backends_per_asic > (rdev->config.si.max_backends_per_se * num_shader_engines)) + num_backends_per_asic = rdev->config.si.max_backends_per_se * num_shader_engines; + + /* make sure we have the same number of backends per se */ + num_backends_per_asic = ALIGN(num_backends_per_asic, num_shader_engines); + /* set up the number of backends per se */ + num_backends_per_se = num_backends_per_asic / num_shader_engines; + if (num_backends_per_se > rdev->config.si.max_backends_per_se) { + num_backends_per_se = rdev->config.si.max_backends_per_se; + num_backends_per_asic = num_backends_per_se * num_shader_engines; + } + + /* create enable mask and count for enabled backends */ + for (i = 0; i < SI_MAX_BACKENDS; ++i) { + if (((*backend_disable_mask_per_asic >> i) & 1) == 0) { + enabled_backends_mask |= (1 << i); + ++enabled_backends_count; + } + if (enabled_backends_count == num_backends_per_asic) + break; + } + + /* force the backends mask to match the current number of backends */ + if (enabled_backends_count != num_backends_per_asic) { + u32 this_backend_enabled; + u32 shader_engine; + u32 backend_per_se; + + enabled_backends_mask = 0; + enabled_backends_count = 0; + *backend_disable_mask_per_asic = SI_MAX_BACKENDS_MASK; + for (i = 0; i < SI_MAX_BACKENDS; ++i) { + /* calc the current se */ + shader_engine = i / rdev->config.si.max_backends_per_se; + /* calc the backend per se */ + backend_per_se = i % rdev->config.si.max_backends_per_se; + /* default to not enabled */ + this_backend_enabled = 0; + if ((shader_engine < num_shader_engines) && + (backend_per_se < num_backends_per_se)) + this_backend_enabled = 1; + if (this_backend_enabled) { + enabled_backends_mask |= (1 << i); + *backend_disable_mask_per_asic &= ~(1 << i); + ++enabled_backends_count; + } + } + } + + + memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * SI_MAX_PIPES); + switch (rdev->family) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + force_no_swizzle = true; + break; + default: + force_no_swizzle = false; + break; + } + if (force_no_swizzle) { + bool last_backend_enabled = false; + + force_no_swizzle = false; + for (i = 0; i < SI_MAX_BACKENDS; ++i) { + if (((enabled_backends_mask >> i) & 1) == 1) { + if (last_backend_enabled) + force_no_swizzle = true; + last_backend_enabled = true; + } else + last_backend_enabled = false; + } + } + + switch (num_tile_pipes) { + case 1: + case 3: + case 5: + case 7: + DRM_ERROR("odd number of pipes!\n"); + break; + case 2: + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + break; + case 4: + if (force_no_swizzle) { + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + swizzle_pipe[2] = 2; + swizzle_pipe[3] = 3; + } else { + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 1; + swizzle_pipe[3] = 3; + } + break; + case 6: + if (force_no_swizzle) { + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + swizzle_pipe[2] = 2; + swizzle_pipe[3] = 3; + swizzle_pipe[4] = 4; + swizzle_pipe[5] = 5; + } else { + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 1; + swizzle_pipe[4] = 3; + swizzle_pipe[5] = 5; + } + break; + case 8: + if (force_no_swizzle) { + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 1; + swizzle_pipe[2] = 2; + swizzle_pipe[3] = 3; + swizzle_pipe[4] = 4; + swizzle_pipe[5] = 5; + swizzle_pipe[6] = 6; + swizzle_pipe[7] = 7; + } else { + swizzle_pipe[0] = 0; + swizzle_pipe[1] = 2; + swizzle_pipe[2] = 4; + swizzle_pipe[3] = 6; + swizzle_pipe[4] = 1; + swizzle_pipe[5] = 3; + swizzle_pipe[6] = 5; + swizzle_pipe[7] = 7; + } + break; + } + + for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) { + while (((1 << cur_backend) & enabled_backends_mask) == 0) + cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS; + + backend_map |= (((cur_backend & 0xf) << (swizzle_pipe[cur_pipe] * 4))); + + cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS; + } + + return backend_map; +} + +static u32 si_get_disable_mask_per_asic(struct radeon_device *rdev, + u32 disable_mask_per_se, + u32 max_disable_mask_per_se, + u32 num_shader_engines) +{ + u32 disable_field_width_per_se = r600_count_pipe_bits(disable_mask_per_se); + u32 disable_mask_per_asic = disable_mask_per_se & max_disable_mask_per_se; + + if (num_shader_engines == 1) + return disable_mask_per_asic; + else if (num_shader_engines == 2) + return disable_mask_per_asic | (disable_mask_per_asic << disable_field_width_per_se); + else + return 0xffffffff; +} + +static void si_tiling_mode_table_init(struct radeon_device *rdev) +{ + const u32 num_tile_mode_states = 32; + u32 reg_offset, gb_tile_moden, split_equal_to_row_size; + + switch (rdev->config.si.mem_row_size_in_kb) { + case 1: + split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB; + break; + case 2: + default: + split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB; + break; + case 4: + split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB; + break; + } + + if ((rdev->family == CHIP_TAHITI) || + (rdev->family == CHIP_PITCAIRN)) { + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { + switch (reg_offset) { + case 0: /* non-AA compressed depth or any compressed stencil */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 1: /* 2xAA/4xAA compressed depth only */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 2: /* 8xAA compressed depth only */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 3: /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 4: /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */ + gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 5: /* Uncompressed 16bpp depth - and stencil buffer allocated with it */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(split_equal_to_row_size) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 6: /* Uncompressed 32bpp depth - and stencil buffer allocated with it */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(split_equal_to_row_size) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1)); + break; + case 7: /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(split_equal_to_row_size) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 8: /* 1D and 1D Array Surfaces */ + gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | + MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 9: /* Displayable maps. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 10: /* Display 8bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 11: /* Display 16bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 12: /* Display 32bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1)); + break; + case 13: /* Thin. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 14: /* Thin 8 bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1)); + break; + case 15: /* Thin 16 bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1)); + break; + case 16: /* Thin 32 bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1)); + break; + case 17: /* Thin 64 bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(split_equal_to_row_size) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1)); + break; + case 21: /* 8 bpp PRT. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 22: /* 16 bpp PRT */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); + break; + case 23: /* 32 bpp PRT */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 24: /* 64 bpp PRT */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 25: /* 128 bpp PRT */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + NUM_BANKS(ADDR_SURF_8_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1)); + break; + default: + gb_tile_moden = 0; + break; + } + WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); + } + } else if (rdev->family == CHIP_VERDE) { + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { + switch (reg_offset) { + case 0: /* non-AA compressed depth or any compressed stencil */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); + break; + case 1: /* 2xAA/4xAA compressed depth only */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); + break; + case 2: /* 8xAA compressed depth only */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); + break; + case 3: /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); + break; + case 4: /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */ + gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 5: /* Uncompressed 16bpp depth - and stencil buffer allocated with it */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(split_equal_to_row_size) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 6: /* Uncompressed 32bpp depth - and stencil buffer allocated with it */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(split_equal_to_row_size) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 7: /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(split_equal_to_row_size) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); + break; + case 8: /* 1D and 1D Array Surfaces */ + gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | + MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 9: /* Displayable maps. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 10: /* Display 8bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); + break; + case 11: /* Display 16bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 12: /* Display 32bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 13: /* Thin. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 14: /* Thin 8 bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 15: /* Thin 16 bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 16: /* Thin 32 bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 17: /* Thin 64 bpp. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(split_equal_to_row_size) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 21: /* 8 bpp PRT. */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 22: /* 16 bpp PRT */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4)); + break; + case 23: /* 32 bpp PRT */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 24: /* 64 bpp PRT */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + NUM_BANKS(ADDR_SURF_16_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2)); + break; + case 25: /* 128 bpp PRT */ + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + NUM_BANKS(ADDR_SURF_8_BANK) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1)); + break; + default: + gb_tile_moden = 0; + break; + } + WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); + } + } else + DRM_ERROR("unknown asic: 0x%x\n", rdev->family); +} + +static void si_gpu_init(struct radeon_device *rdev) +{ + u32 cc_rb_backend_disable = 0; + u32 cc_gc_shader_array_config; + u32 gb_addr_config = 0; + u32 mc_shared_chmap, mc_arb_ramcfg; + u32 gb_backend_map; + u32 cgts_tcc_disable; + u32 sx_debug_1; + u32 gc_user_shader_array_config; + u32 gc_user_rb_backend_disable; + u32 cgts_user_tcc_disable; + u32 hdp_host_path_cntl; + u32 tmp; + int i, j; + + switch (rdev->family) { + case CHIP_TAHITI: + rdev->config.si.max_shader_engines = 2; + rdev->config.si.max_pipes_per_simd = 4; + rdev->config.si.max_tile_pipes = 12; + rdev->config.si.max_simds_per_se = 8; + rdev->config.si.max_backends_per_se = 4; + rdev->config.si.max_texture_channel_caches = 12; + rdev->config.si.max_gprs = 256; + rdev->config.si.max_gs_threads = 32; + rdev->config.si.max_hw_contexts = 8; + + rdev->config.si.sc_prim_fifo_size_frontend = 0x20; + rdev->config.si.sc_prim_fifo_size_backend = 0x100; + rdev->config.si.sc_hiz_tile_fifo_size = 0x30; + rdev->config.si.sc_earlyz_tile_fifo_size = 0x130; + break; + case CHIP_PITCAIRN: + rdev->config.si.max_shader_engines = 2; + rdev->config.si.max_pipes_per_simd = 4; + rdev->config.si.max_tile_pipes = 8; + rdev->config.si.max_simds_per_se = 5; + rdev->config.si.max_backends_per_se = 4; + rdev->config.si.max_texture_channel_caches = 8; + rdev->config.si.max_gprs = 256; + rdev->config.si.max_gs_threads = 32; + rdev->config.si.max_hw_contexts = 8; + + rdev->config.si.sc_prim_fifo_size_frontend = 0x20; + rdev->config.si.sc_prim_fifo_size_backend = 0x100; + rdev->config.si.sc_hiz_tile_fifo_size = 0x30; + rdev->config.si.sc_earlyz_tile_fifo_size = 0x130; + break; + case CHIP_VERDE: + default: + rdev->config.si.max_shader_engines = 1; + rdev->config.si.max_pipes_per_simd = 4; + rdev->config.si.max_tile_pipes = 4; + rdev->config.si.max_simds_per_se = 2; + rdev->config.si.max_backends_per_se = 4; + rdev->config.si.max_texture_channel_caches = 4; + rdev->config.si.max_gprs = 256; + rdev->config.si.max_gs_threads = 32; + rdev->config.si.max_hw_contexts = 8; + + rdev->config.si.sc_prim_fifo_size_frontend = 0x20; + rdev->config.si.sc_prim_fifo_size_backend = 0x40; + rdev->config.si.sc_hiz_tile_fifo_size = 0x30; + rdev->config.si.sc_earlyz_tile_fifo_size = 0x130; + break; + } + + /* Initialize HDP */ + for (i = 0, j = 0; i < 32; i++, j += 0x18) { + WREG32((0x2c14 + j), 0x00000000); + WREG32((0x2c18 + j), 0x00000000); + WREG32((0x2c1c + j), 0x00000000); + WREG32((0x2c20 + j), 0x00000000); + WREG32((0x2c24 + j), 0x00000000); + } + + WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff)); + + evergreen_fix_pci_max_read_req_size(rdev); + + WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN); + + mc_shared_chmap = RREG32(MC_SHARED_CHMAP); + mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); + + cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE); + cc_gc_shader_array_config = RREG32(CC_GC_SHADER_ARRAY_CONFIG); + cgts_tcc_disable = 0xffff0000; + for (i = 0; i < rdev->config.si.max_texture_channel_caches; i++) + cgts_tcc_disable &= ~(1 << (16 + i)); + gc_user_rb_backend_disable = RREG32(GC_USER_RB_BACKEND_DISABLE); + gc_user_shader_array_config = RREG32(GC_USER_SHADER_ARRAY_CONFIG); + cgts_user_tcc_disable = RREG32(CGTS_USER_TCC_DISABLE); + + rdev->config.si.num_shader_engines = rdev->config.si.max_shader_engines; + rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes; + tmp = ((~gc_user_rb_backend_disable) & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT; + rdev->config.si.num_backends_per_se = r600_count_pipe_bits(tmp); + tmp = (gc_user_rb_backend_disable & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT; + rdev->config.si.backend_disable_mask_per_asic = + si_get_disable_mask_per_asic(rdev, tmp, SI_MAX_BACKENDS_PER_SE_MASK, + rdev->config.si.num_shader_engines); + rdev->config.si.backend_map = + si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes, + rdev->config.si.num_backends_per_se * + rdev->config.si.num_shader_engines, + &rdev->config.si.backend_disable_mask_per_asic, + rdev->config.si.num_shader_engines); + tmp = ((~cgts_user_tcc_disable) & TCC_DISABLE_MASK) >> TCC_DISABLE_SHIFT; + rdev->config.si.num_texture_channel_caches = r600_count_pipe_bits(tmp); + rdev->config.si.mem_max_burst_length_bytes = 256; + tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT; + rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; + if (rdev->config.si.mem_row_size_in_kb > 4) + rdev->config.si.mem_row_size_in_kb = 4; + /* XXX use MC settings? */ + rdev->config.si.shader_engine_tile_size = 32; + rdev->config.si.num_gpus = 1; + rdev->config.si.multi_gpu_tile_size = 64; + + gb_addr_config = 0; + switch (rdev->config.si.num_tile_pipes) { + case 1: + gb_addr_config |= NUM_PIPES(0); + break; + case 2: + gb_addr_config |= NUM_PIPES(1); + break; + case 4: + gb_addr_config |= NUM_PIPES(2); + break; + case 8: + default: + gb_addr_config |= NUM_PIPES(3); + break; + } + + tmp = (rdev->config.si.mem_max_burst_length_bytes / 256) - 1; + gb_addr_config |= PIPE_INTERLEAVE_SIZE(tmp); + gb_addr_config |= NUM_SHADER_ENGINES(rdev->config.si.num_shader_engines - 1); + tmp = (rdev->config.si.shader_engine_tile_size / 16) - 1; + gb_addr_config |= SHADER_ENGINE_TILE_SIZE(tmp); + switch (rdev->config.si.num_gpus) { + case 1: + default: + gb_addr_config |= NUM_GPUS(0); + break; + case 2: + gb_addr_config |= NUM_GPUS(1); + break; + case 4: + gb_addr_config |= NUM_GPUS(2); + break; + } + switch (rdev->config.si.multi_gpu_tile_size) { + case 16: + gb_addr_config |= MULTI_GPU_TILE_SIZE(0); + break; + case 32: + default: + gb_addr_config |= MULTI_GPU_TILE_SIZE(1); + break; + case 64: + gb_addr_config |= MULTI_GPU_TILE_SIZE(2); + break; + case 128: + gb_addr_config |= MULTI_GPU_TILE_SIZE(3); + break; + } + switch (rdev->config.si.mem_row_size_in_kb) { + case 1: + default: + gb_addr_config |= ROW_SIZE(0); + break; + case 2: + gb_addr_config |= ROW_SIZE(1); + break; + case 4: + gb_addr_config |= ROW_SIZE(2); + break; + } + + tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT; + rdev->config.si.num_tile_pipes = (1 << tmp); + tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT; + rdev->config.si.mem_max_burst_length_bytes = (tmp + 1) * 256; + tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT; + rdev->config.si.num_shader_engines = tmp + 1; + tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT; + rdev->config.si.num_gpus = tmp + 1; + tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT; + rdev->config.si.multi_gpu_tile_size = 1 << tmp; + tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT; + rdev->config.si.mem_row_size_in_kb = 1 << tmp; + + gb_backend_map = + si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes, + rdev->config.si.num_backends_per_se * + rdev->config.si.num_shader_engines, + &rdev->config.si.backend_disable_mask_per_asic, + rdev->config.si.num_shader_engines); + + /* setup tiling info dword. gb_addr_config is not adequate since it does + * not have bank info, so create a custom tiling dword. + * bits 3:0 num_pipes + * bits 7:4 num_banks + * bits 11:8 group_size + * bits 15:12 row_size + */ + rdev->config.si.tile_config = 0; + switch (rdev->config.si.num_tile_pipes) { + case 1: + rdev->config.si.tile_config |= (0 << 0); + break; + case 2: + rdev->config.si.tile_config |= (1 << 0); + break; + case 4: + rdev->config.si.tile_config |= (2 << 0); + break; + case 8: + default: + /* XXX what about 12? */ + rdev->config.si.tile_config |= (3 << 0); + break; + } + rdev->config.si.tile_config |= + ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4; + rdev->config.si.tile_config |= + ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8; + rdev->config.si.tile_config |= + ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12; + + rdev->config.si.backend_map = gb_backend_map; + WREG32(GB_ADDR_CONFIG, gb_addr_config); + WREG32(DMIF_ADDR_CONFIG, gb_addr_config); + WREG32(HDP_ADDR_CONFIG, gb_addr_config); + + /* primary versions */ + WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable); + WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable); + WREG32(CC_GC_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config); + + WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable); + + /* user versions */ + WREG32(GC_USER_RB_BACKEND_DISABLE, cc_rb_backend_disable); + WREG32(GC_USER_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable); + WREG32(GC_USER_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config); + + WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable); + + si_tiling_mode_table_init(rdev); + + /* set HW defaults for 3D engine */ + WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | + ROQ_IB2_START(0x2b))); + WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60)); + + sx_debug_1 = RREG32(SX_DEBUG_1); + WREG32(SX_DEBUG_1, sx_debug_1); + + WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4)); + + WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) | + SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) | + SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) | + SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size))); + + WREG32(VGT_NUM_INSTANCES, 1); + + WREG32(CP_PERFMON_CNTL, 0); + + WREG32(SQ_CONFIG, 0); + + WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) | + FORCE_EOV_MAX_REZ_CNT(255))); + + WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) | + AUTO_INVLD_EN(ES_AND_GS_AUTO)); + + WREG32(VGT_GS_VERTEX_REUSE, 16); + WREG32(PA_SC_LINE_STIPPLE_STATE, 0); + + WREG32(CB_PERFCOUNTER0_SELECT0, 0); + WREG32(CB_PERFCOUNTER0_SELECT1, 0); + WREG32(CB_PERFCOUNTER1_SELECT0, 0); + WREG32(CB_PERFCOUNTER1_SELECT1, 0); + WREG32(CB_PERFCOUNTER2_SELECT0, 0); + WREG32(CB_PERFCOUNTER2_SELECT1, 0); + WREG32(CB_PERFCOUNTER3_SELECT0, 0); + WREG32(CB_PERFCOUNTER3_SELECT1, 0); + + tmp = RREG32(HDP_MISC_CNTL); + tmp |= HDP_FLUSH_INVALIDATE_CACHE; + WREG32(HDP_MISC_CNTL, tmp); + + hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL); + WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl); + + WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3)); + + udelay(50); +} diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index f565a8fa7ead..cf06dcc9ba96 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -32,9 +32,57 @@ #define CTF_TEMP_MASK 0x0003fe00 #define CTF_TEMP_SHIFT 9 +#define SI_MAX_SH_GPRS 256 +#define SI_MAX_TEMP_GPRS 16 +#define SI_MAX_SH_THREADS 256 +#define SI_MAX_SH_STACK_ENTRIES 4096 +#define SI_MAX_FRC_EOV_CNT 16384 +#define SI_MAX_BACKENDS 8 +#define SI_MAX_BACKENDS_MASK 0xFF +#define SI_MAX_BACKENDS_PER_SE_MASK 0x0F +#define SI_MAX_SIMDS 12 +#define SI_MAX_SIMDS_MASK 0x0FFF +#define SI_MAX_SIMDS_PER_SE_MASK 0x00FF +#define SI_MAX_PIPES 8 +#define SI_MAX_PIPES_MASK 0xFF +#define SI_MAX_PIPES_PER_SIMD_MASK 0x3F +#define SI_MAX_LDS_NUM 0xFFFF +#define SI_MAX_TCC 16 +#define SI_MAX_TCC_MASK 0xFFFF + +#define DMIF_ADDR_CONFIG 0xBD4 + +#define CC_SYS_RB_BACKEND_DISABLE 0xe80 +#define GC_USER_SYS_RB_BACKEND_DISABLE 0xe84 + #define MC_SHARED_CHMAP 0x2004 #define NOOFCHAN_SHIFT 12 #define NOOFCHAN_MASK 0x0000f000 +#define MC_SHARED_CHREMAP 0x2008 + +#define MC_ARB_RAMCFG 0x2760 +#define NOOFBANK_SHIFT 0 +#define NOOFBANK_MASK 0x00000003 +#define NOOFRANK_SHIFT 2 +#define NOOFRANK_MASK 0x00000004 +#define NOOFROWS_SHIFT 3 +#define NOOFROWS_MASK 0x00000038 +#define NOOFCOLS_SHIFT 6 +#define NOOFCOLS_MASK 0x000000C0 +#define CHANSIZE_SHIFT 8 +#define CHANSIZE_MASK 0x00000100 +#define NOOFGROUPS_SHIFT 12 +#define NOOFGROUPS_MASK 0x00001000 + +#define HDP_HOST_PATH_CNTL 0x2C00 + +#define HDP_ADDR_CONFIG 0x2F48 +#define HDP_MISC_CNTL 0x2F4C +#define HDP_FLUSH_INVALIDATE_CACHE (1 << 0) + +#define BIF_FB_EN 0x5490 +#define FB_READ_EN (1 << 0) +#define FB_WRITE_EN (1 << 1) #define DC_LB_MEMORY_SPLIT 0x6b0c #define DC_LB_MEMORY_CONFIG(x) ((x) << 20) @@ -51,4 +99,157 @@ # define LATENCY_LOW_WATERMARK(x) ((x) << 0) # define LATENCY_HIGH_WATERMARK(x) ((x) << 16) +#define GRBM_CNTL 0x8000 +#define GRBM_READ_TIMEOUT(x) ((x) << 0) + +#define CP_QUEUE_THRESHOLDS 0x8760 +#define ROQ_IB1_START(x) ((x) << 0) +#define ROQ_IB2_START(x) ((x) << 8) +#define CP_MEQ_THRESHOLDS 0x8764 +#define MEQ1_START(x) ((x) << 0) +#define MEQ2_START(x) ((x) << 8) + +#define CP_PERFMON_CNTL 0x87FC + +#define VGT_CACHE_INVALIDATION 0x88C4 +#define CACHE_INVALIDATION(x) ((x) << 0) +#define VC_ONLY 0 +#define TC_ONLY 1 +#define VC_AND_TC 2 +#define AUTO_INVLD_EN(x) ((x) << 6) +#define NO_AUTO 0 +#define ES_AUTO 1 +#define GS_AUTO 2 +#define ES_AND_GS_AUTO 3 + +#define VGT_GS_VERTEX_REUSE 0x88D4 + +#define VGT_NUM_INSTANCES 0x8974 + +#define CC_GC_SHADER_ARRAY_CONFIG 0x89bc +#define GC_USER_SHADER_ARRAY_CONFIG 0x89c0 + +#define PA_CL_ENHANCE 0x8A14 +#define CLIP_VTX_REORDER_ENA (1 << 0) +#define NUM_CLIP_SEQ(x) ((x) << 1) + +#define PA_SC_LINE_STIPPLE_STATE 0x8B10 + +#define PA_SC_FORCE_EOV_MAX_CNTS 0x8B24 +#define FORCE_EOV_MAX_CLK_CNT(x) ((x) << 0) +#define FORCE_EOV_MAX_REZ_CNT(x) ((x) << 16) + +#define PA_SC_FIFO_SIZE 0x8BCC +#define SC_FRONTEND_PRIM_FIFO_SIZE(x) ((x) << 0) +#define SC_BACKEND_PRIM_FIFO_SIZE(x) ((x) << 6) +#define SC_HIZ_TILE_FIFO_SIZE(x) ((x) << 15) +#define SC_EARLYZ_TILE_FIFO_SIZE(x) ((x) << 23) + +#define SQ_CONFIG 0x8C00 + +#define SX_DEBUG_1 0x9060 + +#define SPI_CONFIG_CNTL_1 0x913C +#define VTX_DONE_DELAY(x) ((x) << 0) +#define INTERP_ONE_PRIM_PER_ROW (1 << 4) + +#define CGTS_TCC_DISABLE 0x9148 +#define CGTS_USER_TCC_DISABLE 0x914C +#define TCC_DISABLE_MASK 0xFFFF0000 +#define TCC_DISABLE_SHIFT 16 + +#define CC_RB_BACKEND_DISABLE 0x98F4 +#define BACKEND_DISABLE(x) ((x) << 16) +#define GB_ADDR_CONFIG 0x98F8 +#define NUM_PIPES(x) ((x) << 0) +#define NUM_PIPES_MASK 0x00000007 +#define NUM_PIPES_SHIFT 0 +#define PIPE_INTERLEAVE_SIZE(x) ((x) << 4) +#define PIPE_INTERLEAVE_SIZE_MASK 0x00000070 +#define PIPE_INTERLEAVE_SIZE_SHIFT 4 +#define NUM_SHADER_ENGINES(x) ((x) << 12) +#define NUM_SHADER_ENGINES_MASK 0x00003000 +#define NUM_SHADER_ENGINES_SHIFT 12 +#define SHADER_ENGINE_TILE_SIZE(x) ((x) << 16) +#define SHADER_ENGINE_TILE_SIZE_MASK 0x00070000 +#define SHADER_ENGINE_TILE_SIZE_SHIFT 16 +#define NUM_GPUS(x) ((x) << 20) +#define NUM_GPUS_MASK 0x00700000 +#define NUM_GPUS_SHIFT 20 +#define MULTI_GPU_TILE_SIZE(x) ((x) << 24) +#define MULTI_GPU_TILE_SIZE_MASK 0x03000000 +#define MULTI_GPU_TILE_SIZE_SHIFT 24 +#define ROW_SIZE(x) ((x) << 28) +#define ROW_SIZE_MASK 0x30000000 +#define ROW_SIZE_SHIFT 28 + +#define GB_TILE_MODE0 0x9910 +# define MICRO_TILE_MODE(x) ((x) << 0) +# define ADDR_SURF_DISPLAY_MICRO_TILING 0 +# define ADDR_SURF_THIN_MICRO_TILING 1 +# define ADDR_SURF_DEPTH_MICRO_TILING 2 +# define ARRAY_MODE(x) ((x) << 2) +# define ARRAY_LINEAR_GENERAL 0 +# define ARRAY_LINEAR_ALIGNED 1 +# define ARRAY_1D_TILED_THIN1 2 +# define ARRAY_2D_TILED_THIN1 4 +# define PIPE_CONFIG(x) ((x) << 6) +# define ADDR_SURF_P2 0 +# define ADDR_SURF_P4_8x16 4 +# define ADDR_SURF_P4_16x16 5 +# define ADDR_SURF_P4_16x32 6 +# define ADDR_SURF_P4_32x32 7 +# define ADDR_SURF_P8_16x16_8x16 8 +# define ADDR_SURF_P8_16x32_8x16 9 +# define ADDR_SURF_P8_32x32_8x16 10 +# define ADDR_SURF_P8_16x32_16x16 11 +# define ADDR_SURF_P8_32x32_16x16 12 +# define ADDR_SURF_P8_32x32_16x32 13 +# define ADDR_SURF_P8_32x64_32x32 14 +# define TILE_SPLIT(x) ((x) << 11) +# define ADDR_SURF_TILE_SPLIT_64B 0 +# define ADDR_SURF_TILE_SPLIT_128B 1 +# define ADDR_SURF_TILE_SPLIT_256B 2 +# define ADDR_SURF_TILE_SPLIT_512B 3 +# define ADDR_SURF_TILE_SPLIT_1KB 4 +# define ADDR_SURF_TILE_SPLIT_2KB 5 +# define ADDR_SURF_TILE_SPLIT_4KB 6 +# define BANK_WIDTH(x) ((x) << 14) +# define ADDR_SURF_BANK_WIDTH_1 0 +# define ADDR_SURF_BANK_WIDTH_2 1 +# define ADDR_SURF_BANK_WIDTH_4 2 +# define ADDR_SURF_BANK_WIDTH_8 3 +# define BANK_HEIGHT(x) ((x) << 16) +# define ADDR_SURF_BANK_HEIGHT_1 0 +# define ADDR_SURF_BANK_HEIGHT_2 1 +# define ADDR_SURF_BANK_HEIGHT_4 2 +# define ADDR_SURF_BANK_HEIGHT_8 3 +# define MACRO_TILE_ASPECT(x) ((x) << 18) +# define ADDR_SURF_MACRO_ASPECT_1 0 +# define ADDR_SURF_MACRO_ASPECT_2 1 +# define ADDR_SURF_MACRO_ASPECT_4 2 +# define ADDR_SURF_MACRO_ASPECT_8 3 +# define NUM_BANKS(x) ((x) << 20) +# define ADDR_SURF_2_BANK 0 +# define ADDR_SURF_4_BANK 1 +# define ADDR_SURF_8_BANK 2 +# define ADDR_SURF_16_BANK 3 + +#define CB_PERFCOUNTER0_SELECT0 0x9a20 +#define CB_PERFCOUNTER0_SELECT1 0x9a24 +#define CB_PERFCOUNTER1_SELECT0 0x9a28 +#define CB_PERFCOUNTER1_SELECT1 0x9a2c +#define CB_PERFCOUNTER2_SELECT0 0x9a30 +#define CB_PERFCOUNTER2_SELECT1 0x9a34 +#define CB_PERFCOUNTER3_SELECT0 0x9a38 +#define CB_PERFCOUNTER3_SELECT1 0x9a3c + +#define GC_USER_RB_BACKEND_DISABLE 0x9B7C +#define BACKEND_DISABLE_MASK 0x00FF0000 +#define BACKEND_DISABLE_SHIFT 16 + +#define TCP_CHAN_STEER_LO 0xac0c +#define TCP_CHAN_STEER_HI 0xac10 + + #endif