From 437ccd175a7a3c9871536a26b2d28e3c99515e7f Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 18 Apr 2019 10:38:51 +0800 Subject: [PATCH 001/178] drm/amd/powerplay: support hotspot/memory critical limit values These new interfaces(temp2_crit, temp2_crit_hyst, temp3_crit, temp3_crit_hyst) are supported on SOC15 dGPUs only. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h | 8 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 55 ++++++++++++++++++- .../drm/amd/powerplay/hwmgr/hardwaremanager.c | 12 +++- .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 10 +++- .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 11 +++- .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 11 +++- .../gpu/drm/amd/powerplay/inc/power_state.h | 4 ++ .../gpu/drm/amd/powerplay/inc/pp_thermal.h | 8 +-- 8 files changed, 103 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index dca35407879d..8df54443ec78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -75,6 +75,14 @@ struct amdgpu_dpm_thermal { int min_temp; /* high temperature threshold */ int max_temp; + /* hotspot low temperature threshold */ + int min_hotspot_temp; + /* hotspot high temperature critical threshold */ + int max_hotspot_crit_temp; + /* memory low temperature threshold */ + int min_mem_temp; + /* memory high temperature critical threshold */ + int max_mem_crit_temp; /* was last interrupt low to high or high to low */ bool high_to_low; /* interrupt source */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 34471dbaa872..1f78deadb770 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1414,6 +1414,38 @@ static ssize_t amdgpu_hwmon_show_temp_thresh(struct device *dev, return snprintf(buf, PAGE_SIZE, "%d\n", temp); } +static ssize_t amdgpu_hwmon_show_hotspot_temp_thresh(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + int hyst = to_sensor_dev_attr(attr)->index; + int temp; + + if (hyst) + temp = adev->pm.dpm.thermal.min_hotspot_temp; + else + temp = adev->pm.dpm.thermal.max_hotspot_crit_temp; + + return snprintf(buf, PAGE_SIZE, "%d\n", temp); +} + +static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + int hyst = to_sensor_dev_attr(attr)->index; + int temp; + + if (hyst) + temp = adev->pm.dpm.thermal.min_mem_temp; + else + temp = adev->pm.dpm.thermal.max_mem_crit_temp; + + return snprintf(buf, PAGE_SIZE, "%d\n", temp); +} + static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, struct device_attribute *attr, char *buf) @@ -1985,9 +2017,11 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, * * - temp1_input: the on die GPU temperature in millidegrees Celsius * - * - temp1_crit: temperature critical max value in millidegrees Celsius + * - temp[1-3]_crit: temperature critical max value in millidegrees Celsius + * - temp2_crit and temp3_crit are supported on SOC15 dGPUs only * - * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius + * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius + * - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only * * hwmon interfaces for GPU voltage: * @@ -2038,6 +2072,10 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0); +static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0); +static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1); static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0); static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0); static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0); @@ -2064,6 +2102,10 @@ static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_temp1_input.dev_attr.attr, &sensor_dev_attr_temp1_crit.dev_attr.attr, &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp2_crit.dev_attr.attr, + &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp3_crit.dev_attr.attr, + &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr, &sensor_dev_attr_pwm1.dev_attr.attr, &sensor_dev_attr_pwm1_enable.dev_attr.attr, &sensor_dev_attr_pwm1_min.dev_attr.attr, @@ -2186,6 +2228,15 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_freq2_label.dev_attr.attr)) return 0; + /* only SOC15 dGPUs support hotspot and mem temperatures */ + if (((adev->flags & AMD_IS_APU) || + adev->asic_type < CHIP_VEGA10) && + (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr || + attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr || + attr == &sensor_dev_attr_temp3_crit.dev_attr.attr || + attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr)) + return 0; + return effective_mode; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c index 70f7f47a2fcf..af6ab04130ef 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c @@ -225,7 +225,13 @@ int phm_register_irq_handlers(struct pp_hwmgr *hwmgr) int phm_start_thermal_controller(struct pp_hwmgr *hwmgr) { int ret = 0; - struct PP_TemperatureRange range = {TEMP_RANGE_MIN, TEMP_RANGE_MAX}; + struct PP_TemperatureRange range = { + TEMP_RANGE_MIN, + TEMP_RANGE_MAX, + TEMP_RANGE_MIN, + TEMP_RANGE_MAX, + TEMP_RANGE_MIN, + TEMP_RANGE_MAX}; struct amdgpu_device *adev = hwmgr->adev; if (hwmgr->hwmgr_func->get_thermal_temperature_range) @@ -239,6 +245,10 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr) adev->pm.dpm.thermal.min_temp = range.min; adev->pm.dpm.thermal.max_temp = range.max; + adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min; + adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max; + adev->pm.dpm.thermal.min_mem_temp = range.mem_min; + adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max; return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 384c37875cd0..4e1df44f094b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -4852,12 +4852,16 @@ static int vega10_notify_cac_buffer_info(struct pp_hwmgr *hwmgr, static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *thermal_data) { - struct phm_ppt_v2_information *table_info = - (struct phm_ppt_v2_information *)hwmgr->pptable; + struct vega10_hwmgr *data = hwmgr->backend; + PPTable_t *pp_table = &(data->smc_state_table.pp_table); memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange)); - thermal_data->max = table_info->tdp_table->usSoftwareShutdownTemp * + thermal_data->max = pp_table->TedgeLimit * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->hotspot_crit_max = pp_table->ThotspotLimit * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->mem_crit_max = pp_table->ThbmLimit * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; return 0; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index 707cd4b0357f..4f63570ea257 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -2526,12 +2526,17 @@ static int vega12_notify_cac_buffer_info(struct pp_hwmgr *hwmgr, static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *thermal_data) { - struct phm_ppt_v3_information *pptable_information = - (struct phm_ppt_v3_information *)hwmgr->pptable; + struct vega12_hwmgr *data = + (struct vega12_hwmgr *)(hwmgr->backend); + PPTable_t *pp_table = &(data->smc_state_table.pp_table); memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange)); - thermal_data->max = pptable_information->us_software_shutdown_temp * + thermal_data->max = pp_table->TedgeLimit * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->hotspot_crit_max = pp_table->ThotspotLimit * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->mem_crit_max = pp_table->ThbmLimit * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; return 0; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 9b9f87b84910..555ff8733b6b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -3974,12 +3974,17 @@ static int vega20_notify_cac_buffer_info(struct pp_hwmgr *hwmgr, static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *thermal_data) { - struct phm_ppt_v3_information *pptable_information = - (struct phm_ppt_v3_information *)hwmgr->pptable; + struct vega20_hwmgr *data = + (struct vega20_hwmgr *)(hwmgr->backend); + PPTable_t *pp_table = &(data->smc_state_table.pp_table); memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange)); - thermal_data->max = pptable_information->us_software_shutdown_temp * + thermal_data->max = pp_table->TedgeLimit * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->hotspot_crit_max = pp_table->ThotspotLimit * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->mem_crit_max = pp_table->ThbmLimit * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; return 0; diff --git a/drivers/gpu/drm/amd/powerplay/inc/power_state.h b/drivers/gpu/drm/amd/powerplay/inc/power_state.h index a99b5cbb113e..a8988e7d58c6 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/power_state.h +++ b/drivers/gpu/drm/amd/powerplay/inc/power_state.h @@ -124,6 +124,10 @@ struct PP_StateSoftwareAlgorithmBlock { struct PP_TemperatureRange { int min; int max; + int hotspot_min; + int hotspot_crit_max; + int mem_min; + int mem_crit_max; }; struct PP_StateValidationBlock { diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h index 201d2b6329ab..75a0a2f8bea2 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h +++ b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h @@ -27,14 +27,14 @@ static const struct PP_TemperatureRange SMU7ThermalWithDelayPolicy[] = { - {-273150, 99000}, - { 120000, 120000}, + {-273150, 99000, -273150, 99000, -273150, 99000}, + { 120000, 120000, 120000, 120000, 120000, 120000}, }; static const struct PP_TemperatureRange SMU7ThermalPolicy[] = { - {-273150, 99000}, - { 120000, 120000}, + {-273150, 99000, -273150, 99000, -273150, 99000}, + { 120000, 120000, 120000, 120000, 120000, 120000}, }; #endif From 901cb599dbc233fc325e3602e7c1218d2c24359c Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 18 Apr 2019 11:53:04 +0800 Subject: [PATCH 002/178] drm/amd/powerplay: support temperature emergency max values These new interfaces(temp1_emergency, temp2_emergency, temp3_emergency) are supported on SOC15 dGPUs only. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h | 6 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 40 ++++++++++++++++++- .../drm/amd/powerplay/hwmgr/hardwaremanager.c | 8 +++- .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 6 +++ .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 6 +++ .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 6 +++ .../gpu/drm/amd/powerplay/inc/power_state.h | 3 ++ .../gpu/drm/amd/powerplay/inc/pp_thermal.h | 12 ++++-- 8 files changed, 81 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index 8df54443ec78..521dbd0d9af8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -75,14 +75,20 @@ struct amdgpu_dpm_thermal { int min_temp; /* high temperature threshold */ int max_temp; + /* edge max emergency(shutdown) temp */ + int max_edge_emergency_temp; /* hotspot low temperature threshold */ int min_hotspot_temp; /* hotspot high temperature critical threshold */ int max_hotspot_crit_temp; + /* hotspot max emergency(shutdown) temp */ + int max_hotspot_emergency_temp; /* memory low temperature threshold */ int min_mem_temp; /* memory high temperature critical threshold */ int max_mem_crit_temp; + /* memory max emergency(shutdown) temp */ + int max_mem_emergency_temp; /* was last interrupt low to high or high to low */ bool high_to_low; /* interrupt source */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 1f78deadb770..7093b4efc3a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1446,6 +1446,32 @@ static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev, return snprintf(buf, PAGE_SIZE, "%d\n", temp); } +static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + int channel = to_sensor_dev_attr(attr)->index; + int temp = 0; + + if (channel >= PP_TEMP_MAX) + return -EINVAL; + + switch (channel) { + case PP_TEMP_JUNCTION: + temp = adev->pm.dpm.thermal.max_hotspot_emergency_temp; + break; + case PP_TEMP_EDGE: + temp = adev->pm.dpm.thermal.max_edge_emergency_temp; + break; + case PP_TEMP_MEM: + temp = adev->pm.dpm.thermal.max_mem_emergency_temp; + break; + } + + return snprintf(buf, PAGE_SIZE, "%d\n", temp); +} + static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, struct device_attribute *attr, char *buf) @@ -2023,6 +2049,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius * - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only * + * - temp[1-3]_emergency: temperature emergency max value(asic shutdown) in millidegrees Celsius + * - these are supported on SOC15 dGPUs only + * * hwmon interfaces for GPU voltage: * * - in0_input: the voltage on the GPU in millivolts @@ -2072,10 +2101,13 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE); static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION); static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM); static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0); static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0); static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0); @@ -2106,6 +2138,9 @@ static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr, &sensor_dev_attr_temp3_crit.dev_attr.attr, &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp1_emergency.dev_attr.attr, + &sensor_dev_attr_temp2_emergency.dev_attr.attr, + &sensor_dev_attr_temp3_emergency.dev_attr.attr, &sensor_dev_attr_pwm1.dev_attr.attr, &sensor_dev_attr_pwm1_enable.dev_attr.attr, &sensor_dev_attr_pwm1_min.dev_attr.attr, @@ -2234,7 +2269,10 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr || attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr || attr == &sensor_dev_attr_temp3_crit.dev_attr.attr || - attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr)) + attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr || + attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr || + attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr || + attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr)) return 0; return effective_mode; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c index af6ab04130ef..cc57fb953e62 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c @@ -228,9 +228,12 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr) struct PP_TemperatureRange range = { TEMP_RANGE_MIN, TEMP_RANGE_MAX, - TEMP_RANGE_MIN, TEMP_RANGE_MAX, TEMP_RANGE_MIN, + TEMP_RANGE_MAX, + TEMP_RANGE_MAX, + TEMP_RANGE_MIN, + TEMP_RANGE_MAX, TEMP_RANGE_MAX}; struct amdgpu_device *adev = hwmgr->adev; @@ -245,10 +248,13 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr) adev->pm.dpm.thermal.min_temp = range.min; adev->pm.dpm.thermal.max_temp = range.max; + adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max; adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min; adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max; + adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max; adev->pm.dpm.thermal.min_mem_temp = range.mem_min; adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max; + adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max; return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 4e1df44f094b..1422bc4e45d1 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -4859,10 +4859,16 @@ static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, thermal_data->max = pp_table->TedgeLimit * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; thermal_data->hotspot_crit_max = pp_table->ThotspotLimit * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; thermal_data->mem_crit_max = pp_table->ThbmLimit * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)* + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index 4f63570ea257..60c9f9502e65 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -2534,10 +2534,16 @@ static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, thermal_data->max = pp_table->TedgeLimit * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; thermal_data->hotspot_crit_max = pp_table->ThotspotLimit * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; thermal_data->mem_crit_max = pp_table->ThbmLimit * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)* + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 555ff8733b6b..3a9629c907bb 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -3982,10 +3982,16 @@ static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, thermal_data->max = pp_table->TedgeLimit * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; thermal_data->hotspot_crit_max = pp_table->ThotspotLimit * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; thermal_data->mem_crit_max = pp_table->ThbmLimit * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)* + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/inc/power_state.h b/drivers/gpu/drm/amd/powerplay/inc/power_state.h index a8988e7d58c6..a5f2227a3971 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/power_state.h +++ b/drivers/gpu/drm/amd/powerplay/inc/power_state.h @@ -124,10 +124,13 @@ struct PP_StateSoftwareAlgorithmBlock { struct PP_TemperatureRange { int min; int max; + int edge_emergency_max; int hotspot_min; int hotspot_crit_max; + int hotspot_emergency_max; int mem_min; int mem_crit_max; + int mem_emergency_max; }; struct PP_StateValidationBlock { diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h index 75a0a2f8bea2..3e30768f9e1c 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h +++ b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h @@ -27,14 +27,18 @@ static const struct PP_TemperatureRange SMU7ThermalWithDelayPolicy[] = { - {-273150, 99000, -273150, 99000, -273150, 99000}, - { 120000, 120000, 120000, 120000, 120000, 120000}, + {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000}, + { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000}, }; static const struct PP_TemperatureRange SMU7ThermalPolicy[] = { - {-273150, 99000, -273150, 99000, -273150, 99000}, - { 120000, 120000, 120000, 120000, 120000, 120000}, + {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000}, + { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000}, }; +#define CTF_OFFSET_EDGE 5 +#define CTF_OFFSET_HOTSPOT 5 +#define CTF_OFFSET_HBM 5 + #endif From ada2b8f1c8289c0b1a6ac775a7d52d8df62140e0 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 18 Apr 2019 13:28:12 +0800 Subject: [PATCH 003/178] drm/amd/powerplay: support SMU metrics table on Vega12 That should provide some necessary sensor information. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 21 +++++++++++++++++++ .../drm/amd/powerplay/hwmgr/vega12_hwmgr.h | 3 +++ .../drm/amd/powerplay/smumgr/vega12_smumgr.c | 21 +++++++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index 60c9f9502e65..aeeeaa79056c 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -1237,6 +1237,27 @@ static uint32_t vega12_dpm_get_mclk(struct pp_hwmgr *hwmgr, bool low) return (mem_clk * 100); } +static int vega12_get_metrics_table(struct pp_hwmgr *hwmgr, SmuMetrics_t *metrics_table) +{ + struct vega12_hwmgr *data = + (struct vega12_hwmgr *)(hwmgr->backend); + int ret = 0; + + if (!data->metrics_time || time_after(jiffies, data->metrics_time + HZ / 2)) { + ret = smum_smc_table_manager(hwmgr, (uint8_t *)metrics_table, + TABLE_SMU_METRICS, true); + if (ret) { + pr_info("Failed to export SMU metrics table!\n"); + return ret; + } + memcpy(&data->metrics_table, metrics_table, sizeof(SmuMetrics_t)); + data->metrics_time = jiffies; + } else + memcpy(metrics_table, &data->metrics_table, sizeof(SmuMetrics_t)); + + return ret; +} + static int vega12_get_gpu_power(struct pp_hwmgr *hwmgr, uint32_t *query) { #if 0 diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h index b3e424d28994..73875399666a 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h @@ -396,6 +396,9 @@ struct vega12_hwmgr { /* ---- Gfxoff ---- */ bool gfxoff_controlled_by_driver; + + unsigned long metrics_time; + SmuMetrics_t metrics_table; }; #define VEGA12_DPM2_NEAR_TDP_DEC 10 diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c index ddb801517667..1eaf0fa28ef7 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c @@ -287,8 +287,26 @@ static int vega12_smu_init(struct pp_hwmgr *hwmgr) priv->smu_tables.entry[TABLE_OVERDRIVE].version = 0x01; priv->smu_tables.entry[TABLE_OVERDRIVE].size = sizeof(OverDriveTable_t); + /* allocate space for SMU_METRICS table */ + ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev, + sizeof(SmuMetrics_t), + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &priv->smu_tables.entry[TABLE_SMU_METRICS].handle, + &priv->smu_tables.entry[TABLE_SMU_METRICS].mc_addr, + &priv->smu_tables.entry[TABLE_SMU_METRICS].table); + if (ret) + goto err4; + + priv->smu_tables.entry[TABLE_SMU_METRICS].version = 0x01; + priv->smu_tables.entry[TABLE_SMU_METRICS].size = sizeof(SmuMetrics_t); + return 0; +err4: + amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_OVERDRIVE].handle, + &priv->smu_tables.entry[TABLE_OVERDRIVE].mc_addr, + &priv->smu_tables.entry[TABLE_OVERDRIVE].table); err3: amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_AVFS_FUSE_OVERRIDE].handle, &priv->smu_tables.entry[TABLE_AVFS_FUSE_OVERRIDE].mc_addr, @@ -334,6 +352,9 @@ static int vega12_smu_fini(struct pp_hwmgr *hwmgr) amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_OVERDRIVE].handle, &priv->smu_tables.entry[TABLE_OVERDRIVE].mc_addr, &priv->smu_tables.entry[TABLE_OVERDRIVE].table); + amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_SMU_METRICS].handle, + &priv->smu_tables.entry[TABLE_SMU_METRICS].mc_addr, + &priv->smu_tables.entry[TABLE_SMU_METRICS].table); kfree(hwmgr->smu_backend); hwmgr->smu_backend = NULL; } From a34d1166b47c8497cffda4da7c14182cb3420362 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 18 Apr 2019 13:51:53 +0800 Subject: [PATCH 004/178] drm/amd/powerplay: expose current hotspot and memory temperatures V2 Two new hwmon interfaces(temp2_input and temp3_input) are added. They are supported on SOC15 dGPUs only. - V2: correct thermal sensor output Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 45 +++++++++++++++---- .../gpu/drm/amd/include/kgd_pp_interface.h | 3 ++ .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 12 +++++ .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 19 ++++++++ .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 20 ++++++++- 5 files changed, 90 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 7093b4efc3a7..00ca8ec9845f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1382,6 +1382,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); struct drm_device *ddev = adev->ddev; + int channel = to_sensor_dev_attr(attr)->index; int r, temp, size = sizeof(temp); /* Can't get temperature when the card is off */ @@ -1389,11 +1390,32 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - /* get the temperature */ - r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, - (void *)&temp, &size); - if (r) - return r; + if (channel >= PP_TEMP_MAX) + return -EINVAL; + + switch (channel) { + case PP_TEMP_JUNCTION: + /* get current junction temperature */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP, + (void *)&temp, &size); + if (r) + return r; + break; + case PP_TEMP_EDGE: + /* get current edge temperature */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP, + (void *)&temp, &size); + if (r) + return r; + break; + case PP_TEMP_MEM: + /* get current memory temperature */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP, + (void *)&temp, &size); + if (r) + return r; + break; + } return snprintf(buf, PAGE_SIZE, "%d\n", temp); } @@ -2041,7 +2063,8 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, * * hwmon interfaces for GPU temperature: * - * - temp1_input: the on die GPU temperature in millidegrees Celsius + * - temp[1-3]_input: the on die GPU temperature in millidegrees Celsius + * - temp2_input and temp3_input are supported on SOC15 dGPUs only * * - temp[1-3]_crit: temperature critical max value in millidegrees Celsius * - temp2_crit and temp3_crit are supported on SOC15 dGPUs only @@ -2098,13 +2121,15 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, * */ -static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); +static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_EDGE); static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE); +static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_JUNCTION); static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1); static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION); +static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_MEM); static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1); static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM); @@ -2134,8 +2159,10 @@ static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_temp1_input.dev_attr.attr, &sensor_dev_attr_temp1_crit.dev_attr.attr, &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp2_input.dev_attr.attr, &sensor_dev_attr_temp2_crit.dev_attr.attr, &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp3_input.dev_attr.attr, &sensor_dev_attr_temp3_crit.dev_attr.attr, &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr, &sensor_dev_attr_temp1_emergency.dev_attr.attr, @@ -2272,7 +2299,9 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr || attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr || attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr || - attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr)) + attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr || + attr == &sensor_dev_attr_temp2_input.dev_attr.attr || + attr == &sensor_dev_attr_temp3_input.dev_attr.attr)) return 0; return effective_mode; diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 2b579ba9b685..a8bf8e90ceeb 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -111,6 +111,9 @@ enum amd_pp_sensors { AMDGPU_PP_SENSOR_GPU_LOAD, AMDGPU_PP_SENSOR_GFX_MCLK, AMDGPU_PP_SENSOR_GPU_TEMP, + AMDGPU_PP_SENSOR_EDGE_TEMP = AMDGPU_PP_SENSOR_GPU_TEMP, + AMDGPU_PP_SENSOR_HOTSPOT_TEMP, + AMDGPU_PP_SENSOR_MEM_TEMP, AMDGPU_PP_SENSOR_VCE_POWER, AMDGPU_PP_SENSOR_UVD_POWER, AMDGPU_PP_SENSOR_GPU_POWER, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 1422bc4e45d1..d5d0db456021 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -3785,6 +3785,18 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx, *((uint32_t *)value) = vega10_thermal_get_temperature(hwmgr); *size = 4; break; + case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHotspot); + *((uint32_t *)value) = smum_get_argument(hwmgr) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + *size = 4; + break; + case AMDGPU_PP_SENSOR_MEM_TEMP: + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHBM); + *((uint32_t *)value) = smum_get_argument(hwmgr) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + *size = 4; + break; case AMDGPU_PP_SENSOR_UVD_POWER: *((uint32_t *)value) = data->uvd_power_gated ? 0 : 1; *size = 4; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index aeeeaa79056c..8d2865b72c7b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -1338,6 +1338,7 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx, void *value, int *size) { struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend); + SmuMetrics_t metrics_table; int ret = 0; switch (idx) { @@ -1360,6 +1361,24 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx, *((uint32_t *)value) = vega12_thermal_get_temperature(hwmgr); *size = 4; break; + case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: + ret = vega12_get_metrics_table(hwmgr, &metrics_table); + if (ret) + return ret; + + *((uint32_t *)value) = metrics_table.TemperatureHotspot * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + *size = 4; + break; + case AMDGPU_PP_SENSOR_MEM_TEMP: + ret = vega12_get_metrics_table(hwmgr, &metrics_table); + if (ret) + return ret; + + *((uint32_t *)value) = metrics_table.TemperatureHBM * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + *size = 4; + break; case AMDGPU_PP_SENSOR_UVD_POWER: *((uint32_t *)value) = data->uvd_power_gated ? 0 : 1; *size = 4; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 3a9629c907bb..91e26f8b3758 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -2138,10 +2138,28 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx, if (!ret) *size = 4; break; - case AMDGPU_PP_SENSOR_GPU_TEMP: + case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: *((uint32_t *)value) = vega20_thermal_get_temperature(hwmgr); *size = 4; break; + case AMDGPU_PP_SENSOR_EDGE_TEMP: + ret = vega20_get_metrics_table(hwmgr, &metrics_table); + if (ret) + return ret; + + *((uint32_t *)value) = metrics_table.TemperatureEdge * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + *size = 4; + break; + case AMDGPU_PP_SENSOR_MEM_TEMP: + ret = vega20_get_metrics_table(hwmgr, &metrics_table); + if (ret) + return ret; + + *((uint32_t *)value) = metrics_table.TemperatureHBM * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + *size = 4; + break; case AMDGPU_PP_SENSOR_UVD_POWER: *((uint32_t *)value) = data->uvd_power_gated ? 0 : 1; *size = 4; From 2adc11564c42b63827eaf1cf9d61da2f79b9c978 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Wed, 17 Apr 2019 15:45:08 +0800 Subject: [PATCH 005/178] drm/amd/powerplay: support hwmon temperature channel labels V2 Expose temp[1-3]_label hwmon interfaces. While temp2_label and temp3_label are visible for SOC15 dGPUs only. - V2: correct temp1_label as "edge" Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 34 ++++++++++++++++++- .../gpu/drm/amd/include/kgd_pp_interface.h | 7 ++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 00ca8ec9845f..6d2995fcfd33 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -67,6 +67,15 @@ static const struct cg_flag_name clocks[] = { {0, NULL}, }; +static const struct hwmon_temp_label { + enum PP_HWMON_TEMP channel; + const char *label; +} temp_label[] = { + {PP_TEMP_EDGE, "edge"}, + {PP_TEMP_JUNCTION, "junction"}, + {PP_TEMP_MEM, "mem"}, +}; + void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev) { if (adev->pm.dpm_enabled) { @@ -1468,6 +1477,18 @@ static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev, return snprintf(buf, PAGE_SIZE, "%d\n", temp); } +static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int channel = to_sensor_dev_attr(attr)->index; + + if (channel >= PP_TEMP_MAX) + return -EINVAL; + + return snprintf(buf, PAGE_SIZE, "%s\n", temp_label[channel].label); +} + static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev, struct device_attribute *attr, char *buf) @@ -2066,6 +2087,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, * - temp[1-3]_input: the on die GPU temperature in millidegrees Celsius * - temp2_input and temp3_input are supported on SOC15 dGPUs only * + * - temp[1-3]_label: temperature channel label + * - temp2_label and temp3_label are supported on SOC15 dGPUs only + * * - temp[1-3]_crit: temperature critical max value in millidegrees Celsius * - temp2_crit and temp3_crit are supported on SOC15 dGPUs only * @@ -2133,6 +2157,9 @@ static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1); static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM); +static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE); +static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION); +static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM); static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0); static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0); static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0); @@ -2168,6 +2195,9 @@ static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_temp1_emergency.dev_attr.attr, &sensor_dev_attr_temp2_emergency.dev_attr.attr, &sensor_dev_attr_temp3_emergency.dev_attr.attr, + &sensor_dev_attr_temp1_label.dev_attr.attr, + &sensor_dev_attr_temp2_label.dev_attr.attr, + &sensor_dev_attr_temp3_label.dev_attr.attr, &sensor_dev_attr_pwm1.dev_attr.attr, &sensor_dev_attr_pwm1_enable.dev_attr.attr, &sensor_dev_attr_pwm1_min.dev_attr.attr, @@ -2301,7 +2331,9 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr || attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr || attr == &sensor_dev_attr_temp2_input.dev_attr.attr || - attr == &sensor_dev_attr_temp3_input.dev_attr.attr)) + attr == &sensor_dev_attr_temp3_input.dev_attr.attr || + attr == &sensor_dev_attr_temp2_label.dev_attr.attr || + attr == &sensor_dev_attr_temp3_label.dev_attr.attr)) return 0; return effective_mode; diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index a8bf8e90ceeb..30788d510576 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -162,6 +162,13 @@ struct pp_states_info { uint32_t states[16]; }; +enum PP_HWMON_TEMP { + PP_TEMP_EDGE = 0, + PP_TEMP_JUNCTION, + PP_TEMP_MEM, + PP_TEMP_MAX +}; + #define PP_GROUP_MASK 0xF0000000 #define PP_GROUP_SHIFT 28 From c59a722c4ccb2d1d71a0c99ab9f3a46b4cb4407f Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 18 Apr 2019 15:33:06 +0800 Subject: [PATCH 006/178] drm/amd/powerplay: expose Vega12 current power Provide the real sensor information for current power. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index 8d2865b72c7b..1997df39b645 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -1260,19 +1260,16 @@ static int vega12_get_metrics_table(struct pp_hwmgr *hwmgr, SmuMetrics_t *metric static int vega12_get_gpu_power(struct pp_hwmgr *hwmgr, uint32_t *query) { -#if 0 - uint32_t value; + SmuMetrics_t metrics_table; + int ret = 0; - PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc(hwmgr, - PPSMC_MSG_GetCurrPkgPwr), - "Failed to get current package power!", - return -EINVAL); + ret = vega12_get_metrics_table(hwmgr, &metrics_table); + if (ret) + return ret; - value = smum_get_argument(hwmgr); - /* power value is an integer */ - *query = value << 8; -#endif - return 0; + *query = metrics_table.CurrSocketPower << 8; + + return ret; } static int vega12_get_current_gfx_clk_freq(struct pp_hwmgr *hwmgr, uint32_t *gfx_freq) @@ -1389,6 +1386,8 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx, break; case AMDGPU_PP_SENSOR_GPU_POWER: ret = vega12_get_gpu_power(hwmgr, (uint32_t *)value); + if (!ret) + *size = 4; break; case AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK: ret = vega12_get_enabled_smc_features(hwmgr, (uint64_t *)value); From eef2d67ead3e14e284cc840894727b095fe774c2 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 18 Apr 2019 15:37:49 +0800 Subject: [PATCH 007/178] drm/amd/powerplay: expose Vega12 current gpu activity Provide the real sensor information for current gpu activity. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index 1997df39b645..a9d29b4be72f 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -1310,23 +1310,14 @@ static int vega12_get_current_activity_percent( struct pp_hwmgr *hwmgr, uint32_t *activity_percent) { + SmuMetrics_t metrics_table; int ret = 0; - uint32_t current_activity = 50; -#if 0 - ret = smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetAverageGfxActivity, 0); - if (!ret) { - current_activity = smum_get_argument(hwmgr); - if (current_activity > 100) { - PP_ASSERT(false, - "[GetCurrentActivityPercent] Activity Percentage Exceeds 100!"); - current_activity = 100; - } - } else - PP_ASSERT(false, - "[GetCurrentActivityPercent] Attempt To Send Get Average Graphics Activity to SMU Failed!"); -#endif - *activity_percent = current_activity; + ret = vega12_get_metrics_table(hwmgr, &metrics_table); + if (ret) + return ret; + + *activity_percent = metrics_table.AverageGfxActivity; return ret; } From 4fde03a7efe42f1b3de3b483f98dc4551dd62ac4 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Mon, 29 Apr 2019 14:47:41 +0800 Subject: [PATCH 008/178] drm/amd/powerplay: add helper function to get smu firmware & if version add this helper function to get smc version. Signed-off-by: Kevin Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 30 +++++++++++++++++++ .../gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 1 + drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 23 +++++++++----- 3 files changed, 46 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index c058c784180e..52d919a8b70a 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -30,6 +30,36 @@ #include "atom.h" #include "amd_pcie.h" +int smu_get_smc_version(struct smu_context *smu, uint32_t *if_version, uint32_t *smu_version) +{ + int ret = 0; + + if (!if_version && !smu_version) + return -EINVAL; + + if (if_version) { + ret = smu_send_smc_msg(smu, SMU_MSG_GetDriverIfVersion); + if (ret) + return ret; + + ret = smu_read_smc_arg(smu, if_version); + if (ret) + return ret; + } + + if (smu_version) { + ret = smu_send_smc_msg(smu, SMU_MSG_GetSmuVersion); + if (ret) + return ret; + + ret = smu_read_smc_arg(smu, smu_version); + if (ret) + return ret; + } + + return ret; +} + int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type, bool gate) { diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index c8b168b3413b..89052414e9f1 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -767,4 +767,5 @@ extern int smu_dpm_set_power_gate(struct smu_context *smu,uint32_t block_type, b extern int smu_handle_task(struct smu_context *smu, enum amd_dpm_forced_level level, enum amd_pp_task task_id); +int smu_get_smc_version(struct smu_context *smu, uint32_t *if_version, uint32_t *smu_version); #endif diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index 92903a4cc4d8..cd36c4272659 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -223,20 +223,27 @@ static int smu_v11_0_check_fw_status(struct smu_context *smu) static int smu_v11_0_check_fw_version(struct smu_context *smu) { - uint32_t smu_version = 0xff; + uint32_t if_version = 0xff, smu_version = 0xff; + uint16_t smu_major; + uint8_t smu_minor, smu_debug; int ret = 0; - ret = smu_send_smc_msg(smu, SMU_MSG_GetDriverIfVersion); + ret = smu_get_smc_version(smu, &if_version, &smu_version); if (ret) - goto err; + return ret; - ret = smu_read_smc_arg(smu, &smu_version); - if (ret) - goto err; + smu_major = (smu_version >> 16) & 0xffff; + smu_minor = (smu_version >> 8) & 0xff; + smu_debug = (smu_version >> 0) & 0xff; - if (smu_version != smu->smc_if_version) + pr_info("SMU Driver IF Version = 0x%08x, SMU FW Version = 0x%08x (%d.%d.%d)\n", + if_version, smu_version, smu_major, smu_minor, smu_debug); + + if (if_version != smu->smc_if_version) { + pr_err("SMU driver if version not matched\n"); ret = -EINVAL; -err: + } + return ret; } From 88807dc8d573c0f718d0d26f592f212c5a487cf0 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Thu, 4 Apr 2019 15:47:34 -0500 Subject: [PATCH 009/178] drm/amdgpu: Remap hdp coherency registers Remap HDP_MEM_COHERENCY_FLUSH_CNTL and HDP_REG_COHERENCY_FLUSH_CNTL to an empty page in mmio space. We will later map this page to process space so application can flush hdp. This can't be done properly at those registers' original location because it will expose more than desired registers to process space. v2: Use explicit register hole location v3: Moved remapped hdp registers into adev struct v4: Use more generic name for remapped page Expose register offset in kfd_ioctl.h v5: Move hdp register remap function to nbio ip function v6: Fixed operator precedence issue and other bugs Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +++++++ drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c | 15 ++++++++++++--- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 15 ++++++++++++--- drivers/gpu/drm/amd/amdgpu/soc15.c | 11 +++++++++++ include/uapi/linux/kfd_ioctl.h | 7 +++++++ 5 files changed, 49 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 14398f55f602..23c3375623d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -639,6 +639,11 @@ struct nbio_hdp_flush_reg { u32 ref_and_mask_sdma1; }; +struct amdgpu_mmio_remap { + u32 reg_offset; + resource_size_t bus_addr; +}; + struct amdgpu_nbio_funcs { const struct nbio_hdp_flush_reg *hdp_flush_reg; u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev); @@ -666,6 +671,7 @@ struct amdgpu_nbio_funcs { void (*ih_control)(struct amdgpu_device *adev); void (*init_registers)(struct amdgpu_device *adev); void (*detect_hw_virt)(struct amdgpu_device *adev); + void (*remap_hdp_registers)(struct amdgpu_device *adev); }; struct amdgpu_df_funcs { @@ -764,6 +770,7 @@ struct amdgpu_device { void __iomem *rmmio; /* protects concurrent MM_INDEX/DATA based register access */ spinlock_t mmio_idx_lock; + struct amdgpu_mmio_remap rmmio_remap; /* protects concurrent SMC based register access */ spinlock_t smc_idx_lock; amdgpu_rreg_t smc_rreg; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index 1cdb98ad2db3..73419fa38159 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -29,9 +29,18 @@ #include "nbio/nbio_7_0_sh_mask.h" #include "nbio/nbio_7_0_smn.h" #include "vega10_enum.h" +#include #define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c +static void nbio_v7_0_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); @@ -55,10 +64,9 @@ static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); + WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else - amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( - NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0); + amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) @@ -283,4 +291,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = { .ih_control = nbio_v7_0_ih_control, .init_registers = nbio_v7_0_init_registers, .detect_hw_virt = nbio_v7_0_detect_hw_virt, + .remap_hdp_registers = nbio_v7_0_remap_hdp_registers, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index c69d51598cfe..bfaaa327ae3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -27,9 +27,18 @@ #include "nbio/nbio_7_4_offset.h" #include "nbio/nbio_7_4_sh_mask.h" #include "nbio/nbio_7_4_0_smn.h" +#include #define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c +static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); @@ -53,10 +62,9 @@ static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); + WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else - amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( - NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0); + amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev) @@ -262,4 +270,5 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .ih_control = nbio_v7_4_ih_control, .init_registers = nbio_v7_4_init_registers, .detect_hw_virt = nbio_v7_4_detect_hw_virt, + .remap_hdp_registers = nbio_v7_4_remap_hdp_registers, }; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 4900e4958dec..78bd00a0142f 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -44,6 +44,7 @@ #include "smuio/smuio_9_0_offset.h" #include "smuio/smuio_9_0_sh_mask.h" #include "nbio/nbio_7_0_default.h" +#include "nbio/nbio_7_0_offset.h" #include "nbio/nbio_7_0_sh_mask.h" #include "nbio/nbio_7_0_smn.h" #include "mp/mp_9_0_offset.h" @@ -64,6 +65,7 @@ #include "dce_virtual.h" #include "mxgpu_ai.h" #include "amdgpu_smu.h" +#include #define mmMP0_MISC_CGTT_CTRL0 0x01b9 #define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0 @@ -783,8 +785,11 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = static int soc15_common_early_init(void *handle) { +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; adev->smc_rreg = NULL; adev->smc_wreg = NULL; adev->pcie_rreg = &soc15_pcie_rreg; @@ -1014,6 +1019,12 @@ static int soc15_common_hw_init(void *handle) soc15_program_aspm(adev); /* setup nbio registers */ adev->nbio_funcs->init_registers(adev); + /* remap HDP registers to a hole in mmio space, + * for the purpose of expose those registers + * to process space + */ + if (adev->nbio_funcs->remap_hdp_registers) + adev->nbio_funcs->remap_hdp_registers(adev); /* enable the doorbell aperture */ soc15_enable_doorbell_aperture(adev, true); /* HW doorbell routing policy: doorbell writing not diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index dc067ed0b72d..bb1b4280f53d 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -426,6 +426,13 @@ struct kfd_ioctl_import_dmabuf_args { __u32 dmabuf_fd; /* to KFD */ }; +/* Register offset inside the remapped mmio page + */ +enum kfd_mmio_remap { + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL = 0, + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4, +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) From d8e408a82704c86ba87c3d58cfe69dcdb758aa07 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Thu, 11 Apr 2019 14:43:39 -0500 Subject: [PATCH 010/178] drm/amdkfd: Expose HDP registers to user space Introduce a new memory type (KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) and expose mmio page of HDP registers to user space through this new memory type. v2: moved remapped hdp regs to adev struct v3: rename the new memory type to ALLOC_MEM_FLAGS_MMIO_REMAP v4: use more generic function name v5: Fail remapped mmio allocation for asics before gfx9 Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 7 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 7 ++++--- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 ++++++ drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 1 + include/uapi/linux/kfd_ioctl.h | 1 + 6 files changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index aeead072fa79..401edb605fdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -519,6 +519,13 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd) return adev->gmc.xgmi.hive_id; } +uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + return adev->rmmio_remap.bus_addr; +} + int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 4e37fa7e85b1..ea1f141db3ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -169,6 +169,7 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, uint32_t *flags); uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); +uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd); #define read_user_wptr(mmptr, wptr, dst) \ ({ \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index a6e5184d436c..00e013581a70 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1109,7 +1109,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (!offset || !*offset) return -EINVAL; user_addr = *offset; - } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) { + } else if (flags & (ALLOC_MEM_FLAGS_DOORBELL | + ALLOC_MEM_FLAGS_MMIO_REMAP)) { domain = AMDGPU_GEM_DOMAIN_GTT; alloc_domain = AMDGPU_GEM_DOMAIN_CPU; bo_type = ttm_bo_type_sg; @@ -1294,8 +1295,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( /* Free the sync object */ amdgpu_sync_free(&mem->sync); - /* If the SG is not NULL, it's one we created for a doorbell - * BO. We need to free it. + /* If the SG is not NULL, it's one we created for a doorbell or mmio + * remap BO. We need to free it. */ if (mem->bo->tbo.sg) { sg_free_table(mem->bo->tbo.sg); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 083bd8114db1..d795e5018270 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1272,6 +1272,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, if (args->size != kfd_doorbell_process_slice(dev)) return -EINVAL; offset = kfd_get_process_doorbells(dev, p); + } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { + if (args->size != PAGE_SIZE) + return -EINVAL; + offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); + if (!offset) + return -ENOMEM; } mutex_lock(&p->mutex); diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index b897aca9b4c9..98b9533e672b 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -174,6 +174,7 @@ struct tile_config { #define ALLOC_MEM_FLAGS_GTT (1 << 1) #define ALLOC_MEM_FLAGS_USERPTR (1 << 2) #define ALLOC_MEM_FLAGS_DOORBELL (1 << 3) +#define ALLOC_MEM_FLAGS_MMIO_REMAP (1 << 4) /* * Allocation flags attributes/access options. diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index bb1b4280f53d..1e7d5f3376b0 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -338,6 +338,7 @@ struct kfd_ioctl_acquire_vm_args { #define KFD_IOC_ALLOC_MEM_FLAGS_GTT (1 << 1) #define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR (1 << 2) #define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL (1 << 3) +#define KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP (1 << 4) /* Allocation flags: attributes/access options */ #define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE (1 << 31) #define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) From da361dd13f4fb766445f55bbd2eec36c73a9b1bb Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Wed, 17 Apr 2019 14:28:18 -0400 Subject: [PATCH 011/178] drm/amdgpu: Implement get num of hops between two xgmi device KFD need to provide the info for upper level to determine the data path Signed-off-by: shaoyunl Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 15 +++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 26 ++++++++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 23 ++++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 3 ++- 5 files changed, 50 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 401edb605fdd..8949b1ac2f18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -27,6 +27,7 @@ #include "amdgpu_gfx.h" #include #include +#include "amdgpu_xgmi.h" static const unsigned int compute_vmid_bitmap = 0xFF00; @@ -518,6 +519,20 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd) return adev->gmc.xgmi.hive_id; } +uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src) +{ + struct amdgpu_device *peer_adev = (struct amdgpu_device *)src; + struct amdgpu_device *adev = (struct amdgpu_device *)dst; + int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev); + + if (ret < 0) { + DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n", + adev->gmc.xgmi.physical_node_id, + peer_adev->gmc.xgmi.physical_node_id, ret); + ret = 0; + } + return (uint8_t)ret; +} uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index ea1f141db3ff..3369017d9f4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -170,6 +170,7 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd); +uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src); #define read_user_wptr(mmptr, wptr, dst) \ ({ \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index cde113f07c96..acbc18b594a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -95,12 +95,26 @@ struct psp_funcs int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr); }; +#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64 +struct psp_xgmi_node_info { + uint64_t node_id; + uint8_t num_hops; + uint8_t is_sharing_enabled; + enum ta_xgmi_assigned_sdma_engine sdma_engine; +}; + +struct psp_xgmi_topology_info { + uint32_t num_nodes; + struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES]; +}; + struct psp_xgmi_context { uint8_t initialized; uint32_t session_id; struct amdgpu_bo *xgmi_shared_bo; uint64_t xgmi_shared_mc_addr; void *xgmi_shared_buf; + struct psp_xgmi_topology_info top_info; }; struct psp_ras_context { @@ -181,18 +195,6 @@ struct amdgpu_psp_funcs { enum AMDGPU_UCODE_ID); }; -#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64 -struct psp_xgmi_node_info { - uint64_t node_id; - uint8_t num_hops; - uint8_t is_sharing_enabled; - enum ta_xgmi_assigned_sdma_engine sdma_engine; -}; - -struct psp_xgmi_topology_info { - uint32_t num_nodes; - struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES]; -}; #define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type)) #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index a48c84c51775..04dfc8b79e39 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -238,7 +238,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev /* Each psp need to set the latest topology */ ret = psp_xgmi_set_topology_info(&adev->psp, hive->number_devices, - &hive->topology_info); + &adev->psp.xgmi_context.top_info); if (ret) dev_err(adev->dev, "XGMI: Set topology failure on device %llx, hive %llx, ret %d", @@ -248,9 +248,22 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev return ret; } + +int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev) +{ + struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; + int i; + + for (i = 0 ; i < top->num_nodes; ++i) + if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) + return top->nodes[i].num_hops; + return -EINVAL; +} + int amdgpu_xgmi_add_device(struct amdgpu_device *adev) { - struct psp_xgmi_topology_info *hive_topology; + struct psp_xgmi_topology_info *top_info; struct amdgpu_hive_info *hive; struct amdgpu_xgmi *entry; struct amdgpu_device *tmp_adev = NULL; @@ -283,16 +296,16 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) goto exit; } - hive_topology = &hive->topology_info; + top_info = &adev->psp.xgmi_context.top_info; list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); list_for_each_entry(entry, &hive->device_list, head) - hive_topology->nodes[count++].node_id = entry->node_id; + top_info->nodes[count++].node_id = entry->node_id; hive->number_devices = count; /* Each psp need to get the latest topology */ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology); + ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, top_info); if (ret) { dev_err(tmp_adev->dev, "XGMI: Get topology failure on device %llx, hive %llx, ret %d", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 3e9c91e9a4bf..fbcee31788c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -27,7 +27,6 @@ struct amdgpu_hive_info { uint64_t hive_id; struct list_head device_list; - struct psp_xgmi_topology_info topology_info; int number_devices; struct mutex hive_lock, reset_lock; struct kobject *kobj; @@ -41,6 +40,8 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev int amdgpu_xgmi_add_device(struct amdgpu_device *adev); void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); +int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev); static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, struct amdgpu_device *bo_adev) From 0fb0df031acdb6f17603cb32be13a2d1a858e249 Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Wed, 17 Apr 2019 14:34:07 -0400 Subject: [PATCH 012/178] drm/amdkfd: Adjust weight to represent num_hops info when report xgmi iolink Upper level runtime need the xgmi hops info to determine the data path Signed-off-by: shaoyunl Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 7 +++++-- drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 2e7c44955f43..1714900035d7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -372,7 +372,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) props->weight = 20; else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI) - props->weight = 15; + props->weight = 15 * iolink->num_hops_xgmi; else props->weight = node_distance(id_from, id_to); @@ -1092,6 +1092,7 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, struct kfd_dev *kdev, + struct kfd_dev *peer_kdev, struct crat_subtype_iolink *sub_type_hdr, uint32_t proximity_domain_from, uint32_t proximity_domain_to) @@ -1110,6 +1111,8 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; sub_type_hdr->proximity_domain_from = proximity_domain_from; sub_type_hdr->proximity_domain_to = proximity_domain_to; + sub_type_hdr->num_hops_xgmi = + amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd); return 0; } @@ -1287,7 +1290,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, (char *)sub_type_hdr + sizeof(struct crat_subtype_iolink)); ret = kfd_fill_gpu_xgmi_link_to_gpu( - &avail_size, kdev, + &avail_size, kdev, peer_dev->gpu, (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain, nid); if (ret < 0) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h index 7c3f192fe25f..d54ceebd346b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h @@ -274,7 +274,8 @@ struct crat_subtype_iolink { uint32_t minimum_bandwidth_mbs; uint32_t maximum_bandwidth_mbs; uint32_t recommended_transfer_size; - uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH]; + uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH - 1]; + uint8_t num_hops_xgmi; }; /* From 1846e3f9e76ceaac86072eb897393ee24d1d870e Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 19 Apr 2019 13:54:46 +0800 Subject: [PATCH 013/178] drm/amd/powerplay: expose Vega20 realtime memory utilization Enable realtime memory utilization report on Vega20. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/kgd_pp_interface.h | 1 + .../gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 30788d510576..9f661bf96ed0 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -109,6 +109,7 @@ enum amd_pp_sensors { AMDGPU_PP_SENSOR_UVD_DCLK, AMDGPU_PP_SENSOR_VCE_ECCLK, AMDGPU_PP_SENSOR_GPU_LOAD, + AMDGPU_PP_SENSOR_MEM_LOAD, AMDGPU_PP_SENSOR_GFX_MCLK, AMDGPU_PP_SENSOR_GPU_TEMP, AMDGPU_PP_SENSOR_EDGE_TEMP = AMDGPU_PP_SENSOR_GPU_TEMP, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 91e26f8b3758..eb7002401587 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -2094,6 +2094,7 @@ static int vega20_get_current_clk_freq(struct pp_hwmgr *hwmgr, } static int vega20_get_current_activity_percent(struct pp_hwmgr *hwmgr, + int idx, uint32_t *activity_percent) { int ret = 0; @@ -2103,7 +2104,17 @@ static int vega20_get_current_activity_percent(struct pp_hwmgr *hwmgr, if (ret) return ret; - *activity_percent = metrics_table.AverageGfxActivity; + switch (idx) { + case AMDGPU_PP_SENSOR_GPU_LOAD: + *activity_percent = metrics_table.AverageGfxActivity; + break; + case AMDGPU_PP_SENSOR_MEM_LOAD: + *activity_percent = metrics_table.AverageUclkActivity; + break; + default: + pr_err("Invalid index for retrieving clock activity\n"); + return -EINVAL; + } return ret; } @@ -2134,7 +2145,8 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx, *size = 4; break; case AMDGPU_PP_SENSOR_GPU_LOAD: - ret = vega20_get_current_activity_percent(hwmgr, (uint32_t *)value); + case AMDGPU_PP_SENSOR_MEM_LOAD: + ret = vega20_get_current_activity_percent(hwmgr, idx, (uint32_t *)value); if (!ret) *size = 4; break; From 271151d80149e5081a80516f0a8d4eeb5744ff70 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Wed, 24 Apr 2019 15:19:36 +0800 Subject: [PATCH 014/178] drm/amd/powerplay: expose Vega12 realtime memory utilization Enable realtime memory utilization report on Vega12. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index a9d29b4be72f..1a909dda37c7 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -1308,6 +1308,7 @@ static int vega12_get_current_mclk_freq(struct pp_hwmgr *hwmgr, uint32_t *mclk_f static int vega12_get_current_activity_percent( struct pp_hwmgr *hwmgr, + int idx, uint32_t *activity_percent) { SmuMetrics_t metrics_table; @@ -1317,7 +1318,17 @@ static int vega12_get_current_activity_percent( if (ret) return ret; - *activity_percent = metrics_table.AverageGfxActivity; + switch (idx) { + case AMDGPU_PP_SENSOR_GPU_LOAD: + *activity_percent = metrics_table.AverageGfxActivity; + break; + case AMDGPU_PP_SENSOR_MEM_LOAD: + *activity_percent = metrics_table.AverageUclkActivity; + break; + default: + pr_err("Invalid index for retrieving clock activity\n"); + return -EINVAL; + } return ret; } @@ -1341,7 +1352,8 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx, *size = 4; break; case AMDGPU_PP_SENSOR_GPU_LOAD: - ret = vega12_get_current_activity_percent(hwmgr, (uint32_t *)value); + case AMDGPU_PP_SENSOR_MEM_LOAD: + ret = vega12_get_current_activity_percent(hwmgr, idx, (uint32_t *)value); if (!ret) *size = 4; break; From 767fb6b35dfaba08f514ca4f8f5fed1c442c40fd Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Wed, 24 Apr 2019 15:46:50 +0800 Subject: [PATCH 015/178] drm/amd/powerplay: expose SMU7 asics realtime memory utilization Enable realtime memory utilization report on SMU7 asics. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 5 ++++- drivers/gpu/drm/amd/powerplay/inc/smumgr.h | 1 + drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c | 2 ++ drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c | 2 ++ drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c | 2 ++ drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c | 2 ++ drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c | 2 ++ drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c | 2 ++ 8 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 048757e8f494..16591be8b0ca 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -3532,9 +3532,12 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx, *size = 4; return 0; case AMDGPU_PP_SENSOR_GPU_LOAD: + case AMDGPU_PP_SENSOR_MEM_LOAD: offset = data->soft_regs_start + smum_get_offsetof(hwmgr, SMU_SoftRegisters, - AverageGraphicsActivity); + (idx == AMDGPU_PP_SENSOR_GPU_LOAD) ? + AverageGraphicsActivity: + AverageMemoryActivity); activity_percent = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset); activity_percent += 0x80; diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h index 82550a8a3a3f..c5288831aa15 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h @@ -41,6 +41,7 @@ enum SMU_MEMBER { HandshakeDisables = 0, VoltageChangeTimeout, AverageGraphicsActivity, + AverageMemoryActivity, PreVBlankGap, VBlankTimeout, UcodeLoadStatus, diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c index 669bd0c2a16c..9ef57fcf7e78 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c @@ -2254,6 +2254,8 @@ static uint32_t ci_get_offsetof(uint32_t type, uint32_t member) return offsetof(SMU7_SoftRegisters, VoltageChangeTimeout); case AverageGraphicsActivity: return offsetof(SMU7_SoftRegisters, AverageGraphicsA); + case AverageMemoryActivity: + return offsetof(SMU7_SoftRegisters, AverageMemoryA); case PreVBlankGap: return offsetof(SMU7_SoftRegisters, PreVBlankGap); case VBlankTimeout: diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c index bc8375cbf297..0ce85b73338e 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c @@ -2304,6 +2304,8 @@ static uint32_t fiji_get_offsetof(uint32_t type, uint32_t member) return offsetof(SMU73_SoftRegisters, VoltageChangeTimeout); case AverageGraphicsActivity: return offsetof(SMU73_SoftRegisters, AverageGraphicsActivity); + case AverageMemoryActivity: + return offsetof(SMU73_SoftRegisters, AverageMemoryActivity); case PreVBlankGap: return offsetof(SMU73_SoftRegisters, PreVBlankGap); case VBlankTimeout: diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c index 375ccf6ff5f2..f24f13d77808 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c @@ -2219,6 +2219,8 @@ static uint32_t iceland_get_offsetof(uint32_t type, uint32_t member) return offsetof(SMU71_SoftRegisters, VoltageChangeTimeout); case AverageGraphicsActivity: return offsetof(SMU71_SoftRegisters, AverageGraphicsActivity); + case AverageMemoryActivity: + return offsetof(SMU71_SoftRegisters, AverageMemoryActivity); case PreVBlankGap: return offsetof(SMU71_SoftRegisters, PreVBlankGap); case VBlankTimeout: diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c index 2d4cfe14f72e..0d8958e71b94 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c @@ -2313,6 +2313,8 @@ static uint32_t polaris10_get_offsetof(uint32_t type, uint32_t member) return offsetof(SMU74_SoftRegisters, VoltageChangeTimeout); case AverageGraphicsActivity: return offsetof(SMU74_SoftRegisters, AverageGraphicsActivity); + case AverageMemoryActivity: + return offsetof(SMU74_SoftRegisters, AverageMemoryActivity); case PreVBlankGap: return offsetof(SMU74_SoftRegisters, PreVBlankGap); case VBlankTimeout: diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c index 3ed6c5f1e5cf..060c0f7f5238 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c @@ -2611,6 +2611,8 @@ static uint32_t tonga_get_offsetof(uint32_t type, uint32_t member) return offsetof(SMU72_SoftRegisters, VoltageChangeTimeout); case AverageGraphicsActivity: return offsetof(SMU72_SoftRegisters, AverageGraphicsActivity); + case AverageMemoryActivity: + return offsetof(SMU72_SoftRegisters, AverageMemoryActivity); case PreVBlankGap: return offsetof(SMU72_SoftRegisters, PreVBlankGap); case VBlankTimeout: diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c index 1e69300f6175..d499204b2184 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c @@ -2167,6 +2167,8 @@ static uint32_t vegam_get_offsetof(uint32_t type, uint32_t member) return offsetof(SMU75_SoftRegisters, VoltageChangeTimeout); case AverageGraphicsActivity: return offsetof(SMU75_SoftRegisters, AverageGraphicsActivity); + case AverageMemoryActivity: + return offsetof(SMU75_SoftRegisters, AverageMemoryActivity); case PreVBlankGap: return offsetof(SMU75_SoftRegisters, PreVBlankGap); case VBlankTimeout: From f120386d74f193301d808ccfec1a778af62b6797 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 26 Apr 2019 12:02:48 +0800 Subject: [PATCH 016/178] drm/amdgpu: add new sysfs interface for memory realtime utilization A new sysfs interface mem_busy_percent is added for telling how busy the VRAM is(in percentage). Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 40 ++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 6d2995fcfd33..58edf6f13d51 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1311,6 +1311,32 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev, return snprintf(buf, PAGE_SIZE, "%d\n", value); } +/** + * DOC: mem_busy_percent + * + * The amdgpu driver provides a sysfs API for reading how busy the VRAM + * is as a percentage. The file mem_busy_percent is used for this. + * The SMU firmware computes a percentage of load based on the + * aggregate activity level in the IP cores. + */ +static ssize_t amdgpu_get_memory_busy_percent(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + int r, value, size = sizeof(value); + + /* read the IP busy sensor */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, + (void *)&value, &size); + + if (r) + return r; + + return snprintf(buf, PAGE_SIZE, "%d\n", value); +} + /** * DOC: pcie_bw * @@ -1380,6 +1406,8 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR, amdgpu_set_pp_od_clk_voltage); static DEVICE_ATTR(gpu_busy_percent, S_IRUGO, amdgpu_get_busy_percent, NULL); +static DEVICE_ATTR(mem_busy_percent, S_IRUGO, + amdgpu_get_memory_busy_percent, NULL); static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL); static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR, amdgpu_get_ppfeature_status, @@ -2762,6 +2790,16 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) "gpu_busy_level\n"); return ret; } + /* APU does not have its own dedicated memory */ + if (!(adev->flags & AMD_IS_APU)) { + ret = device_create_file(adev->dev, + &dev_attr_mem_busy_percent); + if (ret) { + DRM_ERROR("failed to create device file " + "mem_busy_percent\n"); + return ret; + } + } /* PCIe Perf counters won't work on APU nodes */ if (!(adev->flags & AMD_IS_APU)) { ret = device_create_file(adev->dev, &dev_attr_pcie_bw); @@ -2827,6 +2865,8 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_pp_od_clk_voltage); device_remove_file(adev->dev, &dev_attr_gpu_busy_percent); + if (!(adev->flags & AMD_IS_APU)) + device_remove_file(adev->dev, &dev_attr_mem_busy_percent); if (!(adev->flags & AMD_IS_APU)) device_remove_file(adev->dev, &dev_attr_pcie_bw); if ((adev->asic_type >= CHIP_VEGA10) && From 912dfc846aad77e4a61ac39ca92bd7149062e590 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 29 Apr 2019 16:51:17 +0800 Subject: [PATCH 017/178] drm/amdgpu: enable separate timeout setting for every ring type V4 Every ring type can have its own timeout setting. - V2: update lockup_timeout parameter format and cosmetic fixes - V3: invalidate 0 and negative values - V4: update lockup_timeout parameter format Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 79 ++++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 35 ++++++++-- drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 2 +- 5 files changed, 121 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 23c3375623d7..30165487dabd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -118,7 +118,6 @@ extern int amdgpu_disp_priority; extern int amdgpu_hw_i2c; extern int amdgpu_pcie_gen2; extern int amdgpu_msi; -extern int amdgpu_lockup_timeout; extern int amdgpu_dpm; extern int amdgpu_fw_load_type; extern int amdgpu_aspm; @@ -415,6 +414,7 @@ struct amdgpu_fpriv { }; int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv); +int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev); int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned size, struct amdgpu_ib *ib); @@ -943,6 +943,11 @@ struct amdgpu_device { struct work_struct xgmi_reset_work; bool in_baco_reset; + + long gfx_timeout; + long sdma_timeout; + long video_timeout; + long compute_timeout; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index cc8ad3831982..0237513086e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -910,8 +910,10 @@ def_value: * Validates certain module parameters and updates * the associated values used by the driver (all asics). */ -static void amdgpu_device_check_arguments(struct amdgpu_device *adev) +static int amdgpu_device_check_arguments(struct amdgpu_device *adev) { + int ret = 0; + if (amdgpu_sched_jobs < 4) { dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", amdgpu_sched_jobs); @@ -956,12 +958,15 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev) amdgpu_vram_page_split = 1024; } - if (amdgpu_lockup_timeout == 0) { - dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n"); - amdgpu_lockup_timeout = 10000; + ret = amdgpu_device_get_job_timeout_settings(adev); + if (ret) { + dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); + return ret; } adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); + + return ret; } /** @@ -2473,7 +2478,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->lock_reset); mutex_init(&adev->virt.dpm_mutex); - amdgpu_device_check_arguments(adev); + r = amdgpu_device_check_arguments(adev); + if (r) + return r; spin_lock_init(&adev->mmio_idx_lock); spin_lock_init(&adev->smc_idx_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 1e2cc9d68a05..5924d89e0aee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -81,6 +81,8 @@ #define KMS_DRIVER_MINOR 32 #define KMS_DRIVER_PATCHLEVEL 0 +#define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256 + int amdgpu_vram_limit = 0; int amdgpu_vis_vram_limit = 0; int amdgpu_gart_size = -1; /* auto */ @@ -93,7 +95,7 @@ int amdgpu_disp_priority = 0; int amdgpu_hw_i2c = 0; int amdgpu_pcie_gen2 = -1; int amdgpu_msi = -1; -int amdgpu_lockup_timeout = 10000; +char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH]; int amdgpu_dpm = -1; int amdgpu_fw_load_type = -1; int amdgpu_aspm = -1; @@ -227,12 +229,21 @@ MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(msi, amdgpu_msi, int, 0444); /** - * DOC: lockup_timeout (int) - * Set GPU scheduler timeout value in ms. Value 0 is invalidated, will be adjusted to 10000. - * Negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET). The default is 10000. + * DOC: lockup_timeout (string) + * Set GPU scheduler timeout value in ms. + * + * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or + * multiple values specified. 0 and negative values are invalidated. They will be adjusted + * to default timeout. + * - With one value specified, the setting will apply to all non-compute jobs. + * - With multiple values specified, the first one will be for GFX. The second one is for Compute. + * And the third and fourth ones are for SDMA and Video. + * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video) + * jobs is 10000. And there is no timeout enforced on compute jobs. */ -MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)"); -module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444); +MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: 10000 for non-compute jobs and no timeout for compute jobs), " + "format is [Non-Compute] or [GFX,Compute,SDMA,Video]"); +module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444); /** * DOC: dpm (int) @@ -1216,6 +1227,62 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv) return 0; } +int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) +{ + char *input = amdgpu_lockup_timeout; + char *timeout_setting = NULL; + int index = 0; + long timeout; + int ret = 0; + + /* + * By default timeout for non compute jobs is 10000. + * And there is no timeout enforced on compute jobs. + */ + adev->gfx_timeout = adev->sdma_timeout = adev->video_timeout = 10000; + adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; + + if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) { + while ((timeout_setting = strsep(&input, ",")) && + strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) { + ret = kstrtol(timeout_setting, 0, &timeout); + if (ret) + return ret; + + /* Invalidate 0 and negative values */ + if (timeout <= 0) { + index++; + continue; + } + + switch (index++) { + case 0: + adev->gfx_timeout = timeout; + break; + case 1: + adev->compute_timeout = timeout; + break; + case 2: + adev->sdma_timeout = timeout; + break; + case 3: + adev->video_timeout = timeout; + break; + default: + break; + } + } + /* + * There is only one value specified and + * it should apply to all non-compute jobs. + */ + if (index == 1) + adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; + } + + return ret; +} + static bool amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe, bool in_vblank_irq, int *vpos, int *hpos, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 4dee2326b29c..3a483f7e89c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -427,9 +427,13 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, unsigned num_hw_submission) { + struct amdgpu_device *adev = ring->adev; long timeout; int r; + if (!adev) + return -EINVAL; + /* Check that num_hw_submission is a power of two */ if ((num_hw_submission & (num_hw_submission - 1)) != 0) return -EINVAL; @@ -451,12 +455,31 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, /* No need to setup the GPU scheduler for KIQ ring */ if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) { - /* for non-sriov case, no timeout enforce on compute ring */ - if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) - && !amdgpu_sriov_vf(ring->adev)) - timeout = MAX_SCHEDULE_TIMEOUT; - else - timeout = msecs_to_jiffies(amdgpu_lockup_timeout); + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_GFX: + timeout = adev->gfx_timeout; + break; + case AMDGPU_RING_TYPE_COMPUTE: + /* + * For non-sriov case, no timeout enforce + * on compute ring by default. Unless user + * specifies a timeout for compute ring. + * + * For sriov case, always use the timeout + * as gfx ring + */ + if (!amdgpu_sriov_vf(ring->adev)) + timeout = adev->compute_timeout; + else + timeout = adev->gfx_timeout; + break; + case AMDGPU_RING_TYPE_SDMA: + timeout = adev->sdma_timeout; + break; + default: + timeout = adev->video_timeout; + break; + } r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, num_hw_submission, amdgpu_job_hang_limit, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 2471e7cf75ea..64a7b1fb1d1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -343,7 +343,7 @@ flr_done: /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_device_should_recover_gpu(adev) - && amdgpu_lockup_timeout == MAX_SCHEDULE_TIMEOUT) + && adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT) amdgpu_device_gpu_recover(adev, NULL); } From e008299ea9a166ae2460ad1445031c40d9de0670 Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Mon, 29 Apr 2019 15:15:41 -0400 Subject: [PATCH 018/178] drm/amdgpu: Update latest xgmi topology info after each device is enumulated Adjust the sequence of set/get xgmi topology, so driver can have the latest XGMI topology info for future usage Signed-off-by: shaoyunl Acked-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 32 +++++++++++++++--------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 04dfc8b79e39..e48e9394f1e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -301,30 +301,41 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); list_for_each_entry(entry, &hive->device_list, head) top_info->nodes[count++].node_id = entry->node_id; + top_info->num_nodes = count; hive->number_devices = count; - /* Each psp need to get the latest topology */ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, top_info); + /* update node list for other device in the hive */ + if (tmp_adev != adev) { + top_info = &tmp_adev->psp.xgmi_context.top_info; + top_info->nodes[count - 1].node_id = adev->gmc.xgmi.node_id; + top_info->num_nodes = count; + } + ret = amdgpu_xgmi_update_topology(hive, tmp_adev); + if (ret) + goto exit; + } + + /* get latest topology info for each device from psp */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, + &tmp_adev->psp.xgmi_context.top_info); if (ret) { dev_err(tmp_adev->dev, "XGMI: Get topology failure on device %llx, hive %llx, ret %d", tmp_adev->gmc.xgmi.node_id, tmp_adev->gmc.xgmi.hive_id, ret); /* To do : continue with some node failed or disable the whole hive */ - break; + goto exit; } } - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - ret = amdgpu_xgmi_update_topology(hive, tmp_adev); - if (ret) - break; - } - if (!ret) ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive); + + mutex_unlock(&hive->hive_lock); +exit: if (!ret) dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n", adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id); @@ -333,9 +344,6 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id, ret); - - mutex_unlock(&hive->hive_lock); -exit: return ret; } From 673b366b41e216309c830d86d628e08736172067 Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Tue, 30 Apr 2019 06:42:24 -0400 Subject: [PATCH 019/178] drm/amdgpu: Add replay counter defines to NBIO headers Add the PCIE_RX_NUM_NACK and PCIE_RX_NUM_NACK_GENERATED values to the NBIO SMN headers in preparation for exposing the number of PCIe replays via sysfs Signed-off-by: Kent Russell Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h | 3 +++ drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h | 3 +++ drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h | 3 +++ 3 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h index 8c75669eb500..9470ec5e0f42 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h @@ -54,5 +54,8 @@ #define smnPCIE_PERF_COUNT0_TXCLK2 0x11180258 #define smnPCIE_PERF_COUNT1_TXCLK2 0x1118025c +#define smnPCIE_RX_NUM_NAK 0x11180038 +#define smnPCIE_RX_NUM_NAK_GENERATED 0x1118003c + #endif // _nbio_6_1_SMN_HEADER diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h index 5563f0715896..caf5ffdc130a 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h @@ -51,4 +51,7 @@ #define smnPCIE_PERF_COUNT0_TXCLK2 0x11180258 #define smnPCIE_PERF_COUNT1_TXCLK2 0x1118025c +#define smnPCIE_RX_NUM_NAK 0x11180038 +#define smnPCIE_RX_NUM_NAK_GENERATED 0x1118003c + #endif // _nbio_7_0_SMN_HEADER diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h index c1457d880c4d..4bcacf529852 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h @@ -50,4 +50,7 @@ #define smnPCIE_PERF_CNTL_EVENT_LC_PORT_SEL 0x1118024c #define smnPCIE_PERF_CNTL_EVENT_CI_PORT_SEL 0x11180250 +#define smnPCIE_RX_NUM_NAK 0x11180038 +#define smnPCIE_RX_NUM_NAK_GENERATED 0x1118003c + #endif // _nbio_7_4_0_SMN_HEADER From dcea6e65d41f4696571d5d391d3a760cfbb8ee00 Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Tue, 30 Apr 2019 06:43:33 -0400 Subject: [PATCH 020/178] drm/amdgpu: Add PCIe replay count sysfs file Add a sysfs file for reporting the number of PCIe replays (NAKs). This returns the sum of NAKs received and NAKs generated Signed-off-by: Kent Russell Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 29 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/cik.c | 13 ++++++++++ drivers/gpu/drm/amd/amdgpu/si.c | 13 ++++++++++ drivers/gpu/drm/amd/amdgpu/soc15.c | 14 +++++++++++ drivers/gpu/drm/amd/amdgpu/vi.c | 13 ++++++++++ 6 files changed, 85 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 30165487dabd..4b10f3c1c6c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -558,6 +558,8 @@ struct amdgpu_asic_funcs { uint64_t *count1); /* do we need to reset the asic at init time (e.g., kexec) */ bool (*need_reset_on_init)(struct amdgpu_device *adev); + /* PCIe replay counter */ + uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev); }; /* @@ -1077,6 +1079,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev)) #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) #define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev)) +#define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev))) /* Common functions */ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0237513086e8..665764ff7eb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -97,6 +97,28 @@ static const char *amdgpu_asic_name[] = { "LAST", }; +/** + * DOC: pcie_replay_count + * + * The amdgpu driver provides a sysfs API for reporting the total number + * of PCIe replays (NAKs) + * The file pcie_replay_count is used for this and returns the total + * number of replays as a sum of the NAKs generated and NAKs received + */ + +static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev); + + return snprintf(buf, PAGE_SIZE, "%llu\n", cnt); +} + +static DEVICE_ATTR(pcie_replay_count, S_IRUGO, + amdgpu_device_get_pcie_replay_count, NULL); + static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); /** @@ -2721,6 +2743,12 @@ fence_driver_init: /* must succeed. */ amdgpu_ras_post_init(adev); + r = device_create_file(adev->dev, &dev_attr_pcie_replay_count); + if (r) { + dev_err(adev->dev, "Could not create pcie_replay_count"); + return r; + } + return 0; failed: @@ -2784,6 +2812,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev) adev->rmmio = NULL; amdgpu_device_doorbell_fini(adev); amdgpu_debugfs_regs_cleanup(adev); + device_remove_file(adev->dev, &dev_attr_pcie_replay_count); } diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 07c1f239e9c3..3a4f20766a39 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1804,6 +1804,18 @@ static bool cik_need_reset_on_init(struct amdgpu_device *adev) return false; } +static uint64_t cik_get_pcie_replay_count(struct amdgpu_device *adev) +{ + uint64_t nak_r, nak_g; + + /* Get the number of NAKs received and generated */ + nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK); + nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED); + + /* Add the total number of NAKs, i.e the number of replays */ + return (nak_r + nak_g); +} + static const struct amdgpu_asic_funcs cik_asic_funcs = { .read_disabled_bios = &cik_read_disabled_bios, @@ -1821,6 +1833,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .init_doorbell_index = &legacy_doorbell_index_init, .get_pcie_usage = &cik_get_pcie_usage, .need_reset_on_init = &cik_need_reset_on_init, + .get_pcie_replay_count = &cik_get_pcie_replay_count, }; static int cik_common_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 9d8df68893b9..4ff930a47e10 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1375,6 +1375,18 @@ static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); } +static uint64_t si_get_pcie_replay_count(struct amdgpu_device *adev) +{ + uint64_t nak_r, nak_g; + + /* Get the number of NAKs received and generated */ + nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK); + nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED); + + /* Add the total number of NAKs, i.e the number of replays */ + return (nak_r + nak_g); +} + static const struct amdgpu_asic_funcs si_asic_funcs = { .read_disabled_bios = &si_read_disabled_bios, @@ -1393,6 +1405,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = .need_full_reset = &si_need_full_reset, .get_pcie_usage = &si_get_pcie_usage, .need_reset_on_init = &si_need_reset_on_init, + .get_pcie_replay_count = &si_get_pcie_replay_count, }; static uint32_t si_get_rev_id(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 78bd00a0142f..78b27c03b8c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -745,6 +745,18 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev) return false; } +static uint64_t soc15_get_pcie_replay_count(struct amdgpu_device *adev) +{ + uint64_t nak_r, nak_g; + + /* Get the number of NAKs received and generated */ + nak_r = RREG32_PCIE(smnPCIE_RX_NUM_NAK); + nak_g = RREG32_PCIE(smnPCIE_RX_NUM_NAK_GENERATED); + + /* Add the total number of NAKs, i.e the number of replays */ + return (nak_r + nak_g); +} + static const struct amdgpu_asic_funcs soc15_asic_funcs = { .read_disabled_bios = &soc15_read_disabled_bios, @@ -762,6 +774,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = .init_doorbell_index = &vega10_doorbell_index_init, .get_pcie_usage = &soc15_get_pcie_usage, .need_reset_on_init = &soc15_need_reset_on_init, + .get_pcie_replay_count = &soc15_get_pcie_replay_count, }; static const struct amdgpu_asic_funcs vega20_asic_funcs = @@ -781,6 +794,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = .init_doorbell_index = &vega20_doorbell_index_init, .get_pcie_usage = &soc15_get_pcie_usage, .need_reset_on_init = &soc15_need_reset_on_init, + .get_pcie_replay_count = &soc15_get_pcie_replay_count, }; static int soc15_common_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 5e5b42a0744a..b8adf3808de2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -987,6 +987,18 @@ static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); } +static uint64_t vi_get_pcie_replay_count(struct amdgpu_device *adev) +{ + uint64_t nak_r, nak_g; + + /* Get the number of NAKs received and generated */ + nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK); + nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED); + + /* Add the total number of NAKs, i.e the number of replays */ + return (nak_r + nak_g); +} + static bool vi_need_reset_on_init(struct amdgpu_device *adev) { u32 clock_cntl, pc; @@ -1021,6 +1033,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .init_doorbell_index = &legacy_doorbell_index_init, .get_pcie_usage = &vi_get_pcie_usage, .need_reset_on_init = &vi_need_reset_on_init, + .get_pcie_replay_count = &vi_get_pcie_replay_count, }; #define CZ_REV_BRISTOL(rev) \ From 5980bcaa96101e52586472bbb3f568f02618f16a Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Wed, 1 May 2019 08:22:50 -0400 Subject: [PATCH 021/178] drm/amdgpu: Fix CIK references in gmc_v8 gmc_v8 is for VI, not CIK, so fix those references Signed-off-by: Kent Russell Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 8a3b5e6fc6c9..8bf2ba310fd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -289,7 +289,7 @@ out: * * @adev: amdgpu_device pointer * - * Load the GDDR MC ucode into the hw (CIK). + * Load the GDDR MC ucode into the hw (VI). * Returns 0 on success, error on failure. */ static int gmc_v8_0_tonga_mc_load_microcode(struct amdgpu_device *adev) @@ -443,7 +443,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, * @adev: amdgpu_device pointer * * Set the location of vram, gart, and AGP in the GPU's - * physical address space (CIK). + * physical address space (VI). */ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) { @@ -515,7 +515,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * * Look up the amount of vram, vram width, and decide how to place - * vram and gart within the GPU's physical address space (CIK). + * vram and gart within the GPU's physical address space (VI). * Returns 0 for success. */ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) @@ -630,7 +630,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * @vmid: vm instance to flush * - * Flush the TLB for the requested page table (CIK). + * Flush the TLB for the requested page table (VI). */ static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, uint32_t flush_type) @@ -800,7 +800,7 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable) * This sets up the TLBs, programs the page tables for VMID0, * sets up the hw for VMIDs 1-15 which are allocated on * demand, and sets up the global locations for the LDS, GDS, - * and GPUVM for FSA64 clients (CIK). + * and GPUVM for FSA64 clients (VI). * Returns 0 for success, errors for failure. */ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) @@ -948,7 +948,7 @@ static int gmc_v8_0_gart_init(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * - * This disables all VM page table (CIK). + * This disables all VM page table (VI). */ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev) { @@ -978,7 +978,7 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev) * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value * - * Print human readable fault information (CIK). + * Print human readable fault information (VI). */ static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status, u32 addr, u32 mc_client, unsigned pasid) From 0d87c9cfc08e1508ad9df2b2bda003418dcc0cc9 Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Wed, 1 May 2019 08:23:13 -0400 Subject: [PATCH 022/178] drm/amdkfd: Cosmetic cleanup Fix some spacing issues, log output, uses of !=NULL/==NULL, unneeded extra lines and clean up a declaration from =1 to =true for clarity Signed-off-by: Kent Russell Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 6 ++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 2 -- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- 9 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 8949b1ac2f18..2e2a2617134b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -149,7 +149,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) }; /* this is going to have a few of the MSBs set that we need to - * clear */ + * clear + */ bitmap_complement(gpu_resources.queue_bitmap, adev->gfx.mec.queue_bitmap, KGD_MAX_QUEUES); @@ -163,7 +164,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) gpu_resources.queue_bitmap); /* According to linux/bitmap.h we shouldn't use bitmap_clear if - * nbits is not compile time constant */ + * nbits is not compile time constant + */ last_valid_bit = 1 /* only first MEC can have compute queues */ * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index fa09e11a600c..c6abcf72e822 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -310,7 +310,7 @@ static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; - pr_debug("kfd: sdma base address: 0x%x\n", retval); + pr_debug("sdma base address: 0x%x\n", retval); return retval; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index fec3a6aa1de6..4e8b4e949926 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -266,7 +266,7 @@ static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m) retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET; - pr_debug("kfd: sdma base address: 0x%x\n", retval); + pr_debug("sdma base address: 0x%x\n", retval); return retval; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 00e013581a70..d39cb36b4830 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1410,7 +1410,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( ret = map_bo_to_gpuvm(adev, entry, ctx.sync, is_invalid_userptr); if (ret) { - pr_err("Failed to map radeon bo to gpuvm\n"); + pr_err("Failed to map bo to gpuvm\n"); goto map_bo_to_gpuvm_failed; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index d795e5018270..3ccaa38779ea 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -522,7 +522,7 @@ static int kfd_ioctl_set_trap_handler(struct file *filep, struct kfd_process_device *pdd; dev = kfd_device_by_id(args->gpu_id); - if (dev == NULL) + if (!dev) return -EINVAL; mutex_lock(&p->mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 6e1d41c5bf86..d674d4b3340f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -983,7 +983,7 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid, return; /* Presumably process exited. */ memset(&memory_exception_data, 0, sizeof(memory_exception_data)); memory_exception_data.gpu_id = dev->id; - memory_exception_data.failure.imprecise = 1; + memory_exception_data.failure.imprecise = true; /* Set failure reason */ if (info) { memory_exception_data.va = (info->page_addr) << PAGE_SHIFT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 213ea5454d11..c2a22f6acf9a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -435,5 +435,3 @@ int kfd_init_apertures(struct kfd_process *process) return 0; } - - diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 6469b3456f00..7f1cff3de4eb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -343,7 +343,7 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, memset(m, 0, sizeof(struct vi_sdma_mqd)); *mqd = m; - if (gart_addr != NULL) + if (gart_addr) *gart_addr = (*mqd_mem_obj)->gpu_addr; retval = mm->update_mqd(mm, m, q); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 9e0230965675..9c68ae5093de 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -258,7 +258,7 @@ struct kfd_dev { bool interrupts_active; /* Debug manager */ - struct kfd_dbgmgr *dbgmgr; + struct kfd_dbgmgr *dbgmgr; /* Firmware versions */ uint16_t mec_fw_version; From 9b6eb00dbdaee49ba35569e3c72fbb05734610e3 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Thu, 2 May 2019 09:16:11 -0400 Subject: [PATCH 023/178] drm/amd/amdgpu: Add MEM_LOAD to amdgpu_pm_info debugfs file Signed-off-by: Tom St Denis Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 58edf6f13d51..bd40d5d72508 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -2965,6 +2965,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a /* GPU Load */ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size)) seq_printf(m, "GPU Load: %u %%\n", value); + /* MEM Load */ + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, (void *)&value, &size)) + seq_printf(m, "MEM Load: %u %%\n", value); + seq_printf(m, "\n"); /* SMC feature mask */ From 2fbd6f94accdbb223acccada68940b50b0c668d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 6 May 2019 13:22:06 +0200 Subject: [PATCH 024/178] drm/amdgpu: rename amdgpu_prime.[ch] into amdgpu_dma_buf.[ch] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are getting a dma-buf implementation completely separate from drm prime, so rename the files now and cleanup the code a bit. No functional change. Signed-off-by: Christian König Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 1 + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 1 + .../{amdgpu_prime.c => amdgpu_dma_buf.c} | 463 +++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h | 46 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h | 16 - 7 files changed, 282 insertions(+), 249 deletions(-) rename drivers/gpu/drm/amd/amdgpu/{amdgpu_prime.c => amdgpu_dma_buf.c} (93%) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index fdd0ca4b0f0b..9ca3b4b261b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -49,7 +49,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \ amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ - amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ + amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 2e2a2617134b..98326e3b5619 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -25,6 +25,7 @@ #include #include "amdgpu.h" #include "amdgpu_gfx.h" +#include "amdgpu_dma_buf.h" #include #include #include "amdgpu_xgmi.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d39cb36b4830..5b4fff9a1509 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -30,6 +30,7 @@ #include "amdgpu_object.h" #include "amdgpu_vm.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_dma_buf.h" /* Special VM and GART address alignment needed for VI pre-Fiji due to * a HW bug. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c similarity index 93% rename from drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c rename to drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index a38e0fb4a6fe..4711cf1b5bd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -1,5 +1,5 @@ /* - * Copyright 2012 Advanced Micro Devices, Inc. + * Copyright 2019 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -103,7 +103,8 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) * Returns: * 0 on success or a negative error code on failure. */ -int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, + struct vm_area_struct *vma) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -137,6 +138,235 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma return ret; } +static int +__reservation_object_make_exclusive(struct reservation_object *obj) +{ + struct dma_fence **fences; + unsigned int count; + int r; + + if (!reservation_object_get_list(obj)) /* no shared fences to convert */ + return 0; + + r = reservation_object_get_fences_rcu(obj, NULL, &count, &fences); + if (r) + return r; + + if (count == 0) { + /* Now that was unexpected. */ + } else if (count == 1) { + reservation_object_add_excl_fence(obj, fences[0]); + dma_fence_put(fences[0]); + kfree(fences); + } else { + struct dma_fence_array *array; + + array = dma_fence_array_create(count, fences, + dma_fence_context_alloc(1), 0, + false); + if (!array) + goto err_fences_put; + + reservation_object_add_excl_fence(obj, &array->base); + dma_fence_put(&array->base); + } + + return 0; + +err_fences_put: + while (count--) + dma_fence_put(fences[count]); + kfree(fences); + return -ENOMEM; +} + +/** + * amdgpu_dma_buf_map_attach - &dma_buf_ops.attach implementation + * @dma_buf: Shared DMA buffer + * @attach: DMA-buf attachment + * + * Makes sure that the shared DMA buffer can be accessed by the target device. + * For now, simply pins it to the GTT domain, where it should be accessible by + * all DMA devices. + * + * Returns: + * 0 on success or a negative error code on failure. + */ +static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf, + struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = dma_buf->priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + long r; + + r = drm_gem_map_attach(dma_buf, attach); + if (r) + return r; + + r = amdgpu_bo_reserve(bo, false); + if (unlikely(r != 0)) + goto error_detach; + + + if (attach->dev->driver != adev->dev->driver) { + /* + * We only create shared fences for internal use, but importers + * of the dmabuf rely on exclusive fences for implicitly + * tracking write hazards. As any of the current fences may + * correspond to a write, we need to convert all existing + * fences on the reservation object into a single exclusive + * fence. + */ + r = __reservation_object_make_exclusive(bo->tbo.resv); + if (r) + goto error_unreserve; + } + + /* pin buffer into GTT */ + r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + if (r) + goto error_unreserve; + + if (attach->dev->driver != adev->dev->driver) + bo->prime_shared_count++; + +error_unreserve: + amdgpu_bo_unreserve(bo); + +error_detach: + if (r) + drm_gem_map_detach(dma_buf, attach); + return r; +} + +/** + * amdgpu_dma_buf_map_detach - &dma_buf_ops.detach implementation + * @dma_buf: Shared DMA buffer + * @attach: DMA-buf attachment + * + * This is called when a shared DMA buffer no longer needs to be accessible by + * another device. For now, simply unpins the buffer from GTT. + */ +static void amdgpu_dma_buf_map_detach(struct dma_buf *dma_buf, + struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = dma_buf->priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + int ret = 0; + + ret = amdgpu_bo_reserve(bo, true); + if (unlikely(ret != 0)) + goto error; + + amdgpu_bo_unpin(bo); + if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count) + bo->prime_shared_count--; + amdgpu_bo_unreserve(bo); + +error: + drm_gem_map_detach(dma_buf, attach); +} + +/** + * amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation + * @obj: GEM BO + * + * Returns: + * The BO's reservation object. + */ +struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) +{ + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + return bo->tbo.resv; +} + +/** + * amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation + * @dma_buf: Shared DMA buffer + * @direction: Direction of DMA transfer + * + * This is called before CPU access to the shared DMA buffer's memory. If it's + * a read access, the buffer is moved to the GTT domain if possible, for optimal + * CPU read performance. + * + * Returns: + * 0 on success or a negative error code on failure. + */ +static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, + enum dma_data_direction direction) +{ + struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct ttm_operation_ctx ctx = { true, false }; + u32 domain = amdgpu_display_supported_domains(adev); + int ret; + bool reads = (direction == DMA_BIDIRECTIONAL || + direction == DMA_FROM_DEVICE); + + if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT)) + return 0; + + /* move to gtt */ + ret = amdgpu_bo_reserve(bo, false); + if (unlikely(ret != 0)) + return ret; + + if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) { + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + } + + amdgpu_bo_unreserve(bo); + return ret; +} + +const struct dma_buf_ops amdgpu_dmabuf_ops = { + .attach = amdgpu_dma_buf_map_attach, + .detach = amdgpu_dma_buf_map_detach, + .map_dma_buf = drm_gem_map_dma_buf, + .unmap_dma_buf = drm_gem_unmap_dma_buf, + .release = drm_gem_dmabuf_release, + .begin_cpu_access = amdgpu_dma_buf_begin_cpu_access, + .mmap = drm_gem_dmabuf_mmap, + .vmap = drm_gem_dmabuf_vmap, + .vunmap = drm_gem_dmabuf_vunmap, +}; + +/** + * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation + * @dev: DRM device + * @gobj: GEM BO + * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR. + * + * The main work is done by the &drm_gem_prime_export helper, which in turn + * uses &amdgpu_gem_prime_res_obj. + * + * Returns: + * Shared DMA buffer representing the GEM BO from the given device. + */ +struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, + struct drm_gem_object *gobj, + int flags) +{ + struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); + struct dma_buf *buf; + + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) || + bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) + return ERR_PTR(-EPERM); + + buf = drm_gem_prime_export(dev, gobj, flags); + if (!IS_ERR(buf)) { + buf->file->f_mapping = dev->anon_inode->i_mapping; + buf->ops = &amdgpu_dmabuf_ops; + } + + return buf; +} + /** * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table * implementation @@ -188,235 +418,6 @@ error: return ERR_PTR(ret); } -static int -__reservation_object_make_exclusive(struct reservation_object *obj) -{ - struct dma_fence **fences; - unsigned int count; - int r; - - if (!reservation_object_get_list(obj)) /* no shared fences to convert */ - return 0; - - r = reservation_object_get_fences_rcu(obj, NULL, &count, &fences); - if (r) - return r; - - if (count == 0) { - /* Now that was unexpected. */ - } else if (count == 1) { - reservation_object_add_excl_fence(obj, fences[0]); - dma_fence_put(fences[0]); - kfree(fences); - } else { - struct dma_fence_array *array; - - array = dma_fence_array_create(count, fences, - dma_fence_context_alloc(1), 0, - false); - if (!array) - goto err_fences_put; - - reservation_object_add_excl_fence(obj, &array->base); - dma_fence_put(&array->base); - } - - return 0; - -err_fences_put: - while (count--) - dma_fence_put(fences[count]); - kfree(fences); - return -ENOMEM; -} - -/** - * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation - * @dma_buf: Shared DMA buffer - * @attach: DMA-buf attachment - * - * Makes sure that the shared DMA buffer can be accessed by the target device. - * For now, simply pins it to the GTT domain, where it should be accessible by - * all DMA devices. - * - * Returns: - * 0 on success or a negative error code on failure. - */ -static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, - struct dma_buf_attachment *attach) -{ - struct drm_gem_object *obj = dma_buf->priv; - struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - long r; - - r = drm_gem_map_attach(dma_buf, attach); - if (r) - return r; - - r = amdgpu_bo_reserve(bo, false); - if (unlikely(r != 0)) - goto error_detach; - - - if (attach->dev->driver != adev->dev->driver) { - /* - * We only create shared fences for internal use, but importers - * of the dmabuf rely on exclusive fences for implicitly - * tracking write hazards. As any of the current fences may - * correspond to a write, we need to convert all existing - * fences on the reservation object into a single exclusive - * fence. - */ - r = __reservation_object_make_exclusive(bo->tbo.resv); - if (r) - goto error_unreserve; - } - - /* pin buffer into GTT */ - r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); - if (r) - goto error_unreserve; - - if (attach->dev->driver != adev->dev->driver) - bo->prime_shared_count++; - -error_unreserve: - amdgpu_bo_unreserve(bo); - -error_detach: - if (r) - drm_gem_map_detach(dma_buf, attach); - return r; -} - -/** - * amdgpu_gem_map_detach - &dma_buf_ops.detach implementation - * @dma_buf: Shared DMA buffer - * @attach: DMA-buf attachment - * - * This is called when a shared DMA buffer no longer needs to be accessible by - * another device. For now, simply unpins the buffer from GTT. - */ -static void amdgpu_gem_map_detach(struct dma_buf *dma_buf, - struct dma_buf_attachment *attach) -{ - struct drm_gem_object *obj = dma_buf->priv; - struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - int ret = 0; - - ret = amdgpu_bo_reserve(bo, true); - if (unlikely(ret != 0)) - goto error; - - amdgpu_bo_unpin(bo); - if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count) - bo->prime_shared_count--; - amdgpu_bo_unreserve(bo); - -error: - drm_gem_map_detach(dma_buf, attach); -} - -/** - * amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation - * @obj: GEM BO - * - * Returns: - * The BO's reservation object. - */ -struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) -{ - struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - - return bo->tbo.resv; -} - -/** - * amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation - * @dma_buf: Shared DMA buffer - * @direction: Direction of DMA transfer - * - * This is called before CPU access to the shared DMA buffer's memory. If it's - * a read access, the buffer is moved to the GTT domain if possible, for optimal - * CPU read performance. - * - * Returns: - * 0 on success or a negative error code on failure. - */ -static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, - enum dma_data_direction direction) -{ - struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct ttm_operation_ctx ctx = { true, false }; - u32 domain = amdgpu_display_supported_domains(adev); - int ret; - bool reads = (direction == DMA_BIDIRECTIONAL || - direction == DMA_FROM_DEVICE); - - if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT)) - return 0; - - /* move to gtt */ - ret = amdgpu_bo_reserve(bo, false); - if (unlikely(ret != 0)) - return ret; - - if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) { - amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); - ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - } - - amdgpu_bo_unreserve(bo); - return ret; -} - -const struct dma_buf_ops amdgpu_dmabuf_ops = { - .attach = amdgpu_gem_map_attach, - .detach = amdgpu_gem_map_detach, - .map_dma_buf = drm_gem_map_dma_buf, - .unmap_dma_buf = drm_gem_unmap_dma_buf, - .release = drm_gem_dmabuf_release, - .begin_cpu_access = amdgpu_gem_begin_cpu_access, - .mmap = drm_gem_dmabuf_mmap, - .vmap = drm_gem_dmabuf_vmap, - .vunmap = drm_gem_dmabuf_vunmap, -}; - -/** - * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation - * @dev: DRM device - * @gobj: GEM BO - * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR. - * - * The main work is done by the &drm_gem_prime_export helper, which in turn - * uses &amdgpu_gem_prime_res_obj. - * - * Returns: - * Shared DMA buffer representing the GEM BO from the given device. - */ -struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, - struct drm_gem_object *gobj, - int flags) -{ - struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); - struct dma_buf *buf; - - if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) || - bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) - return ERR_PTR(-EPERM); - - buf = drm_gem_prime_export(dev, gobj, flags); - if (!IS_ERR(buf)) { - buf->file->f_mapping = dev->anon_inode->i_mapping; - buf->ops = &amdgpu_dmabuf_ops; - } - - return buf; -} - /** * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation * @dev: DRM device diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h new file mode 100644 index 000000000000..c7056cbe8685 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h @@ -0,0 +1,46 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_DMA_BUF_H__ +#define __AMDGPU_DMA_BUF_H__ + +#include + +struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj); +struct drm_gem_object * +amdgpu_gem_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sg); +struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, + struct drm_gem_object *gobj, + int flags); +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf); +struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); +void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); +void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, + struct vm_area_struct *vma); + +extern const struct dma_buf_ops amdgpu_dmabuf_ops; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 5924d89e0aee..8fd8807272a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -36,7 +36,7 @@ #include "amdgpu.h" #include "amdgpu_irq.h" -#include "amdgpu_gem.h" +#include "amdgpu_dma_buf.h" #include "amdgpu_amdkfd.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h index f1ddfc50bcc7..b8ba6e27c61f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h @@ -39,22 +39,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, void amdgpu_gem_object_close(struct drm_gem_object *obj, struct drm_file *file_priv); unsigned long amdgpu_gem_timeout(uint64_t timeout_ns); -struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj); -struct drm_gem_object * -amdgpu_gem_prime_import_sg_table(struct drm_device *dev, - struct dma_buf_attachment *attach, - struct sg_table *sg); -struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, - struct drm_gem_object *gobj, - int flags); -struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, - struct dma_buf *dma_buf); -struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); -void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); -void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); -int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); - -extern const struct dma_buf_ops amdgpu_dmabuf_ops; /* * GEM objects. From a58f273cdd0ec707524e335b09ead9984566e163 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 15 Apr 2019 08:41:47 -0400 Subject: [PATCH 025/178] drm/amd/display: 3.2.28 Signed-off-by: Aric Cyr Reviewed-by: Aric Cyr Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 44e4b0465587..a7144cd189cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.27" +#define DC_VER "3.2.28" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 14ed3d00eff9a327cbde78596ef599f68caa19a9 Mon Sep 17 00:00:00 2001 From: Yongqiang Sun Date: Thu, 11 Apr 2019 14:11:47 -0400 Subject: [PATCH 026/178] drm/amd/display: Refactor program watermark. Refactor programming watermark function: Divided into urgent watermark, stutter watermark and pstate watermark. Signed-off-by: Yongqiang Sun Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn10/dcn10_hubbub.c | 430 +++++++++--------- .../drm/amd/display/dc/dcn10/dcn10_hubbub.h | 16 + 2 files changed, 243 insertions(+), 203 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c index 0db2a6e96fc0..177247595974 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c @@ -263,20 +263,15 @@ void hubbub1_wm_change_req_wa(struct hubbub *hubbub) DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, 1); } -void hubbub1_program_watermarks( +void hubbub1_program_urgent_watermarks( struct hubbub *hubbub, struct dcn_watermark_set *watermarks, unsigned int refclk_mhz, bool safe_to_lower) { struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); - /* - * Need to clamp to max of the register values (i.e. no wrap) - * for dcn1, all wm registers are 21-bit wide - */ uint32_t prog_wm_value; - /* Repeat for water mark set A, B, C and D. */ /* clock state A */ if (safe_to_lower || watermarks->a.urgent_ns > hubbub1->watermarks.a.urgent_ns) { @@ -291,60 +286,14 @@ void hubbub1_program_watermarks( watermarks->a.urgent_ns, prog_wm_value); } - if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A)) { - if (safe_to_lower || watermarks->a.pte_meta_urgent_ns > hubbub1->watermarks.a.pte_meta_urgent_ns) { - hubbub1->watermarks.a.pte_meta_urgent_ns = watermarks->a.pte_meta_urgent_ns; - prog_wm_value = convert_and_clamp(watermarks->a.pte_meta_urgent_ns, - refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_A calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->a.pte_meta_urgent_ns, prog_wm_value); - } - } - - if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A)) { - if (safe_to_lower || watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns - > hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns) { - hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = - watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns; - prog_wm_value = convert_and_clamp( - watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, - refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); - } - - if (safe_to_lower || watermarks->a.cstate_pstate.cstate_exit_ns - > hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns) { - hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns = - watermarks->a.cstate_pstate.cstate_exit_ns; - prog_wm_value = convert_and_clamp( - watermarks->a.cstate_pstate.cstate_exit_ns, - refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->a.cstate_pstate.cstate_exit_ns, prog_wm_value); - } - } - - if (safe_to_lower || watermarks->a.cstate_pstate.pstate_change_ns - > hubbub1->watermarks.a.cstate_pstate.pstate_change_ns) { - hubbub1->watermarks.a.cstate_pstate.pstate_change_ns = - watermarks->a.cstate_pstate.pstate_change_ns; - prog_wm_value = convert_and_clamp( - watermarks->a.cstate_pstate.pstate_change_ns, + if (safe_to_lower || watermarks->a.pte_meta_urgent_ns > hubbub1->watermarks.a.pte_meta_urgent_ns) { + hubbub1->watermarks.a.pte_meta_urgent_ns = watermarks->a.pte_meta_urgent_ns; + prog_wm_value = convert_and_clamp(watermarks->a.pte_meta_urgent_ns, refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, 0, - DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n" - "HW register value = 0x%x\n\n", - watermarks->a.cstate_pstate.pstate_change_ns, prog_wm_value); + REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_A calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->a.pte_meta_urgent_ns, prog_wm_value); } /* clock state B */ @@ -360,60 +309,14 @@ void hubbub1_program_watermarks( watermarks->b.urgent_ns, prog_wm_value); } - if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B)) { - if (safe_to_lower || watermarks->b.pte_meta_urgent_ns > hubbub1->watermarks.b.pte_meta_urgent_ns) { - hubbub1->watermarks.b.pte_meta_urgent_ns = watermarks->b.pte_meta_urgent_ns; - prog_wm_value = convert_and_clamp(watermarks->b.pte_meta_urgent_ns, - refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_B calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->b.pte_meta_urgent_ns, prog_wm_value); - } - } - - if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B)) { - if (safe_to_lower || watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns - > hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns) { - hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = - watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns; - prog_wm_value = convert_and_clamp( - watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, - refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); - } - - if (safe_to_lower || watermarks->b.cstate_pstate.cstate_exit_ns - > hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns) { - hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns = - watermarks->b.cstate_pstate.cstate_exit_ns; - prog_wm_value = convert_and_clamp( - watermarks->b.cstate_pstate.cstate_exit_ns, - refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->b.cstate_pstate.cstate_exit_ns, prog_wm_value); - } - } - - if (safe_to_lower || watermarks->b.cstate_pstate.pstate_change_ns - > hubbub1->watermarks.b.cstate_pstate.pstate_change_ns) { - hubbub1->watermarks.b.cstate_pstate.pstate_change_ns = - watermarks->b.cstate_pstate.pstate_change_ns; - prog_wm_value = convert_and_clamp( - watermarks->b.cstate_pstate.pstate_change_ns, + if (safe_to_lower || watermarks->b.pte_meta_urgent_ns > hubbub1->watermarks.b.pte_meta_urgent_ns) { + hubbub1->watermarks.b.pte_meta_urgent_ns = watermarks->b.pte_meta_urgent_ns; + prog_wm_value = convert_and_clamp(watermarks->b.pte_meta_urgent_ns, refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, 0, - DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n" - "HW register value = 0x%x\n\n", - watermarks->b.cstate_pstate.pstate_change_ns, prog_wm_value); + REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_B calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->b.pte_meta_urgent_ns, prog_wm_value); } /* clock state C */ @@ -429,60 +332,14 @@ void hubbub1_program_watermarks( watermarks->c.urgent_ns, prog_wm_value); } - if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C)) { - if (safe_to_lower || watermarks->c.pte_meta_urgent_ns > hubbub1->watermarks.c.pte_meta_urgent_ns) { - hubbub1->watermarks.c.pte_meta_urgent_ns = watermarks->c.pte_meta_urgent_ns; - prog_wm_value = convert_and_clamp(watermarks->c.pte_meta_urgent_ns, - refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_C calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->c.pte_meta_urgent_ns, prog_wm_value); - } - } - - if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C)) { - if (safe_to_lower || watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns - > hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns) { - hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = - watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns; - prog_wm_value = convert_and_clamp( - watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, - refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_C calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); - } - - if (safe_to_lower || watermarks->c.cstate_pstate.cstate_exit_ns - > hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns) { - hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns = - watermarks->c.cstate_pstate.cstate_exit_ns; - prog_wm_value = convert_and_clamp( - watermarks->c.cstate_pstate.cstate_exit_ns, - refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_C calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->c.cstate_pstate.cstate_exit_ns, prog_wm_value); - } - } - - if (safe_to_lower || watermarks->c.cstate_pstate.pstate_change_ns - > hubbub1->watermarks.c.cstate_pstate.pstate_change_ns) { - hubbub1->watermarks.c.cstate_pstate.pstate_change_ns = - watermarks->c.cstate_pstate.pstate_change_ns; - prog_wm_value = convert_and_clamp( - watermarks->c.cstate_pstate.pstate_change_ns, + if (safe_to_lower || watermarks->c.pte_meta_urgent_ns > hubbub1->watermarks.c.pte_meta_urgent_ns) { + hubbub1->watermarks.c.pte_meta_urgent_ns = watermarks->c.pte_meta_urgent_ns; + prog_wm_value = convert_and_clamp(watermarks->c.pte_meta_urgent_ns, refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, 0, - DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_C calculated =%d\n" - "HW register value = 0x%x\n\n", - watermarks->c.cstate_pstate.pstate_change_ns, prog_wm_value); + REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_C calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->c.pte_meta_urgent_ns, prog_wm_value); } /* clock state D */ @@ -498,48 +355,199 @@ void hubbub1_program_watermarks( watermarks->d.urgent_ns, prog_wm_value); } - if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D)) { - if (safe_to_lower || watermarks->d.pte_meta_urgent_ns > hubbub1->watermarks.d.pte_meta_urgent_ns) { - hubbub1->watermarks.d.pte_meta_urgent_ns = watermarks->d.pte_meta_urgent_ns; - prog_wm_value = convert_and_clamp(watermarks->d.pte_meta_urgent_ns, - refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_D calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->d.pte_meta_urgent_ns, prog_wm_value); - } + if (safe_to_lower || watermarks->d.pte_meta_urgent_ns > hubbub1->watermarks.d.pte_meta_urgent_ns) { + hubbub1->watermarks.d.pte_meta_urgent_ns = watermarks->d.pte_meta_urgent_ns; + prog_wm_value = convert_and_clamp(watermarks->d.pte_meta_urgent_ns, + refclk_mhz, 0x1fffff); + REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_D calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->d.pte_meta_urgent_ns, prog_wm_value); + } +} + +void hubbub1_program_stutter_watermarks( + struct hubbub *hubbub, + struct dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower) +{ + struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); + uint32_t prog_wm_value; + + /* clock state A */ + if (safe_to_lower || watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns + > hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns) { + hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = + watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns; + prog_wm_value = convert_and_clamp( + watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); } - if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D)) { - if (safe_to_lower || watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns - > hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns) { - hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = - watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns; - prog_wm_value = convert_and_clamp( - watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, - refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_D calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); - } - - if (safe_to_lower || watermarks->d.cstate_pstate.cstate_exit_ns - > hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns) { - hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns = - watermarks->d.cstate_pstate.cstate_exit_ns; - prog_wm_value = convert_and_clamp( - watermarks->d.cstate_pstate.cstate_exit_ns, - refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_D calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->d.cstate_pstate.cstate_exit_ns, prog_wm_value); - } + if (safe_to_lower || watermarks->a.cstate_pstate.cstate_exit_ns + > hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns) { + hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns = + watermarks->a.cstate_pstate.cstate_exit_ns; + prog_wm_value = convert_and_clamp( + watermarks->a.cstate_pstate.cstate_exit_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->a.cstate_pstate.cstate_exit_ns, prog_wm_value); } + /* clock state B */ + if (safe_to_lower || watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns + > hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns) { + hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = + watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns; + prog_wm_value = convert_and_clamp( + watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); + } + + if (safe_to_lower || watermarks->b.cstate_pstate.cstate_exit_ns + > hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns) { + hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns = + watermarks->b.cstate_pstate.cstate_exit_ns; + prog_wm_value = convert_and_clamp( + watermarks->b.cstate_pstate.cstate_exit_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->b.cstate_pstate.cstate_exit_ns, prog_wm_value); + } + + /* clock state C */ + if (safe_to_lower || watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns + > hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns) { + hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = + watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns; + prog_wm_value = convert_and_clamp( + watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, 0, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_C calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); + } + + if (safe_to_lower || watermarks->c.cstate_pstate.cstate_exit_ns + > hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns) { + hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns = + watermarks->c.cstate_pstate.cstate_exit_ns; + prog_wm_value = convert_and_clamp( + watermarks->c.cstate_pstate.cstate_exit_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, 0, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_C calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->c.cstate_pstate.cstate_exit_ns, prog_wm_value); + } + + /* clock state D */ + if (safe_to_lower || watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns + > hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns) { + hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = + watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns; + prog_wm_value = convert_and_clamp( + watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, 0, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_D calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); + } + + if (safe_to_lower || watermarks->d.cstate_pstate.cstate_exit_ns + > hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns) { + hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns = + watermarks->d.cstate_pstate.cstate_exit_ns; + prog_wm_value = convert_and_clamp( + watermarks->d.cstate_pstate.cstate_exit_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, 0, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_D calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->d.cstate_pstate.cstate_exit_ns, prog_wm_value); + } + +} + +void hubbub1_program_pstate_watermarks( + struct hubbub *hubbub, + struct dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower) +{ + struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); + uint32_t prog_wm_value; + + /* clock state A */ + if (safe_to_lower || watermarks->a.cstate_pstate.pstate_change_ns + > hubbub1->watermarks.a.cstate_pstate.pstate_change_ns) { + hubbub1->watermarks.a.cstate_pstate.pstate_change_ns = + watermarks->a.cstate_pstate.pstate_change_ns; + prog_wm_value = convert_and_clamp( + watermarks->a.cstate_pstate.pstate_change_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, 0, + DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n" + "HW register value = 0x%x\n\n", + watermarks->a.cstate_pstate.pstate_change_ns, prog_wm_value); + } + + /* clock state B */ + if (safe_to_lower || watermarks->b.cstate_pstate.pstate_change_ns + > hubbub1->watermarks.b.cstate_pstate.pstate_change_ns) { + hubbub1->watermarks.b.cstate_pstate.pstate_change_ns = + watermarks->b.cstate_pstate.pstate_change_ns; + prog_wm_value = convert_and_clamp( + watermarks->b.cstate_pstate.pstate_change_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, 0, + DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n" + "HW register value = 0x%x\n\n", + watermarks->b.cstate_pstate.pstate_change_ns, prog_wm_value); + } + + /* clock state C */ + if (safe_to_lower || watermarks->c.cstate_pstate.pstate_change_ns + > hubbub1->watermarks.c.cstate_pstate.pstate_change_ns) { + hubbub1->watermarks.c.cstate_pstate.pstate_change_ns = + watermarks->c.cstate_pstate.pstate_change_ns; + prog_wm_value = convert_and_clamp( + watermarks->c.cstate_pstate.pstate_change_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, 0, + DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_C calculated =%d\n" + "HW register value = 0x%x\n\n", + watermarks->c.cstate_pstate.pstate_change_ns, prog_wm_value); + } + + /* clock state D */ if (safe_to_lower || watermarks->d.cstate_pstate.pstate_change_ns > hubbub1->watermarks.d.cstate_pstate.pstate_change_ns) { hubbub1->watermarks.d.cstate_pstate.pstate_change_ns = @@ -553,6 +561,22 @@ void hubbub1_program_watermarks( "HW register value = 0x%x\n\n", watermarks->d.cstate_pstate.pstate_change_ns, prog_wm_value); } +} + +void hubbub1_program_watermarks( + struct hubbub *hubbub, + struct dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower) +{ + struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); + /* + * Need to clamp to max of the register values (i.e. no wrap) + * for dcn1, all wm registers are 21-bit wide + */ + hubbub1_program_urgent_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower); + hubbub1_program_stutter_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower); + hubbub1_program_pstate_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower); REG_UPDATE(DCHUBBUB_ARB_SAT_LEVEL, DCHUBBUB_ARB_SAT_LEVEL, 60 * refclk_mhz); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h index 85811b24a497..7c2559c9ae23 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h @@ -262,4 +262,20 @@ void hubbub1_construct(struct hubbub *hubbub, const struct dcn_hubbub_shift *hubbub_shift, const struct dcn_hubbub_mask *hubbub_mask); +void hubbub1_program_urgent_watermarks( + struct hubbub *hubbub, + struct dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower); +void hubbub1_program_stutter_watermarks( + struct hubbub *hubbub, + struct dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower); +void hubbub1_program_pstate_watermarks( + struct hubbub *hubbub, + struct dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower); + #endif From 4cd75ff096f4ef49c343093b52a952f27aba7796 Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Fri, 12 Apr 2019 21:23:45 -0400 Subject: [PATCH 027/178] drm/amd/display: fix multi display seamless boot case [Why] There is a scenario that causes eDP to become blank if there are multiple displays connected, and the external display is set as the primary display such that the first flip comes to the external display. In this scenario, we call our optimize function before the eDP even has a chance to flip. [How] There is a check that prevents bandwidth optimize from occurring before first flip is complete on the seamless boot display. But actually it assumed the seamless boot display is the first one to flip. But in this scenario it is not. Modify the check to ensure the steam with the seamless boot flag set is the one that has completed the first flip. Signed-off-by: Anthony Koo Reviewed-by: Aric Cyr Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 18c775a950cc..ee6b646180b6 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1138,9 +1138,6 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c const struct dc_link *link = context->streams[i]->link; struct dc_stream_status *status; - if (context->streams[i]->apply_seamless_boot_optimization) - context->streams[i]->apply_seamless_boot_optimization = false; - if (!context->streams[i]->mode_changed) continue; @@ -1792,10 +1789,15 @@ static void commit_planes_for_stream(struct dc *dc, if (dc->optimize_seamless_boot && surface_count > 0) { /* Optimize seamless boot flag keeps clocks and watermarks high until * first flip. After first flip, optimization is required to lower - * bandwidth. + * bandwidth. Important to note that it is expected UEFI will + * only light up a single display on POST, therefore we only expect + * one stream with seamless boot flag set. */ - dc->optimize_seamless_boot = false; - dc->optimized_required = true; + if (stream->apply_seamless_boot_optimization) { + stream->apply_seamless_boot_optimization = false; + dc->optimize_seamless_boot = false; + dc->optimized_required = true; + } } if (update_type == UPDATE_TYPE_FULL && !dc->optimize_seamless_boot) { From 21e471f0850de874b2afa54f19ef7886490b99fe Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Wed, 3 Apr 2019 15:40:05 -0400 Subject: [PATCH 028/178] drm/amd/display: Set dispclk and dprefclock directly [Why] To simply logic for setting DCN specific clocks, we will send SMU message directly through the VBIOS message box. [How] Add new structure in pp_smu to hold functions to set clocks through vbios message box Signed-off-by: Eric Yang Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_helper.c | 2 +- .../gpu/drm/amd/display/dc/dce/dce_clk_mgr.c | 8 +- .../gpu/drm/amd/display/dc/dce/dce_clk_mgr.h | 21 +++++ .../drm/amd/display/dc/dcn10/dcn10_clk_mgr.c | 78 +++++++++++++++++-- .../drm/amd/display/dc/dcn10/dcn10_resource.c | 37 ++++++--- drivers/gpu/drm/amd/display/dc/dm_pp_smu.h | 2 +- .../gpu/drm/amd/display/dc/inc/hw/clk_mgr.h | 8 +- .../gpu/drm/amd/display/include/dal_asic_id.h | 3 +- 8 files changed, 136 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index 5e6c5eff49cf..2d0acf109360 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -297,7 +297,7 @@ void generic_reg_wait(const struct dc_context *ctx, int i; /* something is terribly wrong if time out is > 200ms. (5Hz) */ - ASSERT(delay_between_poll_us * time_out_num_tries <= 200000); + ASSERT(delay_between_poll_us * time_out_num_tries <= 3000000); for (i = 0; i <= time_out_num_tries; i++) { if (i) { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c index 963686380738..365c10848797 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c @@ -782,22 +782,22 @@ static void dce12_update_clocks(struct clk_mgr *clk_mgr, dce11_pplib_apply_display_requirements(clk_mgr->ctx->dc, context); } -static const struct clk_mgr_funcs dce120_funcs = { +static struct clk_mgr_funcs dce120_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .update_clocks = dce12_update_clocks }; -static const struct clk_mgr_funcs dce112_funcs = { +static struct clk_mgr_funcs dce112_funcs = { .get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz, .update_clocks = dce112_update_clocks }; -static const struct clk_mgr_funcs dce110_funcs = { +static struct clk_mgr_funcs dce110_funcs = { .get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz, .update_clocks = dce11_update_clocks, }; -static const struct clk_mgr_funcs dce_funcs = { +static struct clk_mgr_funcs dce_funcs = { .get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz, .update_clocks = dce_update_clocks }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h index c8f8c442142a..36942ab022a2 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h @@ -39,6 +39,11 @@ #define CLK_COMMON_REG_LIST_DCN_BASE() \ SR(DENTIST_DISPCLK_CNTL) +#define VBIOS_SMU_MSG_BOX_REG_LIST_RV() \ + .MP1_SMN_C2PMSG_91 = mmMP1_SMN_C2PMSG_91, \ + .MP1_SMN_C2PMSG_83 = mmMP1_SMN_C2PMSG_83, \ + .MP1_SMN_C2PMSG_67 = mmMP1_SMN_C2PMSG_67 + #define CLK_SF(reg_name, field_name, post_fix)\ .field_name = reg_name ## __ ## field_name ## post_fix @@ -50,23 +55,39 @@ CLK_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, mask_sh),\ CLK_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, mask_sh) +#define CLK_MASK_SH_LIST_RV1(mask_sh) \ + CLK_COMMON_MASK_SH_LIST_DCN_COMMON_BASE(mask_sh),\ + CLK_SF(MP1_SMN_C2PMSG_67, CONTENT, mask_sh),\ + CLK_SF(MP1_SMN_C2PMSG_83, CONTENT, mask_sh),\ + CLK_SF(MP1_SMN_C2PMSG_91, CONTENT, mask_sh), + + #define CLK_REG_FIELD_LIST(type) \ type DPREFCLK_SRC_SEL; \ type DENTIST_DPREFCLK_WDIVIDER; \ type DENTIST_DISPCLK_WDIVIDER; \ type DENTIST_DISPCLK_CHG_DONE; +#define VBIOS_SMU_REG_FIELD_LIST(type) \ + type CONTENT; + struct clk_mgr_shift { CLK_REG_FIELD_LIST(uint8_t) + VBIOS_SMU_REG_FIELD_LIST(uint32_t) }; struct clk_mgr_mask { CLK_REG_FIELD_LIST(uint32_t) + VBIOS_SMU_REG_FIELD_LIST(uint32_t) }; struct clk_mgr_registers { uint32_t DPREFCLK_CNTL; uint32_t DENTIST_DISPCLK_CNTL; + + uint32_t MP1_SMN_C2PMSG_67; + uint32_t MP1_SMN_C2PMSG_83; + uint32_t MP1_SMN_C2PMSG_91; }; struct state_dependent_clocks { diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c index 2b2de1d913c9..9f2ffce10e12 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c @@ -27,6 +27,7 @@ #include "reg_helper.h" #include "core_types.h" +#include "dal_asic_id.h" #define TO_DCE_CLK_MGR(clocks)\ container_of(clocks, struct dce_clk_mgr, base) @@ -91,13 +92,18 @@ static int dcn1_determine_dppclk_threshold(struct clk_mgr *clk_mgr, struct dc_cl static void dcn1_ramp_up_dispclk_with_dpp(struct clk_mgr *clk_mgr, struct dc_clocks *new_clocks) { + int i; struct dc *dc = clk_mgr->ctx->dc; int dispclk_to_dpp_threshold = dcn1_determine_dppclk_threshold(clk_mgr, new_clocks); bool request_dpp_div = new_clocks->dispclk_khz > new_clocks->dppclk_khz; - int i; /* set disp clk to dpp clk threshold */ - dce112_set_clock(clk_mgr, dispclk_to_dpp_threshold); + + if (clk_mgr->funcs->set_dispclk && clk_mgr->funcs->set_dprefclk) { + clk_mgr->funcs->set_dispclk(clk_mgr, dispclk_to_dpp_threshold); + clk_mgr->funcs->set_dprefclk(clk_mgr); + } else + dce112_set_clock(clk_mgr, dispclk_to_dpp_threshold); /* update request dpp clk division option */ for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -113,8 +119,13 @@ static void dcn1_ramp_up_dispclk_with_dpp(struct clk_mgr *clk_mgr, struct dc_clo } /* If target clk not same as dppclk threshold, set to target clock */ - if (dispclk_to_dpp_threshold != new_clocks->dispclk_khz) - dce112_set_clock(clk_mgr, new_clocks->dispclk_khz); + if (dispclk_to_dpp_threshold != new_clocks->dispclk_khz) { + if (clk_mgr->funcs->set_dispclk && clk_mgr->funcs->set_dprefclk) { + clk_mgr->funcs->set_dispclk(clk_mgr, new_clocks->dispclk_khz); + clk_mgr->funcs->set_dprefclk(clk_mgr); + } else + dce112_set_clock(clk_mgr, dispclk_to_dpp_threshold); + } clk_mgr->clks.dispclk_khz = new_clocks->dispclk_khz; clk_mgr->clks.dppclk_khz = new_clocks->dppclk_khz; @@ -242,7 +253,62 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr, } } } -static const struct clk_mgr_funcs dcn1_funcs = { + +#define VBIOSSMC_MSG_SetDispclkFreq 0x4 +#define VBIOSSMC_MSG_SetDprefclkFreq 0x5 + +int dcn10_set_dispclk(struct clk_mgr *clk_mgr_base, int requested_dispclk_khz) +{ + int actual_dispclk_set_khz = -1; + struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr_base); + + /* First clear response register */ + //dm_write_reg(ctx, mmMP1_SMN_C2PMSG_91, 0); + REG_WRITE(MP1_SMN_C2PMSG_91, 0); + + /* Set the parameter register for the SMU message, unit is Mhz */ + //dm_write_reg(ctx, mmMP1_SMN_C2PMSG_83, requested_dispclk_khz / 1000); + REG_WRITE(MP1_SMN_C2PMSG_83, requested_dispclk_khz / 1000); + + /* Trigger the message transaction by writing the message ID */ + //dm_write_reg(ctx, mmMP1_SMN_C2PMSG_67, VBIOSSMC_MSG_SetDispclkFreq); + REG_WRITE(MP1_SMN_C2PMSG_67, VBIOSSMC_MSG_SetDispclkFreq); + + REG_WAIT(MP1_SMN_C2PMSG_91, CONTENT, 1, 10, 200000); + + /* Actual dispclk set is returned in the parameter register */ + actual_dispclk_set_khz = REG_READ(MP1_SMN_C2PMSG_83) * 1000; + + return actual_dispclk_set_khz; + +} + +int dcn10_set_dprefclk(struct clk_mgr *clk_mgr_base) +{ + int actual_dprefclk_set_khz = -1; + struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr_base); + + REG_WRITE(MP1_SMN_C2PMSG_91, 0); + + /* Set the parameter register for the SMU message */ + REG_WRITE(MP1_SMN_C2PMSG_83, clk_mgr_dce->dprefclk_khz / 1000); + + /* Trigger the message transaction by writing the message ID */ + REG_WRITE(MP1_SMN_C2PMSG_67, VBIOSSMC_MSG_SetDprefclkFreq); + + /* Wait for SMU response */ + REG_WAIT(MP1_SMN_C2PMSG_91, CONTENT, 1, 10, 200000); + + actual_dprefclk_set_khz = REG_READ(MP1_SMN_C2PMSG_83) * 1000; + + return actual_dprefclk_set_khz; +} + +int (*set_dispclk)(struct pp_smu *pp_smu, int dispclk); + +int (*set_dprefclk)(struct pp_smu *pp_smu); + +static struct clk_mgr_funcs dcn1_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .update_clocks = dcn1_update_clocks }; @@ -266,8 +332,8 @@ struct clk_mgr *dcn1_clk_mgr_create(struct dc_context *ctx) clk_mgr_dce->dprefclk_ss_percentage = 0; clk_mgr_dce->dprefclk_ss_divider = 1000; clk_mgr_dce->ss_on_dprefclk = false; - clk_mgr_dce->dprefclk_khz = 600000; + if (bp->integrated_info) clk_mgr_dce->dentist_vco_freq_khz = bp->integrated_info->dentist_vco_freq; if (clk_mgr_dce->dentist_vco_freq_khz == 0) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 7eccb54c421d..ddb020a53098 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -29,7 +29,6 @@ #include "resource.h" #include "include/irq_service_interface.h" #include "dcn10_resource.h" - #include "dcn10_ipp.h" #include "dcn10_mpc.h" #include "irq/dcn10/irq_service_dcn10.h" @@ -445,7 +444,6 @@ static const struct bios_registers bios_regs = { HUBP_REG_LIST_DCN10(id)\ } - static const struct dcn_mi_registers hubp_regs[] = { hubp_regs(0), hubp_regs(1), @@ -461,7 +459,6 @@ static const struct dcn_mi_mask hubp_mask = { HUBP_MASK_SH_LIST_DCN10(_MASK) }; - static const struct dcn_hubbub_registers hubbub_reg = { HUBBUB_REG_LIST_DCN10(0) }; @@ -494,6 +491,27 @@ static const struct dce110_clk_src_mask cs_mask = { CS_COMMON_MASK_SH_LIST_DCN1_0(_MASK) }; + +#define mmMP1_SMN_C2PMSG_91 0x1629B +#define mmMP1_SMN_C2PMSG_83 0x16293 +#define mmMP1_SMN_C2PMSG_67 0x16283 + +#define MP1_SMN_C2PMSG_91__CONTENT_MASK 0xffffffffL +#define MP1_SMN_C2PMSG_83__CONTENT_MASK 0xffffffffL +#define MP1_SMN_C2PMSG_67__CONTENT_MASK 0xffffffffL +#define MP1_SMN_C2PMSG_91__CONTENT__SHIFT 0x00000000 +#define MP1_SMN_C2PMSG_83__CONTENT__SHIFT 0x00000000 +#define MP1_SMN_C2PMSG_67__CONTENT__SHIFT 0x00000000 + + +static const struct clk_mgr_shift clk_mgr_shift = { + CLK_MASK_SH_LIST_RV1(__SHIFT) +}; + +static const struct clk_mgr_mask clk_mgr_mask = { + CLK_MASK_SH_LIST_RV1(_MASK) +}; + static const struct resource_caps res_cap = { .num_timing_generator = 4, .num_opp = 4, @@ -1343,12 +1361,6 @@ static bool construct( goto fail; } } - pool->base.clk_mgr = dcn1_clk_mgr_create(ctx); - if (pool->base.clk_mgr == NULL) { - dm_error("DC: failed to create display clock!\n"); - BREAK_TO_DEBUGGER(); - goto fail; - } pool->base.dmcu = dcn10_dmcu_create(ctx, &dmcu_regs, @@ -1410,6 +1422,13 @@ static bool construct( pool->base.pp_smu = dcn10_pp_smu_create(ctx); + pool->base.clk_mgr = dcn1_clk_mgr_create(ctx); + if (pool->base.clk_mgr == NULL) { + dm_error("DC: failed to create display clock!\n"); + BREAK_TO_DEBUGGER(); + goto fail; + } + if (!dc->debug.disable_pplib_clock_request) dcn_bw_update_from_pplib(dc); dcn_bw_sync_calcs_and_dml(dc); diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h index 4fc4208d1472..9f7ebf6a4e40 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h +++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h @@ -80,6 +80,7 @@ struct pp_smu_funcs_rv { /* PPSMC_MSG_SetDisplayCount * 0 triggers S0i2 optimization */ + void (*set_display_count)(struct pp_smu *pp, int count); /* reader and writer WM's are sent together as part of one table*/ @@ -115,7 +116,6 @@ struct pp_smu_funcs_rv { /* PME w/a */ void (*set_pme_wa_enable)(struct pp_smu *pp); - }; struct pp_smu_funcs { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index 31bd6d5183ab..f3fd3f8cac26 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -31,7 +31,7 @@ struct clk_mgr { struct dc_context *ctx; - const struct clk_mgr_funcs *funcs; + struct clk_mgr_funcs *funcs; struct dc_clocks clks; }; @@ -44,6 +44,12 @@ struct clk_mgr_funcs { int (*get_dp_ref_clk_frequency)(struct clk_mgr *clk_mgr); void (*init_clocks)(struct clk_mgr *clk_mgr); + + /* Returns actual clk that's set */ + int (*set_dispclk)(struct clk_mgr *clk_mgr, int requested_dispclk_khz); + int (*set_dprefclk)(struct clk_mgr *clk_mgr); }; + + #endif /* __DAL_CLK_MGR_H__ */ diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 34d6fdcb32e2..1a9b7507784f 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -131,11 +131,12 @@ #define INTERNAL_REV_RAVEN_A0 0x00 /* First spin of Raven */ #define RAVEN_A0 0x01 #define RAVEN_B0 0x21 -#define PICASSO_A0 0x41 #if defined(CONFIG_DRM_AMD_DC_DCN1_01) /* DCN1_01 */ +#define PICASSO_A0 0x41 #define RAVEN2_A0 0x81 #endif +#define RAVEN1_F0 0xF0 #define RAVEN_UNKNOWN 0xFF #if defined(CONFIG_DRM_AMD_DC_DCN1_01) From 8dea49605f6e21b5b380b0cae7f1f1160675e3ee Mon Sep 17 00:00:00 2001 From: Jun Lei Date: Mon, 15 Apr 2019 14:52:25 -0400 Subject: [PATCH 029/178] drm/amd/display: add support for disconnected eDP panels [why] On some configurations, eDP from GPU is muxed with another GPU. DC does not know state of mux, but DM has this knowledge. This flag allows DC to ignore creating EDP link when DM informs DC that EDP mux is not connected. [how] Add flag to dc, populate flag in DM Signed-off-by: Jun Lei Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 11 ++++++++--- drivers/gpu/drm/amd/display/dc/dc.h | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index ee6b646180b6..700278216424 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -169,9 +169,14 @@ static bool create_links( link = link_create(&link_init_params); if (link) { - dc->links[dc->link_count] = link; - link->dc = dc; - ++dc->link_count; + if (dc->config.edp_not_connected && + link->connector_signal == SIGNAL_TYPE_EDP) { + link_destroy(&link); + } else { + dc->links[dc->link_count] = link; + link->dc = dc; + ++dc->link_count; + } } } diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index a7144cd189cf..12ca75388362 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -205,6 +205,7 @@ struct dc_config { bool disable_fractional_pwm; bool allow_seamless_boot_optimization; bool power_down_display_on_boot; + bool edp_not_connected; }; enum visual_confirm { From f5ce9f3cba9f385bb4d0b4f76b6b32cef2b84da0 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Mon, 15 Apr 2019 16:17:59 -0400 Subject: [PATCH 030/178] drm/amd/display: add SW_USE_I2C_REG request. [Description] This is for DC_I2c arbitration use between HW use/SW use and DMCU use. Signed-off-by: Charlene Liu Reviewed-by: Krunoslav Kovac Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c | 7 +++++-- drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h | 3 +++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c index cd26161bcc4d..526aab438374 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c @@ -268,6 +268,8 @@ static bool setup_engine( struct dce_i2c_hw *dce_i2c_hw) { uint32_t i2c_setup_limit = I2C_SETUP_TIME_LIMIT_DCE; + /* we have checked I2c not used by DMCU, set SW use I2C REQ to 1 to indicate SW using it*/ + REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, 1); if (dce_i2c_hw->setup_limit != 0) i2c_setup_limit = dce_i2c_hw->setup_limit; @@ -322,8 +324,6 @@ static void release_engine( set_speed(dce_i2c_hw, dce_i2c_hw->original_speed); - /* Release I2C */ - REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, 1); /* Reset HW engine */ { @@ -343,6 +343,9 @@ static void release_engine( /* HW I2c engine - clock gating feature */ if (!dce_i2c_hw->engine_keep_power_up_count) REG_UPDATE_N(SETUP, 1, FN(SETUP, DC_I2C_DDC1_ENABLE), 0); + /* Release I2C after reset, so HW or DMCU could use it */ + REG_UPDATE_2(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, 1, + DC_I2C_SW_USE_I2C_REG_REQ, 0); } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h index 575500755b2e..f718e3d396f2 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h @@ -105,6 +105,7 @@ enum { I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_DATA_DRIVE_SEL, mask_sh),\ I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_INTRA_TRANSACTION_DELAY, mask_sh),\ I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_INTRA_BYTE_DELAY, mask_sh),\ + I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, mask_sh),\ I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, mask_sh),\ I2C_SF(DC_I2C_ARBITRATION, DC_I2C_NO_QUEUED_SW_GO, mask_sh),\ I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_PRIORITY, mask_sh),\ @@ -146,6 +147,7 @@ struct dce_i2c_shift { uint8_t DC_I2C_DDC1_INTRA_TRANSACTION_DELAY; uint8_t DC_I2C_DDC1_INTRA_BYTE_DELAY; uint8_t DC_I2C_SW_DONE_USING_I2C_REG; + uint8_t DC_I2C_SW_USE_I2C_REG_REQ; uint8_t DC_I2C_NO_QUEUED_SW_GO; uint8_t DC_I2C_SW_PRIORITY; uint8_t DC_I2C_SOFT_RESET; @@ -184,6 +186,7 @@ struct dce_i2c_mask { uint32_t DC_I2C_DDC1_INTRA_TRANSACTION_DELAY; uint32_t DC_I2C_DDC1_INTRA_BYTE_DELAY; uint32_t DC_I2C_SW_DONE_USING_I2C_REG; + uint32_t DC_I2C_SW_USE_I2C_REG_REQ; uint32_t DC_I2C_NO_QUEUED_SW_GO; uint32_t DC_I2C_SW_PRIORITY; uint32_t DC_I2C_SOFT_RESET; From 64827cadcce3023521b687b7db75c7bc388ca201 Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Fri, 12 Apr 2019 18:23:11 -0400 Subject: [PATCH 031/178] drm/amd/display: block passive dongle EDID Emulation for USB-C ports [Why] Emulating passive dongle on USB-C port causes issue on some asics. [How] Check for DP_IS_USB_C flag in bios parser and propagate it to encoder features flags. If DP_IS_USB_C flag is set and it is trying to emulate passive dongle, then return fail. Signed-off-by: Samson Tam Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 2 ++ drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c | 2 ++ drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h | 1 + drivers/gpu/drm/amd/display/include/bios_parser_types.h | 3 ++- 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index fd5266a58297..12bc7ee66b18 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -1313,6 +1313,8 @@ static enum bp_result bios_parser_get_encoder_cap_info( ATOM_ENCODER_CAP_RECORD_HBR3_EN) ? 1 : 0; info->HDMI_6GB_EN = (record->encodercaps & ATOM_ENCODER_CAP_RECORD_HDMI6Gbps_EN) ? 1 : 0; + info->DP_IS_USB_C = (record->encodercaps & + ATOM_ENCODER_CAP_RECORD_USB_C_TYPE) ? 1 : 0; return BP_RESULT_OK; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c index 0126a44ba012..e25ae43f8d32 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c @@ -726,6 +726,8 @@ void dcn10_link_encoder_construct( enc10->base.features.flags.bits.IS_HBR3_CAPABLE = bp_cap_info.DP_HBR3_EN; enc10->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN; + enc10->base.features.flags.bits.DP_IS_USB_C = + bp_cap_info.DP_IS_USB_C; } else { DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n", __func__, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h index c9d3e37e9531..ca162079a41b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h @@ -59,6 +59,7 @@ struct encoder_feature_support { uint32_t IS_TPS3_CAPABLE:1; uint32_t IS_TPS4_CAPABLE:1; uint32_t HDMI_6GB_EN:1; + uint32_t DP_IS_USB_C:1; } bits; uint32_t raw; } flags; diff --git a/drivers/gpu/drm/amd/display/include/bios_parser_types.h b/drivers/gpu/drm/amd/display/include/bios_parser_types.h index 01bf01a34a08..c30437ae8395 100644 --- a/drivers/gpu/drm/amd/display/include/bios_parser_types.h +++ b/drivers/gpu/drm/amd/display/include/bios_parser_types.h @@ -307,7 +307,8 @@ struct bp_encoder_cap_info { uint32_t DP_HBR2_EN:1; uint32_t DP_HBR3_EN:1; uint32_t HDMI_6GB_EN:1; - uint32_t RESERVED:30; + uint32_t DP_IS_USB_C:1; + uint32_t RESERVED:27; }; #endif /*__DAL_BIOS_PARSER_TYPES_H__ */ From efa023363cdffafa839e5cc7e6fc9f38631e998d Mon Sep 17 00:00:00 2001 From: Chris Park Date: Wed, 17 Apr 2019 16:32:25 -0400 Subject: [PATCH 032/178] drm/amd/display: Support AVI InfoFrame V3 and V4 [Why] Part of HDMI 2.1 requires AVI InfoFrame version update from current V2 to V3 for new VICs, and V4 for new colorimetry. [How] Implement V3 and V4 AVI InfoFrame. If (C1, C0)=(1, 1) and (EC2, EC1, EC0)=(1, 1, 1), the Source shall use 20 AVI InfoFrame Version 4. If VIC >= 128, the Source shall use AVI InfoFrame Version 3 Signed-off-by: Chris Park Reviewed-by: Nevenko Stupar Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index eac7186e4f08..9ef417eb697d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2354,7 +2354,18 @@ static void set_avi_info_frame( break; } } + /* If VIC >= 128, the Source shall use AVI InfoFrame Version 3*/ hdmi_info.bits.VIC0_VIC7 = vic; + if (vic >= 128) + hdmi_info.bits.header.version = 3; + /* If (C1, C0)=(1, 1) and (EC2, EC1, EC0)=(1, 1, 1), + * the Source shall use 20 AVI InfoFrame Version 4 + */ + if (hdmi_info.bits.C0_C1 == COLORIMETRY_EXTENDED && + hdmi_info.bits.EC0_EC2 == COLORIMETRYEX_RESERVED) { + hdmi_info.bits.header.version = 4; + hdmi_info.bits.header.length = 14; + } /* pixel repetition * PR0 - PR3 start from 0 whereas pHwPathMode->mode.timing.flags.pixel @@ -2376,9 +2387,9 @@ static void set_avi_info_frame( /* check_sum - Calculate AFMT_AVI_INFO0 ~ AFMT_AVI_INFO3 */ check_sum = &hdmi_info.packet_raw_data.sb[0]; - *check_sum = HDMI_INFOFRAME_TYPE_AVI + HDMI_AVI_INFOFRAME_SIZE + 2; + *check_sum = HDMI_INFOFRAME_TYPE_AVI + hdmi_info.bits.header.length + hdmi_info.bits.header.version; - for (byte_index = 1; byte_index <= HDMI_AVI_INFOFRAME_SIZE; byte_index++) + for (byte_index = 1; byte_index <= hdmi_info.bits.header.length; byte_index++) *check_sum += hdmi_info.packet_raw_data.sb[byte_index]; /* one byte complement */ From 1352c779cb74d427f4150cbe779a2f7886f70cae Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 16 Apr 2019 10:30:29 -0400 Subject: [PATCH 033/178] drm/amd/display: Fill prescale_params->scale for RGB565 [Why] An assertion is thrown when using SURFACE_PIXEL_FORMAT_GRPH_RGB565 formats on DCE since the prescale_params->scale wasn't being filled. Found by a dmesg-fail when running the igt@kms_plane@pixel-format-pipe-a-planes test on Baffin. [How] Fill in the scale parameter. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Roman Li Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 7ac50ab1b762..7d7e93c87c28 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -242,6 +242,9 @@ static void build_prescale_params(struct ipp_prescale_params *prescale_params, prescale_params->mode = IPP_PRESCALE_MODE_FIXED_UNSIGNED; switch (plane_state->format) { + case SURFACE_PIXEL_FORMAT_GRPH_RGB565: + prescale_params->scale = 0x2082; + break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888: case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888: prescale_params->scale = 0x2020; From e371e19c10a264bd72c2ff1d21e2167b994710d1 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 18 Apr 2019 12:42:32 -0400 Subject: [PATCH 034/178] drm/amd/display: Disable cursor when offscreen in negative direction [Why] When x or y is negative we set the x and y values to 0 and compensate with a positive cursor hotspot in DM since DC expects positive cursor values. When x or y is less than or equal to the maximum cursor width or height the cursor hotspot is clamped so the hotspot doesn't exceed the cursor size: if (x < 0) { xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1); x = 0; } if (y < 0) { yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1); y = 0; } This incorrectly forces the cursor to be at least 1 pixel on the screen in either direction when x or y is sufficiently negative. [How] Just disable the cursor when it goes far enough off the screen in one of these directions. This fixes kms_cursor_crc@cursor-256x256-offscreen. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Sun peng Li Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 995f9df66142..1e8b51d21fea 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4945,12 +4945,12 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc, int x, y; int xorigin = 0, yorigin = 0; - if (!crtc || !plane->state->fb) { - position->enable = false; - position->x = 0; - position->y = 0; + position->enable = false; + position->x = 0; + position->y = 0; + + if (!crtc || !plane->state->fb) return 0; - } if ((plane->state->crtc_w > amdgpu_crtc->max_cursor_width) || (plane->state->crtc_h > amdgpu_crtc->max_cursor_height)) { @@ -4964,6 +4964,10 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc, x = plane->state->crtc_x; y = plane->state->crtc_y; + if (x <= -amdgpu_crtc->max_cursor_width || + y <= -amdgpu_crtc->max_cursor_height) + return 0; + if (crtc->primary->state) { /* avivo cursor are offset into the total surface */ x += crtc->primary->state->src_x >> 16; From 2d27ebac018fdf8dab7935b2b79c79627ad247b0 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 22 Apr 2019 10:22:44 -0400 Subject: [PATCH 035/178] drm/amd/display: 3.2.29 Signed-off-by: Aric Cyr Reviewed-by: Aric Cyr Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 12ca75388362..6c7658110111 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.28" +#define DC_VER "3.2.29" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 2f482c4f90983810cb8a29fd0db43a7a6a9d6cb3 Mon Sep 17 00:00:00 2001 From: Chris Park Date: Wed, 17 Apr 2019 18:11:57 -0400 Subject: [PATCH 036/178] drm/amd/display: Define Byte 14 on AVI InfoFrame [Why] Part of HDMI 2.1 requires AVI InfoFrame version update from current V2 to V4 for new colorimetry. [How] Define V4 AVI InfoFrame ACE0-ACE3 bit. Signed-off-by: Chris Park Reviewed-by: Nevenko Stupar Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 7 +++++++ drivers/gpu/drm/amd/display/include/set_mode_types.h | 5 ++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 9ef417eb697d..20966325852f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2384,6 +2384,13 @@ static void set_avi_info_frame( hdmi_info.bits.bar_right = (stream->timing.h_total - stream->timing.h_border_right + 1); + /* Additional Colorimetry Extension + * Used in conduction with C0-C1 and EC0-EC2 + * 0 = DCI-P3 RGB (D65) + * 1 = DCI-P3 RGB (theater) + */ + hdmi_info.bits.ACE0_ACE3 = 0; + /* check_sum - Calculate AFMT_AVI_INFO0 ~ AFMT_AVI_INFO3 */ check_sum = &hdmi_info.packet_raw_data.sb[0]; diff --git a/drivers/gpu/drm/amd/display/include/set_mode_types.h b/drivers/gpu/drm/amd/display/include/set_mode_types.h index 2b836e582c08..845fea8a387f 100644 --- a/drivers/gpu/drm/amd/display/include/set_mode_types.h +++ b/drivers/gpu/drm/amd/display/include/set_mode_types.h @@ -84,7 +84,10 @@ union hdmi_info_packet { uint16_t bar_left; uint16_t bar_right; - uint8_t reserved[14]; + uint8_t F140_F143:4; + uint8_t ACE0_ACE3:4; + + uint8_t reserved[13]; } bits; struct info_packet_raw_data packet_raw_data; From 40df2f809e8fe4d9e4f9b7b177f95e76e1f36442 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Wed, 17 Apr 2019 19:15:15 -0400 Subject: [PATCH 037/178] drm/amd/display: color space ycbcr709 support Signed-off-by: Charlene Liu Reviewed-by: Duke Du Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/core/dc_hw_sequencer.c | 29 +++++++++++++++++-- .../gpu/drm/amd/display/dc/core/dc_stream.c | 4 +-- drivers/gpu/drm/amd/display/dc/dc_hw_types.h | 1 + .../amd/display/dc/dce/dce_stream_encoder.c | 1 + .../display/dc/dcn10/dcn10_stream_encoder.c | 1 + 5 files changed, 31 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 83d121510ef5..ca50ede37183 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -45,8 +45,10 @@ enum dc_color_space_type { COLOR_SPACE_RGB_LIMITED_TYPE, COLOR_SPACE_YCBCR601_TYPE, COLOR_SPACE_YCBCR709_TYPE, + COLOR_SPACE_YCBCR2020_TYPE, COLOR_SPACE_YCBCR601_LIMITED_TYPE, - COLOR_SPACE_YCBCR709_LIMITED_TYPE + COLOR_SPACE_YCBCR709_LIMITED_TYPE, + COLOR_SPACE_YCBCR709_BLACK_TYPE, }; static const struct tg_color black_color_format[] = { @@ -80,7 +82,6 @@ static const struct out_csc_color_matrix_type output_csc_matrix[] = { { COLOR_SPACE_YCBCR709_TYPE, { 0xE04, 0xF345, 0xFEB7, 0x1004, 0x5D3, 0x1399, 0x1FA, 0x201, 0xFCCA, 0xF533, 0xE04, 0x1004} }, - /* TODO: correct values below */ { COLOR_SPACE_YCBCR601_LIMITED_TYPE, { 0xE00, 0xF447, 0xFDB9, 0x1000, 0x991, @@ -88,6 +89,12 @@ static const struct out_csc_color_matrix_type output_csc_matrix[] = { { COLOR_SPACE_YCBCR709_LIMITED_TYPE, { 0xE00, 0xF349, 0xFEB7, 0x1000, 0x6CE, 0x16E3, 0x24F, 0x200, 0xFCCB, 0xF535, 0xE00, 0x1000} }, + { COLOR_SPACE_YCBCR2020_TYPE, + { 0x1000, 0xF149, 0xFEB7, 0x0000, 0x0868, 0x15B2, + 0x01E6, 0x0000, 0xFB88, 0xF478, 0x1000, 0x0000} }, + { COLOR_SPACE_YCBCR709_BLACK_TYPE, + { 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x0000, + 0x0000, 0x0200, 0x0000, 0x0000, 0x0000, 0x1000} }, }; static bool is_rgb_type( @@ -149,6 +156,16 @@ static bool is_ycbcr709_type( return ret; } +static bool is_ycbcr2020_type( + enum dc_color_space color_space) +{ + bool ret = false; + + if (color_space == COLOR_SPACE_2020_YCBCR) + ret = true; + return ret; +} + static bool is_ycbcr709_limited_type( enum dc_color_space color_space) { @@ -174,7 +191,12 @@ enum dc_color_space_type get_color_space_type(enum dc_color_space color_space) type = COLOR_SPACE_YCBCR601_LIMITED_TYPE; else if (is_ycbcr709_limited_type(color_space)) type = COLOR_SPACE_YCBCR709_LIMITED_TYPE; - + else if (is_ycbcr2020_type(color_space)) + type = COLOR_SPACE_YCBCR2020_TYPE; + else if (color_space == COLOR_SPACE_YCBCR709) + type = COLOR_SPACE_YCBCR709_BLACK_TYPE; + else if (color_space == COLOR_SPACE_YCBCR709_BLACK) + type = COLOR_SPACE_YCBCR709_BLACK_TYPE; return type; } @@ -206,6 +228,7 @@ void color_space_to_black_color( switch (colorspace) { case COLOR_SPACE_YCBCR601: case COLOR_SPACE_YCBCR709: + case COLOR_SPACE_YCBCR709_BLACK: case COLOR_SPACE_YCBCR601_LIMITED: case COLOR_SPACE_YCBCR709_LIMITED: case COLOR_SPACE_2020_YCBCR: diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 96e97d25d639..a79f608b2f79 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -47,8 +47,8 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink) if (dc_is_dvi_signal(stream->signal)) { if (stream->ctx->dc->caps.dual_link_dvi && - (stream->timing.pix_clk_100hz / 10) > TMDS_MAX_PIXEL_CLOCK && - sink->sink_signal != SIGNAL_TYPE_DVI_SINGLE_LINK) + (stream->timing.pix_clk_100hz / 10) > TMDS_MAX_PIXEL_CLOCK && + sink->sink_signal != SIGNAL_TYPE_DVI_SINGLE_LINK) stream->signal = SIGNAL_TYPE_DVI_DUAL_LINK; else stream->signal = SIGNAL_TYPE_DVI_SINGLE_LINK; diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index da55d623647a..c91b8aad78c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -534,6 +534,7 @@ enum dc_color_space { COLOR_SPACE_DOLBYVISION, COLOR_SPACE_APPCTRL, COLOR_SPACE_CUSTOMPOINTS, + COLOR_SPACE_YCBCR709_BLACK, }; enum dc_dither_option { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index 14309fe6f2e6..61fe2596fdb3 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -418,6 +418,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute( break; case COLOR_SPACE_YCBCR709: case COLOR_SPACE_YCBCR709_LIMITED: + case COLOR_SPACE_YCBCR709_BLACK: misc0 = misc0 | 0x18; /* bit3=1, bit4=1 */ misc1 = misc1 & ~0x80; /* bit7 = 0*/ dynamic_range_ycbcr = 1; /*bt709*/ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index 8ee9f6dc1d62..c259c51dff9a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -415,6 +415,7 @@ void enc1_stream_encoder_dp_set_stream_attribute( case COLOR_SPACE_APPCTRL: case COLOR_SPACE_CUSTOMPOINTS: case COLOR_SPACE_UNKNOWN: + case COLOR_SPACE_YCBCR709_BLACK: /* do nothing */ break; } From 052fa7e8c9a92e3804ba9dba7c2550a9684d3441 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Tue, 23 Apr 2019 21:59:54 -0400 Subject: [PATCH 038/178] drm/amd/display: reset retimer/redriver below 340Mhz [Description] This is for HDMI 6Ghz mode before we load the driver, because VBIOS not support HDMI (6Ghz mode) Reset to redriver/retimer setting for the setting for below 340Mhz. Signed-off-by: Charlene Liu Reviewed-by: Krunoslav Kovac Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index b37ecc3ede61..9fbf926d5bf9 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2713,17 +2713,37 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx, int option) { struct dc *core_dc = pipe_ctx->stream->ctx->dc; struct dc_stream_state *stream = pipe_ctx->stream; + struct dc_link *link = stream->sink->link; core_dc->hwss.blank_stream(pipe_ctx); if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) deallocate_mst_payload(pipe_ctx); - if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) - dal_ddc_service_write_scdc_data( - stream->link->ddc, 0, - stream->timing.flags.LTE_340MCSC_SCRAMBLE); + if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) { + struct ext_hdmi_settings settings = {0}; + enum engine_id eng_id = pipe_ctx->stream_res.stream_enc->id; + unsigned short masked_chip_caps = link->chip_caps & + EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK; + //Need to inform that sink is going to use legacy HDMI mode. + dal_ddc_service_write_scdc_data( + link->ddc, + 165000,//vbios only handles 165Mhz. + false); + if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) { + /* DP159, Retimer settings */ + if (get_ext_hdmi_settings(pipe_ctx, eng_id, &settings)) + write_i2c_retimer_setting(pipe_ctx, + false, false, &settings); + else + write_i2c_default_retimer_setting(pipe_ctx, + false, false); + } else if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) { + /* PI3EQX1204, Redriver settings */ + write_i2c_redriver_setting(pipe_ctx, false); + } + } core_dc->hwss.disable_stream(pipe_ctx, option); disable_link(pipe_ctx->stream->link, pipe_ctx->stream->signal); From db819940b0ef74afdc46a48304d0b29410ed7fc4 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Mon, 8 Apr 2019 15:04:43 -0400 Subject: [PATCH 039/178] drm/amd/display: move signal type out of otg dlg params It makes no logical sense being there Signed-off-by: Dmytro Laktyushkin Reviewed-by: Eric Bernstein Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 3 +-- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 6 +++--- drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 33d311cea28c..c67942ae1920 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -663,7 +663,7 @@ static enum dc_status dcn10_enable_stream_timing( pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_offset = pipe_ctx->pipe_dlg_param.vupdate_offset; pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_width = pipe_ctx->pipe_dlg_param.vupdate_width; - pipe_ctx->stream_res.tg->dlg_otg_param.signal = pipe_ctx->stream->signal; + pipe_ctx->stream_res.tg->signal = pipe_ctx->stream->signal; pipe_ctx->stream_res.tg->funcs->program_timing( pipe_ctx->stream_res.tg, @@ -2283,7 +2283,6 @@ static void program_all_pipe_in_tree( pipe_ctx->stream_res.tg->dlg_otg_param.vstartup_start = pipe_ctx->pipe_dlg_param.vstartup_start; pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_offset = pipe_ctx->pipe_dlg_param.vupdate_offset; pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_width = pipe_ctx->pipe_dlg_param.vupdate_width; - pipe_ctx->stream_res.tg->dlg_otg_param.signal = pipe_ctx->stream->signal; pipe_ctx->stream_res.tg->funcs->program_global_sync( pipe_ctx->stream_res.tg); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 0345d51e9d6f..3f0911236f8c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -243,9 +243,9 @@ void optc1_program_timing( OTG_V_SYNC_A_POL, v_sync_polarity); v_init = asic_blank_start; - if (optc->dlg_otg_param.signal == SIGNAL_TYPE_DISPLAY_PORT || - optc->dlg_otg_param.signal == SIGNAL_TYPE_DISPLAY_PORT_MST || - optc->dlg_otg_param.signal == SIGNAL_TYPE_EDP) { + if (optc->signal == SIGNAL_TYPE_DISPLAY_PORT || + optc->signal == SIGNAL_TYPE_DISPLAY_PORT_MST || + optc->signal == SIGNAL_TYPE_EDP) { start_point = 1; if (patched_crtc_timing.flags.INTERLACE == 1) field_num = 1; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 067d53caf28a..4eee1add8e64 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -75,7 +75,6 @@ struct _dlg_otg_param { int vupdate_offset; int vupdate_width; int vready_offset; - enum signal_type signal; }; struct vupdate_keepout_params { @@ -127,6 +126,7 @@ struct timing_generator { struct dc_bios *bp; struct dc_context *ctx; struct _dlg_otg_param dlg_otg_param; + enum signal_type signal; int inst; }; From e7e10c464a481e9e94ae571f849d09c8820f1fdb Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Mon, 8 Apr 2019 15:49:52 -0400 Subject: [PATCH 040/178] drm/amd/display: stop external access to internal optc sync params These are internal otg params and should be handled as such. Thich change passes the params as function arguments. Signed-off-by: Dmytro Laktyushkin Reviewed-by: Eric Bernstein Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../display/dc/dce110/dce110_hw_sequencer.c | 5 ++ .../amd/display/dc/dce110/dce110_resource.c | 5 ++ .../dc/dce110/dce110_timing_generator.c | 5 ++ .../dc/dce110/dce110_timing_generator.h | 5 ++ .../dc/dce110/dce110_timing_generator_v.c | 5 ++ .../dc/dce120/dce120_timing_generator.c | 7 ++- .../display/dc/dce80/dce80_timing_generator.c | 7 ++- .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 25 +++++----- .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 47 ++++++++++++++----- .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.h | 17 ++++++- .../amd/display/dc/inc/hw/timing_generator.h | 23 ++++----- 11 files changed, 111 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 7d7e93c87c28..5919716832a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1299,6 +1299,11 @@ static enum dc_status dce110_enable_stream_timing( pipe_ctx->stream_res.tg->funcs->program_timing( pipe_ctx->stream_res.tg, &stream->timing, + 0, + 0, + 0, + 0, + pipe_ctx->stream->signal, true); } diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index dcd04e9ea76b..35b58a085f5c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -1097,6 +1097,11 @@ static struct pipe_ctx *dce110_acquire_underlay( pipe_ctx->stream_res.tg->funcs->program_timing(pipe_ctx->stream_res.tg, &stream->timing, + 0, + 0, + 0, + 0, + pipe_ctx->stream->signal, false); pipe_ctx->stream_res.tg->funcs->enable_advanced_request( diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c index 1b2fe0df347f..5f7c2c5641c4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c @@ -1952,6 +1952,11 @@ void dce110_tg_set_overscan_color(struct timing_generator *tg, void dce110_tg_program_timing(struct timing_generator *tg, const struct dc_crtc_timing *timing, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + const enum signal_type signal, bool use_vbios) { if (use_vbios) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h index 734d4965dab1..768ccf27ada9 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h @@ -256,6 +256,11 @@ void dce110_tg_set_overscan_color(struct timing_generator *tg, void dce110_tg_program_timing(struct timing_generator *tg, const struct dc_crtc_timing *timing, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + const enum signal_type signal, bool use_vbios); bool dce110_tg_is_blanked(struct timing_generator *tg); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c index a3cef60380ed..a13a2f58944e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c @@ -435,6 +435,11 @@ static void dce110_timing_generator_v_set_blank(struct timing_generator *tg, static void dce110_timing_generator_v_program_timing(struct timing_generator *tg, const struct dc_crtc_timing *timing, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + const enum signal_type signal, bool use_vbios) { if (use_vbios) diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c index 04b866f0fa1f..5ebbbda77021 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c @@ -734,8 +734,13 @@ void dce120_tg_set_overscan_color(struct timing_generator *tg, CRTC_OVERSCAN_COLOR_RED, overscan_color->color_r_cr); } -void dce120_tg_program_timing(struct timing_generator *tg, +static void dce120_tg_program_timing(struct timing_generator *tg, const struct dc_crtc_timing *timing, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + const enum signal_type signal, bool use_vbios) { if (use_vbios) diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c index 8b5ce557ee71..397e7f94e1e8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c @@ -107,12 +107,17 @@ static void program_pix_dur(struct timing_generator *tg, uint32_t pix_clk_100hz) static void program_timing(struct timing_generator *tg, const struct dc_crtc_timing *timing, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + const enum signal_type signal, bool use_vbios) { if (!use_vbios) program_pix_dur(tg, timing->pix_clk_100hz); - dce110_tg_program_timing(tg, timing, use_vbios); + dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, use_vbios); } static void dce80_timing_generator_enable_advanced_request( diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index c67942ae1920..723b2ddf9aa5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -658,16 +658,15 @@ static enum dc_status dcn10_enable_stream_timing( BREAK_TO_DEBUGGER(); return DC_ERROR_UNEXPECTED; } - pipe_ctx->stream_res.tg->dlg_otg_param.vready_offset = pipe_ctx->pipe_dlg_param.vready_offset; - pipe_ctx->stream_res.tg->dlg_otg_param.vstartup_start = pipe_ctx->pipe_dlg_param.vstartup_start; - pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_offset = pipe_ctx->pipe_dlg_param.vupdate_offset; - pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_width = pipe_ctx->pipe_dlg_param.vupdate_width; - - pipe_ctx->stream_res.tg->signal = pipe_ctx->stream->signal; pipe_ctx->stream_res.tg->funcs->program_timing( pipe_ctx->stream_res.tg, &stream->timing, + pipe_ctx->pipe_dlg_param.vready_offset, + pipe_ctx->pipe_dlg_param.vstartup_start, + pipe_ctx->pipe_dlg_param.vupdate_offset, + pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->stream->signal, true); #if 0 /* move to after enable_crtc */ @@ -2279,13 +2278,12 @@ static void program_all_pipe_in_tree( if (pipe_ctx->top_pipe == NULL) { bool blank = !is_pipe_tree_visible(pipe_ctx); - pipe_ctx->stream_res.tg->dlg_otg_param.vready_offset = pipe_ctx->pipe_dlg_param.vready_offset; - pipe_ctx->stream_res.tg->dlg_otg_param.vstartup_start = pipe_ctx->pipe_dlg_param.vstartup_start; - pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_offset = pipe_ctx->pipe_dlg_param.vupdate_offset; - pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_width = pipe_ctx->pipe_dlg_param.vupdate_width; - pipe_ctx->stream_res.tg->funcs->program_global_sync( - pipe_ctx->stream_res.tg); + pipe_ctx->stream_res.tg, + pipe_ctx->pipe_dlg_param.vready_offset, + pipe_ctx->pipe_dlg_param.vstartup_start, + pipe_ctx->pipe_dlg_param.vupdate_offset, + pipe_ctx->pipe_dlg_param.vupdate_width); dc->hwss.blank_pixel_data(dc, pipe_ctx, blank); @@ -2789,7 +2787,6 @@ static void apply_front_porch_workaround( int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx) { - struct timing_generator *optc = pipe_ctx->stream_res.tg; const struct dc_crtc_timing *dc_crtc_timing = &pipe_ctx->stream->timing; struct dc_crtc_timing patched_crtc_timing; int vesa_sync_start; @@ -2812,7 +2809,7 @@ int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx) * interlace_factor; vertical_line_start = asic_blank_end - - optc->dlg_otg_param.vstartup_start + 1; + pipe_ctx->pipe_dlg_param.vstartup_start + 1; return vertical_line_start; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 3f0911236f8c..0afe81ed023b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -60,24 +60,33 @@ static void optc1_apply_front_porch_workaround( } void optc1_program_global_sync( - struct timing_generator *optc) + struct timing_generator *optc, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width) { struct optc *optc1 = DCN10TG_FROM_TG(optc); - if (optc->dlg_otg_param.vstartup_start == 0) { + optc1->vready_offset = vready_offset; + optc1->vstartup_start = vstartup_start; + optc1->vupdate_offset = vupdate_offset; + optc1->vupdate_width = vupdate_width; + + if (optc1->vstartup_start == 0) { BREAK_TO_DEBUGGER(); return; } REG_SET(OTG_VSTARTUP_PARAM, 0, - VSTARTUP_START, optc->dlg_otg_param.vstartup_start); + VSTARTUP_START, optc1->vstartup_start); REG_SET_2(OTG_VUPDATE_PARAM, 0, - VUPDATE_OFFSET, optc->dlg_otg_param.vupdate_offset, - VUPDATE_WIDTH, optc->dlg_otg_param.vupdate_width); + VUPDATE_OFFSET, optc1->vupdate_offset, + VUPDATE_WIDTH, optc1->vupdate_width); REG_SET(OTG_VREADY_PARAM, 0, - VREADY_OFFSET, optc->dlg_otg_param.vready_offset); + VREADY_OFFSET, optc1->vready_offset); } static void optc1_disable_stereo(struct timing_generator *optc) @@ -132,6 +141,11 @@ void optc1_setup_vertical_interrupt2( void optc1_program_timing( struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + const enum signal_type signal, bool use_vbios) { struct dc_crtc_timing patched_crtc_timing; @@ -149,6 +163,11 @@ void optc1_program_timing( struct optc *optc1 = DCN10TG_FROM_TG(optc); + optc1->signal = signal; + optc1->vready_offset = vready_offset; + optc1->vstartup_start = vstartup_start; + optc1->vupdate_offset = vupdate_offset; + optc1->vupdate_width = vupdate_width; patched_crtc_timing = *dc_crtc_timing; optc1_apply_front_porch_workaround(optc, &patched_crtc_timing); @@ -226,7 +245,7 @@ void optc1_program_timing( patched_crtc_timing.v_addressable + patched_crtc_timing.v_border_bottom); - vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1; + vertical_line_start = asic_blank_end - optc1->vstartup_start + 1; v_fp2 = 0; if (vertical_line_start < 0) v_fp2 = -vertical_line_start; @@ -243,9 +262,9 @@ void optc1_program_timing( OTG_V_SYNC_A_POL, v_sync_polarity); v_init = asic_blank_start; - if (optc->signal == SIGNAL_TYPE_DISPLAY_PORT || - optc->signal == SIGNAL_TYPE_DISPLAY_PORT_MST || - optc->signal == SIGNAL_TYPE_EDP) { + if (optc1->signal == SIGNAL_TYPE_DISPLAY_PORT || + optc1->signal == SIGNAL_TYPE_DISPLAY_PORT_MST || + optc1->signal == SIGNAL_TYPE_EDP) { start_point = 1; if (patched_crtc_timing.flags.INTERLACE == 1) field_num = 1; @@ -257,7 +276,7 @@ void optc1_program_timing( REG_UPDATE(OTG_INTERLACE_CONTROL, OTG_INTERLACE_ENABLE, 1); v_init = v_init / 2; - if ((optc->dlg_otg_param.vstartup_start/2)*2 > asic_blank_end) + if ((optc1->vstartup_start/2)*2 > asic_blank_end) v_fp2 = v_fp2 / 2; } else REG_UPDATE(OTG_INTERLACE_CONTROL, @@ -277,7 +296,11 @@ void optc1_program_timing( OTG_START_POINT_CNTL, start_point, OTG_FIELD_NUMBER_CNTL, field_num); - optc1_program_global_sync(optc); + optc1_program_global_sync(optc, + vready_offset, + vstartup_start, + vupdate_offset, + vupdate_width); /* TODO * patched_crtc_timing.flags.HORZ_COUNT_BY_TWO == 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index 4eb9a898c237..7bb414c35d13 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -446,6 +446,12 @@ struct optc { uint32_t min_v_sync_width; uint32_t min_v_blank; uint32_t min_v_blank_interlace; + + int vstartup_start; + int vupdate_offset; + int vupdate_width; + int vready_offset; + enum signal_type signal; }; void dcn10_timing_generator_init(struct optc *optc); @@ -481,6 +487,11 @@ bool optc1_validate_timing( void optc1_program_timing( struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + const enum signal_type signal, bool use_vbios); void optc1_setup_vertical_interrupt0( @@ -495,7 +506,11 @@ void optc1_setup_vertical_interrupt2( uint32_t start_line); void optc1_program_global_sync( - struct timing_generator *optc); + struct timing_generator *optc, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width); bool optc1_disable_crtc(struct timing_generator *optc); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 4eee1add8e64..e4b0de0089af 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -70,13 +70,6 @@ enum crtc_state { CRTC_STATE_VACTIVE }; -struct _dlg_otg_param { - int vstartup_start; - int vupdate_offset; - int vupdate_width; - int vready_offset; -}; - struct vupdate_keepout_params { int start_offset; int end_offset; @@ -125,8 +118,6 @@ struct timing_generator { const struct timing_generator_funcs *funcs; struct dc_bios *bp; struct dc_context *ctx; - struct _dlg_otg_param dlg_otg_param; - enum signal_type signal; int inst; }; @@ -140,7 +131,13 @@ struct timing_generator_funcs { const struct dc_crtc_timing *timing); void (*program_timing)(struct timing_generator *tg, const struct dc_crtc_timing *timing, - bool use_vbios); + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + const enum signal_type signal, + bool use_vbios + ); void (*setup_vertical_interrupt0)( struct timing_generator *optc, uint32_t start_line, @@ -210,7 +207,11 @@ struct timing_generator_funcs { bool (*arm_vert_intr)(struct timing_generator *tg, uint8_t width); - void (*program_global_sync)(struct timing_generator *tg); + void (*program_global_sync)(struct timing_generator *tg, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width); void (*enable_optc_clock)(struct timing_generator *tg, bool enable); void (*program_stereo)(struct timing_generator *tg, const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags); From 79592db31297d2fbf1c24b7775199e2bda283079 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Tue, 16 Apr 2019 13:31:22 -0400 Subject: [PATCH 041/178] drm/amd/display: fix acquire_first_split_pipe function This function needs to re-calculate the scaling on the pipe that loses it's half. Signed-off-by: Dmytro Laktyushkin Reviewed-by: Eric Bernstein Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_resource.c | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 20966325852f..47b99458d319 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1184,24 +1184,27 @@ static int acquire_first_split_pipe( int i; for (i = 0; i < pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i]; + struct pipe_ctx *split_pipe = &res_ctx->pipe_ctx[i]; - if (pipe_ctx->top_pipe && - pipe_ctx->top_pipe->plane_state == pipe_ctx->plane_state) { - pipe_ctx->top_pipe->bottom_pipe = pipe_ctx->bottom_pipe; - if (pipe_ctx->bottom_pipe) - pipe_ctx->bottom_pipe->top_pipe = pipe_ctx->top_pipe; + if (split_pipe->top_pipe && !dc_res_is_odm_head_pipe(split_pipe) && + split_pipe->top_pipe->plane_state == split_pipe->plane_state) { + split_pipe->top_pipe->bottom_pipe = split_pipe->bottom_pipe; + if (split_pipe->bottom_pipe) + split_pipe->bottom_pipe->top_pipe = split_pipe->top_pipe; - memset(pipe_ctx, 0, sizeof(*pipe_ctx)); - pipe_ctx->stream_res.tg = pool->timing_generators[i]; - pipe_ctx->plane_res.hubp = pool->hubps[i]; - pipe_ctx->plane_res.ipp = pool->ipps[i]; - pipe_ctx->plane_res.dpp = pool->dpps[i]; - pipe_ctx->stream_res.opp = pool->opps[i]; - pipe_ctx->plane_res.mpcc_inst = pool->dpps[i]->inst; - pipe_ctx->pipe_idx = i; + if (split_pipe->top_pipe->plane_state) + resource_build_scaling_params(split_pipe->top_pipe); - pipe_ctx->stream = stream; + memset(split_pipe, 0, sizeof(*split_pipe)); + split_pipe->stream_res.tg = pool->timing_generators[i]; + split_pipe->plane_res.hubp = pool->hubps[i]; + split_pipe->plane_res.ipp = pool->ipps[i]; + split_pipe->plane_res.dpp = pool->dpps[i]; + split_pipe->stream_res.opp = pool->opps[i]; + split_pipe->plane_res.mpcc_inst = pool->dpps[i]->inst; + split_pipe->pipe_idx = i; + + split_pipe->stream = stream; return i; } } From b2293ac23776123b896479b6a53463753c648d3b Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Wed, 24 Apr 2019 19:18:12 -0400 Subject: [PATCH 042/178] drm/amd/display: move back vbios cmd table for set dprefclk [Why] Upon closer inspection, our previous implementation is missing code for programming de-spread and DP DTO. Porting this logic into driver is rather involved, as there are a lot of table look ups. So for now move back to calling vbios cmd table [How] Go back to calling vbios cmd table for set dp_refclk Signed-off-by: Eric Yang Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dce/dce_clk_mgr.c | 70 +++++++++++++++++++ .../gpu/drm/amd/display/dc/dce/dce_clk_mgr.h | 2 + 2 files changed, 72 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c index 365c10848797..6b2e207777f0 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c @@ -241,6 +241,7 @@ static enum dm_pp_clocks_state dce_get_required_clocks_state( return low_req_clk; } +/* TODO: remove use the two broken down functions */ static int dce_set_clock( struct clk_mgr *clk_mgr, int requested_clk_khz) @@ -336,6 +337,75 @@ int dce112_set_clock(struct clk_mgr *clk_mgr, int requested_clk_khz) return actual_clock; } +int dce112_set_dispclk(struct clk_mgr *clk_mgr, int requested_clk_khz) +{ + struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr); + struct bp_set_dce_clock_parameters dce_clk_params; + struct dc_bios *bp = clk_mgr->ctx->dc_bios; + struct dc *core_dc = clk_mgr->ctx->dc; + struct dmcu *dmcu = core_dc->res_pool->dmcu; + int actual_clock = requested_clk_khz; + /* Prepare to program display clock*/ + memset(&dce_clk_params, 0, sizeof(dce_clk_params)); + + /* Make sure requested clock isn't lower than minimum threshold*/ + if (requested_clk_khz > 0) + requested_clk_khz = max(requested_clk_khz, + clk_mgr_dce->dentist_vco_freq_khz / 62); + + dce_clk_params.target_clock_frequency = requested_clk_khz; + dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS; + dce_clk_params.clock_type = DCECLOCK_TYPE_DISPLAY_CLOCK; + + bp->funcs->set_dce_clock(bp, &dce_clk_params); + actual_clock = dce_clk_params.target_clock_frequency; + + /* + * from power down, we need mark the clock state as ClocksStateNominal + * from HWReset, so when resume we will call pplib voltage regulator. + */ + if (requested_clk_khz == 0) + clk_mgr_dce->cur_min_clks_state = DM_PP_CLOCKS_STATE_NOMINAL; + + + if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) { + if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) { + if (clk_mgr_dce->dfs_bypass_disp_clk != actual_clock) + dmcu->funcs->set_psr_wait_loop(dmcu, + actual_clock / 1000 / 7); + } + } + + clk_mgr_dce->dfs_bypass_disp_clk = actual_clock; + return actual_clock; + +} + +int dce112_set_dprefclk(struct clk_mgr *clk_mgr) +{ + struct bp_set_dce_clock_parameters dce_clk_params; + struct dc_bios *bp = clk_mgr->ctx->dc_bios; + + memset(&dce_clk_params, 0, sizeof(dce_clk_params)); + + /*Program DP ref Clock*/ + /*VBIOS will determine DPREFCLK frequency, so we don't set it*/ + dce_clk_params.target_clock_frequency = 0; + dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS; + dce_clk_params.clock_type = DCECLOCK_TYPE_DPREFCLK; + if (!ASICREV_IS_VEGA20_P(clk_mgr->ctx->asic_id.hw_internal_rev)) + dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK = + (dce_clk_params.pll_id == + CLOCK_SOURCE_COMBO_DISPLAY_PLL0); + else + dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK = false; + + bp->funcs->set_dce_clock(bp, &dce_clk_params); + + /* Returns the dp_refclk that was set */ + return dce_clk_params.target_clock_frequency; +} + static void dce_clock_read_integrated_info(struct dce_clk_mgr *clk_mgr_dce) { struct dc_debug_options *debug = &clk_mgr_dce->base.ctx->dc->debug; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h index 36942ab022a2..cca0c95d8cc8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h @@ -189,6 +189,8 @@ void dce110_fill_display_configs( struct dm_pp_display_configuration *pp_display_cfg); int dce112_set_clock(struct clk_mgr *dccg, int requested_clk_khz); +int dce112_set_dispclk(struct clk_mgr *clk_mgr, int requested_clk_khz); +int dce112_set_dprefclk(struct clk_mgr *clk_mgr); struct clk_mgr *dce_clk_mgr_create( struct dc_context *ctx, From 1894478ad1f8fd7366edc5cee49ee9caea0e3d52 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Thu, 25 Apr 2019 11:02:30 -0400 Subject: [PATCH 043/178] drm/amd/display: Fill plane attrs only for valid pxl format [Why] In fill_plane_buffer_attributes() we calculate chroma/luma assuming that the surface_pixel_format is always valid. If it's not the case, there's a risk of divide by zero error. [How] Check if format valid before calculating pixel format attributes Signed-off-by: Roman Li Reviewed-by: David Francis Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 1e8b51d21fea..c6713432935e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2584,7 +2584,7 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev, address->type = PLN_ADDR_TYPE_GRAPHICS; address->grph.addr.low_part = lower_32_bits(afb->address); address->grph.addr.high_part = upper_32_bits(afb->address); - } else { + } else if (format < SURFACE_PIXEL_FORMAT_INVALID) { uint64_t chroma_addr = afb->address + fb->offsets[1]; plane_size->video.luma_size.x = 0; From 88ccdf1d59df4d93b0591c8879316de40ef88f93 Mon Sep 17 00:00:00 2001 From: "Leo (Hanghong) Ma" Date: Tue, 16 Apr 2019 11:07:22 -0400 Subject: [PATCH 044/178] drm/amd/display: Expose send immediate sdp message interface [Why] To send sdp message immediately from a single slot. [How] Modify the generic SDP message interface, and use GSP4 to send immediate sdp message. Signed-off-by: Leo (Hanghong) Ma Reviewed-by: Harry Wentland Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_resource.c | 18 --- .../gpu/drm/amd/display/dc/core/dc_stream.c | 47 ++------ .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 13 +++ .../display/dc/dcn10/dcn10_stream_encoder.c | 106 +++++++++++++++++- .../display/dc/dcn10/dcn10_stream_encoder.h | 22 ++++ .../amd/display/dc/inc/hw/stream_encoder.h | 7 +- .../gpu/drm/amd/display/dc/inc/hw_sequencer.h | 5 + 7 files changed, 155 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 47b99458d319..58ce7a6b914c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2446,21 +2446,6 @@ static void set_spd_info_packet( *info_packet = stream->vrr_infopacket; } -static void set_dp_sdp_info_packet( - struct dc_info_packet *info_packet, - struct dc_stream_state *stream) -{ - /* SPD info packet for custom sdp message */ - - /* Return if false. If true, - * set the corresponding bit in the info packet - */ - if (!stream->dpsdp_infopacket.valid) - return; - - *info_packet = stream->dpsdp_infopacket; -} - static void set_hdr_static_info_packet( struct dc_info_packet *info_packet, struct dc_stream_state *stream) @@ -2557,7 +2542,6 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx) info->spd.valid = false; info->hdrsmd.valid = false; info->vsc.valid = false; - info->dpsdp.valid = false; signal = pipe_ctx->stream->signal; @@ -2577,8 +2561,6 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx) set_spd_info_packet(&info->spd, pipe_ctx->stream); set_hdr_static_info_packet(&info->hdrsmd, pipe_ctx->stream); - - set_dp_sdp_info_packet(&info->dpsdp, pipe_ctx->stream); } patch_gamut_packet_checksum(&info->gamut); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index a79f608b2f79..b723ffc8ea25 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -371,42 +371,12 @@ uint32_t dc_stream_get_vblank_counter(const struct dc_stream_state *stream) return 0; } -static void build_dp_sdp_info_frame(struct pipe_ctx *pipe_ctx, - const uint8_t *custom_sdp_message, - unsigned int sdp_message_size) -{ - uint8_t i; - struct encoder_info_frame *info = &pipe_ctx->stream_res.encoder_info_frame; - - /* set valid info */ - info->dpsdp.valid = true; - - /* set sdp message header */ - info->dpsdp.hb0 = custom_sdp_message[0]; /* package id */ - info->dpsdp.hb1 = custom_sdp_message[1]; /* package type */ - info->dpsdp.hb2 = custom_sdp_message[2]; /* package specific byte 0 any data */ - info->dpsdp.hb3 = custom_sdp_message[3]; /* package specific byte 0 any data */ - - /* set sdp message data */ - for (i = 0; i < 32; i++) - info->dpsdp.sb[i] = (custom_sdp_message[i+4]); - -} - -static void invalid_dp_sdp_info_frame(struct pipe_ctx *pipe_ctx) -{ - struct encoder_info_frame *info = &pipe_ctx->stream_res.encoder_info_frame; - - /* in-valid info */ - info->dpsdp.valid = false; -} - bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream, const uint8_t *custom_sdp_message, unsigned int sdp_message_size) { int i; - struct dc *core_dc; + struct dc *dc; struct resource_context *res_ctx; if (stream == NULL) { @@ -414,8 +384,8 @@ bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream, return false; } - core_dc = stream->ctx->dc; - res_ctx = &core_dc->current_state->res_ctx; + dc = stream->ctx->dc; + res_ctx = &dc->current_state->res_ctx; for (i = 0; i < MAX_PIPES; i++) { struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i]; @@ -423,11 +393,14 @@ bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream, if (pipe_ctx->stream != stream) continue; - build_dp_sdp_info_frame(pipe_ctx, custom_sdp_message, sdp_message_size); + if (dc->hwss.send_immediate_sdp_message != NULL) + dc->hwss.send_immediate_sdp_message(pipe_ctx, + custom_sdp_message, + sdp_message_size); + else + DC_LOG_WARNING("%s:send_immediate_sdp_message not implemented on this ASIC\n", + __func__); - core_dc->hwss.update_info_frame(pipe_ctx); - - invalid_dp_sdp_info_frame(pipe_ctx); } return true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 723b2ddf9aa5..c2b60b1e1a25 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2957,6 +2957,18 @@ static void dcn10_unblank_stream(struct pipe_ctx *pipe_ctx, } } +static void dcn10_send_immediate_sdp_message(struct pipe_ctx *pipe_ctx, + const uint8_t *custom_sdp_message, + unsigned int sdp_message_size) +{ + if (dc_is_dp_signal(pipe_ctx->stream->signal)) { + pipe_ctx->stream_res.stream_enc->funcs->send_immediate_sdp_message( + pipe_ctx->stream_res.stream_enc, + custom_sdp_message, + sdp_message_size); + } +} + static const struct hw_sequencer_funcs dcn10_funcs = { .program_gamut_remap = program_gamut_remap, .init_hw = dcn10_init_hw, @@ -2976,6 +2988,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .enable_timing_synchronization = dcn10_enable_timing_synchronization, .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, .update_info_frame = dce110_update_info_frame, + .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, .enable_stream = dce110_enable_stream, .disable_stream = dce110_disable_stream, .unblank_stream = dcn10_unblank_stream, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index c259c51dff9a..ba71b5224e7f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -727,11 +727,9 @@ void enc1_stream_encoder_update_dp_info_packets( 3, /* packetIndex */ &info_frame->hdrsmd); - if (info_frame->dpsdp.valid) - enc1_update_generic_info_packet( - enc1, - 4,/* packetIndex */ - &info_frame->dpsdp); + /* packetIndex 4 is used for send immediate sdp message, and please + * use other packetIndex (such as 5,6) for other info packet + */ /* enable/disable transmission of packet(s). * If enabled, packet transmission begins on the next frame @@ -739,7 +737,101 @@ void enc1_stream_encoder_update_dp_info_packets( REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, info_frame->vsc.valid); REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, info_frame->spd.valid); REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, info_frame->hdrsmd.valid); - REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP4_ENABLE, info_frame->dpsdp.valid); + + + /* This bit is the master enable bit. + * When enabling secondary stream engine, + * this master bit must also be set. + * This register shared with audio info frame. + * Therefore we need to enable master bit + * if at least on of the fields is not 0 + */ + value = REG_READ(DP_SEC_CNTL); + if (value) + REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1); +} + +void enc1_stream_encoder_send_immediate_sdp_message( + struct stream_encoder *enc, + const uint8_t *custom_sdp_message, + unsigned int sdp_message_size) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + uint32_t value = 0; + + /* TODOFPGA Figure out a proper number for max_retries polling for lock + * use 50 for now. + */ + uint32_t max_retries = 50; + + /* check if GSP4 is transmitted */ + REG_WAIT(DP_SEC_CNTL2, DP_SEC_GSP4_SEND_PENDING, + 0, 10, max_retries); + + /* disable GSP4 transmitting */ + REG_UPDATE(DP_SEC_CNTL2, DP_SEC_GSP4_SEND, 0); + + /* transmit GSP4 at the earliest time in a frame */ + REG_UPDATE(DP_SEC_CNTL2, DP_SEC_GSP4_SEND_ANY_LINE, 1); + + /*we need turn on clock before programming AFMT block*/ + REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1); + + /* check if HW reading GSP memory */ + REG_WAIT(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT, + 0, 10, max_retries); + + /* HW does is not reading GSP memory not reading too long -> + * something wrong. clear GPS memory access and notify? + * hw SW is writing to GSP memory + */ + REG_UPDATE(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT_CLR, 1); + + /* use generic packet 4 for immediate sdp message */ + REG_UPDATE(AFMT_VBI_PACKET_CONTROL, + AFMT_GENERIC_INDEX, 4); + + /* write generic packet header + * (4th byte is for GENERIC0 only) + */ + REG_SET_4(AFMT_GENERIC_HDR, 0, + AFMT_GENERIC_HB0, custom_sdp_message[0], + AFMT_GENERIC_HB1, custom_sdp_message[1], + AFMT_GENERIC_HB2, custom_sdp_message[2], + AFMT_GENERIC_HB3, custom_sdp_message[3]); + + /* write generic packet contents + * (we never use last 4 bytes) + * there are 8 (0-7) mmDIG0_AFMT_GENERIC0_x registers + */ + { + const uint32_t *content = + (const uint32_t *) &custom_sdp_message[4]; + + REG_WRITE(AFMT_GENERIC_0, *content++); + REG_WRITE(AFMT_GENERIC_1, *content++); + REG_WRITE(AFMT_GENERIC_2, *content++); + REG_WRITE(AFMT_GENERIC_3, *content++); + REG_WRITE(AFMT_GENERIC_4, *content++); + REG_WRITE(AFMT_GENERIC_5, *content++); + REG_WRITE(AFMT_GENERIC_6, *content++); + REG_WRITE(AFMT_GENERIC_7, *content); + } + + /* check whether GENERIC4 registers double buffer update in immediate mode + * is pending + */ + REG_WAIT(AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_IMMEDIATE_UPDATE_PENDING, + 0, 10, max_retries); + + /* atomically update double-buffered GENERIC4 registers in immediate mode + * (update immediately) + */ + REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, + AFMT_GENERIC4_IMMEDIATE_UPDATE, 1); + + /* enable GSP4 transmitting */ + REG_UPDATE(DP_SEC_CNTL2, DP_SEC_GSP4_SEND, 1); /* This bit is the master enable bit. * When enabling secondary stream engine, @@ -1463,6 +1555,8 @@ static const struct stream_encoder_funcs dcn10_str_enc_funcs = { enc1_stream_encoder_stop_hdmi_info_packets, .update_dp_info_packets = enc1_stream_encoder_update_dp_info_packets, + .send_immediate_sdp_message = + enc1_stream_encoder_send_immediate_sdp_message, .stop_dp_info_packets = enc1_stream_encoder_stop_dp_info_packets, .dp_blank = diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h index e654c2f55971..a292b106a8b1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h @@ -81,6 +81,7 @@ SRI(DP_MSE_RATE_UPDATE, DP, id), \ SRI(DP_PIXEL_FORMAT, DP, id), \ SRI(DP_SEC_CNTL, DP, id), \ + SRI(DP_SEC_CNTL2, DP, id), \ SRI(DP_STEER_FIFO, DP, id), \ SRI(DP_VID_M, DP, id), \ SRI(DP_VID_N, DP, id), \ @@ -118,10 +119,12 @@ struct dcn10_stream_enc_registers { uint32_t AFMT_60958_1; uint32_t AFMT_60958_2; uint32_t DIG_FE_CNTL; + uint32_t DIG_FE_CNTL2; uint32_t DP_MSE_RATE_CNTL; uint32_t DP_MSE_RATE_UPDATE; uint32_t DP_PIXEL_FORMAT; uint32_t DP_SEC_CNTL; + uint32_t DP_SEC_CNTL2; uint32_t DP_STEER_FIFO; uint32_t DP_VID_M; uint32_t DP_VID_N; @@ -191,6 +194,10 @@ struct dcn10_stream_enc_registers { SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, mask_sh),\ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, mask_sh),\ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_MPG_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_PENDING, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL4, DP_SEC_GSP4_LINE_NUM, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_ANY_LINE, mask_sh),\ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, mask_sh),\ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, mask_sh),\ @@ -245,6 +252,7 @@ struct dcn10_stream_enc_registers { SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE_PENDING, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE_PENDING, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE_PENDING, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_IMMEDIATE_UPDATE_PENDING, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE_PENDING, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE_PENDING, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE_PENDING, mask_sh),\ @@ -253,6 +261,7 @@ struct dcn10_stream_enc_registers { SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_IMMEDIATE_UPDATE, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE, mask_sh),\ @@ -260,6 +269,7 @@ struct dcn10_stream_enc_registers { SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, mask_sh),\ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP6_ENABLE, mask_sh),\ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP7_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_PPS, mask_sh),\ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_SEND, mask_sh),\ SE_SF(DP0_DP_DB_CNTL, DP_DB_DISABLE, mask_sh),\ SE_SF(DP0_DP_MSA_COLORIMETRY, DP_MSA_MISC0, mask_sh),\ @@ -304,6 +314,7 @@ struct dcn10_stream_enc_registers { type AFMT_GENERIC2_FRAME_UPDATE_PENDING;\ type AFMT_GENERIC3_FRAME_UPDATE_PENDING;\ type AFMT_GENERIC4_FRAME_UPDATE_PENDING;\ + type AFMT_GENERIC4_IMMEDIATE_UPDATE_PENDING;\ type AFMT_GENERIC5_FRAME_UPDATE_PENDING;\ type AFMT_GENERIC6_FRAME_UPDATE_PENDING;\ type AFMT_GENERIC7_FRAME_UPDATE_PENDING;\ @@ -312,6 +323,7 @@ struct dcn10_stream_enc_registers { type AFMT_GENERIC2_FRAME_UPDATE;\ type AFMT_GENERIC3_FRAME_UPDATE;\ type AFMT_GENERIC4_FRAME_UPDATE;\ + type AFMT_GENERIC4_IMMEDIATE_UPDATE;\ type AFMT_GENERIC5_FRAME_UPDATE;\ type AFMT_GENERIC6_FRAME_UPDATE;\ type AFMT_GENERIC7_FRAME_UPDATE;\ @@ -366,7 +378,12 @@ struct dcn10_stream_enc_registers { type DP_SEC_GSP5_ENABLE;\ type DP_SEC_GSP6_ENABLE;\ type DP_SEC_GSP7_ENABLE;\ + type DP_SEC_GSP7_PPS;\ type DP_SEC_GSP7_SEND;\ + type DP_SEC_GSP4_SEND;\ + type DP_SEC_GSP4_SEND_PENDING;\ + type DP_SEC_GSP4_LINE_NUM;\ + type DP_SEC_GSP4_SEND_ANY_LINE;\ type DP_SEC_MPG_ENABLE;\ type DP_VID_STREAM_DIS_DEFER;\ type DP_VID_STREAM_ENABLE;\ @@ -484,6 +501,11 @@ void enc1_stream_encoder_update_dp_info_packets( struct stream_encoder *enc, const struct encoder_info_frame *info_frame); +void enc1_stream_encoder_send_immediate_sdp_message( + struct stream_encoder *enc, + const uint8_t *custom_sdp_message, + unsigned int sdp_message_size); + void enc1_stream_encoder_stop_dp_info_packets( struct stream_encoder *enc); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index 49854eb73d1d..537563888f87 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -63,8 +63,6 @@ struct encoder_info_frame { struct dc_info_packet vsc; /* HDR Static MetaData */ struct dc_info_packet hdrsmd; - /* custom sdp message */ - struct dc_info_packet dpsdp; }; struct encoder_unblank_param { @@ -123,6 +121,11 @@ struct stream_encoder_funcs { struct stream_encoder *enc, const struct encoder_info_frame *info_frame); + void (*send_immediate_sdp_message)( + struct stream_encoder *enc, + const uint8_t *custom_sdp_message, + unsigned int sdp_message_size); + void (*stop_dp_info_packets)( struct stream_encoder *enc); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index 33905468e2b9..eb1c12ed026a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -158,6 +158,11 @@ struct hw_sequencer_funcs { void (*update_info_frame)(struct pipe_ctx *pipe_ctx); + void (*send_immediate_sdp_message)( + struct pipe_ctx *pipe_ctx, + const uint8_t *custom_sdp_message, + unsigned int sdp_message_size); + void (*enable_stream)(struct pipe_ctx *pipe_ctx); void (*disable_stream)(struct pipe_ctx *pipe_ctx, From dd68722c427d5b33420dce0ed0c44b4881e0a416 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 1 May 2019 17:43:10 -0400 Subject: [PATCH 045/178] drm/amdgpu: Reserve shared fence for eviction fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Need to reserve space for the shared eviction fence when initializing a KFD VM. Signed-off-by: Felix Kuehling Acked-by: Christian König Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 5b4fff9a1509..123eb0d7e2e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -897,6 +897,9 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, AMDGPU_FENCE_OWNER_KFD, false); if (ret) goto wait_pd_fail; + ret = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1); + if (ret) + goto reserve_shared_fail; amdgpu_bo_fence(vm->root.base.bo, &vm->process_info->eviction_fence->base, true); amdgpu_bo_unreserve(vm->root.base.bo); @@ -910,6 +913,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, return 0; +reserve_shared_fail: wait_pd_fail: validate_pd_fail: amdgpu_bo_unreserve(vm->root.base.bo); From 8511477773c37c86b70e23b7fa5ff852cde03ff8 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Fri, 5 Apr 2019 15:30:07 -0400 Subject: [PATCH 046/178] drm/amdgpu: add EDC counter register Add EDC counter register to support gfx9 gpr EDC workaround to clear all EDC counters. Signed-off-by: James Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../amd/include/asic_reg/gc/gc_9_0_offset.h | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h index 529b37db274c..f1d048e0ed2c 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h @@ -829,6 +829,8 @@ #define mmTD_CNTL_BASE_IDX 0 #define mmTD_STATUS 0x0526 #define mmTD_STATUS_BASE_IDX 0 +#define mmTD_EDC_CNT 0x052e +#define mmTD_EDC_CNT_BASE_IDX 0 #define mmTD_DSM_CNTL 0x052f #define mmTD_DSM_CNTL_BASE_IDX 0 #define mmTD_DSM_CNTL2 0x0530 @@ -845,6 +847,8 @@ #define mmTA_STATUS_BASE_IDX 0 #define mmTA_SCRATCH 0x0564 #define mmTA_SCRATCH_BASE_IDX 0 +#define mmTA_EDC_CNT 0x0586 +#define mmTA_EDC_CNT_BASE_IDX 0 // addressBlock: gc_gdsdec @@ -1051,6 +1055,13 @@ #define mmGC_USER_RB_BACKEND_DISABLE_BASE_IDX 0 +// addressBlock: gc_ea_gceadec2 +// base address: 0x9c00 +#define mmGCEA_EDC_CNT 0x0706 +#define mmGCEA_EDC_CNT_BASE_IDX 0 +#define mmGCEA_EDC_CNT2 0x0707 +#define mmGCEA_EDC_CNT2_BASE_IDX 0 + // addressBlock: gc_rmi_rmidec // base address: 0x9e00 #define mmRMI_GENERAL_CNTL 0x0780 @@ -1709,6 +1720,8 @@ #define mmTC_CFG_L1_VOLATILE_BASE_IDX 0 #define mmTC_CFG_L2_VOLATILE 0x0b23 #define mmTC_CFG_L2_VOLATILE_BASE_IDX 0 +#define mmTCI_EDC_CNT 0x0b60 +#define mmTCI_EDC_CNT_BASE_IDX 0 #define mmTCI_STATUS 0x0b61 #define mmTCI_STATUS_BASE_IDX 0 #define mmTCI_CNTL_1 0x0b62 @@ -2594,6 +2607,24 @@ #define mmCP_RB_DOORBELL_CONTROL_SCH_7_BASE_IDX 0 #define mmCP_RB_DOORBELL_CLEAR 0x1188 #define mmCP_RB_DOORBELL_CLEAR_BASE_IDX 0 +#define mmCPF_EDC_TAG_CNT 0x1189 +#define mmCPF_EDC_TAG_CNT_BASE_IDX 0 +#define mmCPF_EDC_ROQ_CNT 0x118a +#define mmCPF_EDC_ROQ_CNT_BASE_IDX 0 +#define mmCPG_EDC_TAG_CNT 0x118b +#define mmCPG_EDC_TAG_CNT_BASE_IDX 0 +#define mmCPG_EDC_DMA_CNT 0x118d +#define mmCPG_EDC_DMA_CNT_BASE_IDX 0 +#define mmCPC_EDC_SCRATCH_CNT 0x118e +#define mmCPC_EDC_SCRATCH_CNT_BASE_IDX 0 +#define mmCPC_EDC_UCODE_CNT 0x118f +#define mmCPC_EDC_UCODE_CNT_BASE_IDX 0 +#define mmDC_EDC_STATE_CNT 0x1191 +#define mmDC_EDC_STATE_CNT_BASE_IDX 0 +#define mmDC_EDC_CSINVOC_CNT 0x1192 +#define mmDC_EDC_CSINVOC_CNT_BASE_IDX 0 +#define mmDC_EDC_RESTORE_CNT 0x1193 +#define mmDC_EDC_RESTORE_CNT_BASE_IDX 0 #define mmCP_GFX_MQD_CONTROL 0x11a0 #define mmCP_GFX_MQD_CONTROL_BASE_IDX 0 #define mmCP_GFX_MQD_BASE_ADDR 0x11a1 From 5326ad54c5ef01a828cf5a942f6fdbf204ef7d3f Mon Sep 17 00:00:00 2001 From: James Zhu Date: Fri, 5 Apr 2019 15:31:47 -0400 Subject: [PATCH 047/178] drm/amdgpu: add gfx9 gpr EDC workaround when RAS is enabled When RAS is enabled, initializes the VGPRs/LDS/SGPRs and resets EDC error counts. This is done in late_init, before RAS TA GFX enable. Signed-off-by: James Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 245 ++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/soc15.h | 10 ++ 2 files changed, 255 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ba67d1023264..14e671d3f079 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -34,6 +34,7 @@ #include "vega10_enum.h" #include "hdp/hdp_4_0_offset.h" +#include "soc15.h" #include "soc15_common.h" #include "clearstate_gfx9.h" #include "v9_structs.h" @@ -3529,6 +3530,245 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); } +static const u32 vgpr_init_compute_shader[] = +{ + 0xb07c0000, 0xbe8000ff, + 0x000000f8, 0xbf110800, + 0x7e000280, 0x7e020280, + 0x7e040280, 0x7e060280, + 0x7e080280, 0x7e0a0280, + 0x7e0c0280, 0x7e0e0280, + 0x80808800, 0xbe803200, + 0xbf84fff5, 0xbf9c0000, + 0xd28c0001, 0x0001007f, + 0xd28d0001, 0x0002027e, + 0x10020288, 0xb8810904, + 0xb7814000, 0xd1196a01, + 0x00000301, 0xbe800087, + 0xbefc00c1, 0xd89c4000, + 0x00020201, 0xd89cc080, + 0x00040401, 0x320202ff, + 0x00000800, 0x80808100, + 0xbf84fff8, 0x7e020280, + 0xbf810000, 0x00000000, +}; + +static const u32 sgpr_init_compute_shader[] = +{ + 0xb07c0000, 0xbe8000ff, + 0x0000005f, 0xbee50080, + 0xbe812c65, 0xbe822c65, + 0xbe832c65, 0xbe842c65, + 0xbe852c65, 0xb77c0005, + 0x80808500, 0xbf84fff8, + 0xbe800080, 0xbf810000, +}; + +static const struct soc15_reg_entry vgpr_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ +}; + +static const struct soc15_reg_entry sgpr_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, +}; + +static const struct soc15_reg_entry sec_ded_counter_registers[] = { + { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED) }, + { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO) }, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2) }, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2) }, + { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2) }, + { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT) }, +}; + +static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + int r, i, j; + u32 tmp; + unsigned total_size, vgpr_offset, sgpr_offset; + u64 gpu_addr; + + /* only support when RAS is enabled */ + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return 0; + + /* bail if the compute ring is not ready */ + if (!ring->sched.ready) + return 0; + + tmp = RREG32_SOC15(GC, 0, mmGB_EDC_MODE); + WREG32_SOC15(GC, 0, mmGB_EDC_MODE, 0); + + total_size = + ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; + total_size += + ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; + total_size = ALIGN(total_size, 256); + vgpr_offset = total_size; + total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); + sgpr_offset = total_size; + total_size += sizeof(sgpr_init_compute_shader); + + /* allocate an indirect buffer to put the commands in */ + memset(&ib, 0, sizeof(ib)); + r = amdgpu_ib_get(adev, NULL, total_size, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); + return r; + } + + /* load the compute shaders */ + for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) + ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; + + for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) + ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; + + /* init the ib length to 0 */ + ib.length_dw = 0; + + /* VGPR */ + /* write the register state for the compute dispatch */ + for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); + ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value; + } + /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ + gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); + ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); + ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); + + /* write dispatch packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); + ib.ptr[ib.length_dw++] = 128; /* x */ + ib.ptr[ib.length_dw++] = 1; /* y */ + ib.ptr[ib.length_dw++] = 1; /* z */ + ib.ptr[ib.length_dw++] = + REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); + + /* write CS partial flush packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); + ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); + + /* SGPR */ + /* write the register state for the compute dispatch */ + for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); + ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; + } + /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ + gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); + ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); + ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); + + /* write dispatch packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); + ib.ptr[ib.length_dw++] = 128; /* x */ + ib.ptr[ib.length_dw++] = 1; /* y */ + ib.ptr[ib.length_dw++] = 1; /* z */ + ib.ptr[ib.length_dw++] = + REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); + + /* write CS partial flush packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); + ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); + + /* shedule the ib on the ring */ + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) { + DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); + goto fail; + } + + /* wait for the GPU to finish processing the IB */ + r = dma_fence_wait(f, false); + if (r) { + DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); + goto fail; + } + + /* read back registers to clear the counters */ + mutex_lock(&adev->grbm_idx_mutex); + for (j = 0; j < 16; j++) { + gfx_v9_0_select_se_sh(adev, 0x01, 0x0, j); + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) + RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); + gfx_v9_0_select_se_sh(adev, 0x02, 0x0, j); + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) + RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); + gfx_v9_0_select_se_sh(adev, 0x03, 0x0, j); + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) + RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); + gfx_v9_0_select_se_sh(adev, 0x04, 0x0, j); + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) + RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); + } + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); + mutex_unlock(&adev->grbm_idx_mutex); + +fail: + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); + + return r; +} + static int gfx_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -3577,6 +3817,11 @@ static int gfx_v9_0_ecc_late_init(void *handle) if (!*ras_if) return -ENOMEM; + /* requires IBs so do in late init after IB pool is initialized */ + r = gfx_v9_0_do_edc_gpr_workarounds(adev); + if (r) + return r; + **ras_if = ras_block; r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index a66c8bfbbaa6..06f39f5bbf76 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -42,8 +42,18 @@ struct soc15_reg_golden { u32 or_mask; }; +struct soc15_reg_entry { + uint32_t hwip; + uint32_t inst; + uint32_t seg; + uint32_t reg_offset; + uint32_t reg_value; +}; + #define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg +#define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset) + #define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \ { ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask } From 992af942a6cfb32f4b5a9fc29545f101074fa250 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Thu, 2 May 2019 20:43:00 -0400 Subject: [PATCH 048/178] drm/amdgpu: add df perfmon regs and funcs for xgmi v6: Squash in warning fix (Colin Ian King) v5: Fix warnings (Alex) v4: fixed mixed delaration and code warnings and minor errors v3: exposing df funcs in amdgpu_df_funcs in amdgpu.h v2: moving permonctl/perfmonctr from default to offset - adding df perfmonctl and perfmonctr registers for df counters - adding df funcs to set perfmonctl and get perfmonctr for df and xgmi counters - exposing df funcs in amdgpu_df_funcs Signed-off-by: Jonathan Kim Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 10 + drivers/gpu/drm/amd/amdgpu/df_v3_6.c | 428 ++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/df_v3_6.h | 17 + .../amd/include/asic_reg/df/df_3_6_offset.h | 18 + 4 files changed, 473 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4b10f3c1c6c4..38e782dee478 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -210,6 +210,7 @@ struct amdgpu_irq_src; struct amdgpu_fpriv; struct amdgpu_bo_va_mapping; struct amdgpu_atif; +struct kfd_vm_fault_info; enum amdgpu_cp_irq { AMDGPU_CP_IRQ_GFX_EOP = 0, @@ -688,6 +689,12 @@ struct amdgpu_df_funcs { u32 *flags); void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, bool enable); + int (*pmc_start)(struct amdgpu_device *adev, uint64_t config, + int is_enable); + int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config, + int is_disable); + void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config, + uint64_t *count); }; /* Define the HW IP blocks will be used in driver , add more if necessary */ enum amd_hw_ip_block_type { @@ -1096,6 +1103,9 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, const u32 array_size); bool amdgpu_device_is_px(struct drm_device *dev); +bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev); + /* atpx handler */ #if defined(CONFIG_VGA_SWITCHEROO) void amdgpu_register_atpx_handler(void); diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index d5ebe566809b..24e25ae0ca83 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -105,6 +105,431 @@ static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev, *flags |= AMD_CG_SUPPORT_DF_MGCG; } +/* hold counter assignment per gpu struct */ +struct df_v3_6_event_mask { + struct amdgpu_device gpu; + uint64_t config_assign_mask[AMDGPU_DF_MAX_COUNTERS]; +}; + +/* get assigned df perfmon ctr as int */ +static void df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev, + uint64_t config, + int *counter) +{ + struct df_v3_6_event_mask *mask; + int i; + + mask = container_of(adev, struct df_v3_6_event_mask, gpu); + + for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) { + if ((config & 0x0FFFFFFUL) == mask->config_assign_mask[i]) { + *counter = i; + return; + } + } +} + +/* get address based on counter assignment */ +static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev, + uint64_t config, + int is_ctrl, + uint32_t *lo_base_addr, + uint32_t *hi_base_addr) +{ + + int target_cntr = -1; + + df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr); + + if (target_cntr < 0) + return; + + switch (target_cntr) { + + case 0: + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo0 : smnPerfMonCtrLo0; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi0 : smnPerfMonCtrHi0; + break; + case 1: + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo1 : smnPerfMonCtrLo1; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi1 : smnPerfMonCtrHi1; + break; + case 2: + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo2 : smnPerfMonCtrLo2; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi2 : smnPerfMonCtrHi2; + break; + case 3: + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo3 : smnPerfMonCtrLo3; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi3 : smnPerfMonCtrHi3; + break; + + } + +} + +/* get read counter address */ +static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev, + uint64_t config, + uint32_t *lo_base_addr, + uint32_t *hi_base_addr) +{ + df_v3_6_pmc_get_addr(adev, config, 0, lo_base_addr, hi_base_addr); +} + +/* get control counter settings i.e. address and values to set */ +static void df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev, + uint64_t config, + uint32_t *lo_base_addr, + uint32_t *hi_base_addr, + uint32_t *lo_val, + uint32_t *hi_val) +{ + + uint32_t eventsel, instance, unitmask; + uint32_t es_5_0, es_13_0, es_13_6, es_13_12, es_11_8, es_7_0; + + df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr); + + if (lo_val == NULL || hi_val == NULL) + return; + + if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) { + DRM_ERROR("DF PMC addressing not retrived! Lo: %x, Hi: %x", + *lo_base_addr, *hi_base_addr); + return; + } + + eventsel = GET_EVENT(config); + instance = GET_INSTANCE(config); + unitmask = GET_UNITMASK(config); + + es_5_0 = eventsel & 0x3FUL; + es_13_6 = instance; + es_13_0 = (es_13_6 << 6) + es_5_0; + es_13_12 = (es_13_0 & 0x03000UL) >> 12; + es_11_8 = (es_13_0 & 0x0F00UL) >> 8; + es_7_0 = es_13_0 & 0x0FFUL; + *lo_val = (es_7_0 & 0xFFUL) | ((unitmask & 0x0FUL) << 8); + *hi_val = (es_11_8 | ((es_13_12)<<(29))); +} + +/* assign df performance counters for read */ +static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev, + uint64_t config, + int *is_assigned) +{ + + struct df_v3_6_event_mask *mask; + int i, target_cntr; + + target_cntr = -1; + + *is_assigned = 0; + + df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr); + + if (target_cntr >= 0) { + *is_assigned = 1; + return 0; + } + + mask = container_of(adev, struct df_v3_6_event_mask, gpu); + + for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) { + if (mask->config_assign_mask[i] == 0ULL) { + mask->config_assign_mask[i] = config & 0x0FFFFFFUL; + return 0; + } + } + + return -ENOSPC; +} + +/* release performance counter */ +static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev, + uint64_t config) +{ + + struct df_v3_6_event_mask *mask; + int target_cntr; + + target_cntr = -1; + + df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr); + + mask = container_of(adev, struct df_v3_6_event_mask, gpu); + + if (target_cntr >= 0) + mask->config_assign_mask[target_cntr] = 0ULL; + +} + +/* + * get xgmi link counters via programmable data fabric (df) counters (max 4) + * using cake tx event. + * + * @adev -> amdgpu device + * @instance-> currently cake has 2 links to poll on vega20 + * @count -> counters to pass + * + */ + +static void df_v3_6_get_xgmi_link_cntr(struct amdgpu_device *adev, + int instance, + uint64_t *count) +{ + uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; + uint64_t config; + + config = GET_INSTANCE_CONFIG(instance); + + df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, + &hi_base_addr); + + if ((lo_base_addr == 0) || (hi_base_addr == 0)) + return; + + lo_val = RREG32_PCIE(lo_base_addr); + hi_val = RREG32_PCIE(hi_base_addr); + + *count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL); +} + +/* + * reset xgmi link counters + * + * @adev -> amdgpu device + * @instance-> currently cake has 2 links to poll on vega20 + * + */ +static void df_v3_6_reset_xgmi_link_cntr(struct amdgpu_device *adev, + int instance) +{ + uint32_t lo_base_addr, hi_base_addr; + uint64_t config; + + config = 0ULL | (0x7ULL) | ((0x46ULL + instance) << 8) | (0x2 << 16); + + df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, + &hi_base_addr); + + if ((lo_base_addr == 0) || (hi_base_addr == 0)) + return; + + WREG32_PCIE(lo_base_addr, 0UL); + WREG32_PCIE(hi_base_addr, 0UL); +} + +/* + * add xgmi link counters + * + * @adev -> amdgpu device + * @instance-> currently cake has 2 links to poll on vega20 + * + */ + +static int df_v3_6_add_xgmi_link_cntr(struct amdgpu_device *adev, + int instance) +{ + uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; + uint64_t config; + int ret, is_assigned; + + if (instance < 0 || instance > 1) + return -EINVAL; + + config = GET_INSTANCE_CONFIG(instance); + + ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned); + + if (ret || is_assigned) + return ret; + + df_v3_6_pmc_get_ctrl_settings(adev, + config, + &lo_base_addr, + &hi_base_addr, + &lo_val, + &hi_val); + + WREG32_PCIE(lo_base_addr, lo_val); + WREG32_PCIE(hi_base_addr, hi_val); + + return ret; +} + + +/* + * start xgmi link counters + * + * @adev -> amdgpu device + * @instance-> currently cake has 2 links to poll on vega20 + * @is_enable -> either resume or assign event via df perfmon + * + */ + +static int df_v3_6_start_xgmi_link_cntr(struct amdgpu_device *adev, + int instance, + int is_enable) +{ + uint32_t lo_base_addr, hi_base_addr, lo_val; + uint64_t config; + int ret; + + if (instance < 0 || instance > 1) + return -EINVAL; + + if (is_enable) { + + ret = df_v3_6_add_xgmi_link_cntr(adev, instance); + + if (ret) + return ret; + + } else { + + config = GET_INSTANCE_CONFIG(instance); + + df_v3_6_pmc_get_ctrl_settings(adev, + config, + &lo_base_addr, + &hi_base_addr, + NULL, + NULL); + + if (lo_base_addr == 0) + return -EINVAL; + + lo_val = RREG32_PCIE(lo_base_addr); + + WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22)); + + ret = 0; + } + + return ret; + +} + +/* + * start xgmi link counters + * + * @adev -> amdgpu device + * @instance-> currently cake has 2 links to poll on vega20 + * @is_enable -> either pause or unassign event via df perfmon + * + */ + +static int df_v3_6_stop_xgmi_link_cntr(struct amdgpu_device *adev, + int instance, + int is_disable) +{ + + uint32_t lo_base_addr, hi_base_addr, lo_val; + uint64_t config; + + config = GET_INSTANCE_CONFIG(instance); + + if (is_disable) { + df_v3_6_reset_xgmi_link_cntr(adev, instance); + df_v3_6_pmc_release_cntr(adev, config); + } else { + + df_v3_6_pmc_get_ctrl_settings(adev, + config, + &lo_base_addr, + &hi_base_addr, + NULL, + NULL); + + if ((lo_base_addr == 0) || (hi_base_addr == 0)) + return -EINVAL; + + lo_val = RREG32_PCIE(lo_base_addr); + + WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22)); + } + + return 0; +} + +static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config, + int is_enable) +{ + int xgmi_tx_link, ret = 0; + + switch (adev->asic_type) { + case CHIP_VEGA20: + xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0 + : (IS_DF_XGMI_1_TX(config) ? 1 : -1); + + if (xgmi_tx_link >= 0) + ret = df_v3_6_start_xgmi_link_cntr(adev, xgmi_tx_link, + is_enable); + + if (ret) + return ret; + + ret = 0; + break; + default: + break; + } + + return ret; +} + +static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config, + int is_disable) +{ + int xgmi_tx_link, ret = 0; + + switch (adev->asic_type) { + case CHIP_VEGA20: + xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0 + : (IS_DF_XGMI_1_TX(config) ? 1 : -1); + + if (xgmi_tx_link >= 0) { + ret = df_v3_6_stop_xgmi_link_cntr(adev, + xgmi_tx_link, + is_disable); + if (ret) + return ret; + } + + ret = 0; + break; + default: + break; + } + + return ret; +} + +static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, + uint64_t config, + uint64_t *count) +{ + + int xgmi_tx_link; + + switch (adev->asic_type) { + case CHIP_VEGA20: + xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0 + : (IS_DF_XGMI_1_TX(config) ? 1 : -1); + + if (xgmi_tx_link >= 0) { + df_v3_6_reset_xgmi_link_cntr(adev, xgmi_tx_link); + df_v3_6_get_xgmi_link_cntr(adev, xgmi_tx_link, count); + } + + break; + default: + break; + } + +} + const struct amdgpu_df_funcs df_v3_6_funcs = { .init = df_v3_6_init, .enable_broadcast_mode = df_v3_6_enable_broadcast_mode, @@ -113,4 +538,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = { .update_medium_grain_clock_gating = df_v3_6_update_medium_grain_clock_gating, .get_clockgating_state = df_v3_6_get_clockgating_state, + .pmc_start = df_v3_6_pmc_start, + .pmc_stop = df_v3_6_pmc_stop, + .pmc_get_count = df_v3_6_pmc_get_count }; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h index e79c58e5efcb..fcffd807764d 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h @@ -35,6 +35,23 @@ enum DF_V3_6_MGCG { DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15 }; +/* Defined in global_features.h as FTI_PERFMON_VISIBLE */ +#define AMDGPU_DF_MAX_COUNTERS 4 + +/* get flags from df perfmon config */ +#define GET_EVENT(x) (x & 0xFFUL) +#define GET_INSTANCE(x) ((x >> 8) & 0xFFUL) +#define GET_UNITMASK(x) ((x >> 16) & 0xFFUL) +#define GET_INSTANCE_CONFIG(x) (0ULL | (0x07ULL) \ + | ((0x046ULL + x) << 8) \ + | (0x02 << 16)) + +/* df event conf macros */ +#define IS_DF_XGMI_0_TX(x) (GET_EVENT(x) == 0x7 \ + && GET_INSTANCE(x) == 0x46 && GET_UNITMASK(x) == 0x2) +#define IS_DF_XGMI_1_TX(x) (GET_EVENT(x) == 0x7 \ + && GET_INSTANCE(x) == 0x47 && GET_UNITMASK(x) == 0x2) + extern const struct amdgpu_df_funcs df_v3_6_funcs; #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h index a9575db8d7aa..6efcaa93e17b 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h @@ -30,4 +30,22 @@ #define mmDF_CS_UMC_AON0_DramBaseAddress0 0x0044 #define mmDF_CS_UMC_AON0_DramBaseAddress0_BASE_IDX 0 +#define smnPerfMonCtlLo0 0x01d440UL +#define smnPerfMonCtlHi0 0x01d444UL +#define smnPerfMonCtlLo1 0x01d450UL +#define smnPerfMonCtlHi1 0x01d454UL +#define smnPerfMonCtlLo2 0x01d460UL +#define smnPerfMonCtlHi2 0x01d464UL +#define smnPerfMonCtlLo3 0x01d470UL +#define smnPerfMonCtlHi3 0x01d474UL + +#define smnPerfMonCtrLo0 0x01d448UL +#define smnPerfMonCtrHi0 0x01d44cUL +#define smnPerfMonCtrLo1 0x01d458UL +#define smnPerfMonCtrHi1 0x01d45cUL +#define smnPerfMonCtrLo2 0x01d468UL +#define smnPerfMonCtrHi2 0x01d46cUL +#define smnPerfMonCtrLo3 0x01d478UL +#define smnPerfMonCtrHi3 0x01d47cUL + #endif From 7c04ca50b0b95c9af9613416ff0a671387c7e84a Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Wed, 8 May 2019 22:17:57 +0800 Subject: [PATCH 049/178] drm/amdgpu: gpu reset will run late_init ras need late init to initialize proper state. Signed-off-by: xinhui pan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 665764ff7eb7..b2ba051edd90 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3494,6 +3494,10 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, if (vram_lost) amdgpu_device_fill_reset_magic(tmp_adev); + r = amdgpu_device_ip_late_init(tmp_adev); + if (r) + goto out; + /* Update PSP FW topology after reset */ if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1) r = amdgpu_xgmi_update_topology(hive, tmp_adev); From b152e8e13e883656c0012504c451d483579aa842 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Wed, 8 May 2019 19:31:06 +0800 Subject: [PATCH 050/178] drm/amdgpu: Revert "drm/amdgpu: skip gpu reset when ras error occured" Enable this now to reset the GPU on RAS errors. This reverts commit 138352e5752aa3e694951d70c8fe8730219f4edf. Signed-off-by: xinhui pan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index eaef5edefc34..e60a554656ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -187,13 +187,10 @@ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev); static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev, bool is_baco) { - /* remove me when gpu reset works on vega20 A1. */ -#if 0 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) schedule_work(&ras->recovery_work); -#endif return 0; } From 7af23ebe93fe52a30b2338450c8cd40a4f5210d4 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Wed, 8 May 2019 16:13:03 +0800 Subject: [PATCH 051/178] drm/amdgpu: Issue ras TA disable/enable cmd forcely on boot Check ras TA error code and return EAGAIN. Issue ras enable/disable cmd without checking currect state. Looks like ras TA will handle current state == target state case. Now driver might need do a reset to satisfy ras TA. Signed-off-by: xinhui pan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 34 +++++++++++++++++++------ 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 22bd21efe6b1..5f8e1163a75d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -521,6 +521,8 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, enable ? "enable":"disable", ras_block_str(head->block), ret); + if (ret == TA_RAS_STATUS__RESET_NEEDED) + return -EAGAIN; return -EINVAL; } @@ -541,16 +543,32 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev, return -EINVAL; if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) { - /* If ras is enabled by vbios, we set up ras object first in - * both case. For enable, that is all what we need do. For - * disable, we need perform a ras TA disable cmd after that. - */ - ret = __amdgpu_ras_feature_enable(adev, head, 1); - if (ret) - return ret; + if (enable) { + /* There is no harm to issue a ras TA cmd regardless of + * the currecnt ras state. + * If current state == target state, it will do nothing + * But sometimes it requests driver to reset and repost + * with error code -EAGAIN. + */ + ret = amdgpu_ras_feature_enable(adev, head, 1); + /* With old ras TA, we might fail to enable ras. + * Log it and just setup the object. + * TODO need remove this WA in the future. + */ + if (ret == -EINVAL) { + ret = __amdgpu_ras_feature_enable(adev, head, 1); + if (!ret) + DRM_INFO("RAS INFO: %s setup object\n", + ras_block_str(head->block)); + } + } else { + /* setup the object then issue a ras TA disable cmd.*/ + ret = __amdgpu_ras_feature_enable(adev, head, 1); + if (ret) + return ret; - if (!enable) ret = amdgpu_ras_feature_enable(adev, head, 0); + } } else ret = amdgpu_ras_feature_enable(adev, head, enable); From a564808e7f5b19b3621a1dc4ff2a3042171ae167 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Wed, 8 May 2019 19:12:24 +0800 Subject: [PATCH 052/178] drm/amdgpu: handle ras reset add another flag to allow IP do a gpu reset after device init. Signed-off-by: xinhui pan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 36 +++++++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 +++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 5f8e1163a75d..37cb3de08494 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -118,7 +118,8 @@ const char *ras_block_string[] = { #define ras_err_str(i) (ras_error_string[ffs(i)]) #define ras_block_str(i) (ras_block_string[i]) -#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1 +#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1 +#define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2 #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS) static void amdgpu_ras_self_test(struct amdgpu_device *adev) @@ -1358,6 +1359,19 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) } /* recovery end */ +/* return 0 if ras will reset gpu and repost.*/ +int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, + unsigned int block) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + if (!ras) + return -EINVAL; + + ras->flags |= AMDGPU_RAS_FLAG_INIT_NEED_RESET; + return 0; +} + /* * check hardware's ras ability which will be saved in hw_supported. * if hardware does not support ras, we can skip some ras initializtion and @@ -1433,7 +1447,12 @@ recovery_out: return -EINVAL; } -/* do some init work after IP late init as dependence */ +/* do some init work after IP late init as dependence. + * TODO + * gpu reset will re-enable ras, need fint out one way to run it again. + * for now, if a gpu reset happened, unless IP enable its ras, the ras state + * will be showed as disabled. + */ void amdgpu_ras_post_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -1462,6 +1481,19 @@ void amdgpu_ras_post_init(struct amdgpu_device *adev) } } } + + if (con->flags & AMDGPU_RAS_FLAG_INIT_NEED_RESET) { + con->flags &= ~AMDGPU_RAS_FLAG_INIT_NEED_RESET; + /* setup ras obj state as disabled. + * for init_by_vbios case. + * if we want to enable ras, just enable it in a normal way. + * If we want do disable it, need setup ras obj as enabled, + * then issue another TA disable cmd. + * See feature_enable_on_boot + */ + amdgpu_ras_disable_all_features(adev, 1); + amdgpu_ras_reset_gpu(adev, 0); + } } /* do some fini work before IP fini as dependence */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index e60a554656ca..06ef325b61b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -175,6 +175,9 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, return ras && (ras->supported & (1 << block)); } +int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, + unsigned int block); + int amdgpu_ras_query_error_count(struct amdgpu_device *adev, bool is_ce); From 36810fdb69ba35d8d8d7407b79c522218e8a0dd5 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Wed, 8 May 2019 22:32:34 +0800 Subject: [PATCH 053/178] drm/amdgpu: gmc support ras gpu reset request a gpu reset if ras return EAGAIN. we will run late init again so it is ok to do nothing this time. Signed-off-by: xinhui pan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3fd79e07944d..19f7cc1d51c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -696,8 +696,14 @@ static int gmc_v9_0_ecc_late_init(void *handle) **ras_if = ras_block; r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) + if (r) { + if (r == -EAGAIN) { + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__UMC); + r = 0; + } goto feature; + } ih_info.head = **ras_if; fs_info.head = **ras_if; @@ -730,7 +736,7 @@ interrupt: feature: kfree(*ras_if); *ras_if = NULL; - return -EINVAL; + return r; } From 70ab8c6125d6649f73a46118f8660c2ddeefa1e1 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Wed, 8 May 2019 22:36:10 +0800 Subject: [PATCH 054/178] drm/amdgpu: gfx support ras gpu reset request a gpu reset if ras return EAGAIN. we will run late init again so it is ok to do nothing this time. Signed-off-by: xinhui pan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 14e671d3f079..f88101f18409 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3825,8 +3825,14 @@ static int gfx_v9_0_ecc_late_init(void *handle) **ras_if = ras_block; r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) + if (r) { + if (r == -EAGAIN) { + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__GFX); + r = 0; + } goto feature; + } ih_info.head = **ras_if; fs_info.head = **ras_if; @@ -3859,7 +3865,7 @@ interrupt: feature: kfree(*ras_if); *ras_if = NULL; - return -EINVAL; + return r; } static int gfx_v9_0_late_init(void *handle) From 9708a318620d83d48788bf1c7701dfc193bb5632 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Wed, 8 May 2019 22:38:37 +0800 Subject: [PATCH 055/178] drm/amdgpu: sdma support ras gpu reset request a gpu reset if ras return EAGAIN. we will run late init again so it is ok to do nothing this time. Signed-off-by: xinhui pan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 9c88ce513d78..65775964efd3 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1531,8 +1531,14 @@ static int sdma_v4_0_late_init(void *handle) **ras_if = ras_block; r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) + if (r) { + if (r == -EAGAIN) { + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__SDMA); + r = 0; + } goto feature; + } ih_info.head = **ras_if; fs_info.head = **ras_if; @@ -1571,7 +1577,7 @@ interrupt: feature: kfree(*ras_if); *ras_if = NULL; - return -EINVAL; + return r; } static int sdma_v4_0_sw_init(void *handle) From e79a04d531478e4ccbd3c22288b1229677e1e489 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Thu, 9 May 2019 00:13:22 +0800 Subject: [PATCH 056/178] drm/amdgpu: gpu reset will run ras post init ras need initialize proper state after late init Signed-off-by: xinhui pan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b2ba051edd90..d7e3af3e1ea1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3498,6 +3498,9 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, if (r) goto out; + /* must succeed. */ + amdgpu_ras_post_init(tmp_adev); + /* Update PSP FW topology after reset */ if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1) r = amdgpu_xgmi_update_topology(hive, tmp_adev); From 78d481126795e9e4740204df95572d9731a6d34e Mon Sep 17 00:00:00 2001 From: Trigger Huang Date: Thu, 9 May 2019 17:32:59 +0800 Subject: [PATCH 057/178] drm/amdgpu: init vega10 SR-IOV reg access mode Set different register access mode according to the features provided by firmware Signed-off-by: Trigger Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 44 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 13 +++++++ drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 19 ++++++++++ 4 files changed, 79 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d7e3af3e1ea1..4f4e2a453b18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1532,6 +1532,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) r = amdgpu_virt_request_full_gpu(adev, true); if (r) return -EAGAIN; + + /* query the reg access mode at the very beginning */ + amdgpu_virt_init_reg_access_mode(adev); } adev->pm.pp_feature = amdgpu_pp_feature_mask; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 7d484fad3909..1f0bd4d16475 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -426,3 +426,47 @@ uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest) return clk; } +void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev) +{ + struct amdgpu_virt *virt = &adev->virt; + + if (virt->ops && virt->ops->init_reg_access_mode) + virt->ops->init_reg_access_mode(adev); +} + +bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev) +{ + bool ret = false; + struct amdgpu_virt *virt = &adev->virt; + + if (amdgpu_sriov_vf(adev) + && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH)) + ret = true; + + return ret; +} + +bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev) +{ + bool ret = false; + struct amdgpu_virt *virt = &adev->virt; + + if (amdgpu_sriov_vf(adev) + && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_RLC) + && !(amdgpu_sriov_runtime(adev))) + ret = true; + + return ret; +} + +bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev) +{ + bool ret = false; + struct amdgpu_virt *virt = &adev->virt; + + if (amdgpu_sriov_vf(adev) + && (virt->reg_access_mode & AMDGPU_VIRT_REG_SKIP_SEETING)) + ret = true; + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 584947b7ccf3..dca25deee75c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -48,6 +48,12 @@ struct amdgpu_vf_error_buffer { uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE]; }; +/* According to the fw feature, some new reg access modes are supported */ +#define AMDGPU_VIRT_REG_ACCESS_LEGACY (1 << 0) /* directly mmio */ +#define AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH (1 << 1) /* by PSP */ +#define AMDGPU_VIRT_REG_ACCESS_RLC (1 << 2) /* by RLC */ +#define AMDGPU_VIRT_REG_SKIP_SEETING (1 << 3) /* Skip setting reg */ + /** * struct amdgpu_virt_ops - amdgpu device virt operations */ @@ -59,6 +65,7 @@ struct amdgpu_virt_ops { void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf); int (*force_dpm_level)(struct amdgpu_device *adev, u32 level); + void (*init_reg_access_mode)(struct amdgpu_device *adev); }; /* @@ -258,6 +265,7 @@ struct amdgpu_virt { uint32_t gim_feature; /* protect DPM events to GIM */ struct mutex dpm_mutex; + uint32_t reg_access_mode; }; #define amdgpu_sriov_enabled(adev) \ @@ -307,4 +315,9 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest); uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest); +void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev); +bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev); +bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev); +bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 64a7b1fb1d1b..31030f86be86 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -26,6 +26,7 @@ #include "nbio/nbio_6_1_sh_mask.h" #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" +#include "mp/mp_9_0_offset.h" #include "soc15.h" #include "vega10_ih.h" #include "soc15_common.h" @@ -448,6 +449,23 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); } +static void xgpu_ai_init_reg_access_mode(struct amdgpu_device *adev) +{ + uint32_t rlc_fw_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); + uint32_t sos_fw_ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); + + adev->virt.reg_access_mode = AMDGPU_VIRT_REG_ACCESS_LEGACY; + + if (rlc_fw_ver >= 0x5d) + adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_RLC; + + if (sos_fw_ver >= 0x80455) + adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH; + + if (sos_fw_ver >= 0x8045b) + adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_SKIP_SEETING; +} + const struct amdgpu_virt_ops xgpu_ai_virt_ops = { .req_full_gpu = xgpu_ai_request_full_gpu_access, .rel_full_gpu = xgpu_ai_release_full_gpu_access, @@ -456,4 +474,5 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = { .trans_msg = xgpu_ai_mailbox_trans_msg, .get_pp_clk = xgpu_ai_get_pp_clk, .force_dpm_level = xgpu_ai_force_dpm_level, + .init_reg_access_mode = xgpu_ai_init_reg_access_mode, }; From 2d11fd3f5420f71b15158672a8ffa3722b37ebf5 Mon Sep 17 00:00:00 2001 From: Trigger Huang Date: Wed, 24 Apr 2019 15:23:41 +0800 Subject: [PATCH 058/178] drm/amdgpu: initialize PSP before IH under SR-IOV In order to support new PSP feature that PSP may provide interface to program IH CNTL register, initialize PSP before IH under Vega10 SR-IOV VF Signed-off-by: Trigger Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + drivers/gpu/drm/amd/amdgpu/soc15.c | 24 ++++++++++++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4f4e2a453b18..309461d0c275 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1580,6 +1580,7 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) if (adev->ip_blocks[i].status.hw) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || + (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { r = adev->ip_blocks[i].version->funcs->hw_init(adev); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 78b27c03b8c2..c1785843f0de 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -608,12 +608,24 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) case CHIP_VEGA20: amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); - amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { - if (adev->asic_type == CHIP_VEGA20) - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - else - amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); + + /* For Vega10 SR-IOV, PSP need to be initialized before IH */ + if (amdgpu_sriov_vf(adev)) { + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { + if (adev->asic_type == CHIP_VEGA20) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + else + amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); + } + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + } else { + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { + if (adev->asic_type == CHIP_VEGA20) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + else + amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); + } } amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); From 3880369f6a8ee532849ed79f901ca32ff7511540 Mon Sep 17 00:00:00 2001 From: Trigger Huang Date: Mon, 6 May 2019 15:06:17 +0800 Subject: [PATCH 059/178] drm/amdgpu: Add new PSP cmd GFX_CMD_ID_PROG_REG Add new PSP command GFX_CMD_ID_PROG_REG definition Signed-off-by: Trigger Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 2f79765b4bdb..7f8edc66ddff 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -94,6 +94,7 @@ enum psp_gfx_cmd_id GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */ GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */ GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */ + GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */ }; @@ -217,6 +218,12 @@ struct psp_gfx_cmd_save_restore_ip_fw enum psp_gfx_fw_type fw_type; /* FW type */ }; +/* Command to setup register program */ +struct psp_gfx_cmd_reg_prog { + uint32_t reg_value; + uint32_t reg_id; +}; + /* All GFX ring buffer commands. */ union psp_gfx_commands { @@ -226,6 +233,7 @@ union psp_gfx_commands struct psp_gfx_cmd_setup_tmr cmd_setup_tmr; struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw; struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw; + struct psp_gfx_cmd_reg_prog cmd_setup_reg_prog; }; From c5d19419ed9cf8e3bc4190996ee95ea70b324737 Mon Sep 17 00:00:00 2001 From: Trigger Huang Date: Mon, 6 May 2019 15:27:23 +0800 Subject: [PATCH 060/178] drm/amdgpu: implement PSP cmd GFX_CMD_ID_PROG_REG Add implementation to program regs by PSP, currently the following IH registers are supported: IH_RB_CNTL IH_RB_CNTL_RING1 IH_RB_CNTL_RING2 Signed-off-by: Trigger Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 28 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 11 +++++++++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 05897b05766b..c567a5586e8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -289,6 +289,34 @@ static int psp_asd_load(struct psp_context *psp) return ret; } +static void psp_prep_reg_prog_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t id, uint32_t value) +{ + cmd->cmd_id = GFX_CMD_ID_PROG_REG; + cmd->cmd.cmd_setup_reg_prog.reg_value = value; + cmd->cmd.cmd_setup_reg_prog.reg_id = id; +} + +int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg, + uint32_t value) +{ + struct psp_gfx_cmd_resp *cmd = NULL; + int ret = 0; + + if (reg >= PSP_REG_LAST) + return -EINVAL; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_reg_prog_cmd_buf(cmd, reg, value); + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + kfree(cmd); + return ret; +} + static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared, uint32_t xgmi_ta_size, uint32_t shared_size) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index acbc18b594a2..cf49539b0b07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -62,6 +62,14 @@ struct psp_ring uint32_t ring_size; }; +/* More registers may will be supported */ +enum psp_reg_prog_id { + PSP_REG_IH_RB_CNTL = 0, /* register IH_RB_CNTL */ + PSP_REG_IH_RB_CNTL_RING1 = 1, /* register IH_RB_CNTL_RING1 */ + PSP_REG_IH_RB_CNTL_RING2 = 2, /* register IH_RB_CNTL_RING2 */ + PSP_REG_LAST +}; + struct psp_funcs { int (*init_microcode)(struct psp_context *psp); @@ -252,5 +260,6 @@ int psp_ras_enable_features(struct psp_context *psp, union ta_ras_cmd_input *info, bool enable); extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; - +int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg, + uint32_t value); #endif From 470b425019e70d763d2e8bdc3c8f8ad2f8560de2 Mon Sep 17 00:00:00 2001 From: Trigger Huang Date: Sun, 28 Apr 2019 18:03:17 +0800 Subject: [PATCH 061/178] drm/amdgpu: call psp to program ih cntl in SR-IOV call psp to program ih cntl in SR-IOV if supported Signed-off-by: Trigger Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 91 +++++++++++++++++++++++--- 1 file changed, 82 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 8d89ab7f0ae8..5f54acc70fec 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -48,14 +48,29 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + } adev->irq.ih.enabled = true; if (adev->irq.ih1.ring_size) { ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, RB_ENABLE, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + } adev->irq.ih1.enabled = true; } @@ -63,7 +78,15 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, RB_ENABLE, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + } adev->irq.ih2.enabled = true; } } @@ -81,7 +104,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + } + /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0); @@ -92,7 +123,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, RB_ENABLE, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + } /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0); WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0); @@ -104,7 +143,16 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, RB_ENABLE, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + } + /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0); WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0); @@ -187,7 +235,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + } /* set the writeback address whether it's enabled or not */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, @@ -214,7 +270,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) WPTR_OVERFLOW_ENABLE, 0); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + } /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0); @@ -232,7 +296,16 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + } /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0); From 7bd877692e23964fc7d3c9736ed9ec5e511999dc Mon Sep 17 00:00:00 2001 From: Trigger Huang Date: Mon, 25 Feb 2019 18:41:02 +0800 Subject: [PATCH 062/178] drm/amdgpu: Support PSP VMR ring for Vega10 VF Add VMR ring support for Vega10 SR-IOV VF if PSP supported Signed-off-by: Trigger Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v3_1.c | 123 ++++++++++++++++++++------ 1 file changed, 95 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 143f0fae69d5..3f5827764df0 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -50,6 +50,10 @@ MODULE_FIRMWARE("amdgpu/vega12_asd.bin"); static uint32_t sos_old_versions[] = {1517616, 1510592, 1448594, 1446554}; +static bool psp_v3_1_support_vmr_ring(struct psp_context *psp); +static int psp_v3_1_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type); + static int psp_v3_1_init_microcode(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -296,27 +300,57 @@ static int psp_v3_1_ring_create(struct psp_context *psp, psp_v3_1_reroute_ih(psp); - /* Write low address of the ring to C2PMSG_69 */ - psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); - /* Write high address of the ring to C2PMSG_70 */ - psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); - /* Write size of ring to C2PMSG_71 */ - psp_ring_reg = ring->ring_size; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); - /* Write the ring initialization command to C2PMSG_64 */ - psp_ring_reg = ring_type; - psp_ring_reg = psp_ring_reg << 16; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + if (psp_v3_1_support_vmr_ring(psp)) { + ret = psp_v3_1_ring_stop(psp, ring_type); + if (ret) { + DRM_ERROR("psp_v3_1_ring_stop_sriov failed!\n"); + return ret; + } - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); + /* Write low address of the ring to C2PMSG_102 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); + /* Write high address of the ring to C2PMSG_103 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg); + /* No size initialization for sriov */ + /* Write the ring initialization command to C2PMSG_101 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg); - /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + /* there might be hardware handshake issue which needs delay */ + mdelay(20); + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, + mmMP0_SMN_C2PMSG_101), 0x80000000, + 0x8000FFFF, false); + } else { + + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be hardware handshake issue which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, + mmMP0_SMN_C2PMSG_64), 0x80000000, + 0x8000FFFF, false); + + } return ret; } @@ -327,16 +361,31 @@ static int psp_v3_1_ring_stop(struct psp_context *psp, unsigned int psp_ring_reg = 0; struct amdgpu_device *adev = psp->adev; - /* Write the ring destroy command to C2PMSG_64 */ - psp_ring_reg = 3 << 16; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + if (psp_v3_1_support_vmr_ring(psp)) { + /* Write the Destroy GPCOM ring command to C2PMSG_101 */ + psp_ring_reg = GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg); - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); + /* there might be handshake issue which needs delay */ + mdelay(20); - /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + } else { + /* Write the ring destroy command to C2PMSG_64 */ + psp_ring_reg = 3 << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be handshake issue which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + } return ret; } @@ -375,7 +424,10 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp, uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; /* KM (GPCOM) prepare write pointer */ - psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + if (psp_v3_1_support_vmr_ring(psp)) + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); + else + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); /* Update KM RB frame pointer to new frame */ /* write_frame ptr increments by size of rb_frame in bytes */ @@ -404,7 +456,13 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp, /* Update the write Pointer in DWORDs */ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); + if (psp_v3_1_support_vmr_ring(psp)) { + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg); + /* send interrupt to PSP for SRIOV ring write pointer update */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); return 0; } @@ -574,6 +632,14 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp) return 0; } +static bool psp_v3_1_support_vmr_ring(struct psp_context *psp) +{ + if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version >= 0x80455) + return true; + + return false; +} + static const struct psp_funcs psp_v3_1_funcs = { .init_microcode = psp_v3_1_init_microcode, .bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv, @@ -586,6 +652,7 @@ static const struct psp_funcs psp_v3_1_funcs = { .compare_sram_data = psp_v3_1_compare_sram_data, .smu_reload_quirk = psp_v3_1_smu_reload_quirk, .mode1_reset = psp_v3_1_mode1_reset, + .support_vmr_ring = psp_v3_1_support_vmr_ring, }; void psp_v3_1_set_psp_funcs(struct psp_context *psp) From 98cad2deaf557d63f2c84fae30afefd0e84840e9 Mon Sep 17 00:00:00 2001 From: Trigger Huang Date: Mon, 4 Mar 2019 12:30:58 +0800 Subject: [PATCH 063/178] drm/amdgpu: Skip setting some regs under Vega10 VF For Vega10 SR-IOV VF, skip setting some regs due to: 1, host will program them 2, avoid VF register programming violations Signed-off-by: Trigger Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 ++++++++------ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 +++ drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 25 ++++++++++++++++++++----- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 14 ++++++++------ drivers/gpu/drm/amd/amdgpu/soc15.c | 16 +++++++++++----- 5 files changed, 50 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index f88101f18409..49ed53812091 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -308,12 +308,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA10: - soc15_program_register_sequence(adev, - golden_settings_gc_9_0, - ARRAY_SIZE(golden_settings_gc_9_0)); - soc15_program_register_sequence(adev, - golden_settings_gc_9_0_vg10, - ARRAY_SIZE(golden_settings_gc_9_0_vg10)); + if (!amdgpu_virt_support_skip_setting(adev)) { + soc15_program_register_sequence(adev, + golden_settings_gc_9_0, + ARRAY_SIZE(golden_settings_gc_9_0)); + soc15_program_register_sequence(adev, + golden_settings_gc_9_0_vg10, + ARRAY_SIZE(golden_settings_gc_9_0_vg10)); + } break; case CHIP_VEGA12: soc15_program_register_sequence(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 19f7cc1d51c3..1e1ef46715da 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1097,6 +1097,9 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: + if (amdgpu_virt_support_skip_setting(adev)) + break; + /* fall through */ case CHIP_VEGA20: soc15_program_register_sequence(adev, golden_settings_mmhub_1_0_0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 41a9a5779623..05d1d448c8f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -111,6 +111,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); + if (amdgpu_virt_support_skip_setting(adev)) + return; + /* Set default page address. */ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + adev->vm_manager.vram_base_offset; @@ -156,6 +159,9 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) { uint32_t tmp; + if (amdgpu_virt_support_skip_setting(adev)) + return; + /* Setup L2 cache */ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); @@ -202,6 +208,9 @@ static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev) static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) { + if (amdgpu_virt_support_skip_setting(adev)) + return; + WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, 0XFFFFFFFF); WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, @@ -338,11 +347,13 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) 0); WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); - /* Setup L2 cache */ - tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); - WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp); - WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0); + if (!amdgpu_virt_support_skip_setting(adev)) { + /* Setup L2 cache */ + tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0); + } } /** @@ -354,6 +365,10 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { u32 tmp; + + if (amdgpu_virt_support_skip_setting(adev)) + return; + tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 65775964efd3..d227215a85bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -210,12 +210,14 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA10: - soc15_program_register_sequence(adev, - golden_settings_sdma_4, - ARRAY_SIZE(golden_settings_sdma_4)); - soc15_program_register_sequence(adev, - golden_settings_sdma_vg10, - ARRAY_SIZE(golden_settings_sdma_vg10)); + if (!amdgpu_virt_support_skip_setting(adev)) { + soc15_program_register_sequence(adev, + golden_settings_sdma_4, + ARRAY_SIZE(golden_settings_sdma_4)); + soc15_program_register_sequence(adev, + golden_settings_sdma_vg10, + ARRAY_SIZE(golden_settings_sdma_vg10)); + } break; case CHIP_VEGA12: soc15_program_register_sequence(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index c1785843f0de..a4ebe9a0ac7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1024,11 +1024,17 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev) int i; struct amdgpu_ring *ring; - for (i = 0; i < adev->sdma.num_instances; i++) { - ring = &adev->sdma.instance[i].ring; - adev->nbio_funcs->sdma_doorbell_range(adev, i, - ring->use_doorbell, ring->doorbell_index, - adev->doorbell_index.sdma_doorbell_range); + /* Two reasons to skip + * 1, Host driver already programmed them + * 2, To avoid registers program violations in SR-IOV + */ + if (!amdgpu_virt_support_skip_setting(adev)) { + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + adev->nbio_funcs->sdma_doorbell_range(adev, i, + ring->use_doorbell, ring->doorbell_index, + adev->doorbell_index.sdma_doorbell_range); + } } adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, From 6b1ff3ddc66d431a7a71f454aebf32e9757d2ac3 Mon Sep 17 00:00:00 2001 From: Trigger Huang Date: Fri, 1 Mar 2019 11:56:20 +0800 Subject: [PATCH 064/178] drm/amdgpu: add basic func for RLC program reg New feature for RLC, some registers can be programmed by RLC interface under SR-IOV VF: WREG32_SOC15_RLC_SHADOW: 1, for GRBM_GFX_CNTL, firstly the new register value should be be programmed to SCRATCH_REG2 1, for GRBM_GFX_INDEX, firstly the new register value should be be programmed to SCRATCH_REG3 WREG32_RLC: for registers supported to be programmed by RLC interface, the following sequence should be used: 1, write the value to SCRATCH_REG0 2, write reg | 0x80000000 to SCRATCH_REG1 3, write 0x1 to RLC_SPARE_INT to notify RLC 4, polling SCRATCH_REG1 to check if finished Signed-off-by: Trigger Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15_common.h | 59 ++++++++++++++++++++++- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 49c262540940..c634606e64bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -89,6 +89,61 @@ (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ } while (0) + +#define WREG32_RLC(reg, value) \ + do { \ + if (amdgpu_virt_support_rlc_prg_reg(adev)) { \ + uint32_t i = 0; \ + uint32_t retries = 50000; \ + uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0; \ + uint32_t r1 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1; \ + uint32_t spare_int = adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT; \ + WREG32(r0, value); \ + WREG32(r1, (reg | 0x80000000)); \ + WREG32(spare_int, 0x1); \ + for (i = 0; i < retries; i++) { \ + u32 tmp = RREG32(r1); \ + if (!(tmp & 0x80000000)) \ + break; \ + udelay(10); \ + } \ + if (i >= retries) \ + pr_err("timeout: rlcg program reg:0x%05x failed !\n", reg); \ + } else { \ + WREG32(reg, value); \ + } \ + } while (0) + +#define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \ + do { \ + uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\ + if (amdgpu_virt_support_rlc_prg_reg(adev)) { \ + uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2; \ + uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3; \ + uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; \ + uint32_t grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; \ + if (target_reg == grbm_cntl) \ + WREG32(r2, value); \ + else if (target_reg == grbm_idx) \ + WREG32(r3, value); \ + WREG32(target_reg, value); \ + } else { \ + WREG32(target_reg, value); \ + } \ + } while (0) + +#define WREG32_SOC15_RLC(ip, inst, reg, value) \ + do { \ + uint32_t target_reg = adev->reg_offset[GC_HWIP][0][reg##_BASE_IDX] + reg;\ + WREG32_RLC(target_reg, value); \ + } while (0) + +#define WREG32_FIELD15_RLC(ip, idx, reg, field, val) \ + WREG32_RLC((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \ + (RREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \ + & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) + +#define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \ + WREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset), value) + #endif - - From 1bff7f6c679fb605d2d3fae77c9dd8d4cbad92b9 Mon Sep 17 00:00:00 2001 From: Trigger Huang Date: Thu, 2 May 2019 20:33:49 +0800 Subject: [PATCH 065/178] drm/amdgpu: RLC to program regs for Vega10 SR-IOV Under Vega10 SR-IOV, with new RLC's new feature, VF should call RLC to program some registers if supported Signed-off-by: Trigger Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 30 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 100 +++++++++--------- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 20 ++-- drivers/gpu/drm/amd/amdgpu/soc15.c | 12 ++- 4 files changed, 85 insertions(+), 77 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index ef3d93b995b2..6574814d2ade 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -225,8 +225,8 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, lock_srbm(kgd, 0, 0, 0, vmid); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); /* APE1 no longer exists on GFX9 */ unlock_srbm(kgd); @@ -369,7 +369,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, ((mec << 5) | (pipe << 3) | queue_id | 0x80)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); } /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ @@ -378,13 +378,13 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, for (reg = hqd_base; reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) - WREG32(reg, mqd_hqd[reg - hqd_base]); + WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); /* Activate doorbell logic before triggering WPTR poll. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); if (wptr) { /* Don't read wptr with get_user because the user @@ -413,25 +413,25 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), lower_32_bits(guessed_wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), upper_32_bits(guessed_wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), lower_32_bits((uintptr_t)wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), upper_32_bits((uintptr_t)wptr)); WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); release_queue(kgd); @@ -633,7 +633,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, acquire_queue(kgd, pipe_id, queue_id); if (m->cp_hqd_vmid == 0) - WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); + WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); switch (reset_type) { case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: @@ -647,7 +647,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, break; } - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { @@ -838,7 +838,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, mutex_lock(&adev->grbm_idx_mutex); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val); + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val); WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); data = REG_SET_FIELD(data, GRBM_GFX_INDEX, @@ -848,7 +848,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); mutex_unlock(&adev->grbm_idx_mutex); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 49ed53812091..fc6d8c1fd32b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1837,7 +1837,7 @@ static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh else data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); - WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); } static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) @@ -1905,8 +1905,8 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { soc15_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ - WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); - WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); + WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); + WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); } soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); @@ -1917,7 +1917,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) u32 tmp; int i; - WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); + WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); gfx_v9_0_tiling_mode_table_init(adev); @@ -1960,7 +1960,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) */ gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE, + WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE, (adev->gfx.config.sc_prim_fifo_size_frontend << PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | (adev->gfx.config.sc_prim_fifo_size_backend << @@ -2027,11 +2027,11 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, static void gfx_v9_0_init_csb(struct amdgpu_device *adev) { /* csib */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), adev->gfx.rlc.clear_state_gpu_addr >> 32); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), adev->gfx.rlc.clear_state_size); } @@ -2501,7 +2501,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].sched.ready = false; } - WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); + WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); udelay(50); } @@ -2699,9 +2699,9 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) int i; if (enable) { - WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); } else { - WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, + WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); for (i = 0; i < adev->gfx.num_compute_rings; i++) adev->gfx.compute_ring[i].sched.ready = false; @@ -2762,9 +2762,9 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); tmp |= 0x80; - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); } static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) @@ -2982,67 +2982,67 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) /* disable wptr polling */ WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); /* enable doorbell? */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); /* disable the queue if it's active */ if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); for (j = 0; j < adev->usec_timeout; j++) { if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) break; udelay(1); } - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); } /* set the pointer to the MQD */ - WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, + WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); - WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); /* set MQD vmid to 0 */ - WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, mqd->cp_mqd_control); /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); /* set up the HQD, this is similar to CP_RB0_CNTL */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); /* set the wb address whether it's enabled or not */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->cp_hqd_pq_rptr_report_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->cp_hqd_pq_rptr_report_addr_hi); /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); /* enable the doorbell if requested */ @@ -3057,19 +3057,19 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) mqd->cp_hqd_pq_doorbell_control); /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); /* set the vmid for the queue */ - WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); - WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); /* activate the queue */ - WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active); if (ring->use_doorbell) @@ -3086,7 +3086,7 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) /* disable the queue if it's active */ if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); for (j = 0; j < adev->usec_timeout; j++) { if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) @@ -3098,21 +3098,21 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) DRM_DEBUG("KIQ dequeue request failed.\n"); /* Manual disable if dequeue request times out */ - WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); } - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0); } - WREG32_SOC15(GC, 0, mmCP_HQD_IQ_TIMER, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); return 0; } @@ -4572,8 +4572,8 @@ static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); - WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 7bb5359d0bbd..0dc8926111e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -71,12 +71,12 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) uint64_t value; /* Program the AGP BAR */ - WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0); - WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); - WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); /* Program the system aperture low logical page number. */ - WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, + WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) @@ -86,11 +86,11 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) * workaround that increase system aperture high address (add 1) * to get rid of the VM fault and hardware hang. */ - WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, max((adev->gmc.fb_end >> 18) + 0x1, adev->gmc.agp_end >> 18)); else - WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ @@ -129,7 +129,7 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); - WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); } static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) @@ -267,9 +267,9 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) * VF copy registers so vbios post doesn't program them, for * SRIOV driver need to program them */ - WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE, + WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_BASE, adev->gmc.vram_start >> 24); - WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP, + WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_TOP, adev->gmc.vram_end >> 24); } @@ -303,7 +303,7 @@ void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) MC_VM_MX_L1_TLB_CNTL, ENABLE_ADVANCED_DRIVER_MODEL, 0); - WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); /* Setup L2 cache */ WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index a4ebe9a0ac7f..f9c9cacc5c50 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -232,7 +232,7 @@ void soc15_grbm_select(struct amdgpu_device *adev, grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid); grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL), grbm_gfx_cntl); + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl); } static void soc15_vga_set_state(struct amdgpu_device *adev, bool state) @@ -387,7 +387,15 @@ void soc15_program_register_sequence(struct amdgpu_device *adev, tmp &= ~(entry->and_mask); tmp |= entry->or_mask; } - WREG32(reg, tmp); + + if (reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3) || + reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE) || + reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1) || + reg == SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG)) + WREG32_RLC(reg, tmp); + else + WREG32(reg, tmp); + } } From c5fb351470e81913163aa0e8692434ad09cb00a7 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Wed, 8 May 2019 20:55:16 +0800 Subject: [PATCH 066/178] drm/amd/display: Make some functions static Fix the following sparse warnings: drivers/gpu/drm/amd/amdgpu/../display/dc/dce120/dce120_resource.c:483:21: warning: symbol 'dce120_clock_source_create' was not declared. Should it be static? drivers/gpu/drm/amd/amdgpu/../display/dc/dce120/dce120_resource.c:506:6: warning: symbol 'dce120_clock_source_destroy' was not declared. Should it be static? drivers/gpu/drm/amd/amdgpu/../display/dc/dce120/dce120_resource.c:513:6: warning: symbol 'dce120_hw_sequencer_create' was not declared. Should it be static? Fixes: b8fdfcc6a92c ("drm/amd/display: Add DCE12 core support") Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 6d49c7143c67..fa46caee958a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -480,7 +480,7 @@ static const struct dc_debug_options debug_defaults = { .disable_clock_gate = true, }; -struct clock_source *dce120_clock_source_create( +static struct clock_source *dce120_clock_source_create( struct dc_context *ctx, struct dc_bios *bios, enum clock_source_id id, @@ -503,14 +503,14 @@ struct clock_source *dce120_clock_source_create( return NULL; } -void dce120_clock_source_destroy(struct clock_source **clk_src) +static void dce120_clock_source_destroy(struct clock_source **clk_src) { kfree(TO_DCE110_CLK_SRC(*clk_src)); *clk_src = NULL; } -bool dce120_hw_sequencer_create(struct dc *dc) +static bool dce120_hw_sequencer_create(struct dc *dc) { /* All registers used by dce11.2 match those in dce11 in offset and * structure From 3058770ae3526ef4fc02afad796a43aee140cd2a Mon Sep 17 00:00:00 2001 From: James Zhu Date: Wed, 8 May 2019 16:38:58 -0400 Subject: [PATCH 067/178] drm/amdgpu: Fix S3 test issue During S3 test, when system wake up and resume, ras interface is already allocated. Move workaround before ras jumps to resume step in gfx_v9_0_ecc_late_init, and make sure workaround applied during resume. Also remove unused mmGB_EDC_MODE clearing. Signed-off-by: James Zhu Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index fc6d8c1fd32b..d07f3dda5b96 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3632,7 +3632,6 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) struct amdgpu_ib ib; struct dma_fence *f = NULL; int r, i, j; - u32 tmp; unsigned total_size, vgpr_offset, sgpr_offset; u64 gpu_addr; @@ -3644,9 +3643,6 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) if (!ring->sched.ready) return 0; - tmp = RREG32_SOC15(GC, 0, mmGB_EDC_MODE); - WREG32_SOC15(GC, 0, mmGB_EDC_MODE, 0); - total_size = ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; total_size += @@ -3812,6 +3808,11 @@ static int gfx_v9_0_ecc_late_init(void *handle) return 0; } + /* requires IBs so do in late init after IB pool is initialized */ + r = gfx_v9_0_do_edc_gpr_workarounds(adev); + if (r) + return r; + if (*ras_if) goto resume; @@ -3819,11 +3820,6 @@ static int gfx_v9_0_ecc_late_init(void *handle) if (!*ras_if) return -ENOMEM; - /* requires IBs so do in late init after IB pool is initialized */ - r = gfx_v9_0_do_edc_gpr_workarounds(adev); - if (r) - return r; - **ras_if = ras_block; r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); From 466b179346094e01deccd051a215fe782b59ca68 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Tue, 7 May 2019 11:53:31 +0800 Subject: [PATCH 068/178] drm/amdgpu: add badpages sysfs interafce add badpages node. it will output badpages list in format gpu pfn : gpu page size : flags example 0x00000000 : 0x00001000 : R 0x00000001 : 0x00001000 : R 0x00000002 : 0x00001000 : R 0x00000003 : 0x00001000 : R 0x00000004 : 0x00001000 : R 0x00000005 : 0x00001000 : R 0x00000006 : 0x00001000 : R 0x00000007 : 0x00001000 : P 0x00000008 : 0x00001000 : P 0x00000009 : 0x00001000 : P flags can be one of below characters R: reserved. P: pending for reserve. F: failed to reserve for some reasons. Signed-off-by: xinhui pan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 146 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 1 + 2 files changed, 147 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 37cb3de08494..49c71cfc7fc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -90,6 +90,12 @@ struct ras_manager { struct ras_err_data err_data; }; +struct ras_badpage { + unsigned int bp; + unsigned int size; + unsigned int flags; +}; + const char *ras_error_string[] = { "none", "parity", @@ -710,6 +716,77 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev, /* sysfs begin */ +static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, + struct ras_badpage **bps, unsigned int *count); + +static char *amdgpu_ras_badpage_flags_str(unsigned int flags) +{ + switch (flags) { + case 0: + return "R"; + case 1: + return "P"; + case 2: + default: + return "F"; + }; +} + +/* + * DOC: ras sysfs gpu_vram_bad_pages interface + * + * It allows user to read the bad pages of vram on the gpu through + * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages + * + * It outputs multiple lines, and each line stands for one gpu page. + * + * The format of one line is below, + * gpu pfn : gpu page size : flags + * + * gpu pfn and gpu page size are printed in hex format. + * flags can be one of below character, + * R: reserved, this gpu page is reserved and not able to use. + * P: pending for reserve, this gpu page is marked as bad, will be reserved + * in next window of page_reserve. + * F: unable to reserve. this gpu page can't be reserved due to some reasons. + * + * examples: + * 0x00000001 : 0x00001000 : R + * 0x00000002 : 0x00001000 : P + */ + +static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f, + struct kobject *kobj, struct bin_attribute *attr, + char *buf, loff_t ppos, size_t count) +{ + struct amdgpu_ras *con = + container_of(attr, struct amdgpu_ras, badpages_attr); + struct amdgpu_device *adev = con->adev; + const unsigned int element_size = + sizeof("0xabcdabcd : 0x12345678 : R\n") - 1; + unsigned int start = (ppos + element_size - 1) / element_size; + unsigned int end = (ppos + count - 1) / element_size; + ssize_t s = 0; + struct ras_badpage *bps = NULL; + unsigned int bps_count = 0; + + memset(buf, 0, count); + + if (amdgpu_ras_badpages_read(adev, &bps, &bps_count)) + return 0; + + for (; start < end && start < bps_count; start++) + s += scnprintf(&buf[s], element_size + 1, + "0x%08x : 0x%08x : %1s\n", + bps[start].bp, + bps[start].size, + amdgpu_ras_badpage_flags_str(bps[start].flags)); + + kfree(bps); + + return s; +} + static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev, struct device_attribute *attr, char *buf) { @@ -750,9 +827,14 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev) &con->features_attr.attr, NULL }; + struct bin_attribute *bin_attrs[] = { + &con->badpages_attr, + NULL + }; struct attribute_group group = { .name = "ras", .attrs = attrs, + .bin_attrs = bin_attrs, }; con->features_attr = (struct device_attribute) { @@ -762,7 +844,19 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev) }, .show = amdgpu_ras_sysfs_features_read, }; + + con->badpages_attr = (struct bin_attribute) { + .attr = { + .name = "gpu_vram_bad_pages", + .mode = S_IRUGO, + }, + .size = 0, + .private = NULL, + .read = amdgpu_ras_sysfs_badpages_read, + }; + sysfs_attr_init(attrs[0]); + sysfs_bin_attr_init(bin_attrs[0]); return sysfs_create_group(&adev->dev->kobj, &group); } @@ -774,9 +868,14 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev) &con->features_attr.attr, NULL }; + struct bin_attribute *bin_attrs[] = { + &con->badpages_attr, + NULL + }; struct attribute_group group = { .name = "ras", .attrs = attrs, + .bin_attrs = bin_attrs, }; sysfs_remove_group(&adev->dev->kobj, &group); @@ -1108,6 +1207,53 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev) /* ih end */ /* recovery begin */ + +/* return 0 on success. + * caller need free bps. + */ +static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, + struct ras_badpage **bps, unsigned int *count) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data; + int i = 0; + int ret = 0; + + if (!con || !con->eh_data || !bps || !count) + return -EINVAL; + + mutex_lock(&con->recovery_lock); + data = con->eh_data; + if (!data || data->count == 0) { + *bps = NULL; + goto out; + } + + *bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL); + if (!*bps) { + ret = -ENOMEM; + goto out; + } + + for (; i < data->count; i++) { + (*bps)[i] = (struct ras_badpage){ + .bp = data->bps[i].bp, + .size = AMDGPU_GPU_PAGE_SIZE, + .flags = 0, + }; + + if (data->last_reserved <= i) + (*bps)[i].flags = 1; + else if (data->bps[i].bo == NULL) + (*bps)[i].flags = 2; + } + + *count = data->count; +out: + mutex_unlock(&con->recovery_lock); + return ret; +} + static void amdgpu_ras_do_recovery(struct work_struct *work) { struct amdgpu_ras *ras = diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 06ef325b61b8..59994ee00855 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -93,6 +93,7 @@ struct amdgpu_ras { struct dentry *ent; /* sysfs */ struct device_attribute features_attr; + struct bin_attribute badpages_attr; /* block array */ struct ras_manager *objs; From 511fdbc33aaa4758f7c445183ff840e251c0b427 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Thu, 9 May 2019 08:26:27 +0800 Subject: [PATCH 069/178] drm/amdgpu: ras support suspend/resume add ras suspend function. rename ras_post_init to amdgpu_ras_resume. Signed-off-by: xinhui pan Reviewed-by: Alex Deucher Reviewed-by: James Zhu Tested-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 20 +++++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 4 +++- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 309461d0c275..da120fe330be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2745,7 +2745,7 @@ fence_driver_init: } /* must succeed. */ - amdgpu_ras_post_init(adev); + amdgpu_ras_resume(adev); r = device_create_file(adev->dev, &dev_attr_pcie_replay_count); if (r) { @@ -3503,7 +3503,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, goto out; /* must succeed. */ - amdgpu_ras_post_init(tmp_adev); + amdgpu_ras_resume(tmp_adev); /* Update PSP FW topology after reset */ if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 49c71cfc7fc6..da1dc40b9b14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1594,12 +1594,9 @@ recovery_out: } /* do some init work after IP late init as dependence. - * TODO - * gpu reset will re-enable ras, need fint out one way to run it again. - * for now, if a gpu reset happened, unless IP enable its ras, the ras state - * will be showed as disabled. + * and it runs in resume/gpu reset/booting up cases. */ -void amdgpu_ras_post_init(struct amdgpu_device *adev) +void amdgpu_ras_resume(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj, *tmp; @@ -1642,6 +1639,19 @@ void amdgpu_ras_post_init(struct amdgpu_device *adev) } } +void amdgpu_ras_suspend(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (!con) + return; + + amdgpu_ras_disable_all_features(adev, 0); + /* Make sure all ras objects are disabled. */ + if (con->features) + amdgpu_ras_disable_all_features(adev, 1); +} + /* do some fini work before IP fini as dependence */ int amdgpu_ras_pre_fini(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 59994ee00855..c6b34fbd695f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -179,6 +179,9 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, unsigned int block); +void amdgpu_ras_resume(struct amdgpu_device *adev); +void amdgpu_ras_suspend(struct amdgpu_device *adev); + int amdgpu_ras_query_error_count(struct amdgpu_device *adev, bool is_ce); @@ -256,7 +259,6 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) { /* called in ip_init and ip_fini */ int amdgpu_ras_init(struct amdgpu_device *adev); -void amdgpu_ras_post_init(struct amdgpu_device *adev); int amdgpu_ras_fini(struct amdgpu_device *adev); int amdgpu_ras_pre_fini(struct amdgpu_device *adev); From 5e6932fe31624f97e394c4106b4c209db03810d9 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Thu, 9 May 2019 07:32:54 +0800 Subject: [PATCH 070/178] drm/amdgpu: enable ras suspend/resume suspend/resume will change ras state behind us. Let driver get notified. Signed-off-by: xinhui pan Reviewed-by: Alex Deucher Reviewed-by: James Zhu Tested-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index da120fe330be..15410fa4a85c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2897,6 +2897,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) amdgpu_amdkfd_suspend(adev); + amdgpu_ras_suspend(adev); + r = amdgpu_device_ip_suspend_phase1(adev); /* evict vram memory */ @@ -3017,6 +3019,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) drm_kms_helper_poll_enable(dev); + amdgpu_ras_resume(adev); + /* * Most of the connector probing functions try to acquire runtime pm * refs to ensure that the GPU is powered on when connector polling is From 6121366b749d037ac5f06df721a1f83dd1cb72ed Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Thu, 9 May 2019 08:26:02 +0800 Subject: [PATCH 071/178] drm/amdgpu: gmc handle ras resume During S3/S4 bootloader will re-init ras state behind us. Resume might fail or raise a gpu reset. Signed-off-by: xinhui pan Reviewed-by: Alex Deucher Reviewed-by: James Zhu Tested-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 1e1ef46715da..94d67b7f9217 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -686,8 +686,25 @@ static int gmc_v9_0_ecc_late_init(void *handle) return 0; } /* handle resume path. */ - if (*ras_if) + if (*ras_if) { + /* resend ras TA enable cmd during resume. + * prepare to handle failure. + */ + ih_info.head = **ras_if; + r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); + if (r) { + if (r == -EAGAIN) { + /* request a gpu reset. will run again. */ + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__UMC); + return 0; + } + /* fail to enable ras, cleanup all. */ + goto irq; + } + /* enable successfully. continue. */ goto resume; + } *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); if (!*ras_if) From 2c2fc0cd0bd162dde2a30eafb851ff11b1ecd655 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Thu, 9 May 2019 08:58:56 +0800 Subject: [PATCH 072/178] drm/amdgpu: gfx handle ras resume During S3/S4 bootloader will re-init ras state behind us. Resume might fail or raise a gpu reset. Signed-off-by: xinhui pan Reviewed-by: Alex Deucher Reviewed-by: James Zhu Tested-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index d07f3dda5b96..7d7d28775f06 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3813,8 +3813,26 @@ static int gfx_v9_0_ecc_late_init(void *handle) if (r) return r; - if (*ras_if) + /* handle resume path. */ + if (*ras_if) { + /* resend ras TA enable cmd during resume. + * prepare to handle failure. + */ + ih_info.head = **ras_if; + r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); + if (r) { + if (r == -EAGAIN) { + /* request a gpu reset. will run again. */ + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__GFX); + return 0; + } + /* fail to enable ras, cleanup all. */ + goto irq; + } + /* enable successfully. continue. */ goto resume; + } *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); if (!*ras_if) From 777ac3ba3d85e87c0739ccedad0effd1e6e0ac41 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Thu, 9 May 2019 09:00:14 +0800 Subject: [PATCH 073/178] drm/amdgpu: sdma handle ras resume During S3/S4 bootloader will re-init ras state behind us. Resume might fail or raise a gpu reset. Signed-off-by: xinhui pan Reviewed-by: Alex Deucher Reviewed-by: James Zhu Tested-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index d227215a85bb..7a259c5b6c62 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1523,8 +1523,25 @@ static int sdma_v4_0_late_init(void *handle) } /* handle resume path. */ - if (*ras_if) + if (*ras_if) { + /* resend ras TA enable cmd during resume. + * prepare to handle failure. + */ + ih_info.head = **ras_if; + r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); + if (r) { + if (r == -EAGAIN) { + /* request a gpu reset. will run again. */ + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__SDMA); + return 0; + } + /* fail to enable ras, cleanup all. */ + goto irq; + } + /* enable successfully. continue. */ goto resume; + } *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); if (!*ras_if) From 120c212518c87ba8b07360a2f42ecdd372817b74 Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Wed, 8 May 2019 11:05:11 -0400 Subject: [PATCH 074/178] drm/amdgpu: add no_user_fence flag to ring funcs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So we can generalize the no user fence supported engine Signed-off-by: Leo Liu Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index d7fae2676269..cdddce938bf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -114,6 +114,7 @@ struct amdgpu_ring_funcs { uint32_t align_mask; u32 nop; bool support_64bit_ptrs; + bool no_user_fence; unsigned vmhub; unsigned extra_dw; From 7ee250b142a3f2b72700ceacfd104543506d2816 Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Wed, 8 May 2019 11:07:26 -0400 Subject: [PATCH 075/178] drm/amdgpu/UVD: set no_user_fence flag to true MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no user fence support for UVD Signed-off-by: Leo Liu Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 1 + drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | 1 + drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 3 +++ drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 2 ++ 4 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index c4fb58667fd4..bf3385280d3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -741,6 +741,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = uvd_v4_2_ring_get_rptr, .get_wptr = uvd_v4_2_ring_get_wptr, .set_wptr = uvd_v4_2_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 52bd8a654734..3210a7bd9a6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -849,6 +849,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = uvd_v5_0_ring_get_rptr, .get_wptr = uvd_v5_0_ring_get_wptr, .set_wptr = uvd_v5_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index c9edddf9f88a..c61a314c56cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -1502,6 +1502,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = uvd_v6_0_ring_get_rptr, .get_wptr = uvd_v6_0_ring_get_wptr, .set_wptr = uvd_v6_0_ring_set_wptr, @@ -1527,6 +1528,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = uvd_v6_0_ring_get_rptr, .get_wptr = uvd_v6_0_ring_get_wptr, .set_wptr = uvd_v6_0_ring_set_wptr, @@ -1555,6 +1557,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = { .align_mask = 0x3f, .nop = HEVC_ENC_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = uvd_v6_0_enc_ring_get_rptr, .get_wptr = uvd_v6_0_enc_ring_get_wptr, .set_wptr = uvd_v6_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 2191d3d0a219..cdb96d4cb424 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -1759,6 +1759,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .vmhub = AMDGPU_MMHUB, .get_rptr = uvd_v7_0_ring_get_rptr, .get_wptr = uvd_v7_0_ring_get_wptr, @@ -1791,6 +1792,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { .align_mask = 0x3f, .nop = HEVC_ENC_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .vmhub = AMDGPU_MMHUB, .get_rptr = uvd_v7_0_enc_ring_get_rptr, .get_wptr = uvd_v7_0_enc_ring_get_wptr, From f61334b556d527be6351a27bfd91eeb370f0ff7e Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Wed, 8 May 2019 11:08:58 -0400 Subject: [PATCH 076/178] drm/amdgpu/VCE: set no_user_fence flag to true MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no user fence support for VCE Signed-off-by: Leo Liu Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 1 + 3 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index 40363ca6c5f1..ab0cb8325796 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -605,6 +605,7 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = { .align_mask = 0xf, .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = vce_v2_0_ring_get_rptr, .get_wptr = vce_v2_0_ring_get_wptr, .set_wptr = vce_v2_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 6ec65cf11112..36902ec16dcf 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -894,6 +894,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = { .align_mask = 0xf, .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = vce_v3_0_ring_get_rptr, .get_wptr = vce_v3_0_ring_get_wptr, .set_wptr = vce_v3_0_ring_set_wptr, @@ -917,6 +918,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = { .align_mask = 0xf, .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = vce_v3_0_ring_get_rptr, .get_wptr = vce_v3_0_ring_get_wptr, .set_wptr = vce_v3_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index c0ec27991c22..e267b073f525 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -1069,6 +1069,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { .align_mask = 0x3f, .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .vmhub = AMDGPU_MMHUB, .get_rptr = vce_v4_0_ring_get_rptr, .get_wptr = vce_v4_0_ring_get_wptr, From 474612fc7482d4b88c3012a406803ee6a579a52f Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Wed, 8 May 2019 11:10:05 -0400 Subject: [PATCH 077/178] drm/amdgpu/VCN: set no_user_fence flag to true MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no user fence support for VCN Signed-off-by: Leo Liu Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 3dbc51f9d3b9..ac2e5a1eb576 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -2054,6 +2054,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .vmhub = AMDGPU_MMHUB, .get_rptr = vcn_v1_0_dec_ring_get_rptr, .get_wptr = vcn_v1_0_dec_ring_get_wptr, @@ -2087,6 +2088,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .vmhub = AMDGPU_MMHUB, .get_rptr = vcn_v1_0_enc_ring_get_rptr, .get_wptr = vcn_v1_0_enc_ring_get_wptr, @@ -2118,6 +2120,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = { .align_mask = 0xf, .nop = PACKET0(0x81ff, 0), .support_64bit_ptrs = false, + .no_user_fence = true, .vmhub = AMDGPU_MMHUB, .extra_dw = 64, .get_rptr = vcn_v1_0_jpeg_ring_get_rptr, From 742b48aed44b7f347e82d2d5c48f68110731ac78 Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Wed, 8 May 2019 11:13:53 -0400 Subject: [PATCH 078/178] drm/amdgpu: check no_user_fence flag for engines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To replace checking ring type and make them generic Signed-off-by: Leo Liu Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 2f6239b6be6f..8cee09c60c99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1054,11 +1054,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, j++; } - /* UVD & VCE fw doesn't support user fences */ + /* MM engine doesn't support user fences */ ring = to_amdgpu_ring(parser->entity->rq->sched); - if (parser->job->uf_addr && ( - ring->funcs->type == AMDGPU_RING_TYPE_UVD || - ring->funcs->type == AMDGPU_RING_TYPE_VCE)) + if (parser->job->uf_addr && ring->funcs->no_user_fence) return -EINVAL; return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity); From 7d59c41b5150d0641203f91cfcaa0f9af5999cce Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 30 Apr 2019 16:34:20 +0800 Subject: [PATCH 079/178] drm/amd/powerplay: fix Vega10 mclk/socclk voltage link setup This may affects the Vega10 MCLK OD functionality. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 35 +++++++++++++------ 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index d5d0db456021..138f9f9ea765 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -5016,7 +5016,7 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr, struct vega10_hwmgr *data = hwmgr->backend; struct phm_ppt_v2_information *table_info = hwmgr->pptable; struct phm_ppt_v1_clock_voltage_dependency_table *dep_table = table_info->vdd_dep_on_socclk; - struct vega10_single_dpm_table *dpm_table = &data->golden_dpm_table.soc_table; + struct vega10_single_dpm_table *dpm_table = &data->golden_dpm_table.mem_table; struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep_on_socclk = &data->odn_dpm_table.vdd_dep_on_socclk; @@ -5040,7 +5040,8 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr, break; } if (j == od_vddc_lookup_table->count) { - od_vddc_lookup_table->entries[j-1].us_vdd = + j = od_vddc_lookup_table->count - 1; + od_vddc_lookup_table->entries[j].us_vdd = podn_vdd_dep->entries[i].vddc; data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC; } @@ -5048,23 +5049,35 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr, } dpm_table = &data->dpm_table.soc_table; for (i = 0; i < dep_table->count; i++) { - if (dep_table->entries[i].vddInd == podn_vdd_dep->entries[dep_table->count-1].vddInd && - dep_table->entries[i].clk < podn_vdd_dep->entries[dep_table->count-1].clk) { + if (dep_table->entries[i].vddInd == podn_vdd_dep->entries[podn_vdd_dep->count-1].vddInd && + dep_table->entries[i].clk < podn_vdd_dep->entries[podn_vdd_dep->count-1].clk) { data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK; - podn_vdd_dep_on_socclk->entries[i].clk = podn_vdd_dep->entries[dep_table->count-1].clk; - dpm_table->dpm_levels[i].value = podn_vdd_dep_on_socclk->entries[i].clk; + for (; (i < dep_table->count) && + (dep_table->entries[i].clk < podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk); i++) { + podn_vdd_dep_on_socclk->entries[i].clk = podn_vdd_dep->entries[podn_vdd_dep->count-1].clk; + dpm_table->dpm_levels[i].value = podn_vdd_dep_on_socclk->entries[i].clk; + } + break; + } else { + dpm_table->dpm_levels[i].value = dep_table->entries[i].clk; + podn_vdd_dep_on_socclk->entries[i].vddc = dep_table->entries[i].vddc; + podn_vdd_dep_on_socclk->entries[i].vddInd = dep_table->entries[i].vddInd; + podn_vdd_dep_on_socclk->entries[i].clk = dep_table->entries[i].clk; } } if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk < - podn_vdd_dep->entries[dep_table->count-1].clk) { + podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk) { data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK; - podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk = podn_vdd_dep->entries[dep_table->count-1].clk; - dpm_table->dpm_levels[podn_vdd_dep_on_socclk->count - 1].value = podn_vdd_dep->entries[dep_table->count-1].clk; + podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk = + podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk; + dpm_table->dpm_levels[podn_vdd_dep_on_socclk->count - 1].value = + podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk; } if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd < - podn_vdd_dep->entries[dep_table->count-1].vddInd) { + podn_vdd_dep->entries[podn_vdd_dep->count - 1].vddInd) { data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK; - podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd = podn_vdd_dep->entries[dep_table->count-1].vddInd; + podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd = + podn_vdd_dep->entries[podn_vdd_dep->count - 1].vddInd; } } } From 36f5f8a74981b857596e7eedd424f4573a81c6e9 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 6 May 2019 16:37:40 +0800 Subject: [PATCH 080/178] drm/amd/powerplay: valid Vega10 DPMTABLE_OD_UPDATE_VDDC settings V2 With user specified voltage(DPMTABLE_OD_UPDATE_VDDC), the AVFS will be disabled. However, the buggy code makes this actually not working as expected. - V2: clear all OD flags excpet DPMTABLE_OD_UPDATE_VDDC Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 138f9f9ea765..05f6bf7d703e 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -2466,11 +2466,6 @@ static void vega10_check_dpm_table_updated(struct pp_hwmgr *hwmgr) return; } } - - if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { - data->need_update_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC; - data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_OD_UPDATE_MCLK; - } } /** @@ -3683,6 +3678,10 @@ static int vega10_set_power_state_tasks(struct pp_hwmgr *hwmgr, vega10_update_avfs(hwmgr); + /* + * Clear all OD flags except DPMTABLE_OD_UPDATE_VDDC. + * That will help to keep AVFS disabled. + */ data->need_update_dpm_table &= DPMTABLE_OD_UPDATE_VDDC; return 0; From 319dd4764ffdbe580631464ae7674b1c7a001ae0 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 6 May 2019 16:45:13 +0800 Subject: [PATCH 081/178] drm/amd/powerplay: avoid repeat AVFS enablement/disablement No need to enable or disable AVFS if it's already in wanted state. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 05f6bf7d703e..eb9023f02ffa 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -2364,6 +2364,10 @@ static int vega10_avfs_enable(struct pp_hwmgr *hwmgr, bool enable) struct vega10_hwmgr *data = hwmgr->backend; if (data->smu_features[GNLD_AVFS].supported) { + /* Already enabled or disabled */ + if (!(enable ^ data->smu_features[GNLD_AVFS].enabled)) + return 0; + if (enable) { PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr, true, From fc99f2be06c6e1b27cf97529abd6304374b750f7 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 7 May 2019 12:49:03 +0800 Subject: [PATCH 082/178] drm/amd/powerplay: update Vega10 power state on OD Update Vega10 top performance level power state accordingly on OD. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index eb9023f02ffa..346cf61d55f6 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -5013,6 +5013,63 @@ static bool vega10_check_clk_voltage_valid(struct pp_hwmgr *hwmgr, return true; } +static void vega10_odn_update_power_state(struct pp_hwmgr *hwmgr) +{ + struct vega10_hwmgr *data = hwmgr->backend; + struct pp_power_state *ps = hwmgr->request_ps; + struct vega10_power_state *vega10_ps; + struct vega10_single_dpm_table *gfx_dpm_table = + &data->dpm_table.gfx_table; + struct vega10_single_dpm_table *soc_dpm_table = + &data->dpm_table.soc_table; + struct vega10_single_dpm_table *mem_dpm_table = + &data->dpm_table.mem_table; + int max_level; + + if (!ps) + return; + + vega10_ps = cast_phw_vega10_power_state(&ps->hardware); + max_level = vega10_ps->performance_level_count - 1; + + if (vega10_ps->performance_levels[max_level].gfx_clock != + gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value) + vega10_ps->performance_levels[max_level].gfx_clock = + gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value; + + if (vega10_ps->performance_levels[max_level].soc_clock != + soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value) + vega10_ps->performance_levels[max_level].soc_clock = + soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value; + + if (vega10_ps->performance_levels[max_level].mem_clock != + mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value) + vega10_ps->performance_levels[max_level].mem_clock = + mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value; + + if (!hwmgr->ps) + return; + + ps = (struct pp_power_state *)((unsigned long)(hwmgr->ps) + hwmgr->ps_size * (hwmgr->num_ps - 1)); + vega10_ps = cast_phw_vega10_power_state(&ps->hardware); + max_level = vega10_ps->performance_level_count - 1; + + if (vega10_ps->performance_levels[max_level].gfx_clock != + gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value) + vega10_ps->performance_levels[max_level].gfx_clock = + gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value; + + if (vega10_ps->performance_levels[max_level].soc_clock != + soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value) + vega10_ps->performance_levels[max_level].soc_clock = + soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value; + + if (vega10_ps->performance_levels[max_level].mem_clock != + mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value) + vega10_ps->performance_levels[max_level].mem_clock = + mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value; +} + static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr, enum PP_OD_DPM_TABLE_COMMAND type) { @@ -5083,6 +5140,7 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr, podn_vdd_dep->entries[podn_vdd_dep->count - 1].vddInd; } } + vega10_odn_update_power_state(hwmgr); } static int vega10_odn_edit_dpm_table(struct pp_hwmgr *hwmgr, @@ -5117,6 +5175,7 @@ static int vega10_odn_edit_dpm_table(struct pp_hwmgr *hwmgr, } else if (PP_OD_RESTORE_DEFAULT_TABLE == type) { memcpy(&(data->dpm_table), &(data->golden_dpm_table), sizeof(struct vega10_dpm_table)); vega10_odn_initial_default_setting(hwmgr); + vega10_odn_update_power_state(hwmgr); return 0; } else if (PP_OD_COMMIT_DPM_TABLE == type) { vega10_check_dpm_table_updated(hwmgr); From 9995ac560bc3f69aaa12e91065b46cfa03e32e56 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Wed, 8 May 2019 13:55:21 +0800 Subject: [PATCH 083/178] drm/amd/powerplay: force to update all clock tables on OD reset On OD reset, the clock tables in SMU need to be reset to default. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 346cf61d55f6..b298aba1206b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -5176,6 +5176,10 @@ static int vega10_odn_edit_dpm_table(struct pp_hwmgr *hwmgr, memcpy(&(data->dpm_table), &(data->golden_dpm_table), sizeof(struct vega10_dpm_table)); vega10_odn_initial_default_setting(hwmgr); vega10_odn_update_power_state(hwmgr); + /* force to update all clock tables */ + data->need_update_dpm_table = DPMTABLE_UPDATE_SCLK | + DPMTABLE_UPDATE_MCLK | + DPMTABLE_UPDATE_SOCCLK; return 0; } else if (PP_OD_COMMIT_DPM_TABLE == type) { vega10_check_dpm_table_updated(hwmgr); From 4c6097efa8ab7721ab0c1dc577ca5db68de11203 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Wed, 8 May 2019 14:38:22 +0800 Subject: [PATCH 084/178] drm/amd/powerplay: update Vega10 ACG Avfs Gb parameters Update Vega10 ACG Avfs GB parameters. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index b298aba1206b..9585ba51d853 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -2267,8 +2267,8 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr) pp_table->AcgAvfsGb.m1 = avfs_params.ulAcgGbFuseTableM1; pp_table->AcgAvfsGb.m2 = avfs_params.ulAcgGbFuseTableM2; pp_table->AcgAvfsGb.b = avfs_params.ulAcgGbFuseTableB; - pp_table->AcgAvfsGb.m1_shift = 0; - pp_table->AcgAvfsGb.m2_shift = 0; + pp_table->AcgAvfsGb.m1_shift = 24; + pp_table->AcgAvfsGb.m2_shift = 12; pp_table->AcgAvfsGb.b_shift = 0; } else { From dca29491c37fd9d685bca7da4fbc1f93e7aa552e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 10 May 2019 19:56:30 +0200 Subject: [PATCH 085/178] drm/amdgpu: remove static GDS, GWS and OA allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As far as we know this was never used by userspace and so should be removed. Signed-off-by: Christian König Reviewed-by: Marek Olšák Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 6 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 21 ++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 11 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 24 ++----------- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 32 +++-------------- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 32 +++-------------- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 39 ++++----------------- 7 files changed, 28 insertions(+), 137 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 5c79da8e1150..d497467b7fc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -81,9 +81,9 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, return -ENOMEM; kref_init(&list->refcount); - list->gds_obj = adev->gds.gds_gfx_bo; - list->gws_obj = adev->gds.gws_gfx_bo; - list->oa_obj = adev->gds.oa_gfx_bo; + list->gds_obj = NULL; + list->gws_obj = NULL; + list->oa_obj = NULL; array = amdgpu_bo_list_array_entry(list, 0); memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h index f89f5734d985..dad2186f4ed5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h @@ -27,26 +27,11 @@ struct amdgpu_ring; struct amdgpu_bo; -struct amdgpu_gds_asic_info { - uint32_t total_size; - uint32_t gfx_partition_size; - uint32_t cs_partition_size; -}; - struct amdgpu_gds { - struct amdgpu_gds_asic_info mem; - struct amdgpu_gds_asic_info gws; - struct amdgpu_gds_asic_info oa; + uint32_t gds_size; + uint32_t gws_size; + uint32_t oa_size; uint32_t gds_compute_max_wave_id; - - /* At present, GDS, GWS and OA resources for gfx (graphics) - * is always pre-allocated and available for graphics operation. - * Such resource is shared between all gfx clients. - * TODO: move this operation to user space - * */ - struct amdgpu_bo* gds_gfx_bo; - struct amdgpu_bo* gws_gfx_bo; - struct amdgpu_bo* oa_gfx_bo; }; struct amdgpu_gds_reg_offset { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index b17d0545728e..edb675103bd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -590,13 +590,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file struct drm_amdgpu_info_gds gds_info; memset(&gds_info, 0, sizeof(gds_info)); - gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size; - gds_info.compute_partition_size = adev->gds.mem.cs_partition_size; - gds_info.gds_total_size = adev->gds.mem.total_size; - gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size; - gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size; - gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size; - gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size; + gds_info.compute_partition_size = adev->gds.gds_size; + gds_info.gds_total_size = adev->gds.gds_size; + gds_info.gws_per_compute_partition = adev->gds.gws_size; + gds_info.oa_per_compute_partition = adev->gds.oa_size; return copy_to_user(out, &gds_info, min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0c52d1f9fe0f..af1e218c6a74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1753,44 +1753,26 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Initialize various on-chip memory pools */ r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS, - adev->gds.mem.total_size); + adev->gds.gds_size); if (r) { DRM_ERROR("Failed initializing GDS heap.\n"); return r; } - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, - 4, AMDGPU_GEM_DOMAIN_GDS, - &adev->gds.gds_gfx_bo, NULL, NULL); - if (r) - return r; - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS, - adev->gds.gws.total_size); + adev->gds.gws_size); if (r) { DRM_ERROR("Failed initializing gws heap.\n"); return r; } - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, - 1, AMDGPU_GEM_DOMAIN_GWS, - &adev->gds.gws_gfx_bo, NULL, NULL); - if (r) - return r; - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA, - adev->gds.oa.total_size); + adev->gds.oa_size); if (r) { DRM_ERROR("Failed initializing oa heap.\n"); return r; } - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, - 1, AMDGPU_GEM_DOMAIN_OA, - &adev->gds.oa_gfx_bo, NULL, NULL); - if (r) - return r; - /* Register debugfs entries for amdgpu_ttm */ r = amdgpu_ttm_debugfs_init(adev); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index a59e0fdf5a97..4cd1731d62fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4493,12 +4493,8 @@ static int gfx_v7_0_sw_init(void *handle) static int gfx_v7_0_sw_fini(void *handle) { - int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); + int i; for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); @@ -5070,30 +5066,10 @@ static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev) static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev) { /* init asci gds info */ - adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); - adev->gds.gws.total_size = 64; - adev->gds.oa.total_size = 16; + adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE); + adev->gds.gws_size = 64; + adev->gds.oa_size = 16; adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); - - if (adev->gds.mem.total_size == 64 * 1024) { - adev->gds.mem.gfx_partition_size = 4096; - adev->gds.mem.cs_partition_size = 4096; - - adev->gds.gws.gfx_partition_size = 4; - adev->gds.gws.cs_partition_size = 4; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 1; - } else { - adev->gds.mem.gfx_partition_size = 1024; - adev->gds.mem.cs_partition_size = 1024; - - adev->gds.gws.gfx_partition_size = 16; - adev->gds.gws.cs_partition_size = 16; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 4; - } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 02955e6e9dd9..25400b708722 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -2057,12 +2057,8 @@ static int gfx_v8_0_sw_init(void *handle) static int gfx_v8_0_sw_fini(void *handle) { - int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); + int i; for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); @@ -7010,30 +7006,10 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) { /* init asci gds info */ - adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); - adev->gds.gws.total_size = 64; - adev->gds.oa.total_size = 16; + adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE); + adev->gds.gws_size = 64; + adev->gds.oa_size = 16; adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); - - if (adev->gds.mem.total_size == 64 * 1024) { - adev->gds.mem.gfx_partition_size = 4096; - adev->gds.mem.cs_partition_size = 4096; - - adev->gds.gws.gfx_partition_size = 4; - adev->gds.gws.cs_partition_size = 4; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 1; - } else { - adev->gds.mem.gfx_partition_size = 1024; - adev->gds.mem.cs_partition_size = 1024; - - adev->gds.gws.gfx_partition_size = 16; - adev->gds.gws.cs_partition_size = 16; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 4; - } } static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 7d7d28775f06..c763733619fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1461,8 +1461,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) /* GDS reserve memory: 64 bytes alignment */ adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); - adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size; - adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size; + adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size; adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); @@ -1570,7 +1569,7 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) gfx_v9_0_write_data_to_reg(ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), - (adev->gds.mem.total_size + + (adev->gds.gds_size + adev->gfx.ngg.gds_reserve_size)); amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); @@ -1784,10 +1783,6 @@ static int gfx_v9_0_sw_fini(void *handle) kfree(ras_if); } - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); - for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); for (i = 0; i < adev->gfx.num_compute_rings; i++) @@ -5323,13 +5318,13 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: - adev->gds.mem.total_size = 0x10000; + adev->gds.gds_size = 0x10000; break; case CHIP_RAVEN: - adev->gds.mem.total_size = 0x1000; + adev->gds.gds_size = 0x1000; break; default: - adev->gds.mem.total_size = 0x10000; + adev->gds.gds_size = 0x10000; break; } @@ -5353,28 +5348,8 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) break; } - adev->gds.gws.total_size = 64; - adev->gds.oa.total_size = 16; - - if (adev->gds.mem.total_size == 64 * 1024) { - adev->gds.mem.gfx_partition_size = 4096; - adev->gds.mem.cs_partition_size = 4096; - - adev->gds.gws.gfx_partition_size = 4; - adev->gds.gws.cs_partition_size = 4; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 1; - } else { - adev->gds.mem.gfx_partition_size = 1024; - adev->gds.mem.cs_partition_size = 1024; - - adev->gds.gws.gfx_partition_size = 16; - adev->gds.gws.cs_partition_size = 16; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 4; - } + adev->gds.gws_size = 64; + adev->gds.oa_size = 16; } static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, From c3b6c6074166499517e7a298f2cdec26648685a2 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 13 May 2019 13:57:29 +0800 Subject: [PATCH 086/178] drm/amdgpu: suppress repeating tmo report MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit only report once per TMO job and the timer would be restarted upon the job finished if it's just slow. Suggested-by: Christian König Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 0a17fb1af204..7ab1241bd9e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -51,6 +51,8 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job) if (amdgpu_device_should_recover_gpu(ring->adev)) amdgpu_device_gpu_recover(ring->adev, job); + else + drm_sched_suspend_timeout(&ring->sched); } int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, From fe2b5323d2c3cedaa3bf943dc7a0d233c853c914 Mon Sep 17 00:00:00 2001 From: Tiecheng Zhou Date: Tue, 14 May 2019 10:03:35 +0800 Subject: [PATCH 087/178] drm/amdgpu/sriov: Need to initialize the HDP_NONSURFACE_BAStE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit it requires to initialize HDP_NONSURFACE_BASE, so as to avoid using the value left by a previous VM under sriov scenario. v2: it should not hurt baremetal, generalize it for both sriov and baremetal Signed-off-by: Emily Deng Signed-off-by: Tiecheng Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 94d67b7f9217..59c580bd5a3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1181,6 +1181,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); + WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8)); + WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40)); + /* After HDP is initialized, flush HDP.*/ adev->nbio_funcs->hdp_flush(adev, NULL); From 05eee12dd60ed5f19fefa93a79bacad9dd2c5883 Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Mon, 13 May 2019 12:15:45 -0400 Subject: [PATCH 088/178] drm/amdgpu: move the VCN DPG mode read and write to VCN Since this is VCN specific and only used by VCN Signed-off-by: Leo Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 21 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/soc15_common.h | 21 --------------------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index a0ad19af9080..98bd0982d325 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -45,6 +45,27 @@ #define VCN_ENC_CMD_REG_WRITE 0x0000000b #define VCN_ENC_CMD_REG_WAIT 0x0000000c +#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \ + ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ + UVD_DPG_LMA_CTL__MASK_EN_MASK | \ + ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ + << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ + (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ + RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); \ + }) + +#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \ + do { \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ + UVD_DPG_LMA_CTL__READ_WRITE_MASK | \ + ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ + << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ + (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ + } while (0) + enum engine_status_constants { UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0, UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index c634606e64bd..47f74dab365d 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -69,27 +69,6 @@ } \ } while (0) -#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \ - ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ - UVD_DPG_LMA_CTL__MASK_EN_MASK | \ - ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ - << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ - (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ - RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); }) - -#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \ - do { \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ - UVD_DPG_LMA_CTL__READ_WRITE_MASK | \ - ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ - << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ - (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ - } while (0) - - #define WREG32_RLC(reg, value) \ do { \ if (amdgpu_virt_support_rlc_prg_reg(adev)) { \ From 9dc7b02a3ccc78ca59fc20f466a166ca7e30d9e8 Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Mon, 13 May 2019 12:41:54 -0400 Subject: [PATCH 089/178] drm/amdgpu: make VCN DPG pause mode detached from general VCN It should be attached to VCN 1.0 Signed-off-by: Leo Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 130 +---------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 2 + drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 132 +++++++++++++++++++++++- 3 files changed, 135 insertions(+), 129 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index ecf6f96df2ad..118451f5e3aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -212,132 +212,6 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) return 0; } -static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state) -{ - int ret_code; - uint32_t reg_data = 0; - uint32_t reg_data2 = 0; - struct amdgpu_ring *ring; - - /* pause/unpause if state is changed */ - if (adev->vcn.pause_state.fw_based != new_state->fw_based) { - DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", - adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, - new_state->fw_based, new_state->jpeg); - - reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & - (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); - - if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { - ret_code = 0; - - if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK)) - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, - UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); - - if (!ret_code) { - /* pause DPG non-jpeg */ - reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; - WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, - UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, - UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); - - /* Restore */ - ring = &adev->vcn.ring_enc[0]; - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); - WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); - - ring = &adev->vcn.ring_enc[1]; - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); - WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - - ring = &adev->vcn.ring_dec; - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, - RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, - UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); - } - } else { - /* unpause dpg non-jpeg, no need to wait */ - reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; - WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); - } - adev->vcn.pause_state.fw_based = new_state->fw_based; - } - - /* pause/unpause if state is changed */ - if (adev->vcn.pause_state.jpeg != new_state->jpeg) { - DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", - adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, - new_state->fw_based, new_state->jpeg); - - reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & - (~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK); - - if (new_state->jpeg == VCN_DPG_STATE__PAUSE) { - ret_code = 0; - - if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK)) - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, - UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); - - if (!ret_code) { - /* Make sure JPRG Snoop is disabled before sending the pause */ - reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS); - reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK; - WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2); - - /* pause DPG jpeg */ - reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; - WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, - UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, - UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code); - - /* Restore */ - ring = &adev->vcn.ring_jpeg; - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, - UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | - UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, - upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, - UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); - - ring = &adev->vcn.ring_dec; - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, - RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, - UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); - } - } else { - /* unpause dpg jpeg, no need to wait */ - reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; - WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); - } - adev->vcn.pause_state.jpeg = new_state->jpeg; - } - - return 0; -} - static void amdgpu_vcn_idle_work_handler(struct work_struct *work) { struct amdgpu_device *adev = @@ -362,7 +236,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) else new_state.jpeg = VCN_DPG_STATE__UNPAUSE; - amdgpu_vcn_pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, &new_state); } fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg); @@ -417,7 +291,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) new_state.jpeg = VCN_DPG_STATE__PAUSE; - amdgpu_vcn_pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, &new_state); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 98bd0982d325..a1ee19251aae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -102,6 +102,8 @@ struct amdgpu_vcn { unsigned num_enc_rings; enum amd_powergating_state cur_state; struct dpg_pause_state pause_state; + int (*pause_dpg_mode)(struct amdgpu_device *adev, + struct dpg_pause_state *new_state); }; int amdgpu_vcn_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index ac2e5a1eb576..bb47f5b24be5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -49,6 +49,8 @@ static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev); static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr); static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state); +static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, + struct dpg_pause_state *new_state); /** * vcn_v1_0_early_init - set function pointers @@ -140,7 +142,9 @@ static int vcn_v1_0_sw_init(void *handle) if (r) return r; - return r; + adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode; + + return 0; } /** @@ -1204,6 +1208,132 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev) return r; } +static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, + struct dpg_pause_state *new_state) +{ + int ret_code; + uint32_t reg_data = 0; + uint32_t reg_data2 = 0; + struct amdgpu_ring *ring; + + /* pause/unpause if state is changed */ + if (adev->vcn.pause_state.fw_based != new_state->fw_based) { + DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", + adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, + new_state->fw_based, new_state->jpeg); + + reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + ret_code = 0; + + if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK)) + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + if (!ret_code) { + /* pause DPG non-jpeg */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); + + /* Restore */ + ring = &adev->vcn.ring_enc[0]; + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + + ring = &adev->vcn.ring_enc[1]; + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + + ring = &adev->vcn.ring_dec; + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + } + } else { + /* unpause dpg non-jpeg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + } + adev->vcn.pause_state.fw_based = new_state->fw_based; + } + + /* pause/unpause if state is changed */ + if (adev->vcn.pause_state.jpeg != new_state->jpeg) { + DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", + adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, + new_state->fw_based, new_state->jpeg); + + reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK); + + if (new_state->jpeg == VCN_DPG_STATE__PAUSE) { + ret_code = 0; + + if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK)) + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + if (!ret_code) { + /* Make sure JPRG Snoop is disabled before sending the pause */ + reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS); + reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK; + WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2); + + /* pause DPG jpeg */ + reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, + UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code); + + /* Restore */ + ring = &adev->vcn.ring_jpeg; + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, + UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | + UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, + UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); + + ring = &adev->vcn.ring_dec; + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + } + } else { + /* unpause dpg jpeg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + } + adev->vcn.pause_state.jpeg = new_state->jpeg; + } + + return 0; +} + static bool vcn_v1_0_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; From 5bb23532732384e90c876bbd2139fd08644b9847 Mon Sep 17 00:00:00 2001 From: Ori Messinger Date: Mon, 22 Apr 2019 13:52:52 -0400 Subject: [PATCH 090/178] drm/amdgpu: Report firmware versions with sysfs v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Firmware versions can be found as separate sysfs files at: /sys/class/drm/cardX/device/fw_version (where X is the card number) The firmware versions are displayed in hexadecimal. v2: Moved sysfs files to subfolder Signed-off-by: Ori Messinger Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 63 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 2 + 3 files changed, 70 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 15410fa4a85c..6fea2d121537 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2705,6 +2705,10 @@ fence_driver_init: if (r) DRM_ERROR("registering pm debugfs failed (%d).\n", r); + r = amdgpu_ucode_sysfs_init(adev); + if (r) + DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); + r = amdgpu_debugfs_gem_init(adev); if (r) DRM_ERROR("registering gem debugfs failed (%d).\n", r); @@ -2817,6 +2821,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev) amdgpu_device_doorbell_fini(adev); amdgpu_debugfs_regs_cleanup(adev); device_remove_file(adev->dev, &dev_attr_pcie_replay_count); + amdgpu_ucode_sysfs_fini(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 7b33867036e7..33c1eb76c076 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -313,6 +313,69 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) return AMDGPU_FW_LOAD_DIRECT; } +#define FW_VERSION_ATTR(name, mode, field) \ +static ssize_t show_##name(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct drm_device *ddev = dev_get_drvdata(dev); \ + struct amdgpu_device *adev = ddev->dev_private; \ + \ + return snprintf(buf, PAGE_SIZE, "0x%08x\n", adev->field); \ +} \ +static DEVICE_ATTR(name, mode, show_##name, NULL) + +FW_VERSION_ATTR(vce_fw_version, 0444, vce.fw_version); +FW_VERSION_ATTR(uvd_fw_version, 0444, uvd.fw_version); +FW_VERSION_ATTR(mc_fw_version, 0444, gmc.fw_version); +FW_VERSION_ATTR(me_fw_version, 0444, gfx.me_fw_version); +FW_VERSION_ATTR(pfp_fw_version, 0444, gfx.pfp_fw_version); +FW_VERSION_ATTR(ce_fw_version, 0444, gfx.ce_fw_version); +FW_VERSION_ATTR(rlc_fw_version, 0444, gfx.rlc_fw_version); +FW_VERSION_ATTR(rlc_srlc_fw_version, 0444, gfx.rlc_srlc_fw_version); +FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version); +FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version); +FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version); +FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version); +FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos_fw_version); +FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_fw_version); +FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_fw_version); +FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_fw_version); +FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version); +FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version); +FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version); +FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version); +FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version); + +static struct attribute *fw_attrs[] = { + &dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr, + &dev_attr_mc_fw_version.attr, &dev_attr_me_fw_version.attr, + &dev_attr_pfp_fw_version.attr, &dev_attr_ce_fw_version.attr, + &dev_attr_rlc_fw_version.attr, &dev_attr_rlc_srlc_fw_version.attr, + &dev_attr_rlc_srlg_fw_version.attr, &dev_attr_rlc_srls_fw_version.attr, + &dev_attr_mec_fw_version.attr, &dev_attr_mec2_fw_version.attr, + &dev_attr_sos_fw_version.attr, &dev_attr_asd_fw_version.attr, + &dev_attr_ta_ras_fw_version.attr, &dev_attr_ta_xgmi_fw_version.attr, + &dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr, + &dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr, + &dev_attr_dmcu_fw_version.attr, NULL +}; + +static const struct attribute_group fw_attr_group = { + .name = "fw_version", + .attrs = fw_attrs +}; + +int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev) +{ + return sysfs_create_group(&adev->dev->kobj, &fw_attr_group); +} + +void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev) +{ + sysfs_remove_group(&adev->dev->kobj, &fw_attr_group); +} + static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, struct amdgpu_firmware_info *ucode, uint64_t mc_addr, void *kptr) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 7ac25a1c7853..ec4c2ea1f05a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -291,7 +291,9 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr, int amdgpu_ucode_init_bo(struct amdgpu_device *adev); int amdgpu_ucode_create_bo(struct amdgpu_device *adev); +int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev); void amdgpu_ucode_free_bo(struct amdgpu_device *adev); +void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev); enum amdgpu_firmware_load_type amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type); From d7316ddc610f15dac35eab4f9471cf1f45335dc3 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Tue, 14 May 2019 09:05:37 -0400 Subject: [PATCH 091/178] drm/amd/display: Add ASICREV_IS_PICASSO [WHY] We only want to load DMCU FW on Picasso and Raven 2, not on Raven 1. Signed-off-by: Harry Wentland Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/include/dal_asic_id.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 1a9b7507784f..072d8d7debf5 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -139,13 +139,14 @@ #define RAVEN1_F0 0xF0 #define RAVEN_UNKNOWN 0xFF -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) -#define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < 0xF0)) -#endif /* DCN1_01 */ #define ASIC_REV_IS_RAVEN(eChipRev) ((eChipRev >= RAVEN_A0) && eChipRev < RAVEN_UNKNOWN) #define RAVEN1_F0 0xF0 #define ASICREV_IS_RV1_F0(eChipRev) ((eChipRev >= RAVEN1_F0) && (eChipRev < RAVEN_UNKNOWN)) +#if defined(CONFIG_DRM_AMD_DC_DCN1_01) +#define ASICREV_IS_PICASSO(eChipRev) ((eChipRev >= PICASSO_A0) && (eChipRev < RAVEN2_A0)) +#define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < 0xF0)) +#endif /* DCN1_01 */ #define FAMILY_RV 142 /* DCN 1*/ From a7669aff77649a34b0601aef87879095caed7a5f Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Mon, 29 Apr 2019 09:39:15 -0400 Subject: [PATCH 092/178] drm/amd/display: Don't load DMCU for Raven 1 (v2) [WHY] Some early Raven boards had a bad SBIOS that doesn't play nicely with the DMCU FW. We thought the issues were fixed by ignoring errors on DMCU load but that doesn't seem to be the case. We've still seen reports of users unable to boot their systems at all. [HOW] Disable DMCU load on Raven 1. Only load it for Raven 2 and Picasso. v2: Fix ifdef (Alex) Signed-off-by: Harry Wentland Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index c6713432935e..8d53aced6c9f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -29,6 +29,7 @@ #include "dm_services_types.h" #include "dc.h" #include "dc/inc/core_types.h" +#include "dal_asic_id.h" #include "vid.h" #include "amdgpu.h" @@ -640,7 +641,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) static int load_dmcu_fw(struct amdgpu_device *adev) { - const char *fw_name_dmcu; + const char *fw_name_dmcu = NULL; int r; const struct dmcu_firmware_header_v1_0 *hdr; @@ -663,7 +664,14 @@ static int load_dmcu_fw(struct amdgpu_device *adev) case CHIP_VEGA20: return 0; case CHIP_RAVEN: - fw_name_dmcu = FIRMWARE_RAVEN_DMCU; +#if defined(CONFIG_DRM_AMD_DC_DCN1_01) + if (ASICREV_IS_PICASSO(adev->external_rev_id)) + fw_name_dmcu = FIRMWARE_RAVEN_DMCU; + else if (ASICREV_IS_RAVEN2(adev->external_rev_id)) + fw_name_dmcu = FIRMWARE_RAVEN_DMCU; + else +#endif + return 0; break; default: DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type); From 97df424fe7a7497a129f057527f646c91b881fbf Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Tue, 14 May 2019 09:12:45 -0400 Subject: [PATCH 093/178] drm/amd/display: Drop DCN1_01 guards [WHY] These were only needed for bringup. They're not needed anymore. Signed-off-by: Harry Wentland Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/Kconfig | 6 ------ .../display/dc/bios/command_table_helper2.c | 5 ----- .../gpu/drm/amd/display/dc/core/dc_resource.c | 4 ---- .../drm/amd/display/dc/dcn10/dcn10_hubbub.c | 2 -- .../drm/amd/display/dc/dcn10/dcn10_resource.c | 19 ------------------- .../gpu/drm/amd/display/dc/gpio/hw_factory.c | 4 ---- .../drm/amd/display/dc/gpio/hw_translate.c | 5 ----- .../gpu/drm/amd/display/include/dal_asic_id.h | 4 ---- .../gpu/drm/amd/display/include/dal_types.h | 2 -- 9 files changed, 51 deletions(-) diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 13a6ce9c8e94..ed654a76c76a 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -5,7 +5,6 @@ config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y select DRM_AMD_DC_DCN1_0 if X86 && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) - select DRM_AMD_DC_DCN1_01 if X86 && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and @@ -16,11 +15,6 @@ config DRM_AMD_DC_DCN1_0 help RV family support for display engine -config DRM_AMD_DC_DCN1_01 - def_bool n - help - RV2 family for display engine - config DEBUG_KERNEL_DC bool "Enable kgdb break in DC" depends on DRM_AMD_DC diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c index 8196f3bb10c7..53deba42007a 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c @@ -57,11 +57,6 @@ bool dal_bios_parser_init_cmd_tbl_helper2( return true; #if defined(CONFIG_DRM_AMD_DC_DCN1_0) case DCN_VERSION_1_0: - *h = dal_cmd_tbl_helper_dce112_get_table2(); - return true; -#endif - -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) case DCN_VERSION_1_01: *h = dal_cmd_tbl_helper_dce112_get_table2(); return true; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 58ce7a6b914c..d9bfffb2c48d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -93,10 +93,8 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id) #if defined(CONFIG_DRM_AMD_DC_DCN1_0) case FAMILY_RV: dc_version = DCN_VERSION_1_0; -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) if (ASICREV_IS_RAVEN2(asic_id.hw_internal_rev)) dc_version = DCN_VERSION_1_01; -#endif break; #endif default: @@ -147,9 +145,7 @@ struct resource_pool *dc_create_resource_pool(struct dc *dc, #if defined(CONFIG_DRM_AMD_DC_DCN1_0) case DCN_VERSION_1_0: -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) case DCN_VERSION_1_01: -#endif res_pool = dcn10_create_resource_pool(init_data, dc); break; #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c index 177247595974..bf978831bb0e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c @@ -927,9 +927,7 @@ void hubbub1_construct(struct hubbub *hubbub, hubbub1->masks = hubbub_mask; hubbub1->debug_test_index_pstate = 0x7; -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) if (ctx->dce_version == DCN_VERSION_1_01) hubbub1->debug_test_index_pstate = 0xB; -#endif } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index ddb020a53098..dc7cf3704252 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -152,9 +152,7 @@ enum dcn10_clk_src_array_id { DCN10_CLK_SRC_PLL2, DCN10_CLK_SRC_PLL3, DCN10_CLK_SRC_TOTAL, -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) DCN101_CLK_SRC_TOTAL = DCN10_CLK_SRC_PLL3 -#endif }; /* begin ********************* @@ -522,7 +520,6 @@ static const struct resource_caps res_cap = { .num_ddc = 4, }; -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) static const struct resource_caps rv2_res_cap = { .num_timing_generator = 3, .num_opp = 3, @@ -532,7 +529,6 @@ static const struct resource_caps rv2_res_cap = { .num_pll = 3, .num_ddc = 3, }; -#endif static const struct dc_plane_cap plane_cap = { .type = DC_PLANE_TYPE_DCN_UNIVERSAL, @@ -1270,11 +1266,9 @@ static bool construct( ctx->dc_bios->regs = &bios_regs; -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) if (ctx->dce_version == DCN_VERSION_1_01) pool->base.res_cap = &rv2_res_cap; else -#endif pool->base.res_cap = &res_cap; pool->base.funcs = &dcn10_res_pool_funcs; @@ -1291,10 +1285,8 @@ static bool construct( /* max pipe num for ASIC before check pipe fuses */ pool->base.pipe_count = pool->base.res_cap->num_timing_generator; -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) if (dc->ctx->dce_version == DCN_VERSION_1_01) pool->base.pipe_count = 3; -#endif dc->caps.max_video_width = 3840; dc->caps.max_downscale_ratio = 200; dc->caps.i2c_speed_in_khz = 100; @@ -1327,26 +1319,17 @@ static bool construct( CLOCK_SOURCE_COMBO_PHY_PLL2, &clk_src_regs[2], false); -#ifdef CONFIG_DRM_AMD_DC_DCN1_01 if (dc->ctx->dce_version == DCN_VERSION_1_0) { pool->base.clock_sources[DCN10_CLK_SRC_PLL3] = dcn10_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL3, &clk_src_regs[3], false); } -#else - pool->base.clock_sources[DCN10_CLK_SRC_PLL3] = - dcn10_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL3, - &clk_src_regs[3], false); -#endif pool->base.clk_src_count = DCN10_CLK_SRC_TOTAL; -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) if (dc->ctx->dce_version == DCN_VERSION_1_01) pool->base.clk_src_count = DCN101_CLK_SRC_TOTAL; -#endif pool->base.dp_clock_source = dcn10_clock_source_create(ctx, ctx->dc_bios, @@ -1386,7 +1369,6 @@ static bool construct( memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults)); memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults)); -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) if (dc->ctx->dce_version == DCN_VERSION_1_01) { struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc; struct dcn_ip_params *dcn_ip = dc->dcn_ip; @@ -1397,7 +1379,6 @@ static bool construct( dcn_soc->dram_clock_change_latency = 23; dcn_ip->max_num_dpp = 3; } -#endif if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) { dc->dcn_soc->urgent_latency = 3; dc->debug.disable_dmcu = true; diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c index c2028c4744a6..a610fae16280 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c @@ -84,10 +84,6 @@ bool dal_hw_factory_init( return true; #if defined(CONFIG_DRM_AMD_DC_DCN1_0) case DCN_VERSION_1_0: - dal_hw_factory_dcn10_init(factory); - return true; -#endif -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) case DCN_VERSION_1_01: dal_hw_factory_dcn10_init(factory); return true; diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c index 236ca28784a9..77615146b96e 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c @@ -84,11 +84,6 @@ bool dal_hw_translate_init( dal_hw_translate_dcn10_init(translate); return true; #endif -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) - case DCN_VERSION_1_01: - dal_hw_translate_dcn10_init(translate); - return true; -#endif default: BREAK_TO_DEBUGGER(); diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 072d8d7debf5..63c3e77159d9 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -131,11 +131,9 @@ #define INTERNAL_REV_RAVEN_A0 0x00 /* First spin of Raven */ #define RAVEN_A0 0x01 #define RAVEN_B0 0x21 -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) /* DCN1_01 */ #define PICASSO_A0 0x41 #define RAVEN2_A0 0x81 -#endif #define RAVEN1_F0 0xF0 #define RAVEN_UNKNOWN 0xFF @@ -143,10 +141,8 @@ #define RAVEN1_F0 0xF0 #define ASICREV_IS_RV1_F0(eChipRev) ((eChipRev >= RAVEN1_F0) && (eChipRev < RAVEN_UNKNOWN)) -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) #define ASICREV_IS_PICASSO(eChipRev) ((eChipRev >= PICASSO_A0) && (eChipRev < RAVEN2_A0)) #define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < 0xF0)) -#endif /* DCN1_01 */ #define FAMILY_RV 142 /* DCN 1*/ diff --git a/drivers/gpu/drm/amd/display/include/dal_types.h b/drivers/gpu/drm/amd/display/include/dal_types.h index f5bd869d4320..dabdbc0999d4 100644 --- a/drivers/gpu/drm/amd/display/include/dal_types.h +++ b/drivers/gpu/drm/amd/display/include/dal_types.h @@ -45,9 +45,7 @@ enum dce_version { DCE_VERSION_12_1, DCE_VERSION_MAX, DCN_VERSION_1_0, -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) DCN_VERSION_1_01, -#endif /* DCN1_01 */ DCN_VERSION_MAX }; From 8667eb64a6b2bad2111361839e91209bd16fe92f Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 10 May 2019 16:56:27 +0800 Subject: [PATCH 094/178] drm/amd/powerplay: drop unnecessary sw smu check There is already sw smu check on IP block adding. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 52d919a8b70a..ed8840026e59 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -436,9 +436,6 @@ static int smu_sw_init(void *handle) struct smu_context *smu = &adev->smu; int ret; - if (!is_support_sw_smu(adev)) - return -EINVAL; - smu->pool_size = adev->pm.smu_prv_buffer_size; smu->smu_feature.feature_num = SMU_FEATURE_MAX; mutex_init(&smu->smu_feature.mutex); @@ -490,9 +487,6 @@ static int smu_sw_fini(void *handle) struct smu_context *smu = &adev->smu; int ret; - if (!is_support_sw_smu(adev)) - return -EINVAL; - ret = smu_smc_table_sw_fini(smu); if (ret) { pr_err("Failed to sw fini smc table!\n"); @@ -818,9 +812,6 @@ static int smu_hw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct smu_context *smu = &adev->smu; - if (!is_support_sw_smu(adev)) - return -EINVAL; - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { ret = smu_load_microcode(smu); if (ret) @@ -879,9 +870,6 @@ static int smu_hw_fini(void *handle) struct smu_table_context *table_context = &smu->smu_table; int ret = 0; - if (!is_support_sw_smu(adev)) - return -EINVAL; - kfree(table_context->driver_pptable); table_context->driver_pptable = NULL; @@ -936,9 +924,6 @@ static int smu_suspend(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct smu_context *smu = &adev->smu; - if (!is_support_sw_smu(adev)) - return -EINVAL; - ret = smu_system_features_control(smu, false); if (ret) return ret; @@ -954,9 +939,6 @@ static int smu_resume(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct smu_context *smu = &adev->smu; - if (!is_support_sw_smu(adev)) - return -EINVAL; - pr_info("SMU is resuming...\n"); mutex_lock(&smu->mutex); From e969f29148baee095a6ca5f81e840890578abeac Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 13 May 2019 11:16:27 +0800 Subject: [PATCH 095/178] drm/amd/powerplay: drop redundant smu call smu_get_clk_info_from_vbios() was called repeatedly. It seems a merge error. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index ed8840026e59..b119a276a977 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -636,10 +636,6 @@ static int smu_smc_table_hw_init(struct smu_context *smu, * check if the format_revision in vbios is up to pptable header * version, and the structure size is not 0. */ - ret = smu_get_clk_info_from_vbios(smu); - if (ret) - return ret; - ret = smu_check_pptable(smu); if (ret) return ret; From fe75a323713e9ed254c4e3d27b391b2416cee237 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 13 May 2019 15:32:21 +0800 Subject: [PATCH 096/178] drm/amd/powerplay: support ppfeatures sysfs interface on sw smu routine Support ppfeatures sysfs interface on Vega20 sw smu routine. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 10 +- .../gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 7 +- drivers/gpu/drm/amd/powerplay/vega20_ppt.c | 153 ++++++++++++++++++ drivers/gpu/drm/amd/powerplay/vega20_ppt.h | 44 +++++ 4 files changed, 211 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index bd40d5d72508..adba9ea03e63 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -767,7 +767,11 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev, pr_debug("featuremask = 0x%llx\n", featuremask); - if (adev->powerplay.pp_funcs->set_ppfeature_status) { + if (is_support_sw_smu(adev)) { + ret = smu_set_ppfeature_status(&adev->smu, featuremask); + if (ret) + return -EINVAL; + } else if (adev->powerplay.pp_funcs->set_ppfeature_status) { ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask); if (ret) return -EINVAL; @@ -783,7 +787,9 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (adev->powerplay.pp_funcs->get_ppfeature_status) + if (is_support_sw_smu(adev)) { + return smu_get_ppfeature_status(&adev->smu, buf); + } else if (adev->powerplay.pp_funcs->get_ppfeature_status) return amdgpu_dpm_get_ppfeature_status(adev, buf); return snprintf(buf, PAGE_SIZE, "\n"); diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index 89052414e9f1..3a9c253759dc 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -458,6 +458,8 @@ struct pptable_funcs { uint32_t *mclk_mask, uint32_t *soc_mask); int (*set_cpu_power_state)(struct smu_context *smu); + int (*set_ppfeature_status)(struct smu_context *smu, uint64_t ppfeatures); + int (*get_ppfeature_status)(struct smu_context *smu, char *buf); }; struct smu_funcs @@ -727,7 +729,10 @@ struct smu_funcs ((smu)->funcs->get_mclk ? (smu)->funcs->get_mclk((smu), (low)) : 0) #define smu_set_xgmi_pstate(smu, pstate) \ ((smu)->funcs->set_xgmi_pstate ? (smu)->funcs->set_xgmi_pstate((smu), (pstate)) : 0) - +#define smu_set_ppfeature_status(smu, ppfeatures) \ + ((smu)->ppt_funcs->set_ppfeature_status ? (smu)->ppt_funcs->set_ppfeature_status((smu), (ppfeatures)) : -EINVAL) +#define smu_get_ppfeature_status(smu, buf) \ + ((smu)->ppt_funcs->get_ppfeature_status ? (smu)->ppt_funcs->get_ppfeature_status((smu), (buf)) : -EINVAL) extern int smu_get_atom_data_table(struct smu_context *smu, uint32_t table, uint16_t *size, uint8_t *frev, uint8_t *crev, diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c index 8fafcbdb1dfd..b39f3d439332 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c @@ -2374,6 +2374,157 @@ static int vega20_odn_edit_dpm_table(struct smu_context *smu, return ret; } +static int vega20_get_enabled_smc_features(struct smu_context *smu, + uint64_t *features_enabled) +{ + uint32_t feature_mask[2]; + int ret = 0; + + ret = smu_feature_get_enabled_mask(smu, feature_mask, 2); + if (ret) + return ret; + + *features_enabled = ((((uint64_t)feature_mask[0] << SMU_FEATURES_LOW_SHIFT) & SMU_FEATURES_LOW_MASK) | + (((uint64_t)feature_mask[1] << SMU_FEATURES_HIGH_SHIFT) & SMU_FEATURES_HIGH_MASK)); + + return ret; +} + +static int vega20_enable_smc_features(struct smu_context *smu, + bool enable, uint64_t feature_mask) +{ + uint32_t smu_features_low, smu_features_high; + int ret = 0; + + smu_features_low = (uint32_t)((feature_mask & SMU_FEATURES_LOW_MASK) >> SMU_FEATURES_LOW_SHIFT); + smu_features_high = (uint32_t)((feature_mask & SMU_FEATURES_HIGH_MASK) >> SMU_FEATURES_HIGH_SHIFT); + + if (enable) { + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesLow, + smu_features_low); + if (ret) + return ret; + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesHigh, + smu_features_high); + if (ret) + return ret; + } else { + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesLow, + smu_features_low); + if (ret) + return ret; + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesHigh, + smu_features_high); + if (ret) + return ret; + } + + return 0; + +} + +static int vega20_get_ppfeature_status(struct smu_context *smu, char *buf) +{ + static const char *ppfeature_name[] = { + "DPM_PREFETCHER", + "GFXCLK_DPM", + "UCLK_DPM", + "SOCCLK_DPM", + "UVD_DPM", + "VCE_DPM", + "ULV", + "MP0CLK_DPM", + "LINK_DPM", + "DCEFCLK_DPM", + "GFXCLK_DS", + "SOCCLK_DS", + "LCLK_DS", + "PPT", + "TDC", + "THERMAL", + "GFX_PER_CU_CG", + "RM", + "DCEFCLK_DS", + "ACDC", + "VR0HOT", + "VR1HOT", + "FW_CTF", + "LED_DISPLAY", + "FAN_CONTROL", + "GFX_EDC", + "GFXOFF", + "CG", + "FCLK_DPM", + "FCLK_DS", + "MP1CLK_DS", + "MP0CLK_DS", + "XGMI", + "ECC"}; + static const char *output_title[] = { + "FEATURES", + "BITMASK", + "ENABLEMENT"}; + uint64_t features_enabled; + int i; + int ret = 0; + int size = 0; + + ret = vega20_get_enabled_smc_features(smu, &features_enabled); + if (ret) + return ret; + + size += sprintf(buf + size, "Current ppfeatures: 0x%016llx\n", features_enabled); + size += sprintf(buf + size, "%-19s %-22s %s\n", + output_title[0], + output_title[1], + output_title[2]); + for (i = 0; i < GNLD_FEATURES_MAX; i++) { + size += sprintf(buf + size, "%-19s 0x%016llx %6s\n", + ppfeature_name[i], + 1ULL << i, + (features_enabled & (1ULL << i)) ? "Y" : "N"); + } + + return size; +} + +static int vega20_set_ppfeature_status(struct smu_context *smu, uint64_t new_ppfeature_masks) +{ + uint64_t features_enabled; + uint64_t features_to_enable; + uint64_t features_to_disable; + int ret = 0; + + if (new_ppfeature_masks >= (1ULL << GNLD_FEATURES_MAX)) + return -EINVAL; + + ret = vega20_get_enabled_smc_features(smu, &features_enabled); + if (ret) + return ret; + + features_to_disable = + features_enabled & ~new_ppfeature_masks; + features_to_enable = + ~features_enabled & new_ppfeature_masks; + + pr_debug("features_to_disable 0x%llx\n", features_to_disable); + pr_debug("features_to_enable 0x%llx\n", features_to_enable); + + if (features_to_disable) { + ret = vega20_enable_smc_features(smu, false, features_to_disable); + if (ret) + return ret; + } + + if (features_to_enable) { + ret = vega20_enable_smc_features(smu, true, features_to_enable); + if (ret) + return ret; + } + + return 0; +} + static const struct pptable_funcs vega20_ppt_funcs = { .alloc_dpm_context = vega20_allocate_dpm_context, .store_powerplay_table = vega20_store_powerplay_table, @@ -2404,6 +2555,8 @@ static const struct pptable_funcs vega20_ppt_funcs = { .unforce_dpm_levels = vega20_unforce_dpm_levels, .upload_dpm_level = vega20_upload_dpm_level, .get_profiling_clk_mask = vega20_get_profiling_clk_mask, + .set_ppfeature_status = vega20_set_ppfeature_status, + .get_ppfeature_status = vega20_get_ppfeature_status, }; void vega20_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.h b/drivers/gpu/drm/amd/powerplay/vega20_ppt.h index 5a0d2af63173..87f3a8303645 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.h +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.h @@ -36,6 +36,50 @@ #define AVFS_CURVE 0 #define OD8_HOTCURVE_TEMPERATURE 85 +#define SMU_FEATURES_LOW_MASK 0x00000000FFFFFFFF +#define SMU_FEATURES_LOW_SHIFT 0 +#define SMU_FEATURES_HIGH_MASK 0xFFFFFFFF00000000 +#define SMU_FEATURES_HIGH_SHIFT 32 + +enum { + GNLD_DPM_PREFETCHER = 0, + GNLD_DPM_GFXCLK, + GNLD_DPM_UCLK, + GNLD_DPM_SOCCLK, + GNLD_DPM_UVD, + GNLD_DPM_VCE, + GNLD_ULV, + GNLD_DPM_MP0CLK, + GNLD_DPM_LINK, + GNLD_DPM_DCEFCLK, + GNLD_DS_GFXCLK, + GNLD_DS_SOCCLK, + GNLD_DS_LCLK, + GNLD_PPT, + GNLD_TDC, + GNLD_THERMAL, + GNLD_GFX_PER_CU_CG, + GNLD_RM, + GNLD_DS_DCEFCLK, + GNLD_ACDC, + GNLD_VR0HOT, + GNLD_VR1HOT, + GNLD_FW_CTF, + GNLD_LED_DISPLAY, + GNLD_FAN_CONTROL, + GNLD_DIDT, + GNLD_GFXOFF, + GNLD_CG, + GNLD_DPM_FCLK, + GNLD_DS_FCLK, + GNLD_DS_MP1CLK, + GNLD_DS_MP0CLK, + GNLD_XGMI, + GNLD_ECC, + + GNLD_FEATURES_MAX +}; + struct vega20_dpm_level { bool enabled; uint32_t value; From 1de888e8fc5931a25a5979d9156154e92e860f24 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 13 May 2019 17:31:03 +0800 Subject: [PATCH 097/178] drm/amd/powerplay: honor hw limit on fetching metrics data Request too frequently may get corrupt data. Signed-off-by: Evan Quan Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher --- .../gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 3 ++ drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 33 +++++++++++++++++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index 3a9c253759dc..2cb4cc2a8208 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -403,6 +403,9 @@ struct smu_context uint32_t default_power_profile_mode; uint32_t smc_if_version; + + unsigned long metrics_time; + void *metrics_table; }; struct pptable_funcs { diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index cd36c4272659..c139f5f75464 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -369,6 +369,13 @@ static int smu_v11_0_init_power(struct smu_context *smu) return -ENOMEM; smu_power->power_context_size = sizeof(struct smu_11_0_dpm_context); + smu->metrics_time = 0; + smu->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL); + if (!smu->metrics_table) { + kfree(smu_power->power_context); + return -ENOMEM; + } + return 0; } @@ -379,7 +386,9 @@ static int smu_v11_0_fini_power(struct smu_context *smu) if (!smu_power->power_context || smu_power->power_context_size == 0) return -EINVAL; + kfree(smu->metrics_table); kfree(smu_power->power_context); + smu->metrics_table = NULL; smu_power->power_context = NULL; smu_power->power_context_size = 0; @@ -1093,6 +1102,26 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu) return ret; } +static int smu_v11_0_get_metrics_table(struct smu_context *smu, + SmuMetrics_t *metrics_table) +{ + int ret = 0; + + if (!smu->metrics_time || time_after(jiffies, smu->metrics_time + HZ / 1000)) { + ret = smu_update_table(smu, TABLE_SMU_METRICS, + (void *)metrics_table, false); + if (ret) { + pr_info("Failed to export SMU metrics table!\n"); + return ret; + } + memcpy(smu->metrics_table, metrics_table, sizeof(SmuMetrics_t)); + smu->metrics_time = jiffies; + } else + memcpy(metrics_table, smu->metrics_table, sizeof(SmuMetrics_t)); + + return ret; +} + static int smu_v11_0_get_current_activity_percent(struct smu_context *smu, uint32_t *value) { @@ -1102,7 +1131,7 @@ static int smu_v11_0_get_current_activity_percent(struct smu_context *smu, if (!value) return -EINVAL; - ret = smu_update_table(smu, TABLE_SMU_METRICS, (void *)&metrics, false); + ret = smu_v11_0_get_metrics_table(smu, &metrics); if (ret) return ret; @@ -1139,7 +1168,7 @@ static int smu_v11_0_get_gpu_power(struct smu_context *smu, uint32_t *value) if (!value) return -EINVAL; - ret = smu_update_table(smu, TABLE_SMU_METRICS, (void *)&metrics, false); + ret = smu_v11_0_get_metrics_table(smu, &metrics); if (ret) return ret; From 986b21b8c88d7c2a862e774dd987120b1d8c89ed Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 14 May 2019 10:38:42 +0800 Subject: [PATCH 098/178] drm/amd/powerplay: support uclk activity retrieve on sw smu routine Support realtime uclk activity report. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index c139f5f75464..93ad3997becc 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -1123,6 +1123,7 @@ static int smu_v11_0_get_metrics_table(struct smu_context *smu, } static int smu_v11_0_get_current_activity_percent(struct smu_context *smu, + enum amd_pp_sensors sensor, uint32_t *value) { int ret = 0; @@ -1135,7 +1136,17 @@ static int smu_v11_0_get_current_activity_percent(struct smu_context *smu, if (ret) return ret; - *value = metrics.AverageGfxActivity; + switch (sensor) { + case AMDGPU_PP_SENSOR_GPU_LOAD: + *value = metrics.AverageGfxActivity; + break; + case AMDGPU_PP_SENSOR_MEM_LOAD: + *value = metrics.AverageUclkActivity; + break; + default: + pr_err("Invalid sensor for retrieving clock activity\n"); + return -EINVAL; + } return 0; } @@ -1210,7 +1221,9 @@ static int smu_v11_0_read_sensor(struct smu_context *smu, int ret = 0; switch (sensor) { case AMDGPU_PP_SENSOR_GPU_LOAD: + case AMDGPU_PP_SENSOR_MEM_LOAD: ret = smu_v11_0_get_current_activity_percent(smu, + sensor, (uint32_t *)data); *size = 4; break; From 917048d132d736f31b1240d99287b9a2c6e524cd Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 14 May 2019 11:46:27 +0800 Subject: [PATCH 099/178] drm/amd/powerplay: support sw smu hotspot and memory temperature retrieval Support hotspot and memory temperature retrieval on sw smu routine. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h | 3 + drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 74 ++++++++++++++++--- 2 files changed, 66 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h index aa8d81f4111e..02c965d64256 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h @@ -36,6 +36,9 @@ #define smnMP0_FW_INTF 0x30101c0 #define smnMP1_PUB_CTRL 0x3010b14 +#define TEMP_RANGE_MIN (0) +#define TEMP_RANGE_MAX (80 * 1000) + struct smu_11_0_max_sustainable_clocks { uint32_t display_clock; uint32_t phy_clock; diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index 93ad3997becc..da59a0972b8e 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -1011,9 +1011,20 @@ static int smu_v11_0_get_current_clk_freq(struct smu_context *smu, uint32_t clk_ static int smu_v11_0_get_thermal_range(struct smu_context *smu, struct PP_TemperatureRange *range) { + PPTable_t *pptable = smu->smu_table.driver_pptable; memcpy(range, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange)); - range->max = smu->smu_table.software_shutdown_temp * + range->max = pptable->TedgeLimit * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->edge_emergency_max = (pptable->TedgeLimit + CTF_OFFSET_EDGE) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->hotspot_crit_max = pptable->ThotspotLimit * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->hotspot_emergency_max = (pptable->ThotspotLimit + CTF_OFFSET_HOTSPOT) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->mem_crit_max = pptable->ThbmLimit * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->mem_emergency_max = (pptable->ThbmLimit + CTF_OFFSET_HBM)* PP_TEMPERATURE_UNITS_PER_CENTIGRADES; return 0; @@ -1078,7 +1089,16 @@ static int smu_v11_0_set_thermal_fan_table(struct smu_context *smu) static int smu_v11_0_start_thermal_control(struct smu_context *smu) { int ret = 0; - struct PP_TemperatureRange range; + struct PP_TemperatureRange range = { + TEMP_RANGE_MIN, + TEMP_RANGE_MAX, + TEMP_RANGE_MAX, + TEMP_RANGE_MIN, + TEMP_RANGE_MAX, + TEMP_RANGE_MAX, + TEMP_RANGE_MIN, + TEMP_RANGE_MAX, + TEMP_RANGE_MAX}; struct amdgpu_device *adev = smu->adev; smu_v11_0_get_thermal_range(smu, &range); @@ -1098,6 +1118,13 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu) adev->pm.dpm.thermal.min_temp = range.min; adev->pm.dpm.thermal.max_temp = range.max; + adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max; + adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min; + adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max; + adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max; + adev->pm.dpm.thermal.min_mem_temp = range.mem_min; + adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max; + adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max; return ret; } @@ -1151,22 +1178,45 @@ static int smu_v11_0_get_current_activity_percent(struct smu_context *smu, return 0; } -static int smu_v11_0_thermal_get_temperature(struct smu_context *smu, uint32_t *value) +static int smu_v11_0_thermal_get_temperature(struct smu_context *smu, + enum amd_pp_sensors sensor, + uint32_t *value) { struct amdgpu_device *adev = smu->adev; + SmuMetrics_t metrics; uint32_t temp = 0; + int ret = 0; if (!value) return -EINVAL; - temp = RREG32_SOC15(THM, 0, mmCG_MULT_THERMAL_STATUS); - temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >> - CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT; + ret = smu_v11_0_get_metrics_table(smu, &metrics); + if (ret) + return ret; - temp = temp & 0x1ff; - temp *= SMU11_TEMPERATURE_UNITS_PER_CENTIGRADES; + switch (sensor) { + case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: + temp = RREG32_SOC15(THM, 0, mmCG_MULT_THERMAL_STATUS); + temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >> + CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT; - *value = temp; + temp = temp & 0x1ff; + temp *= SMU11_TEMPERATURE_UNITS_PER_CENTIGRADES; + + *value = temp; + break; + case AMDGPU_PP_SENSOR_EDGE_TEMP: + *value = metrics.TemperatureEdge * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + break; + case AMDGPU_PP_SENSOR_MEM_TEMP: + *value = metrics.TemperatureHBM * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + break; + default: + pr_err("Invalid sensor for retrieving temp\n"); + return -EINVAL; + } return 0; } @@ -1235,8 +1285,10 @@ static int smu_v11_0_read_sensor(struct smu_context *smu, ret = smu_get_current_clk_freq(smu, PPCLK_GFXCLK, (uint32_t *)data); *size = 4; break; - case AMDGPU_PP_SENSOR_GPU_TEMP: - ret = smu_v11_0_thermal_get_temperature(smu, (uint32_t *)data); + case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: + case AMDGPU_PP_SENSOR_EDGE_TEMP: + case AMDGPU_PP_SENSOR_MEM_TEMP: + ret = smu_v11_0_thermal_get_temperature(smu, sensor, (uint32_t *)data); *size = 4; break; case AMDGPU_PP_SENSOR_GPU_POWER: From 0623fdb061d8572876efe3b075e37086a3f93e34 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Fri, 26 Apr 2019 00:29:13 -0400 Subject: [PATCH 100/178] drm/amd/display: define v_total_min and max parameters add these parameters for future use Signed-off-by: Charlene Liu Reviewed-by: Dmytro Laktyushkin Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index c5b791d158a7..6cc59f138095 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -219,6 +219,9 @@ struct _vcs_dpi_display_pipe_source_params_st { unsigned char xfc_enable; unsigned char xfc_slave; struct _vcs_dpi_display_xfc_params_st xfc_params; + //for vstartuplines calculation freesync + unsigned char v_total_min; + unsigned char v_total_max; }; struct writeback_st { int wb_src_height; @@ -289,6 +292,8 @@ struct _vcs_dpi_display_pipe_dest_params_st { unsigned char otg_inst; unsigned char odm_combine; unsigned char use_maximum_vstartup; + unsigned int vtotal_max; + unsigned int vtotal_min; }; struct _vcs_dpi_display_pipe_params_st { From 0880d9ffaea57b3ccaf7f6abecf0f9569d9bc18f Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 25 Apr 2019 12:11:08 -0400 Subject: [PATCH 101/178] drm/amd/display: Hook up CRC capture support for dce120 [Why] Many IGT tests require CRC capture in order to confirm that the output is visually correct. These skip on dce120 because configure_crc and get_crc aren't set. [How] Hook up is_tg_enabled, configure_crc and get_crc functions on dce120's timing generator. The logic should be the same as DCE and DCN with some minor register naming differences. Signed-off-by: Nicholas Kazlauskas Reviewed-by: David Francis Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../dc/dce120/dce120_timing_generator.c | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c index 5ebbbda77021..098e56962f2a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c @@ -1114,6 +1114,92 @@ static bool dce120_arm_vert_intr( return true; } + +static bool dce120_is_tg_enabled(struct timing_generator *tg) +{ + struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); + uint32_t value, field; + + value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CONTROL, + tg110->offsets.crtc); + field = get_reg_field_value(value, CRTC0_CRTC_CONTROL, + CRTC_CURRENT_MASTER_EN_STATE); + + return field == 1; +} + +static bool dce120_configure_crc(struct timing_generator *tg, + const struct crc_params *params) +{ + struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); + + /* Cannot configure crc on a CRTC that is disabled */ + if (!dce120_is_tg_enabled(tg)) + return false; + + /* First, disable CRC before we configure it. */ + dm_write_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL, + tg110->offsets.crtc, 0); + + if (!params->enable) + return true; + + /* Program frame boundaries */ + /* Window A x axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_X_CONTROL, + CRTC_CRC0_WINDOWA_X_START, params->windowa_x_start, + CRTC_CRC0_WINDOWA_X_END, params->windowa_x_end); + + /* Window A y axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_Y_CONTROL, + CRTC_CRC0_WINDOWA_Y_START, params->windowa_y_start, + CRTC_CRC0_WINDOWA_Y_END, params->windowa_y_end); + + /* Window B x axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_X_CONTROL, + CRTC_CRC0_WINDOWB_X_START, params->windowb_x_start, + CRTC_CRC0_WINDOWB_X_END, params->windowb_x_end); + + /* Window B y axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_Y_CONTROL, + CRTC_CRC0_WINDOWB_Y_START, params->windowb_y_start, + CRTC_CRC0_WINDOWB_Y_END, params->windowb_y_end); + + /* Set crc mode and selection, and enable. Only using CRC0*/ + CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL, + CRTC_CRC_EN, params->continuous_mode ? 1 : 0, + CRTC_CRC0_SELECT, params->selection, + CRTC_CRC_EN, 1); + + return true; +} + +static bool dce120_get_crc(struct timing_generator *tg, uint32_t *r_cr, + uint32_t *g_y, uint32_t *b_cb) +{ + struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); + uint32_t value, field; + + value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL, + tg110->offsets.crtc); + field = get_reg_field_value(value, CRTC0_CRTC_CRC_CNTL, CRTC_CRC_EN); + + /* Early return if CRC is not enabled for this CRTC */ + if (!field) + return false; + + value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_RG, + tg110->offsets.crtc); + *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_R_CR); + *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_G_Y); + + value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_B, + tg110->offsets.crtc); + *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_B, CRC0_B_CB); + + return true; +} + static const struct timing_generator_funcs dce120_tg_funcs = { .validate_timing = dce120_tg_validate_timing, .program_timing = dce120_tg_program_timing, @@ -1145,6 +1231,9 @@ static const struct timing_generator_funcs dce120_tg_funcs = { .set_static_screen_control = dce120_timing_generator_set_static_screen_control, .set_test_pattern = dce120_timing_generator_set_test_pattern, .arm_vert_intr = dce120_arm_vert_intr, + .is_tg_enabled = dce120_is_tg_enabled, + .configure_crc = dce120_configure_crc, + .get_crc = dce120_get_crc, }; From be4b289f10ca7ef4272fa0339b167827ba5fd2cd Mon Sep 17 00:00:00 2001 From: SivapiriyanKumarasamy Date: Wed, 24 Apr 2019 10:20:15 -0400 Subject: [PATCH 102/178] drm/amd/display: Remove DPMS state dependency for fast boot [Why] The DPMS state of a display should not impact whether we want to enable fast boot. Currently fast boot is not enabled when resuming from S4 because of this. [How] Remove check for DPMS state when determining if fast boot can be applied. Signed-off-by: SivapiriyanKumarasamy Reviewed-by: Anthony Koo Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../display/dc/dce110/dce110_hw_sequencer.c | 85 ++++++++----------- 1 file changed, 35 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 5919716832a5..d3c821f3899b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1496,10 +1496,11 @@ static void disable_vga_and_power_gate_all_controllers( } } -static struct dc_link *get_link_for_edp(struct dc *dc) +static struct dc_link *get_edp_link(struct dc *dc) { int i; + // report any eDP links, even unconnected DDI's for (i = 0; i < dc->link_count; i++) { if (dc->links[i]->connector_signal == SIGNAL_TYPE_EDP) return dc->links[i]; @@ -1507,23 +1508,13 @@ static struct dc_link *get_link_for_edp(struct dc *dc) return NULL; } -static struct dc_link *get_link_for_edp_to_turn_off( +static struct dc_link *get_edp_link_with_sink( struct dc *dc, struct dc_state *context) { int i; struct dc_link *link = NULL; - /* check if eDP panel is suppose to be set mode, if yes, no need to disable */ - for (i = 0; i < context->stream_count; i++) { - if (context->streams[i]->signal == SIGNAL_TYPE_EDP) { - if (context->streams[i]->dpms_off == true) - return context->streams[i]->sink->link; - else - return NULL; - } - } - /* check if there is an eDP panel not in use */ for (i = 0; i < dc->link_count; i++) { if (dc->links[i]->local_sink && @@ -1546,12 +1537,33 @@ static struct dc_link *get_link_for_edp_to_turn_off( void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) { int i; - struct dc_link *edp_link_to_turnoff = NULL; - struct dc_link *edp_link = get_link_for_edp(dc); - bool can_edp_fast_boot_optimize = false; - bool apply_edp_fast_boot_optimization = false; + struct dc_link *edp_link_with_sink = get_edp_link_with_sink(dc, context); + struct dc_link *edp_link = get_edp_link(dc); + bool can_apply_edp_fast_boot = false; bool can_apply_seamless_boot = false; + if (dc->hwss.init_pipes) + dc->hwss.init_pipes(dc, context); + + // Check fastboot support, disable on DCE8 because of blank screens + if (edp_link && dc->ctx->dce_version != DCE_VERSION_8_0 && + dc->ctx->dce_version != DCE_VERSION_8_1 && + dc->ctx->dce_version != DCE_VERSION_8_3) { + + // enable fastboot if backend is enabled on eDP + if (edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc)) { + /* Find eDP stream and set optimization flag */ + for (i = 0; i < context->stream_count; i++) { + if (context->streams[i]->signal == SIGNAL_TYPE_EDP) { + context->streams[i]->apply_edp_fast_boot_optimization = true; + can_apply_edp_fast_boot = true; + break; + } + } + } + } + + // Check seamless boot support for (i = 0; i < context->stream_count; i++) { if (context->streams[i]->apply_seamless_boot_optimization) { can_apply_seamless_boot = true; @@ -1559,46 +1571,19 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) } } - if (dc->hwss.init_pipes) - dc->hwss.init_pipes(dc, context); - - if (edp_link) { - /* this seems to cause blank screens on DCE8 */ - if ((dc->ctx->dce_version == DCE_VERSION_8_0) || - (dc->ctx->dce_version == DCE_VERSION_8_1) || - (dc->ctx->dce_version == DCE_VERSION_8_3)) - can_edp_fast_boot_optimize = false; - else - can_edp_fast_boot_optimize = - edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc); - } - - if (can_edp_fast_boot_optimize) - edp_link_to_turnoff = get_link_for_edp_to_turn_off(dc, context); - - /* if OS doesn't light up eDP and eDP link is available, we want to disable - * If resume from S4/S5, should optimization. + /* eDP should not have stream in resume from S4 and so even with VBios post + * it should get turned off */ - if (can_edp_fast_boot_optimize && !edp_link_to_turnoff) { - /* Find eDP stream and set optimization flag */ - for (i = 0; i < context->stream_count; i++) { - if (context->streams[i]->signal == SIGNAL_TYPE_EDP) { - context->streams[i]->apply_edp_fast_boot_optimization = true; - apply_edp_fast_boot_optimization = true; - } - } - } - - if (!apply_edp_fast_boot_optimization && !can_apply_seamless_boot) { - if (edp_link_to_turnoff) { + if (!can_apply_edp_fast_boot && !can_apply_seamless_boot) { + if (edp_link_with_sink) { /*turn off backlight before DP_blank and encoder powered down*/ - dc->hwss.edp_backlight_control(edp_link_to_turnoff, false); + dc->hwss.edp_backlight_control(edp_link_with_sink, false); } /*resume from S3, no vbios posting, no need to power down again*/ power_down_all_hw_blocks(dc); disable_vga_and_power_gate_all_controllers(dc); - if (edp_link_to_turnoff) - dc->hwss.edp_power_control(edp_link_to_turnoff, false); + if (edp_link_with_sink) + dc->hwss.edp_power_control(edp_link_with_sink, false); } bios_set_scratch_acc_mode_change(dc->ctx->dc_bios); } From f62b09b1345eec45392f725efb756aaf21d5fa5c Mon Sep 17 00:00:00 2001 From: Vitaly Prosyak Date: Thu, 18 Apr 2019 09:51:12 -0500 Subject: [PATCH 103/178] drm/amd/display: Reuse MPC OGRAM for 1D blender [Why & How] Reuse existent code path and in order to do that apply de gamma in 1D blender LUT and re use MPC OGAM. Follow up is required. Signed-off-by: Vitaly Prosyak Reviewed-by: Aric Cyr Acked-by: Bhawanpreet Lakha Acked-by: Krunoslav Kovac Acked-by: Vitaly Prosyak Signed-off-by: Alex Deucher --- .../amd/display/modules/color/color_gamma.c | 56 +++++++++++++------ .../amd/display/modules/color/color_gamma.h | 1 + 2 files changed, 40 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index a1055413bade..8601d371776e 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -240,16 +240,27 @@ struct dividers { struct fixed31_32 divider3; }; -static void build_coefficients(struct gamma_coefficients *coefficients, bool is_2_4) +enum gamma_type_index { + gamma_type_index_2_4, + gamma_type_index_2_2, + gamma_type_index_2_2_flat +}; + +static void build_coefficients(struct gamma_coefficients *coefficients, enum gamma_type_index type) { - static const int32_t numerator01[] = { 31308, 180000}; - static const int32_t numerator02[] = { 12920, 4500}; - static const int32_t numerator03[] = { 55, 99}; - static const int32_t numerator04[] = { 55, 99}; - static const int32_t numerator05[] = { 2400, 2200}; + static const int32_t numerator01[] = { 31308, 180000, 0}; + static const int32_t numerator02[] = { 12920, 4500, 0}; + static const int32_t numerator03[] = { 55, 99, 0}; + static const int32_t numerator04[] = { 55, 99, 0}; + static const int32_t numerator05[] = { 2400, 2200, 2200}; uint32_t i = 0; - uint32_t index = is_2_4 == true ? 0:1; + uint32_t index = 0; + + if (type == gamma_type_index_2_2) + index = 1; + else if (type == gamma_type_index_2_2_flat) + index = 2; do { coefficients->a0[i] = dc_fixpt_from_fraction( @@ -697,7 +708,7 @@ static void build_de_pq(struct pwl_float_data_ex *de_pq, static void build_regamma(struct pwl_float_data_ex *rgb_regamma, uint32_t hw_points_num, - const struct hw_x_point *coordinate_x, bool is_2_4) + const struct hw_x_point *coordinate_x, enum gamma_type_index type) { uint32_t i; @@ -705,7 +716,7 @@ static void build_regamma(struct pwl_float_data_ex *rgb_regamma, struct pwl_float_data_ex *rgb = rgb_regamma; const struct hw_x_point *coord_x = coordinate_x; - build_coefficients(&coeff, is_2_4); + build_coefficients(&coeff, type); i = 0; @@ -892,13 +903,13 @@ static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma, static void build_degamma(struct pwl_float_data_ex *curve, uint32_t hw_points_num, - const struct hw_x_point *coordinate_x, bool is_2_4) + const struct hw_x_point *coordinate_x, enum gamma_type_index type) { uint32_t i; struct gamma_coefficients coeff; uint32_t begin_index, end_index; - build_coefficients(&coeff, is_2_4); + build_coefficients(&coeff, type); i = 0; /* X points is 2^-25 to 2^7 @@ -1614,7 +1625,7 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, coordinates_x, output_tf->sdr_ref_white_level); } else if (tf == TRANSFER_FUNCTION_GAMMA22 && - fs_params != NULL) { + fs_params != NULL && fs_params->skip_tm == 0) { build_freesync_hdr(rgb_regamma, MAX_HW_POINTS, coordinates_x, @@ -1627,7 +1638,9 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, build_regamma(rgb_regamma, MAX_HW_POINTS, - coordinates_x, tf == TRANSFER_FUNCTION_SRGB ? true:false); + coordinates_x, tf == TRANSFER_FUNCTION_SRGB ? gamma_type_index_2_4 : + tf == TRANSFER_FUNCTION_GAMMA22 ? + gamma_type_index_2_2_flat : gamma_type_index_2_2); } map_regamma_hw_to_x_user(ramp, coeff, rgb_user, coordinates_x, axis_x, rgb_regamma, @@ -1832,7 +1845,9 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, build_degamma(curve, MAX_HW_POINTS, coordinates_x, - tf == TRANSFER_FUNCTION_SRGB ? true : false); + tf == TRANSFER_FUNCTION_SRGB ? + gamma_type_index_2_4 : tf == TRANSFER_FUNCTION_GAMMA22 ? + gamma_type_index_2_2_flat : gamma_type_index_2_2); else if (tf == TRANSFER_FUNCTION_LINEAR) { // just copy coordinates_x into curve i = 0; @@ -1932,7 +1947,10 @@ bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans, build_regamma(rgb_regamma, MAX_HW_POINTS, - coordinates_x, trans == TRANSFER_FUNCTION_SRGB ? true:false); + coordinates_x, + trans == TRANSFER_FUNCTION_SRGB ? + gamma_type_index_2_4 : trans == TRANSFER_FUNCTION_GAMMA22 ? + gamma_type_index_2_2_flat : gamma_type_index_2_2); for (i = 0; i <= MAX_HW_POINTS ; i++) { points->red[i] = rgb_regamma[i].r; points->green[i] = rgb_regamma[i].g; @@ -2002,7 +2020,8 @@ bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, kvfree(rgb_degamma); } else if (trans == TRANSFER_FUNCTION_SRGB || - trans == TRANSFER_FUNCTION_BT709) { + trans == TRANSFER_FUNCTION_BT709 || + trans == TRANSFER_FUNCTION_GAMMA22) { rgb_degamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*rgb_degamma), GFP_KERNEL); @@ -2011,7 +2030,10 @@ bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, build_degamma(rgb_degamma, MAX_HW_POINTS, - coordinates_x, trans == TRANSFER_FUNCTION_SRGB ? true:false); + coordinates_x, + trans == TRANSFER_FUNCTION_SRGB ? + gamma_type_index_2_4 : trans == TRANSFER_FUNCTION_GAMMA22 ? + gamma_type_index_2_2_flat : gamma_type_index_2_2); for (i = 0; i <= MAX_HW_POINTS ; i++) { points->red[i] = rgb_degamma[i].r; points->green[i] = rgb_degamma[i].g; diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h index a6e164df090a..369953fafadf 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h @@ -79,6 +79,7 @@ struct freesync_hdr_tf_params { unsigned int max_content; // luminance in nits unsigned int min_display; // luminance in 1/10000 nits unsigned int max_display; // luminance in nits + unsigned int skip_tm; // skip tm }; void setup_x_points_distribution(void); From 5408887141baac0ad1a5e6cf514ceadf33090114 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sat, 27 Apr 2019 18:50:43 -0400 Subject: [PATCH 104/178] drm/amd/display: 3.2.30 Signed-off-by: Aric Cyr Reviewed-by: Aric Cyr Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 6c7658110111..425e2df0809f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.29" +#define DC_VER "3.2.30" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 24c18794946ad127334138669b2aa204b2e60763 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Mon, 8 Apr 2019 14:56:29 -0400 Subject: [PATCH 105/178] drm/amd/display: add null checks and set update flags * add plane state null checks * add and set update surface flags Signed-off-by: Dmytro Laktyushkin Reviewed-by: Eric Bernstein Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 6 ++++++ .../gpu/drm/amd/display/dc/core/dc_resource.c | 1 - drivers/gpu/drm/amd/display/dc/dc.h | 2 ++ .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 13 +++++------- .../amd/display/dc/dcn10/dcn10_hw_sequencer.h | 2 ++ .../gpu/drm/amd/display/dc/inc/core_types.h | 20 +++++++++++++++++++ 6 files changed, 35 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 700278216424..f469394751c1 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1358,6 +1358,9 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa if (u->plane_info->global_alpha_value != u->surface->global_alpha_value) update_flags->bits.global_alpha_change = 1; + if (u->plane_info->sdr_white_level != u->surface->sdr_white_level) + update_flags->bits.sdr_white_level = 1; + if (u->plane_info->dcc.enable != u->surface->dcc.enable || u->plane_info->dcc.grph.independent_64b_blks != u->surface->dcc.grph.independent_64b_blks || u->plane_info->dcc.grph.meta_pitch != u->surface->dcc.grph.meta_pitch) @@ -1461,6 +1464,9 @@ static enum surface_update_type det_surface_update(const struct dc *dc, update_flags->raw = 0; // Reset all flags + if (u->flip_addr) + update_flags->bits.addr_update = 1; + if (!is_surface_in_context(context, u->surface)) { update_flags->bits.new_plane = 1; return UPDATE_TYPE_FULL; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index d9bfffb2c48d..262d07a8cb75 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2497,7 +2497,6 @@ void dc_resource_state_copy_construct( if (cur_pipe->bottom_pipe) cur_pipe->bottom_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx]; - } for (i = 0; i < dst_ctx->stream_count; i++) { diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 425e2df0809f..7d459362601d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -541,12 +541,14 @@ struct dc_plane_status { union surface_update_flags { struct { + uint32_t addr_update:1; /* Medium updates */ uint32_t dcc_change:1; uint32_t color_space_change:1; uint32_t horizontal_mirror_change:1; uint32_t per_pixel_alpha_change:1; uint32_t global_alpha_change:1; + uint32_t sdr_white_level:1; uint32_t rotation_change:1; uint32_t swizzle_change:1; uint32_t scaling_change:1; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index c2b60b1e1a25..bf0db874d99f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1755,7 +1755,7 @@ static void dcn10_program_output_csc(struct dc *dc, bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx) { - if (pipe_ctx->plane_state->visible) + if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible) return true; if (pipe_ctx->bottom_pipe && is_lower_pipe_tree_visible(pipe_ctx->bottom_pipe)) return true; @@ -1764,7 +1764,7 @@ bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx) bool is_upper_pipe_tree_visible(struct pipe_ctx *pipe_ctx) { - if (pipe_ctx->plane_state->visible) + if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible) return true; if (pipe_ctx->top_pipe && is_upper_pipe_tree_visible(pipe_ctx->top_pipe)) return true; @@ -1773,7 +1773,7 @@ bool is_upper_pipe_tree_visible(struct pipe_ctx *pipe_ctx) bool is_pipe_tree_visible(struct pipe_ctx *pipe_ctx) { - if (pipe_ctx->plane_state->visible) + if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible) return true; if (pipe_ctx->top_pipe && is_upper_pipe_tree_visible(pipe_ctx->top_pipe)) return true; @@ -1919,7 +1919,7 @@ static uint16_t fixed_point_to_int_frac( return result; } -void build_prescale_params(struct dc_bias_and_scale *bias_and_scale, +void dcn10_build_prescale_params(struct dc_bias_and_scale *bias_and_scale, const struct dc_plane_state *plane_state) { if (plane_state->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN @@ -1952,7 +1952,7 @@ static void update_dpp(struct dpp *dpp, struct dc_plane_state *plane_state) plane_state->color_space); //set scale and bias registers - build_prescale_params(&bns_params, plane_state); + dcn10_build_prescale_params(&bns_params, plane_state); if (dpp->funcs->dpp_program_bias_and_scale) dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params); } @@ -2641,9 +2641,6 @@ static void dcn10_wait_for_mpcc_disconnect( res_pool->mpc->funcs->wait_for_idle(res_pool->mpc, mpcc_inst); pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst] = false; hubp->funcs->set_blank(hubp, true); - /*DC_LOG_ERROR(dc->ctx->logger, - "[debug_mpo: wait_for_mpcc finished waiting on mpcc %d]\n", - i);*/ } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h index 4b3b27a5d23b..ef94d6b15843 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h @@ -83,6 +83,8 @@ struct pipe_ctx *find_top_pipe_for_stream( int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx); +void dcn10_build_prescale_params(struct dc_bias_and_scale *bias_and_scale, + const struct dc_plane_state *plane_state); void lock_all_pipes(struct dc *dc, struct dc_state *context, bool lock); diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 6f5ab05d6467..d61efa068c9a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -212,6 +212,25 @@ struct plane_resource { struct dcn_fe_bandwidth bw; }; +union pipe_update_flags { + struct { + uint32_t enable : 1; + uint32_t disable : 1; + uint32_t odm : 1; + uint32_t global_sync : 1; + uint32_t opp_changed : 1; + uint32_t tg_changed : 1; + uint32_t mpcc : 1; + uint32_t dppclk : 1; + uint32_t hubp_interdependent : 1; + uint32_t hubp_rq_dlg_ttu : 1; + uint32_t gamut_remap : 1; + uint32_t scaler : 1; + uint32_t viewport : 1; + } bits; + uint32_t raw; +}; + struct pipe_ctx { struct dc_plane_state *plane_state; struct dc_stream_state *stream; @@ -234,6 +253,7 @@ struct pipe_ctx { struct _vcs_dpi_display_rq_regs_st rq_regs; struct _vcs_dpi_display_pipe_dest_params_st pipe_dlg_param; #endif + union pipe_update_flags update_flags; }; struct resource_context { From 6476a7c8f03156883bb52605fb84a8ec0147849f Mon Sep 17 00:00:00 2001 From: Joshua Aberback Date: Mon, 29 Apr 2019 17:21:19 -0400 Subject: [PATCH 106/178] drm/amd/display: Program VTG params after programming Global Sync [Why] VTG has a parameter FP2, which is defined as: if VSTARTUP is before VSYNC: FP2 = number of lines in between VSTARTUP and VSYNC else FP2 = 0 Currently, FP2 is only programmed during "program_timing". However, the position of VSTARTUP is affected by the prefetching requirements on all pipes, so the position might change when we do memory request control on another pipe, so we need to make sure that FP2 stays up-to-date whenever we adjust VSTARTUP. [How] - refactor VTG_CONTROL programming into a new function "set_vtg_params" - call it after calling "program_global_sync" - make sure it's called after because it relies on the cached dlg params Signed-off-by: Joshua Aberback Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Acked-by: Jun Lei Signed-off-by: Alex Deucher --- .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 3 + .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 115 ++++++++++-------- .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.h | 3 + .../amd/display/dc/inc/hw/timing_generator.h | 2 + 4 files changed, 73 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index bf0db874d99f..66bb0e7db25c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2285,6 +2285,9 @@ static void program_all_pipe_in_tree( pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width); + pipe_ctx->stream_res.tg->funcs->set_vtg_params( + pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); + dc->hwss.blank_pixel_data(dc, pipe_ctx, blank); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 0afe81ed023b..533b0f3cf6c3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -46,9 +46,7 @@ * This is a workaround for a bug that has existed since R5xx and has not been * fixed keep Front porch at minimum 2 for Interlaced mode or 1 for progressive. */ -static void optc1_apply_front_porch_workaround( - struct timing_generator *optc, - struct dc_crtc_timing *timing) +static void apply_front_porch_workaround(struct dc_crtc_timing *timing) { if (timing->flags.INTERLACE == 1) { if (timing->v_front_porch < 2) @@ -149,17 +147,14 @@ void optc1_program_timing( bool use_vbios) { struct dc_crtc_timing patched_crtc_timing; - uint32_t vesa_sync_start; uint32_t asic_blank_end; uint32_t asic_blank_start; uint32_t v_total; uint32_t v_sync_end; - uint32_t v_init, v_fp2; uint32_t h_sync_polarity, v_sync_polarity; uint32_t start_point = 0; uint32_t field_num = 0; uint32_t h_div_2; - int32_t vertical_line_start; struct optc *optc1 = DCN10TG_FROM_TG(optc); @@ -169,7 +164,7 @@ void optc1_program_timing( optc1->vupdate_offset = vupdate_offset; optc1->vupdate_width = vupdate_width; patched_crtc_timing = *dc_crtc_timing; - optc1_apply_front_porch_workaround(optc, &patched_crtc_timing); + apply_front_porch_workaround(&patched_crtc_timing); /* Load horizontal timing */ @@ -182,24 +177,16 @@ void optc1_program_timing( OTG_H_SYNC_A_START, 0, OTG_H_SYNC_A_END, patched_crtc_timing.h_sync_width); - /* asic_h_blank_end = HsyncWidth + HbackPorch = - * vesa. usHorizontalTotal - vesa. usHorizontalSyncStart - - * vesa.h_left_border - */ - vesa_sync_start = patched_crtc_timing.h_addressable + - patched_crtc_timing.h_border_right + + /* blank_start = line end - front porch */ + asic_blank_start = patched_crtc_timing.h_total - patched_crtc_timing.h_front_porch; - asic_blank_end = patched_crtc_timing.h_total - - vesa_sync_start - + /* blank_end = blank_start - active */ + asic_blank_end = asic_blank_start - + patched_crtc_timing.h_border_right - + patched_crtc_timing.h_addressable - patched_crtc_timing.h_border_left; - /* h_blank_start = v_blank_end + v_active */ - asic_blank_start = asic_blank_end + - patched_crtc_timing.h_border_left + - patched_crtc_timing.h_addressable + - patched_crtc_timing.h_border_right; - REG_UPDATE_2(OTG_H_BLANK_START_END, OTG_H_BLANK_START, asic_blank_start, OTG_H_BLANK_END, asic_blank_end); @@ -231,24 +218,15 @@ void optc1_program_timing( OTG_V_SYNC_A_START, 0, OTG_V_SYNC_A_END, v_sync_end); - vesa_sync_start = patched_crtc_timing.v_addressable + - patched_crtc_timing.v_border_bottom + + /* blank_start = frame end - front porch */ + asic_blank_start = patched_crtc_timing.v_total - patched_crtc_timing.v_front_porch; - asic_blank_end = (patched_crtc_timing.v_total - - vesa_sync_start - - patched_crtc_timing.v_border_top); - - /* v_blank_start = v_blank_end + v_active */ - asic_blank_start = asic_blank_end + - (patched_crtc_timing.v_border_top + - patched_crtc_timing.v_addressable + - patched_crtc_timing.v_border_bottom); - - vertical_line_start = asic_blank_end - optc1->vstartup_start + 1; - v_fp2 = 0; - if (vertical_line_start < 0) - v_fp2 = -vertical_line_start; + /* blank_end = blank_start - active */ + asic_blank_end = asic_blank_start - + patched_crtc_timing.v_border_bottom - + patched_crtc_timing.v_addressable - + patched_crtc_timing.v_border_top; REG_UPDATE_2(OTG_V_BLANK_START_END, OTG_V_BLANK_START, asic_blank_start, @@ -261,10 +239,9 @@ void optc1_program_timing( REG_UPDATE(OTG_V_SYNC_A_CNTL, OTG_V_SYNC_A_POL, v_sync_polarity); - v_init = asic_blank_start; if (optc1->signal == SIGNAL_TYPE_DISPLAY_PORT || - optc1->signal == SIGNAL_TYPE_DISPLAY_PORT_MST || - optc1->signal == SIGNAL_TYPE_EDP) { + optc1->signal == SIGNAL_TYPE_DISPLAY_PORT_MST || + optc1->signal == SIGNAL_TYPE_EDP) { start_point = 1; if (patched_crtc_timing.flags.INTERLACE == 1) field_num = 1; @@ -272,13 +249,10 @@ void optc1_program_timing( /* Interlace */ if (REG(OTG_INTERLACE_CONTROL)) { - if (patched_crtc_timing.flags.INTERLACE == 1) { + if (patched_crtc_timing.flags.INTERLACE == 1) REG_UPDATE(OTG_INTERLACE_CONTROL, OTG_INTERLACE_ENABLE, 1); - v_init = v_init / 2; - if ((optc1->vstartup_start/2)*2 > asic_blank_end) - v_fp2 = v_fp2 / 2; - } else + else REG_UPDATE(OTG_INTERLACE_CONTROL, OTG_INTERLACE_ENABLE, 0); } @@ -287,21 +261,19 @@ void optc1_program_timing( REG_UPDATE(CONTROL, VTG0_ENABLE, 0); - REG_UPDATE_2(CONTROL, - VTG0_FP2, v_fp2, - VTG0_VCOUNT_INIT, v_init); - /* original code is using VTG offset to address OTG reg, seems wrong */ REG_UPDATE_2(OTG_CONTROL, OTG_START_POINT_CNTL, start_point, OTG_FIELD_NUMBER_CNTL, field_num); - optc1_program_global_sync(optc, + optc->funcs->program_global_sync(optc, vready_offset, vstartup_start, vupdate_offset, vupdate_width); + optc->funcs->set_vtg_params(optc, dc_crtc_timing); + /* TODO * patched_crtc_timing.flags.HORZ_COUNT_BY_TWO == 1 * program_horz_count_by_2 @@ -319,6 +291,48 @@ void optc1_program_timing( } +void optc1_set_vtg_params(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing) +{ + struct dc_crtc_timing patched_crtc_timing; + uint32_t asic_blank_end; + uint32_t v_init; + uint32_t v_fp2 = 0; + int32_t vertical_line_start; + + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + patched_crtc_timing = *dc_crtc_timing; + apply_front_porch_workaround(&patched_crtc_timing); + + /* VCOUNT_INIT is the start of blank */ + v_init = patched_crtc_timing.v_total - patched_crtc_timing.v_front_porch; + + /* end of blank = v_init - active */ + asic_blank_end = v_init - + patched_crtc_timing.v_border_bottom - + patched_crtc_timing.v_addressable - + patched_crtc_timing.v_border_top; + + /* if VSTARTUP is before VSYNC, FP2 is the offset, otherwise 0 */ + vertical_line_start = asic_blank_end - optc1->vstartup_start + 1; + if (vertical_line_start < 0) + v_fp2 = -vertical_line_start; + + /* Interlace */ + if (REG(OTG_INTERLACE_CONTROL)) { + if (patched_crtc_timing.flags.INTERLACE == 1) { + v_init = v_init / 2; + if ((optc1->vstartup_start/2)*2 > asic_blank_end) + v_fp2 = v_fp2 / 2; + } + } + + REG_UPDATE_2(CONTROL, + VTG0_FP2, v_fp2, + VTG0_VCOUNT_INIT, v_init); +} + void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable) { struct optc *optc1 = DCN10TG_FROM_TG(optc); @@ -1443,6 +1457,7 @@ static const struct timing_generator_funcs dcn10_tg_funcs = { .clear_optc_underflow = optc1_clear_optc_underflow, .get_crc = optc1_get_crc, .configure_crc = optc1_configure_crc, + .set_vtg_params = optc1_set_vtg_params, }; void dcn10_timing_generator_init(struct optc *optc1) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index 7bb414c35d13..651b8caa4b9f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -597,4 +597,7 @@ bool optc1_get_crc(struct timing_generator *optc, bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing); +void optc1_set_vtg_params(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing); + #endif /* __DC_TIMING_GENERATOR_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index e4b0de0089af..0b8c6896581f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -238,6 +238,8 @@ struct timing_generator_funcs { bool (*get_crc)(struct timing_generator *tg, uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb); + void (*set_vtg_params)(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing); }; #endif From eed928dcd83ee8c80fa05013bbc019bc9e9e2a0e Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Mon, 29 Apr 2019 14:35:01 -0400 Subject: [PATCH 107/178] drm/amd/display: enabling stream after HPD low to high happened [Why] 1. No real HPD plug in/out but HPD happens, the driver notifies OS connection changed. 2. No display in target. When HPD goes low to high, the driver should regard as HPD and enter setmode flow. [How] In this case, even stream didn't change but still retrain. Signed-off-by: Chiawen Huang Signed-off-by: Charlene Liu Reviewed-by: Tony Cheng Acked-by: Anthony Koo Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 3 +++ drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 ++++ drivers/gpu/drm/amd/display/dc/dc_link.h | 1 + drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 4 +++- 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 9fbf926d5bf9..6b8dc72a7861 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -704,6 +704,7 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) if (new_connection_type != dc_connection_none) { link->type = new_connection_type; + link->link_state_valid = false; /* From Disconnected-to-Connected. */ switch (link->connector_signal) { @@ -2631,6 +2632,8 @@ void core_link_enable_stream( stream->phy_pix_clk, pipe_ctx->stream_res.audio != NULL); + pipe_ctx->stream->link->link_state_valid = true; + if (dc_is_dvi_signal(pipe_ctx->stream->signal)) pipe_ctx->stream_res.stream_enc->funcs->dvi_set_stream_attribute( pipe_ctx->stream_res.stream_enc, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 262d07a8cb75..578503ba0d9f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2642,6 +2642,10 @@ bool pipe_need_reprogram( if (is_vsc_info_packet_changed(pipe_ctx_old->stream, pipe_ctx->stream)) return true; + if (false == pipe_ctx_old->stream->link->link_state_valid && + false == pipe_ctx_old->stream->dpms_off) + return true; + return false; } diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 7b9429e30d82..094009127e25 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -75,6 +75,7 @@ struct dc_link { enum dc_irq_source irq_source_hpd_rx;/* aka DP Short Pulse */ bool is_hpd_filter_disabled; bool dp_ss_off; + bool link_state_valid; /* caps is the same as reported_link_cap. link_traing use * reported_link_cap. Will clean up. TODO diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index d3c821f3899b..69f215967af3 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -2023,8 +2023,10 @@ enum dc_status dce110_apply_ctx_to_hw( if (pipe_ctx->stream == NULL) continue; - if (pipe_ctx->stream == pipe_ctx_old->stream) + if (pipe_ctx->stream == pipe_ctx_old->stream && + pipe_ctx->stream->link->link_state_valid) { continue; + } if (pipe_ctx_old->stream && !pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) continue; From 78cc70b1e47d660207dabf75ca2751fc1e4edf84 Mon Sep 17 00:00:00 2001 From: Wesley Chalmers Date: Wed, 24 Apr 2019 15:25:41 -0400 Subject: [PATCH 108/178] drm/amd/display: Engine-specific encoder allocation [WHY] From DCE110 onward, we have the ability to assign DIG BE and FE separately for any display connector type; before, we could only do this for DP. Signed-off-by: Wesley Chalmers Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_resource.c | 42 +----------------- .../amd/display/dc/dce100/dce100_resource.c | 44 ++++++++++++++++++- .../amd/display/dc/dce100/dce100_resource.h | 5 +++ .../amd/display/dc/dce110/dce110_resource.c | 35 ++++++++++++++- .../amd/display/dc/dce110/dce110_resource.h | 5 +++ .../amd/display/dc/dce112/dce112_resource.c | 3 +- .../amd/display/dc/dce120/dce120_resource.c | 3 +- .../drm/amd/display/dc/dce80/dce80_resource.c | 3 +- .../drm/amd/display/dc/dcn10/dcn10_resource.c | 3 +- .../gpu/drm/amd/display/dc/inc/core_types.h | 5 +++ 10 files changed, 101 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 578503ba0d9f..b7952f39f3fc 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1646,46 +1646,6 @@ static int acquire_first_free_pipe( return -1; } -static struct stream_encoder *find_first_free_match_stream_enc_for_link( - struct resource_context *res_ctx, - const struct resource_pool *pool, - struct dc_stream_state *stream) -{ - int i; - int j = -1; - struct dc_link *link = stream->link; - - for (i = 0; i < pool->stream_enc_count; i++) { - if (!res_ctx->is_stream_enc_acquired[i] && - pool->stream_enc[i]) { - /* Store first available for MST second display - * in daisy chain use case */ - j = i; - if (pool->stream_enc[i]->id == - link->link_enc->preferred_engine) - return pool->stream_enc[i]; - } - } - - /* - * below can happen in cases when stream encoder is acquired: - * 1) for second MST display in chain, so preferred engine already - * acquired; - * 2) for another link, which preferred engine already acquired by any - * MST configuration. - * - * If signal is of DP type and preferred engine not found, return last available - * - * TODO - This is just a patch up and a generic solution is - * required for non DP connectors. - */ - - if (j >= 0 && link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT) - return pool->stream_enc[j]; - - return NULL; -} - static struct audio *find_first_free_audio( struct resource_context *res_ctx, const struct resource_pool *pool, @@ -1997,7 +1957,7 @@ enum dc_status resource_map_pool_resources( pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx]; pipe_ctx->stream_res.stream_enc = - find_first_free_match_stream_enc_for_link( + dc->res_pool->funcs->find_first_free_match_stream_enc_for_link( &context->res_ctx, pool, stream); if (!pipe_ctx->stream_res.stream_enc) diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c index e938bf9986d3..d7a531e9700f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c @@ -867,13 +867,55 @@ enum dc_status dce100_validate_plane(const struct dc_plane_state *plane_state, s return DC_FAIL_SURFACE_VALIDATE; } +struct stream_encoder *dce100_find_first_free_match_stream_enc_for_link( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_stream_state *stream) +{ + int i; + int j = -1; + struct dc_link *link = stream->link; + + for (i = 0; i < pool->stream_enc_count; i++) { + if (!res_ctx->is_stream_enc_acquired[i] && + pool->stream_enc[i]) { + /* Store first available for MST second display + * in daisy chain use case + */ + j = i; + if (pool->stream_enc[i]->id == + link->link_enc->preferred_engine) + return pool->stream_enc[i]; + } + } + + /* + * below can happen in cases when stream encoder is acquired: + * 1) for second MST display in chain, so preferred engine already + * acquired; + * 2) for another link, which preferred engine already acquired by any + * MST configuration. + * + * If signal is of DP type and preferred engine not found, return last available + * + * TODO - This is just a patch up and a generic solution is + * required for non DP connectors. + */ + + if (j >= 0 && link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT) + return pool->stream_enc[j]; + + return NULL; +} + static const struct resource_funcs dce100_res_pool_funcs = { .destroy = dce100_destroy_resource_pool, .link_enc_create = dce100_link_encoder_create, .validate_bandwidth = dce100_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce100_add_stream_to_ctx, - .validate_global = dce100_validate_global + .validate_global = dce100_validate_global, + .find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link }; static bool construct( diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h index 2f366d66635d..fecab7c560f5 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h @@ -46,4 +46,9 @@ enum dc_status dce100_add_stream_to_ctx( struct dc_state *new_ctx, struct dc_stream_state *dc_stream); +struct stream_encoder *dce100_find_first_free_match_stream_enc_for_link( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_stream_state *stream); + #endif /* DCE100_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index 35b58a085f5c..f982c8b196cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -1134,6 +1134,38 @@ static void dce110_destroy_resource_pool(struct resource_pool **pool) *pool = NULL; } +struct stream_encoder *dce110_find_first_free_match_stream_enc_for_link( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_stream_state *stream) +{ + int i; + int j = -1; + struct dc_link *link = stream->link; + + for (i = 0; i < pool->stream_enc_count; i++) { + if (!res_ctx->is_stream_enc_acquired[i] && + pool->stream_enc[i]) { + /* Store first available for MST second display + * in daisy chain use case + */ + j = i; + if (pool->stream_enc[i]->id == + link->link_enc->preferred_engine) + return pool->stream_enc[i]; + } + } + + /* + * For CZ and later, we can allow DIG FE and BE to differ for all display types + */ + + if (j >= 0) + return pool->stream_enc[j]; + + return NULL; +} + static const struct resource_funcs dce110_res_pool_funcs = { .destroy = dce110_destroy_resource_pool, @@ -1142,7 +1174,8 @@ static const struct resource_funcs dce110_res_pool_funcs = { .validate_plane = dce110_validate_plane, .acquire_idle_pipe_for_layer = dce110_acquire_underlay, .add_stream_to_ctx = dce110_add_stream_to_ctx, - .validate_global = dce110_validate_global + .validate_global = dce110_validate_global, + .find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link }; static bool underlay_create(struct dc_context *ctx, struct resource_pool *pool) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h index e5f168c1f8c8..aa4531e0800e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h @@ -45,5 +45,10 @@ struct resource_pool *dce110_create_resource_pool( struct dc *dc, struct hw_asic_id asic_id); +struct stream_encoder *dce110_find_first_free_match_stream_enc_for_link( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_stream_state *stream); + #endif /* __DC_RESOURCE_DCE110_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index a480b15f6885..cdf759b0f5f9 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -993,7 +993,8 @@ static const struct resource_funcs dce112_res_pool_funcs = { .validate_bandwidth = dce112_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce112_add_stream_to_ctx, - .validate_global = dce112_validate_global + .validate_global = dce112_validate_global, + .find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link }; static void bw_calcs_data_update_from_pplib(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index fa46caee958a..9e6a5d84b0a1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -837,7 +837,8 @@ static const struct resource_funcs dce120_res_pool_funcs = { .link_enc_create = dce120_link_encoder_create, .validate_bandwidth = dce112_validate_bandwidth, .validate_plane = dce100_validate_plane, - .add_stream_to_ctx = dce112_add_stream_to_ctx + .add_stream_to_ctx = dce112_add_stream_to_ctx, + .find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link }; static void bw_calcs_data_update_from_pplib(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c index 27d0cc394963..2c21135a8510 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c @@ -880,7 +880,8 @@ static const struct resource_funcs dce80_res_pool_funcs = { .validate_bandwidth = dce80_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce100_add_stream_to_ctx, - .validate_global = dce80_validate_global + .validate_global = dce80_validate_global, + .find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link }; static bool dce80_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index dc7cf3704252..749fb44cfc4b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -1243,7 +1243,8 @@ static const struct resource_funcs dcn10_res_pool_funcs = { .validate_plane = dcn10_validate_plane, .validate_global = dcn10_validate_global, .add_stream_to_ctx = dcn10_add_stream_to_ctx, - .get_default_swizzle_mode = dcn10_get_default_swizzle_mode + .get_default_swizzle_mode = dcn10_get_default_swizzle_mode, + .find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link }; static uint32_t read_pipe_fuses(struct dc_context *ctx) diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index d61efa068c9a..539d34d3439c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -123,6 +123,11 @@ struct resource_funcs { enum dc_status (*get_default_swizzle_mode)( struct dc_plane_state *plane_state); + struct stream_encoder *(*find_first_free_match_stream_enc_for_link)( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_stream_state *stream); + }; struct audio_support{ From 2da4605dce38b84cd2e5b86686f43adae1b2cacb Mon Sep 17 00:00:00 2001 From: Wesley Chalmers Date: Wed, 24 Apr 2019 15:29:06 -0400 Subject: [PATCH 109/178] drm/amd/display: Use DCN functions instead of DCE [WHY] DCN code should make as few references to DCE as possible [HOW] Copy DCE110 implementation of find_first_free_match_stream_enc_for_link into DCN10 Signed-off-by: Wesley Chalmers Reviewed-by: Tony Cheng Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn10/dcn10_resource.c | 34 ++++++++++++++++++- .../drm/amd/display/dc/dcn10/dcn10_resource.h | 5 +++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 749fb44cfc4b..bfddd51294a2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -1231,6 +1231,38 @@ static enum dc_status dcn10_get_default_swizzle_mode(struct dc_plane_state *plan return result; } +struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_stream_state *stream) +{ + int i; + int j = -1; + struct dc_link *link = stream->link; + + for (i = 0; i < pool->stream_enc_count; i++) { + if (!res_ctx->is_stream_enc_acquired[i] && + pool->stream_enc[i]) { + /* Store first available for MST second display + * in daisy chain use case + */ + j = i; + if (pool->stream_enc[i]->id == + link->link_enc->preferred_engine) + return pool->stream_enc[i]; + } + } + + /* + * For CZ and later, we can allow DIG FE and BE to differ for all display types + */ + + if (j >= 0) + return pool->stream_enc[j]; + + return NULL; +} + static const struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn10_get_dcc_compression_cap }; @@ -1244,7 +1276,7 @@ static const struct resource_funcs dcn10_res_pool_funcs = { .validate_global = dcn10_validate_global, .add_stream_to_ctx = dcn10_add_stream_to_ctx, .get_default_swizzle_mode = dcn10_get_default_swizzle_mode, - .find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link }; static uint32_t read_pipe_fuses(struct dc_context *ctx) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h index 999c684a0b36..633025ccb870 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h @@ -42,6 +42,11 @@ struct resource_pool *dcn10_create_resource_pool( const struct dc_init_data *init_data, struct dc *dc); +struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_stream_state *stream); + #endif /* __DC_RESOURCE_DCN10_H__ */ From b33171ccaabeffa40df9744656d8d523ceb2b612 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Wed, 1 May 2019 17:05:05 -0400 Subject: [PATCH 110/178] drm/amd/display: Use VCP for extended colorimetry DPRX should send the VCP extended colorimetry packet if the sink supports DPCD rev1.4 and reports the extended colorimetry bit. Signed-off-by: Aric Cyr Reviewed-by: Anthony Koo Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index db06fab2ad5c..bc13c552797f 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -63,7 +63,9 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream, if (stream->psr_version != 0) vscPacketRevision = 2; - if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) + /* Update to revision 5 for extended colorimetry support for DPCD 1.4+ */ + if (stream->link->dpcd_caps.dpcd_rev.raw >= 0x14 && + stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED) vscPacketRevision = 5; /* VSC packet not needed based on the features From a634913ed24aaef9389be17d87dd215529f81f35 Mon Sep 17 00:00:00 2001 From: Joshua Aberback Date: Thu, 2 May 2019 17:32:14 -0400 Subject: [PATCH 111/178] drm/amd/display: Rename EDID_BLOCK_SIZE to DC_EDID_BLOCK_SIZE Signed-off-by: Joshua Aberback Reviewed-by: Abdoulaye Berthe Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 6 +++--- drivers/gpu/drm/amd/display/dc/dc_types.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 6b8dc72a7861..e7236539f867 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -907,10 +907,10 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK; /* Connectivity log: detection */ - for (i = 0; i < sink->dc_edid.length / EDID_BLOCK_SIZE; i++) { + for (i = 0; i < sink->dc_edid.length / DC_EDID_BLOCK_SIZE; i++) { CONN_DATA_DETECT(link, - &sink->dc_edid.raw_edid[i * EDID_BLOCK_SIZE], - EDID_BLOCK_SIZE, + &sink->dc_edid.raw_edid[i * DC_EDID_BLOCK_SIZE], + DC_EDID_BLOCK_SIZE, "%s: [Block %d] ", sink->edid_caps.display_name, i); } diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 6c2a3d9a4c2e..92a670894c05 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -104,7 +104,7 @@ struct dc_context { #define DC_MAX_EDID_BUFFER_SIZE 1024 -#define EDID_BLOCK_SIZE 128 +#define DC_EDID_BLOCK_SIZE 128 #define MAX_SURFACE_NUM 4 #define NUM_PIXEL_FORMATS 10 From aa5fdb1ab5b6559ce6ebe11dd78177c8487f02e0 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 2 May 2019 13:21:48 -0400 Subject: [PATCH 112/178] drm/amd/display: Explicitly specify update type per plane info change [Why] The bit for flip addr is being set causing the determination for FAST vs MEDIUM to always return MEDIUM when plane info is provided as a surface update. This causes extreme stuttering for the typical atomic update path on Linux. [How] Don't use update_flags->raw for determining FAST vs MEDIUM. It's too fragile to changes like this. Explicitly specify the update type per update flag instead. It's not as clever as checking the bits itself but at least it's correct. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Harry Wentland Acked-by: Bhawanpreet Lakha Acked-by: Eryk Brol Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 68 +++++++++++++++--------- 1 file changed, 44 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index f469394751c1..257e632a3a1a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1333,74 +1333,94 @@ static bool is_surface_in_context( static enum surface_update_type get_plane_info_update_type(const struct dc_surface_update *u) { union surface_update_flags *update_flags = &u->surface->update_flags; + enum surface_update_type update_type = UPDATE_TYPE_FAST; if (!u->plane_info) return UPDATE_TYPE_FAST; - if (u->plane_info->color_space != u->surface->color_space) + if (u->plane_info->color_space != u->surface->color_space) { update_flags->bits.color_space_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_MED); + } - if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror) + if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror) { update_flags->bits.horizontal_mirror_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_MED); + } - if (u->plane_info->rotation != u->surface->rotation) + if (u->plane_info->rotation != u->surface->rotation) { update_flags->bits.rotation_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_FULL); + } - if (u->plane_info->format != u->surface->format) + if (u->plane_info->format != u->surface->format) { update_flags->bits.pixel_format_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_FULL); + } - if (u->plane_info->stereo_format != u->surface->stereo_format) + if (u->plane_info->stereo_format != u->surface->stereo_format) { update_flags->bits.stereo_format_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_FULL); + } - if (u->plane_info->per_pixel_alpha != u->surface->per_pixel_alpha) + if (u->plane_info->per_pixel_alpha != u->surface->per_pixel_alpha) { update_flags->bits.per_pixel_alpha_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_MED); + } - if (u->plane_info->global_alpha_value != u->surface->global_alpha_value) + if (u->plane_info->global_alpha_value != u->surface->global_alpha_value) { update_flags->bits.global_alpha_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_MED); + } - if (u->plane_info->sdr_white_level != u->surface->sdr_white_level) + if (u->plane_info->sdr_white_level != u->surface->sdr_white_level) { update_flags->bits.sdr_white_level = 1; + elevate_update_type(&update_type, UPDATE_TYPE_MED); + } if (u->plane_info->dcc.enable != u->surface->dcc.enable || u->plane_info->dcc.grph.independent_64b_blks != u->surface->dcc.grph.independent_64b_blks - || u->plane_info->dcc.grph.meta_pitch != u->surface->dcc.grph.meta_pitch) + || u->plane_info->dcc.grph.meta_pitch != u->surface->dcc.grph.meta_pitch) { update_flags->bits.dcc_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_MED); + } if (resource_pixel_format_to_bpp(u->plane_info->format) != - resource_pixel_format_to_bpp(u->surface->format)) + resource_pixel_format_to_bpp(u->surface->format)) { /* different bytes per element will require full bandwidth * and DML calculation */ update_flags->bits.bpp_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_FULL); + } if (u->plane_info->plane_size.grph.surface_pitch != u->surface->plane_size.grph.surface_pitch || u->plane_info->plane_size.video.luma_pitch != u->surface->plane_size.video.luma_pitch - || u->plane_info->plane_size.video.chroma_pitch != u->surface->plane_size.video.chroma_pitch) + || u->plane_info->plane_size.video.chroma_pitch != u->surface->plane_size.video.chroma_pitch) { update_flags->bits.plane_size_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_MED); + } if (memcmp(&u->plane_info->tiling_info, &u->surface->tiling_info, sizeof(union dc_tiling_info)) != 0) { update_flags->bits.swizzle_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_MED); + /* todo: below are HW dependent, we should add a hook to * DCE/N resource and validated there. */ - if (u->plane_info->tiling_info.gfx9.swizzle != DC_SW_LINEAR) + if (u->plane_info->tiling_info.gfx9.swizzle != DC_SW_LINEAR) { /* swizzled mode requires RQ to be setup properly, * thus need to run DML to calculate RQ settings */ update_flags->bits.bandwidth_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_FULL); + } } - if (update_flags->bits.rotation_change - || update_flags->bits.stereo_format_change - || update_flags->bits.pixel_format_change - || update_flags->bits.bpp_change - || update_flags->bits.bandwidth_change - || update_flags->bits.output_tf_change) - return UPDATE_TYPE_FULL; - - return update_flags->raw ? UPDATE_TYPE_MED : UPDATE_TYPE_FAST; + /* This should be UPDATE_TYPE_FAST if nothing has changed. */ + return update_type; } static enum surface_update_type get_scaling_info_update_type( @@ -1464,9 +1484,6 @@ static enum surface_update_type det_surface_update(const struct dc *dc, update_flags->raw = 0; // Reset all flags - if (u->flip_addr) - update_flags->bits.addr_update = 1; - if (!is_surface_in_context(context, u->surface)) { update_flags->bits.new_plane = 1; return UPDATE_TYPE_FULL; @@ -1483,6 +1500,9 @@ static enum surface_update_type det_surface_update(const struct dc *dc, type = get_scaling_info_update_type(u); elevate_update_type(&overall_type, type); + if (u->flip_addr) + update_flags->bits.addr_update = 1; + if (u->in_transfer_func) update_flags->bits.in_transfer_func_change = 1; From ccd76ebce4e8e1e19df19bc7024355a1f82a9363 Mon Sep 17 00:00:00 2001 From: Jun Lei Date: Tue, 30 Apr 2019 16:22:38 -0400 Subject: [PATCH 113/178] drm/amd/display: dont set otg offset move the update of otg instance outside of hw programming logic, since this is sw state, it should always be updated and should never be optimized away. Signed-off-by: Jun Lei Reviewed-by: Eric Yang Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 257e632a3a1a..4e17af2b63dc 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1141,7 +1141,6 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c /* Program all planes within new context*/ for (i = 0; i < context->stream_count; i++) { const struct dc_link *link = context->streams[i]->link; - struct dc_stream_status *status; if (!context->streams[i]->mode_changed) continue; @@ -1166,9 +1165,6 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c } } - status = dc_stream_get_status_from_state(context, context->streams[i]); - context->streams[i]->out.otg_offset = status->primary_otg_inst; - CONN_MSG_MODE(link, "{%dx%d, %dx%d@%dKhz}", context->streams[i]->timing.h_addressable, context->streams[i]->timing.v_addressable, From 1090d58d4815b1fcd95a80987391006c86398b4c Mon Sep 17 00:00:00 2001 From: Paul Hsieh Date: Fri, 3 May 2019 23:50:10 +0800 Subject: [PATCH 114/178] drm/amd/display: Disable ABM before destroy ABM struct [Why] When disable driver, OS will set backlight optimization then do stop device. But this flag will cause driver to enable ABM when driver disabled. [How] Send ABM disable command before destroy ABM construct Signed-off-by: Paul Hsieh Reviewed-by: Anthony Koo Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_abm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c index da96229db53a..2959c3c9390b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c @@ -473,6 +473,8 @@ void dce_abm_destroy(struct abm **abm) { struct dce_abm *abm_dce = TO_DCE_ABM(*abm); + abm_dce->base.funcs->set_abm_immediate_disable(*abm); + kfree(abm_dce); *abm = NULL; } From 0cd8afaef19927803b7bb0808e09d59a3e405505 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 6 May 2019 10:16:12 -0400 Subject: [PATCH 115/178] drm/amd/display: 3.2.31 Signed-off-by: Aric Cyr Reviewed-by: Aric Cyr Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 7d459362601d..566111ff463e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.30" +#define DC_VER "3.2.31" #define MAX_SURFACES 3 #define MAX_PLANES 6 From e74609cb425371cf6a5baba0a76073b3be34625f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 15 May 2019 13:53:14 -0500 Subject: [PATCH 116/178] drm/amdgpu/vega20: use mode1 reset for RAS and XGMI If RAS or XGMI are enabled, you have to use mode1 reset rather than BACO. Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index f9c9cacc5c50..32dc5a128249 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -65,6 +65,8 @@ #include "dce_virtual.h" #include "mxgpu_ai.h" #include "amdgpu_smu.h" +#include "amdgpu_ras.h" +#include "amdgpu_xgmi.h" #include #define mmMP0_MISC_CGTT_CTRL0 0x01b9 @@ -485,6 +487,13 @@ static int soc15_asic_reset(struct amdgpu_device *adev) soc15_asic_get_baco_capability(adev, &baco_reset); else baco_reset = false; + if (baco_reset) { + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + if (hive || (ras && ras->supported)) + baco_reset = false; + } break; default: baco_reset = false; From e70a26b303d5b31a5d467eb7e0573302babc5cb7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 10 May 2019 08:07:34 +0100 Subject: [PATCH 117/178] drm/amdgpu: fix spelling mistake "retrived" -> "retrieved" There is a spelling mistake in a DRM_ERROR error message. Fix this. Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/df_v3_6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index 24e25ae0ca83..8c09bf994acd 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -194,7 +194,7 @@ static void df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev, return; if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) { - DRM_ERROR("DF PMC addressing not retrived! Lo: %x, Hi: %x", + DRM_ERROR("DF PMC addressing not retrieved! Lo: %x, Hi: %x", *lo_base_addr, *hi_base_addr); return; } From 1b9557fcaa42a45920821b0c6455088bb341f477 Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Wed, 15 May 2019 16:17:05 -0400 Subject: [PATCH 118/178] drm/amd/powerplay: Fix maybe-uninitialized in get_ppfeature_status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the warning below error: ‘feature_mask’ may be used uninitialized in this function [-Werror=maybe-uninitialized] *features_enabled = ((((uint64_t)feature_mask[0] << SMU_FEATURES_LOW_SHIFT) & SMU_FEATURES_LOW_MASK) | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^ (((uint64_t)feature_mask[1] << SMU_FEATURES_HIGH_SHIFT) & SMU_FEATURES_HIGH_MASK)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Reviewed-by: Alex Deucher Signed-off-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/vega20_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c index b39f3d439332..4aa8f5a69c4c 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c @@ -2377,7 +2377,7 @@ static int vega20_odn_edit_dpm_table(struct smu_context *smu, static int vega20_get_enabled_smc_features(struct smu_context *smu, uint64_t *features_enabled) { - uint32_t feature_mask[2]; + uint32_t feature_mask[2] = {0, 0}; int ret = 0; ret = smu_feature_get_enabled_mask(smu, feature_mask, 2); From a254bfa2c5fbd46165010cd6c29a4ad7638b8772 Mon Sep 17 00:00:00 2001 From: Chengming Gui Date: Wed, 8 May 2019 14:38:55 +0800 Subject: [PATCH 119/178] drm/amd/powerplay: Enable "disable dpm" feature to support swSMU debug (v2) add pm_enabled to control the dpm off/on. v2: Directly return 0 to replace return ret and merge some check code. Signed-off-by: Chengming Gui Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 24 +++++++++++-- .../gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 1 + drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 34 ++++++++++++++++--- 3 files changed, 51 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index b119a276a977..3a47130f8150 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -198,6 +198,8 @@ int smu_sys_set_pp_table(struct smu_context *smu, void *buf, size_t size) ATOM_COMMON_TABLE_HEADER *header = (ATOM_COMMON_TABLE_HEADER *)buf; int ret = 0; + if (!smu->pm_enabled) + return -EINVAL; if (header->usStructureSize != size) { pr_err("pp table size not matched !\n"); return -EIO; @@ -233,6 +235,8 @@ int smu_feature_init_dpm(struct smu_context *smu) int ret = 0; uint32_t unallowed_feature_mask[SMU_FEATURE_MAX/32]; + if (!smu->pm_enabled) + return ret; mutex_lock(&feature->mutex); bitmap_fill(feature->allowed, SMU_FEATURE_MAX); mutex_unlock(&feature->mutex); @@ -344,6 +348,7 @@ static int smu_early_init(void *handle) struct smu_context *smu = &adev->smu; smu->adev = adev; + smu->pm_enabled = amdgpu_dpm; mutex_init(&smu->mutex); return smu_set_funcs(adev); @@ -353,6 +358,9 @@ static int smu_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct smu_context *smu = &adev->smu; + + if (!smu->pm_enabled) + return 0; mutex_lock(&smu->mutex); smu_handle_task(&adev->smu, smu->smu_dpm.dpm_level, @@ -736,6 +744,9 @@ static int smu_smc_table_hw_init(struct smu_context *smu, */ ret = smu_set_tool_table_location(smu); + if (!smu_is_dpm_running(smu)) + pr_info("dpm has been disabled\n"); + return ret; } @@ -848,7 +859,10 @@ static int smu_hw_init(void *handle) mutex_unlock(&smu->mutex); - adev->pm.dpm_enabled = true; + if (!smu->pm_enabled) + adev->pm.dpm_enabled = false; + else + adev->pm.dpm_enabled = true; pr_info("SMU is initialized successfully!\n"); @@ -963,7 +977,7 @@ int smu_display_configuration_change(struct smu_context *smu, int index = 0; int num_of_active_display = 0; - if (!is_support_sw_smu(smu->adev)) + if (!smu->pm_enabled || !is_support_sw_smu(smu->adev)) return -EINVAL; if (!display_config) @@ -1091,7 +1105,7 @@ static int smu_enable_umd_pstate(void *handle, struct smu_context *smu = (struct smu_context*)(handle); struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); - if (!smu_dpm_ctx->dpm_context) + if (!smu->pm_enabled || !smu_dpm_ctx->dpm_context) return -EINVAL; if (!(smu_dpm_ctx->dpm_level & profile_mode_mask)) { @@ -1134,6 +1148,8 @@ int smu_adjust_power_state_dynamic(struct smu_context *smu, long workload; struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); + if (!smu->pm_enabled) + return -EINVAL; if (!skip_display_settings) { ret = smu_display_config_changed(smu); if (ret) { @@ -1142,6 +1158,8 @@ int smu_adjust_power_state_dynamic(struct smu_context *smu, } } + if (!smu->pm_enabled) + return -EINVAL; ret = smu_apply_clocks_adjust_rules(smu); if (ret) { pr_err("Failed to apply clocks adjust rules!"); diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index 2cb4cc2a8208..3eb1de9ecf73 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -401,6 +401,7 @@ struct smu_context uint32_t workload_setting[WORKLOAD_POLICY_MAX]; uint32_t power_profile_mode; uint32_t default_power_profile_mode; + bool pm_enabled; uint32_t smc_if_version; diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index da59a0972b8e..940b519686d5 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -360,6 +360,8 @@ static int smu_v11_0_init_power(struct smu_context *smu) { struct smu_power_context *smu_power = &smu->smu_power; + if (!smu->pm_enabled) + return 0; if (smu_power->power_context || smu_power->power_context_size != 0) return -EINVAL; @@ -383,6 +385,8 @@ static int smu_v11_0_fini_power(struct smu_context *smu) { struct smu_power_context *smu_power = &smu->smu_power; + if (!smu->pm_enabled) + return 0; if (!smu_power->power_context || smu_power->power_context_size == 0) return -EINVAL; @@ -650,6 +654,8 @@ static int smu_v11_0_set_min_dcef_deep_sleep(struct smu_context *smu) { struct smu_table_context *table_context = &smu->smu_table; + if (!smu->pm_enabled) + return 0; if (!table_context) return -EINVAL; @@ -678,6 +684,9 @@ static int smu_v11_0_set_tool_table_location(struct smu_context *smu) static int smu_v11_0_init_display(struct smu_context *smu) { int ret = 0; + + if (!smu->pm_enabled) + return ret; ret = smu_send_smc_msg_with_param(smu, SMU_MSG_NumOfDisplays, 0); return ret; } @@ -687,6 +696,8 @@ static int smu_v11_0_update_feature_enable_state(struct smu_context *smu, uint32 uint32_t feature_low = 0, feature_high = 0; int ret = 0; + if (!smu->pm_enabled) + return ret; if (feature_id >= 0 && feature_id < 31) feature_low = (1 << feature_id); else if (feature_id > 31 && feature_id < 63) @@ -793,10 +804,13 @@ static int smu_v11_0_system_features_control(struct smu_context *smu, uint32_t feature_mask[2]; int ret = 0; - ret = smu_send_smc_msg(smu, (en ? SMU_MSG_EnableAllSmuFeatures : - SMU_MSG_DisableAllSmuFeatures)); - if (ret) - return ret; + if (smu->pm_enabled) { + ret = smu_send_smc_msg(smu, (en ? SMU_MSG_EnableAllSmuFeatures : + SMU_MSG_DisableAllSmuFeatures)); + if (ret) + return ret; + } + ret = smu_feature_get_enabled_mask(smu, feature_mask, 2); if (ret) return ret; @@ -813,6 +827,8 @@ static int smu_v11_0_notify_display_change(struct smu_context *smu) { int ret = 0; + if (!smu->pm_enabled) + return ret; if (smu_feature_is_enabled(smu, FEATURE_DPM_UCLK_BIT)) ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetUclkFastSwitch, 1); @@ -825,6 +841,8 @@ smu_v11_0_get_max_sustainable_clock(struct smu_context *smu, uint32_t *clock, { int ret = 0; + if (!smu->pm_enabled) + return ret; ret = smu_send_smc_msg_with_param(smu, SMU_MSG_GetDcModeMaxDpmFreq, clock_select << 16); if (ret) { @@ -1101,6 +1119,8 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu) TEMP_RANGE_MAX}; struct amdgpu_device *adev = smu->adev; + if (!smu->pm_enabled) + return ret; smu_v11_0_get_thermal_range(smu, &range); if (smu->smu_table.thermal_controller_type) { @@ -1336,6 +1356,8 @@ smu_v11_0_display_clock_voltage_request(struct smu_context *smu, PPCLK_e clk_select = 0; uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000; + if (!smu->pm_enabled) + return -EINVAL; if (smu_feature_is_enabled(smu, FEATURE_DPM_DCEFCLK_BIT)) { switch (clk_type) { case amd_pp_dcef_clock: @@ -1619,7 +1641,7 @@ static int smu_v11_0_get_power_profile_mode(struct smu_context *smu, char *buf) "PD_Data_error_rate_coeff"}; int result = 0; - if (!buf) + if (!smu->pm_enabled || !buf) return -EINVAL; size += sprintf(buf + size, "%16s %s %s %s %s %s %s %s %s %s %s\n", @@ -1706,6 +1728,8 @@ static int smu_v11_0_set_power_profile_mode(struct smu_context *smu, long *input smu->power_profile_mode = input[size]; + if (!smu->pm_enabled) + return ret; if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) { pr_err("Invalid power profile mode %d\n", smu->power_profile_mode); return -EINVAL; From d6ee400e793f0ae6c9f5926bea9fbb362a950d96 Mon Sep 17 00:00:00 2001 From: Slava Abramov Date: Thu, 16 May 2019 16:17:53 -0400 Subject: [PATCH 120/178] drm/amdgpu: use div64_ul for 32-bit compatibility v1 v1: replace casting to unsigned long with div64_ul Acked-by: Alex Deucher Signed-off-by: Slava Abramov Tested-by: Slava Abramov Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index da1dc40b9b14..d5719b0fb82c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -764,8 +764,8 @@ static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f, struct amdgpu_device *adev = con->adev; const unsigned int element_size = sizeof("0xabcdabcd : 0x12345678 : R\n") - 1; - unsigned int start = (ppos + element_size - 1) / element_size; - unsigned int end = (ppos + count - 1) / element_size; + unsigned int start = div64_ul(ppos + element_size - 1, element_size); + unsigned int end = div64_ul(ppos + count - 1, element_size); ssize_t s = 0; struct ras_badpage *bps = NULL; unsigned int bps_count = 0; From 17a839135f7356d9e1cff653ea861b1b890c9d6c Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 17 May 2019 13:39:36 +0800 Subject: [PATCH 121/178] drm/amd/powerplay: fix sw SMU wrong UVD/VCE powergate setting The UVD/VCE bits are set wrongly. This causes the UVD/VCE clocks are not brought back correctly on needed. Signed-off-by: Evan Quan Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index 940b519686d5..d2eeb6240484 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -1834,17 +1834,6 @@ static int smu_v11_0_update_od8_settings(struct smu_context *smu, } static int smu_v11_0_dpm_set_uvd_enable(struct smu_context *smu, bool enable) -{ - if (!smu_feature_is_supported(smu, FEATURE_DPM_VCE_BIT)) - return 0; - - if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_VCE_BIT)) - return 0; - - return smu_feature_set_enabled(smu, FEATURE_DPM_VCE_BIT, enable); -} - -static int smu_v11_0_dpm_set_vce_enable(struct smu_context *smu, bool enable) { if (!smu_feature_is_supported(smu, FEATURE_DPM_UVD_BIT)) return 0; @@ -1855,6 +1844,17 @@ static int smu_v11_0_dpm_set_vce_enable(struct smu_context *smu, bool enable) return smu_feature_set_enabled(smu, FEATURE_DPM_UVD_BIT, enable); } +static int smu_v11_0_dpm_set_vce_enable(struct smu_context *smu, bool enable) +{ + if (!smu_feature_is_supported(smu, FEATURE_DPM_VCE_BIT)) + return 0; + + if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_VCE_BIT)) + return 0; + + return smu_feature_set_enabled(smu, FEATURE_DPM_VCE_BIT, enable); +} + static int smu_v11_0_get_current_rpm(struct smu_context *smu, uint32_t *current_rpm) { From 5208e6a3b25d0fe7acd812957b4a5e745b86e957 Mon Sep 17 00:00:00 2001 From: Weitao Hou Date: Sat, 18 May 2019 21:41:41 +0800 Subject: [PATCH 122/178] gpu: fix typos in code comments fix eror to error Signed-off-by: Weitao Hou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index f70437aae8e0..df422440845b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -183,8 +183,8 @@ static bool calculate_fb_and_fractional_fb_divider( *RETURNS: * It fills the PLLSettings structure with PLL Dividers values * if calculated values are within required tolerance -* It returns - true if eror is within tolerance -* - false if eror is not within tolerance +* It returns - true if error is within tolerance +* - false if error is not within tolerance */ static bool calc_fb_divider_checking_tolerance( struct calc_pll_clock_source *calc_pll_cs, From dbaa922b5706b1aff4572c280e15bbea2d04afe6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 11 Apr 2019 08:58:22 -0500 Subject: [PATCH 123/178] drm/amdgpu: use pcie_bandwidth_available rather than open coding it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It does the same thing we were doing already. I though it needed work for gen3/4 speeds, but that seems to be covered already. Reviewed-by: Evan Quan Acked-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 41 ++-------------------- 1 file changed, 2 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6fea2d121537..a7ff8d6dbc6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3707,43 +3707,6 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ return r; } -static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev, - enum pci_bus_speed *speed, - enum pcie_link_width *width) -{ - struct pci_dev *pdev = adev->pdev; - enum pci_bus_speed cur_speed; - enum pcie_link_width cur_width; - u32 ret = 1; - - *speed = PCI_SPEED_UNKNOWN; - *width = PCIE_LNK_WIDTH_UNKNOWN; - - while (pdev) { - cur_speed = pcie_get_speed_cap(pdev); - cur_width = pcie_get_width_cap(pdev); - ret = pcie_bandwidth_available(adev->pdev, NULL, - NULL, &cur_width); - if (!ret) - cur_width = PCIE_LNK_WIDTH_RESRV; - - if (cur_speed != PCI_SPEED_UNKNOWN) { - if (*speed == PCI_SPEED_UNKNOWN) - *speed = cur_speed; - else if (cur_speed < *speed) - *speed = cur_speed; - } - - if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) { - if (*width == PCIE_LNK_WIDTH_UNKNOWN) - *width = cur_width; - else if (cur_width < *width) - *width = cur_width; - } - pdev = pci_upstream_bridge(pdev); - } -} - /** * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot * @@ -3777,8 +3740,8 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) return; - amdgpu_device_get_min_pci_speed_width(adev, &platform_speed_cap, - &platform_link_width); + pcie_bandwidth_available(adev->pdev, NULL, + &platform_speed_cap, &platform_link_width); if (adev->pm.pcie_gen_mask == 0) { /* asic caps */ From bcae18c2f54252b7278e58da0663ee6b0bda3285 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 29 Apr 2019 11:35:42 +0800 Subject: [PATCH 124/178] drm/amd/powerplay: enable ppfeaturemask module parameter support on Vega20 Support DPM/DS/ULV related bitmasks of ppfeaturemask module parameter. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index eb7002401587..d18f34d4a51e 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -97,6 +97,27 @@ static void vega20_set_default_registry_data(struct pp_hwmgr *hwmgr) if (hwmgr->smu_version < 0x282100) data->registry_data.disallowed_features |= FEATURE_ECC_MASK; + if (!(hwmgr->feature_mask & PP_PCIE_DPM_MASK)) + data->registry_data.disallowed_features |= FEATURE_DPM_LINK_MASK; + + if (!(hwmgr->feature_mask & PP_SCLK_DPM_MASK)) + data->registry_data.disallowed_features |= FEATURE_DPM_GFXCLK_MASK; + + if (!(hwmgr->feature_mask & PP_SOCCLK_DPM_MASK)) + data->registry_data.disallowed_features |= FEATURE_DPM_SOCCLK_MASK; + + if (!(hwmgr->feature_mask & PP_MCLK_DPM_MASK)) + data->registry_data.disallowed_features |= FEATURE_DPM_UCLK_MASK; + + if (!(hwmgr->feature_mask & PP_DCEFCLK_DPM_MASK)) + data->registry_data.disallowed_features |= FEATURE_DPM_DCEFCLK_MASK; + + if (!(hwmgr->feature_mask & PP_ULV_MASK)) + data->registry_data.disallowed_features |= FEATURE_ULV_MASK; + + if (!(hwmgr->feature_mask & PP_SCLK_DEEP_SLEEP_MASK)) + data->registry_data.disallowed_features |= FEATURE_DS_GFXCLK_MASK; + data->registry_data.od_state_in_dc_support = 0; data->registry_data.thermal_support = 1; data->registry_data.skip_baco_hardware = 0; From cb77ee7cae96c4db5d7dfe127d3e0cf9d6056875 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Thu, 1 Nov 2018 11:06:25 -0400 Subject: [PATCH 125/178] drm/amdkfd: Use 64 bit sdma_bitmap Maximumly support 64 sdma queues Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 10 +++++----- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index c6c9530e704e..0de8cf45c130 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -891,7 +891,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) } dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; - dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; + dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1; return 0; } @@ -929,8 +929,8 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, if (dqm->sdma_bitmap == 0) return -ENOMEM; - bit = ffs(dqm->sdma_bitmap) - 1; - dqm->sdma_bitmap &= ~(1 << bit); + bit = __ffs64(dqm->sdma_bitmap); + dqm->sdma_bitmap &= ~(1ULL << bit); *sdma_queue_id = bit; return 0; @@ -941,7 +941,7 @@ static void deallocate_sdma_queue(struct device_queue_manager *dqm, { if (sdma_queue_id >= get_num_sdma_queues(dqm)) return; - dqm->sdma_bitmap |= (1 << sdma_queue_id); + dqm->sdma_bitmap |= (1ULL << sdma_queue_id); } static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, @@ -1047,7 +1047,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) dqm->queue_count = dqm->processes_count = 0; dqm->sdma_queue_count = 0; dqm->active_runlist = false; - dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; + dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1; INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 70e38a2e23b9..2770f3ece89f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -188,7 +188,7 @@ struct device_queue_manager { unsigned int total_queue_count; unsigned int next_pipe_to_allocate; unsigned int *allocated_queues; - unsigned int sdma_bitmap; + uint64_t sdma_bitmap; unsigned int vmid_bitmap; uint64_t pipelines_addr; struct kfd_mem_obj *pipeline_mem; From 96eb5f9dd3d85f3204b271b208b36dfe4df14101 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Thu, 8 Nov 2018 10:40:41 -0500 Subject: [PATCH 126/178] drm/amdkfd: Add sdma allocation debug message Add debug messages during SDMA queue allocation. Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 0de8cf45c130..775f20eb4f68 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1181,6 +1181,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, q->sdma_id / get_num_sdma_engines(dqm); q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm); + pr_debug("SDMA id is: %d\n", q->sdma_id); + pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); + pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); } retval = allocate_doorbell(qpd, q); From 323c71df9447f277dd151b65711bc9ba51fcc4c0 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Mon, 3 Dec 2018 09:20:20 -0600 Subject: [PATCH 127/178] drm/amdkfd: Differentiate b/t sdma_id and sdma_queue_id sdma_queue_id is sdma queue index inside one sdma engine. sdma_id is sdma queue index among all sdma engines. Use those two names properly. Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 775f20eb4f68..426d3a46f501 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -922,7 +922,7 @@ static int stop_nocpsch(struct device_queue_manager *dqm) } static int allocate_sdma_queue(struct device_queue_manager *dqm, - unsigned int *sdma_queue_id) + unsigned int *sdma_id) { int bit; @@ -931,17 +931,17 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, bit = __ffs64(dqm->sdma_bitmap); dqm->sdma_bitmap &= ~(1ULL << bit); - *sdma_queue_id = bit; + *sdma_id = bit; return 0; } static void deallocate_sdma_queue(struct device_queue_manager *dqm, - unsigned int sdma_queue_id) + unsigned int sdma_id) { - if (sdma_queue_id >= get_num_sdma_queues(dqm)) + if (sdma_id >= get_num_sdma_queues(dqm)) return; - dqm->sdma_bitmap |= (1ULL << sdma_queue_id); + dqm->sdma_bitmap |= (1ULL << sdma_id); } static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, From c6fd980ab154455df62adc89d81898d24654ab72 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Tue, 4 Dec 2018 16:08:33 -0600 Subject: [PATCH 128/178] drm/amdkfd: Shift sdma_engine_id and sdma_queue_id in mqd FW of some new ASICs requires sdma mqd size to be not more than 128 dwords. Repurpose the last 2 reserved fields of sdma mqd for driver internal use, so the total mqd size is no bigger than 128 dwords Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/cik_structs.h | 3 +-- drivers/gpu/drm/amd/include/v9_structs.h | 3 +-- drivers/gpu/drm/amd/include/vi_structs.h | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/include/cik_structs.h b/drivers/gpu/drm/amd/include/cik_structs.h index 749eab94e335..699e658c3cec 100644 --- a/drivers/gpu/drm/amd/include/cik_structs.h +++ b/drivers/gpu/drm/amd/include/cik_structs.h @@ -282,8 +282,7 @@ struct cik_sdma_rlc_registers { uint32_t reserved_123; uint32_t reserved_124; uint32_t reserved_125; - uint32_t reserved_126; - uint32_t reserved_127; + /* reserved_126,127: repurposed for driver-internal use */ uint32_t sdma_engine_id; uint32_t sdma_queue_id; }; diff --git a/drivers/gpu/drm/amd/include/v9_structs.h b/drivers/gpu/drm/amd/include/v9_structs.h index ceaf4932258d..8b383dbe1cda 100644 --- a/drivers/gpu/drm/amd/include/v9_structs.h +++ b/drivers/gpu/drm/amd/include/v9_structs.h @@ -151,8 +151,7 @@ struct v9_sdma_mqd { uint32_t reserved_123; uint32_t reserved_124; uint32_t reserved_125; - uint32_t reserved_126; - uint32_t reserved_127; + /* reserved_126,127: repurposed for driver-internal use */ uint32_t sdma_engine_id; uint32_t sdma_queue_id; }; diff --git a/drivers/gpu/drm/amd/include/vi_structs.h b/drivers/gpu/drm/amd/include/vi_structs.h index 717fbae1d362..c17613287cd0 100644 --- a/drivers/gpu/drm/amd/include/vi_structs.h +++ b/drivers/gpu/drm/amd/include/vi_structs.h @@ -151,8 +151,7 @@ struct vi_sdma_mqd { uint32_t reserved_123; uint32_t reserved_124; uint32_t reserved_125; - uint32_t reserved_126; - uint32_t reserved_127; + /* reserved_126,127: repurposed for driver-internal use */ uint32_t sdma_engine_id; uint32_t sdma_queue_id; }; From f756e6319ca07e369494a921b9935eb8625cc40b Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Fri, 25 Jan 2019 16:35:35 -0500 Subject: [PATCH 129/178] drm/amdkfd: Fix compute profile switching Fix compute profile switching on process termination. Add a dedicated reference counter to keep track of entry/exit to/from compute profile. This enables switching compute profiles for other reasons than process creation or termination. Signed-off-by: Harish Kasiviswanathan Signed-off-by: Eric Huang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 16 ++++++++++++++++ .../drm/amd/amdkfd/kfd_device_queue_manager.c | 11 ++++++----- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 7 +++++++ 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index c1e4d44d6137..8202a5db3a35 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -462,6 +462,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, kfd->pdev = pdev; kfd->init_complete = false; kfd->kfd2kgd = f2g; + atomic_set(&kfd->compute_profile, 0); mutex_init(&kfd->doorbell_mutex); memset(&kfd->doorbell_available_index, 0, @@ -1036,6 +1037,21 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) atomic_inc(&kfd->sram_ecc_flag); } +void kfd_inc_compute_active(struct kfd_dev *kfd) +{ + if (atomic_inc_return(&kfd->compute_profile) == 1) + amdgpu_amdkfd_set_compute_idle(kfd->kgd, false); +} + +void kfd_dec_compute_active(struct kfd_dev *kfd) +{ + int count = atomic_dec_return(&kfd->compute_profile); + + if (count == 0) + amdgpu_amdkfd_set_compute_idle(kfd->kgd, true); + WARN_ONCE(count < 0, "Compute profile ref. count error"); +} + #if defined(CONFIG_DEBUG_FS) /* This function will send a package to HIQ to hang the HWS diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 426d3a46f501..d6fe75245d05 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -811,8 +811,8 @@ static int register_process(struct device_queue_manager *dqm, retval = dqm->asic_ops.update_qpd(dqm, qpd); - if (dqm->processes_count++ == 0) - amdgpu_amdkfd_set_compute_idle(dqm->dev->kgd, false); + dqm->processes_count++; + kfd_inc_compute_active(dqm->dev); dqm_unlock(dqm); @@ -835,9 +835,8 @@ static int unregister_process(struct device_queue_manager *dqm, if (qpd == cur->qpd) { list_del(&cur->list); kfree(cur); - if (--dqm->processes_count == 0) - amdgpu_amdkfd_set_compute_idle( - dqm->dev->kgd, true); + dqm->processes_count--; + kfd_dec_compute_active(dqm->dev); goto out; } } @@ -1542,6 +1541,7 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm, list_del(&cur->list); kfree(cur); dqm->processes_count--; + kfd_dec_compute_active(dqm->dev); break; } } @@ -1629,6 +1629,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, list_del(&cur->list); kfree(cur); dqm->processes_count--; + kfd_dec_compute_active(dqm->dev); break; } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 9c68ae5093de..eac687b79ad8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -279,6 +279,9 @@ struct kfd_dev { /* SRAM ECC flag */ atomic_t sram_ecc_flag; + + /* Compute Profile ref. count */ + atomic_t compute_profile; }; enum kfd_mempool { @@ -978,6 +981,10 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); bool kfd_is_locked(void); +/* Compute profile */ +void kfd_inc_compute_active(struct kfd_dev *dev); +void kfd_dec_compute_active(struct kfd_dev *dev); + /* Debugfs */ #if defined(CONFIG_DEBUG_FS) From 2db2f259594ddf3d9d0053c2aa265e6ae48ee6db Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Wed, 14 Nov 2018 22:23:25 -0600 Subject: [PATCH 130/178] drm/amdkfd: Fix gfx8 MEM_VIOL exception handler When MEM_VIOL is asserted the context save handler rewinds the program counter. This is incorrect for any source of the exception. MEM_VIOL may be raised in normal operation by out-of-bounds access to LDS or GDS and does not require special handling. Remove PC adjustment when MEM_VIOL has been raised. Signed-off-by: Jay Cornwall Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 9 ++------- .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm | 13 ------------- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 3621efbd5759..ec9a9a99f808 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -21,7 +21,7 @@ */ static const uint32_t cwsr_trap_gfx8_hex[] = { - 0xbf820001, 0xbf82012b, + 0xbf820001, 0xbf820121, 0xb8f4f802, 0x89748674, 0xb8f5f803, 0x8675ff75, 0x00000400, 0xbf850017, @@ -36,12 +36,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { 0x8671ff71, 0x0000ffff, 0x8f728374, 0xb972e0c2, 0xbf800002, 0xb9740002, - 0xbe801f70, 0xb8f5f803, - 0x8675ff75, 0x00000100, - 0xbf840006, 0xbefa0080, - 0xb97a0203, 0x8671ff71, - 0x0000ffff, 0x80f08870, - 0x82f18071, 0xbefa0080, + 0xbe801f70, 0xbefa0080, 0xb97a0283, 0xbef60068, 0xbef70069, 0xb8fa1c07, 0x8e7a9c7a, 0x87717a71, diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm index abe1a5da29fb..a47f5b933120 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm @@ -282,19 +282,6 @@ if G8SR_DEBUG_TIMESTAMP s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? end - //check whether there is mem_viol - s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK - s_cbranch_scc0 L_NO_PC_REWIND - - //if so, need rewind PC assuming GDS operation gets NACKed - s_mov_b32 s_save_tmp, 0 //clear mem_viol bit - s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit - s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] - s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8 - s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0 // -scc - -L_NO_PC_REWIND: s_mov_b32 s_save_tmp, 0 //clear saveCtx bit s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit From 157e586dc98fcc9c6ec4cefa7d893e3a0992bd1a Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Thu, 31 Jan 2019 11:38:18 -0600 Subject: [PATCH 131/178] drm/amdkfd: Preserve wave state after instruction fetch MEM_VIOL If instruction fetch fails the wave cannot be halted and returned to the shader without raising MEM_VIOL again. Currently the wave is terminated if this occurs, but this loses information about the cause of the fault. The debugger would prefer the faulting wave state to be context-saved. Poll inside the trap handler until TRAPSTS.SAVECTX indicates context save is ready. Exit the poll loop and complete the remainder of the exception handler, then return to the shader. The next instruction fetch will be from the trap handler and not the faulting PC. Context save will then deschedule the wave and save its state. Signed-off-by: Jay Cornwall Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 10 ++++++---- drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 10 ++++++++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index ec9a9a99f808..097da0dd3b04 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -274,15 +274,17 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { static const uint32_t cwsr_trap_gfx9_hex[] = { - 0xbf820001, 0xbf82015d, + 0xbf820001, 0xbf820161, 0xb8f8f802, 0x89788678, 0xb8f1f803, 0x866eff71, - 0x00000400, 0xbf850037, + 0x00000400, 0xbf85003b, 0x866eff71, 0x00000800, 0xbf850003, 0x866eff71, - 0x00000100, 0xbf840008, + 0x00000100, 0xbf84000c, 0x866eff78, 0x00002000, - 0xbf840001, 0xbf810000, + 0xbf840005, 0xbf8e0010, + 0xb8eef803, 0x866eff6e, + 0x00000400, 0xbf84fffb, 0x8778ff78, 0x00002000, 0x80ec886c, 0x82ed806d, 0xb8eef807, 0x866fff6e, diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index 0bb9c577b3a2..6a010c9e55de 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -266,10 +266,16 @@ if (!EMU_RUN_HACK) L_HALT_WAVE: // If STATUS.HALT is set then this fault must come from SQC instruction fetch. - // We cannot prevent further faults so just terminate the wavefront. + // We cannot prevent further faults. Spin wait until context saved. s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK s_cbranch_scc0 L_NOT_ALREADY_HALTED - s_endpgm + +L_WAIT_CTX_SAVE: + s_sleep 0x10 + s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS) + s_and_b32 ttmp2, ttmp2, SQ_WAVE_TRAPSTS_SAVECTX_MASK + s_cbranch_scc0 L_WAIT_CTX_SAVE + L_NOT_ALREADY_HALTED: s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK From 58836009015c6e1139f227ce827655b7e9639c2d Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Tue, 19 Feb 2019 14:51:56 -0600 Subject: [PATCH 132/178] drm/amdkfd: Fix gfx9 XNACK state save/restore SQ_WAVE_IB_STS.RCNT grew from 4 bits to 5 in gfx9. Do not truncate when saving in the high bits of TTMP1. Signed-off-by: Jay Cornwall Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 12 ++++++------ .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 097da0dd3b04..eed845b4e9a7 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -310,8 +310,8 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0xbe801f6c, 0x866dff6d, 0x0000ffff, 0xbef00080, 0xb9700283, 0xb8f02407, - 0x8e709c70, 0x876d706d, - 0xb8f003c7, 0x8e709b70, + 0x8e709b70, 0x876d706d, + 0xb8f003c7, 0x8e709a70, 0x876d706d, 0xb8f0f807, 0x8670ff70, 0x00007fff, 0xb970f807, 0xbeee007e, @@ -549,11 +549,11 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0x00000048, 0xc0031e77, 0x00000058, 0xc0071eb7, 0x0000005c, 0xbf8cc07f, - 0x866fff6d, 0xf0000000, - 0x8f6f9c6f, 0x8e6f906f, + 0x866fff6d, 0xf8000000, + 0x8f6f9b6f, 0x8e6f906f, 0xbeee0080, 0x876e6f6e, - 0x866fff6d, 0x08000000, - 0x8f6f9b6f, 0x8e6f8f6f, + 0x866fff6d, 0x04000000, + 0x8f6f9a6f, 0x8e6f8f6f, 0x876e6f6e, 0x866fff70, 0x00800000, 0x8f6f976f, 0xb96ef807, 0x866dff6d, diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index 6a010c9e55de..e1ac34517642 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -150,10 +150,10 @@ var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28 var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 -var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used -var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME -var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME -var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME +var S_SAVE_PC_HI_RCNT_SHIFT = 27 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used +var S_SAVE_PC_HI_RCNT_MASK = 0xF8000000 //FIXME +var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 26 //FIXME +var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x04000000 //FIXME var s_save_spi_init_lo = exec_lo var s_save_spi_init_hi = exec_hi From fa722f0d9820a84952a3cbaccb888c31f9bfca2a Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Tue, 2 Apr 2019 11:43:30 -0500 Subject: [PATCH 133/178] drm/amdkfd: Preserve ttmp[4:5] instead of ttmp[14:15] ttmp[4:5] is initialized by the SPI with SPI_GDBG_TRAP_DATA* values. These values are more useful to the debugger than ttmp[14:15], which carries dispatch_scratch_base*. There are too few registers to preserve both. Signed-off-by: Jay Cornwall Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 486 +++++++++--------- .../drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 45 +- 2 files changed, 263 insertions(+), 268 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index eed845b4e9a7..e413d4a71fa3 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -274,12 +274,12 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { static const uint32_t cwsr_trap_gfx9_hex[] = { - 0xbf820001, 0xbf820161, + 0xbf820001, 0xbf82015e, 0xb8f8f802, 0x89788678, - 0xb8f1f803, 0x866eff71, + 0xb8fbf803, 0x866eff7b, 0x00000400, 0xbf85003b, - 0x866eff71, 0x00000800, - 0xbf850003, 0x866eff71, + 0x866eff7b, 0x00000800, + 0xbf850003, 0x866eff7b, 0x00000100, 0xbf84000c, 0x866eff78, 0x00002000, 0xbf840005, 0xbf8e0010, @@ -292,13 +292,13 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0x8977ff77, 0xfc000000, 0x87776f77, 0x896eff6e, 0x001f8000, 0xb96ef807, - 0xb8f0f812, 0xb8f1f813, - 0x8ef08870, 0xc0071bb8, + 0xb8faf812, 0xb8fbf813, + 0x8efa887a, 0xc0071bbd, 0x00000000, 0xbf8cc07f, - 0xc0071c38, 0x00000008, + 0xc0071ebd, 0x00000008, 0xbf8cc07f, 0x86ee6e6e, 0xbf840001, 0xbe801d6e, - 0xb8f1f803, 0x8671ff71, + 0xb8fbf803, 0x867bff7b, 0x000001ff, 0xbf850002, 0x806c846c, 0x826d806d, 0x866dff6d, 0x0000ffff, @@ -308,258 +308,256 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0x8f6e8378, 0xb96ee0c2, 0xbf800002, 0xb9780002, 0xbe801f6c, 0x866dff6d, - 0x0000ffff, 0xbef00080, - 0xb9700283, 0xb8f02407, - 0x8e709b70, 0x876d706d, - 0xb8f003c7, 0x8e709a70, - 0x876d706d, 0xb8f0f807, - 0x8670ff70, 0x00007fff, - 0xb970f807, 0xbeee007e, + 0x0000ffff, 0xbefa0080, + 0xb97a0283, 0xb8fa2407, + 0x8e7a9b7a, 0x876d7a6d, + 0xb8fa03c7, 0x8e7a9a7a, + 0x876d7a6d, 0xb8faf807, + 0x867aff7a, 0x00007fff, + 0xb97af807, 0xbeee007e, 0xbeef007f, 0xbefe0180, - 0xbf900004, 0x87708478, - 0xb970f802, 0xbf8e0002, - 0xbf88fffe, 0xb8f02a05, + 0xbf900004, 0x877a8478, + 0xb97af802, 0xbf8e0002, + 0xbf88fffe, 0xb8fa2a05, + 0x807a817a, 0x8e7a8a7a, + 0xb8fb1605, 0x807b817b, + 0x8e7b867b, 0x807a7b7a, + 0x807a7e7a, 0x827b807f, + 0x867bff7b, 0x0000ffff, + 0xc04b1c3d, 0x00000050, + 0xbf8cc07f, 0xc04b1d3d, + 0x00000060, 0xbf8cc07f, + 0xc0431e7d, 0x00000074, + 0xbf8cc07f, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0x867aff7f, + 0x08000000, 0x8f7a837a, + 0x87777a77, 0x867aff7f, + 0x70000000, 0x8f7a817a, + 0x87777a77, 0xbef1007c, + 0xbef00080, 0xb8f02a05, 0x80708170, 0x8e708a70, - 0xb8f11605, 0x80718171, - 0x8e718671, 0x80707170, - 0x80707e70, 0x8271807f, - 0x8671ff71, 0x0000ffff, - 0xc0471cb8, 0x00000040, - 0xbf8cc07f, 0xc04b1d38, - 0x00000048, 0xbf8cc07f, - 0xc0431e78, 0x00000058, - 0xbf8cc07f, 0xc0471eb8, - 0x0000005c, 0xbf8cc07f, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xbefe007c, + 0xbefc0070, 0xc0611c7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611b3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611b7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611bba, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611bfa, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611e3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8fbf803, + 0xbefe007c, 0xbefc0070, + 0xc0611efa, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611a3a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611a7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8f1f801, + 0xbefe007c, 0xbefc0070, + 0xc0611c7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0x867aff7f, + 0x04000000, 0xbeef0080, + 0x876f6f7a, 0xb8f02a05, + 0x80708170, 0x8e708a70, + 0xb8fb1605, 0x807b817b, + 0x8e7b847b, 0x8e76827b, + 0xbef600ff, 0x01000000, + 0xbef20174, 0x80747074, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a7b7c, 0xbf85ffe7, + 0xbef40172, 0xbef00080, + 0xbefe00c1, 0xbeff00c1, + 0xbee80080, 0xbee90080, + 0xbef600ff, 0x01000000, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, + 0xbefe00c1, 0xbeff00c1, + 0xb8fb4306, 0x867bc17b, + 0xbf84002c, 0xbf8a0000, + 0x867aff6f, 0x04000000, + 0xbf840028, 0x8e7b867b, + 0x8e7b827b, 0xbef6007b, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0xb8fa1605, + 0x807a817a, 0x8e7a867a, + 0x80707a70, 0x8070ff70, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xd28c0002, 0x000100c1, + 0xd28d0003, 0x000204c1, + 0xd1060002, 0x00011103, + 0x7e0602ff, 0x00000200, + 0xbefc00ff, 0x00010000, + 0xbe800077, 0x8677ff77, + 0xff7fffff, 0x8777ff77, + 0x00058000, 0xd8ec0000, + 0x00000002, 0xbf8cc07f, + 0xe0765000, 0x701d0002, + 0x68040702, 0xd0c9006a, + 0x0000f702, 0xbf87fff7, + 0xbef70000, 0xbef000ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8fb2a05, + 0x807b817b, 0x8e7b827b, + 0x8e76887b, 0xbef600ff, + 0x01000000, 0xbefc0084, + 0xbf0a7b7c, 0xbf840015, + 0xbf11017c, 0x807bff7b, + 0x00001000, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0x807c847c, + 0x8070ff70, 0x00000400, + 0xbf0a7b7c, 0xbf85ffef, + 0xbf9c0000, 0xbf8200da, 0xbef4007e, 0x8675ff7f, 0x0000ffff, 0x8775ff75, 0x00040000, 0xbef60080, 0xbef700ff, 0x00807fac, - 0x8670ff7f, 0x08000000, - 0x8f708370, 0x87777077, - 0x8670ff7f, 0x70000000, - 0x8f708170, 0x87777077, - 0xbefb007c, 0xbefa0080, - 0xb8fa2a05, 0x807a817a, - 0x8e7a8a7a, 0xb8f01605, - 0x80708170, 0x8e708670, - 0x807a707a, 0xbef60084, - 0xbef600ff, 0x01000000, - 0xbefe007c, 0xbefc007a, - 0xc0611efa, 0x0000007c, - 0xbf8cc07f, 0x807a847a, - 0xbefc007e, 0xbefe007c, - 0xbefc007a, 0xc0611b3a, - 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xbefe007c, 0xbefc007a, - 0xc0611b7a, 0x0000007c, - 0xbf8cc07f, 0x807a847a, - 0xbefc007e, 0xbefe007c, - 0xbefc007a, 0xc0611bba, - 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xbefe007c, 0xbefc007a, - 0xc0611bfa, 0x0000007c, - 0xbf8cc07f, 0x807a847a, - 0xbefc007e, 0xbefe007c, - 0xbefc007a, 0xc0611e3a, - 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xb8f1f803, 0xbefe007c, - 0xbefc007a, 0xc0611c7a, - 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xbefe007c, 0xbefc007a, - 0xc0611a3a, 0x0000007c, - 0xbf8cc07f, 0x807a847a, - 0xbefc007e, 0xbefe007c, - 0xbefc007a, 0xc0611a7a, - 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xb8fbf801, 0xbefe007c, - 0xbefc007a, 0xc0611efa, - 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0x8670ff7f, 0x04000000, - 0xbeef0080, 0x876f6f70, - 0xb8fa2a05, 0x807a817a, - 0x8e7a8a7a, 0xb8f11605, - 0x80718171, 0x8e718471, - 0x8e768271, 0xbef600ff, - 0x01000000, 0xbef20174, - 0x80747a74, 0x82758075, - 0xbefc0080, 0xbf800000, - 0xbe802b00, 0xbe822b02, - 0xbe842b04, 0xbe862b06, - 0xbe882b08, 0xbe8a2b0a, - 0xbe8c2b0c, 0xbe8e2b0e, - 0xc06b003a, 0x00000000, - 0xbf8cc07f, 0xc06b013a, - 0x00000010, 0xbf8cc07f, - 0xc06b023a, 0x00000020, - 0xbf8cc07f, 0xc06b033a, - 0x00000030, 0xbf8cc07f, - 0x8074c074, 0x82758075, - 0x807c907c, 0xbf0a717c, - 0xbf85ffe7, 0xbef40172, - 0xbefa0080, 0xbefe00c1, - 0xbeff00c1, 0xbee80080, - 0xbee90080, 0xbef600ff, - 0x01000000, 0xe0724000, - 0x7a1d0000, 0xe0724100, - 0x7a1d0100, 0xe0724200, - 0x7a1d0200, 0xe0724300, - 0x7a1d0300, 0xbefe00c1, - 0xbeff00c1, 0xb8f14306, - 0x8671c171, 0xbf84002c, - 0xbf8a0000, 0x8670ff6f, - 0x04000000, 0xbf840028, - 0x8e718671, 0x8e718271, - 0xbef60071, 0xb8fa2a05, - 0x807a817a, 0x8e7a8a7a, - 0xb8f01605, 0x80708170, - 0x8e708670, 0x807a707a, - 0x807aff7a, 0x00000080, - 0xbef600ff, 0x01000000, - 0xbefc0080, 0xd28c0002, - 0x000100c1, 0xd28d0003, - 0x000204c1, 0xd1060002, - 0x00011103, 0x7e0602ff, - 0x00000200, 0xbefc00ff, - 0x00010000, 0xbe800077, - 0x8677ff77, 0xff7fffff, - 0x8777ff77, 0x00058000, - 0xd8ec0000, 0x00000002, - 0xbf8cc07f, 0xe0765000, - 0x7a1d0002, 0x68040702, - 0xd0c9006a, 0x0000e302, - 0xbf87fff7, 0xbef70000, - 0xbefa00ff, 0x00000400, - 0xbefe00c1, 0xbeff00c1, - 0xb8f12a05, 0x80718171, - 0x8e718271, 0x8e768871, - 0xbef600ff, 0x01000000, - 0xbefc0084, 0xbf0a717c, - 0xbf840015, 0xbf11017c, - 0x8071ff71, 0x00001000, - 0x7e000300, 0x7e020301, - 0x7e040302, 0x7e060303, - 0xe0724000, 0x7a1d0000, - 0xe0724100, 0x7a1d0100, - 0xe0724200, 0x7a1d0200, - 0xe0724300, 0x7a1d0300, - 0x807c847c, 0x807aff7a, - 0x00000400, 0xbf0a717c, - 0xbf85ffef, 0xbf9c0000, - 0xbf8200dc, 0xbef4007e, - 0x8675ff7f, 0x0000ffff, - 0x8775ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x00807fac, 0x866eff7f, - 0x08000000, 0x8f6e836e, - 0x87776e77, 0x866eff7f, - 0x70000000, 0x8f6e816e, - 0x87776e77, 0x866eff7f, - 0x04000000, 0xbf84001e, - 0xbefe00c1, 0xbeff00c1, - 0xb8ef4306, 0x866fc16f, - 0xbf840019, 0x8e6f866f, - 0x8e6f826f, 0xbef6006f, - 0xb8f82a05, 0x80788178, - 0x8e788a78, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x8078ff78, - 0x00000080, 0xbef600ff, - 0x01000000, 0xbefc0080, - 0xe0510000, 0x781d0000, - 0xe0510100, 0x781d0000, - 0x807cff7c, 0x00000200, - 0x8078ff78, 0x00000200, - 0xbf0a6f7c, 0xbf85fff6, - 0xbef80080, 0xbefe00c1, - 0xbeff00c1, 0xb8ef2a05, - 0x806f816f, 0x8e6f826f, - 0x8e76886f, 0xbef600ff, - 0x01000000, 0xbeee0078, - 0x8078ff78, 0x00000400, - 0xbefc0084, 0xbf11087c, - 0x806fff6f, 0x00008000, - 0xe0524000, 0x781d0000, - 0xe0524100, 0x781d0100, - 0xe0524200, 0x781d0200, - 0xe0524300, 0x781d0300, - 0xbf8c0f70, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0x807c847c, - 0x8078ff78, 0x00000400, - 0xbf0a6f7c, 0xbf85ffee, - 0xbf9c0000, 0xe0524000, - 0x6e1d0000, 0xe0524100, - 0x6e1d0100, 0xe0524200, - 0x6e1d0200, 0xe0524300, - 0x6e1d0300, 0xb8f82a05, + 0x866eff7f, 0x08000000, + 0x8f6e836e, 0x87776e77, + 0x866eff7f, 0x70000000, + 0x8f6e816e, 0x87776e77, + 0x866eff7f, 0x04000000, + 0xbf84001e, 0xbefe00c1, + 0xbeff00c1, 0xb8ef4306, + 0x866fc16f, 0xbf840019, + 0x8e6f866f, 0x8e6f826f, + 0xbef6006f, 0xb8f82a05, 0x80788178, 0x8e788a78, 0xb8ee1605, 0x806e816e, 0x8e6e866e, 0x80786e78, - 0x80f8c078, 0xb8ef1605, - 0x806f816f, 0x8e6f846f, - 0x8e76826f, 0xbef600ff, - 0x01000000, 0xbefc006f, - 0xc031003a, 0x00000078, - 0x80f8c078, 0xbf8cc07f, - 0x80fc907c, 0xbf800000, - 0xbe802d00, 0xbe822d02, - 0xbe842d04, 0xbe862d06, - 0xbe882d08, 0xbe8a2d0a, - 0xbe8c2d0c, 0xbe8e2d0e, - 0xbf06807c, 0xbf84fff0, + 0x8078ff78, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xe0510000, + 0x781d0000, 0xe0510100, + 0x781d0000, 0x807cff7c, + 0x00000200, 0x8078ff78, + 0x00000200, 0xbf0a6f7c, + 0xbf85fff6, 0xbef80080, + 0xbefe00c1, 0xbeff00c1, + 0xb8ef2a05, 0x806f816f, + 0x8e6f826f, 0x8e76886f, + 0xbef600ff, 0x01000000, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefc0084, + 0xbf11087c, 0x806fff6f, + 0x00008000, 0xe0524000, + 0x781d0000, 0xe0524100, + 0x781d0100, 0xe0524200, + 0x781d0200, 0xe0524300, + 0x781d0300, 0xbf8c0f70, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0x807c847c, 0x8078ff78, + 0x00000400, 0xbf0a6f7c, + 0xbf85ffee, 0xbf9c0000, + 0xe0524000, 0x6e1d0000, + 0xe0524100, 0x6e1d0100, + 0xe0524200, 0x6e1d0200, + 0xe0524300, 0x6e1d0300, 0xb8f82a05, 0x80788178, 0x8e788a78, 0xb8ee1605, 0x806e816e, 0x8e6e866e, - 0x80786e78, 0xbef60084, + 0x80786e78, 0x80f8c078, + 0xb8ef1605, 0x806f816f, + 0x8e6f846f, 0x8e76826f, 0xbef600ff, 0x01000000, - 0xc0211bfa, 0x00000078, - 0x80788478, 0xc0211b3a, + 0xbefc006f, 0xc031003a, + 0x00000078, 0x80f8c078, + 0xbf8cc07f, 0x80fc907c, + 0xbf800000, 0xbe802d00, + 0xbe822d02, 0xbe842d04, + 0xbe862d06, 0xbe882d08, + 0xbe8a2d0a, 0xbe8c2d0c, + 0xbe8e2d0e, 0xbf06807c, + 0xbf84fff0, 0xb8f82a05, + 0x80788178, 0x8e788a78, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xc0211bfa, 0x00000078, 0x80788478, - 0xc0211b7a, 0x00000078, - 0x80788478, 0xc0211eba, + 0xc0211b3a, 0x00000078, + 0x80788478, 0xc0211b7a, 0x00000078, 0x80788478, - 0xc0211efa, 0x00000078, - 0x80788478, 0xc0211c3a, + 0xc0211c3a, 0x00000078, + 0x80788478, 0xc0211c7a, 0x00000078, 0x80788478, - 0xc0211c7a, 0x00000078, - 0x80788478, 0xc0211a3a, + 0xc0211eba, 0x00000078, + 0x80788478, 0xc0211efa, 0x00000078, 0x80788478, - 0xc0211a7a, 0x00000078, - 0x80788478, 0xc0211cfa, + 0xc0211a3a, 0x00000078, + 0x80788478, 0xc0211a7a, 0x00000078, 0x80788478, - 0xbf8cc07f, 0xbefc006f, - 0xbefe007a, 0xbeff007b, - 0x866f71ff, 0x000003ff, - 0xb96f4803, 0x866f71ff, - 0xfffff800, 0x8f6f8b6f, - 0xb96fa2c3, 0xb973f801, - 0xb8ee2a05, 0x806e816e, - 0x8e6e8a6e, 0xb8ef1605, - 0x806f816f, 0x8e6f866f, - 0x806e6f6e, 0x806e746e, - 0x826f8075, 0x866fff6f, - 0x0000ffff, 0xc0071cb7, - 0x00000040, 0xc00b1d37, - 0x00000048, 0xc0031e77, - 0x00000058, 0xc0071eb7, - 0x0000005c, 0xbf8cc07f, - 0x866fff6d, 0xf8000000, - 0x8f6f9b6f, 0x8e6f906f, - 0xbeee0080, 0x876e6f6e, - 0x866fff6d, 0x04000000, - 0x8f6f9a6f, 0x8e6f8f6f, - 0x876e6f6e, 0x866fff70, - 0x00800000, 0x8f6f976f, - 0xb96ef807, 0x866dff6d, - 0x0000ffff, 0x86fe7e7e, - 0x86ea6a6a, 0x8f6e8370, - 0xb96ee0c2, 0xbf800002, - 0xb9700002, 0xbf8a0000, - 0x95806f6c, 0xbf810000, + 0xc0211cfa, 0x00000078, + 0x80788478, 0xbf8cc07f, + 0xbefc006f, 0xbefe0070, + 0xbeff0071, 0x866f7bff, + 0x000003ff, 0xb96f4803, + 0x866f7bff, 0xfffff800, + 0x8f6f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8ef1605, 0x806f816f, + 0x8e6f866f, 0x806e6f6e, + 0x806e746e, 0x826f8075, + 0x866fff6f, 0x0000ffff, + 0xc00b1c37, 0x00000050, + 0xc00b1d37, 0x00000060, + 0xc0031e77, 0x00000074, + 0xbf8cc07f, 0x866fff6d, + 0xf8000000, 0x8f6f9b6f, + 0x8e6f906f, 0xbeee0080, + 0x876e6f6e, 0x866fff6d, + 0x04000000, 0x8f6f9a6f, + 0x8e6f8f6f, 0x876e6f6e, + 0x866fff7a, 0x00800000, + 0x8f6f976f, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e837a, 0xb96ee0c2, + 0xbf800002, 0xb97a0002, + 0xbf8a0000, 0x95806f6c, + 0xbf810000, 0x00000000, }; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index e1ac34517642..6bae2e022c6e 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -162,8 +162,8 @@ var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],tra var s_save_pc_hi = ttmp1 var s_save_exec_lo = ttmp2 var s_save_exec_hi = ttmp3 -var s_save_tmp = ttmp4 -var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine +var s_save_tmp = ttmp14 +var s_save_trapsts = ttmp15 //not really used until the end of the SAVE routine var s_save_xnack_mask_lo = ttmp6 var s_save_xnack_mask_hi = ttmp7 var s_save_buf_rsrc0 = ttmp8 @@ -171,9 +171,9 @@ var s_save_buf_rsrc1 = ttmp9 var s_save_buf_rsrc2 = ttmp10 var s_save_buf_rsrc3 = ttmp11 var s_save_status = ttmp12 -var s_save_mem_offset = ttmp14 +var s_save_mem_offset = ttmp4 var s_save_alloc_size = s_save_trapsts //conflict -var s_save_m0 = ttmp15 +var s_save_m0 = ttmp5 var s_save_ttmps_lo = s_save_tmp //no conflict var s_save_ttmps_hi = s_save_trapsts //no conflict @@ -207,10 +207,10 @@ var s_restore_mode = ttmp7 var s_restore_pc_lo = ttmp0 var s_restore_pc_hi = ttmp1 -var s_restore_exec_lo = ttmp14 -var s_restore_exec_hi = ttmp15 -var s_restore_status = ttmp4 -var s_restore_trapsts = ttmp5 +var s_restore_exec_lo = ttmp4 +var s_restore_exec_hi = ttmp5 +var s_restore_status = ttmp14 +var s_restore_trapsts = ttmp15 var s_restore_xnack_mask_lo = xnack_mask_lo var s_restore_xnack_mask_hi = xnack_mask_hi var s_restore_buf_rsrc0 = ttmp8 @@ -299,12 +299,12 @@ L_FETCH_2ND_TRAP: // Read second-level TBA/TMA from first-level TMA and jump if available. // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) // ttmp12 holds SQ_WAVE_STATUS - s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO) - s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI) - s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 - s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA + s_getreg_b32 ttmp14, hwreg(HW_REG_SQ_SHADER_TMA_LO) + s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI) + s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 + s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA s_waitcnt lgkmcnt(0) - s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA + s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA s_waitcnt lgkmcnt(0) s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set @@ -411,7 +411,7 @@ end else end - // Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic + // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 get_vgpr_size_bytes(s_save_ttmps_lo) get_sgpr_size_bytes(s_save_ttmps_hi) @@ -419,13 +419,11 @@ end s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo s_addc_u32 s_save_ttmps_hi, s_save_spi_init_hi, 0x0 s_and_b32 s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF - s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1 + s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1 ack_sqc_store_workaround() - s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1 + s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1 ack_sqc_store_workaround() - s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1 - ack_sqc_store_workaround() - s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1 + s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1 ack_sqc_store_workaround() /* setup Resource Contants */ @@ -1099,7 +1097,7 @@ end //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode - // Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic + // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 get_vgpr_size_bytes(s_restore_ttmps_lo) get_sgpr_size_bytes(s_restore_ttmps_hi) @@ -1107,10 +1105,9 @@ end s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0 s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0 s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF - s_load_dwordx2 [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1 - s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1 - s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1 - s_load_dwordx2 [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1 + s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 glc:1 + s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 glc:1 + s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1 s_waitcnt lgkmcnt(0) //reuse s_restore_m0 as a temp register From e14ba95b908f049dc98915e3452705dec5e506c6 Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Thu, 25 Oct 2018 15:40:51 -0400 Subject: [PATCH 134/178] drm/amdgpu: Use heavy weight for tlb invalidation on xgmi configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a bug found in vml2 xgmi logic: mtype is always sent as NC on the VMC to TC interface for a page walk, regardless of whether the request is being sent to local or remote GPU. NC means non-coherent and will cause the VMC return data to be cached in the TCC (versus UC – uncached will not cache the data). Since the page table updates are being done by SDMA/HDP, then TCC will never be updated and the GC VML2 will continue to hit on the TCC and never get the updated page tables and result in a fault. Heave weigh tlb invalidation does a WB/INVAL of the L1/L2 GL data caches so TCC will not be hit on next request Signed-off-by: shaoyunl Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 53 +++++++++---------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 6574814d2ade..d5af41143d12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -726,29 +726,8 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; } -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - /* Use legacy mode tlb invalidation. - * - * Currently on Raven the code below is broken for anything but - * legacy mode due to a MMHUB power gating problem. A workaround - * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ - * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack - * bit. - * - * TODO 1: agree on the right set of invalidation registers for - * KFD use. Use the last one for now. Invalidate both GC and - * MMHUB. - * - * TODO 2: support range-based invalidation, requires kfg2kgd - * interface change - */ - amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0); -} - -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) +static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid, + uint32_t flush_type) { signed long r; uint32_t seq; @@ -761,7 +740,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) PACKET3_INVALIDATE_TLBS_DST_SEL(1) | PACKET3_INVALIDATE_TLBS_ALL_HUB(1) | PACKET3_INVALIDATE_TLBS_PASID(pasid) | - PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */ + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq.ring_lock); @@ -780,12 +759,16 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) struct amdgpu_device *adev = (struct amdgpu_device *) kgd; int vmid; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + uint32_t flush_type = 0; if (adev->in_gpu_reset) return -EIO; + if (adev->gmc.xgmi.num_physical_nodes && + adev->asic_type == CHIP_VEGA20) + flush_type = 2; if (ring->sched.ready) - return invalidate_tlbs_with_kiq(adev, pasid); + return invalidate_tlbs_with_kiq(adev, pasid, flush_type); for (vmid = 0; vmid < 16; vmid++) { if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) @@ -793,7 +776,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) == pasid) { - write_vmid_invalidate_request(kgd, vmid); + amdgpu_gmc_flush_gpu_tlb(adev, vmid, + flush_type); break; } } @@ -811,7 +795,22 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) return 0; } - write_vmid_invalidate_request(kgd, vmid); + /* Use legacy mode tlb invalidation. + * + * Currently on Raven the code below is broken for anything but + * legacy mode due to a MMHUB power gating problem. A workaround + * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ + * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack + * bit. + * + * TODO 1: agree on the right set of invalidation registers for + * KFD use. Use the last one for now. Invalidate both GC and + * MMHUB. + * + * TODO 2: support range-based invalidation, requires kfg2kgd + * interface change + */ + amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0); return 0; } From 2c5a51f57042f9d686d72b96a41eb81dbfb86a64 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 23 Jul 2018 17:45:46 -0400 Subject: [PATCH 135/178] drm/amdgpu: use HMM callback to replace mmu notifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in DRM_AMDGPU_USERPTR Kconfig. It supports both KFD userptr and gfx userptr paths. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Kconfig | 6 +- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 164 +++++++++++-------------- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 2 +- 4 files changed, 74 insertions(+), 100 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 9221e5489069..960a63355705 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK config DRM_AMDGPU_USERPTR bool "Always enable userptr write support" depends on DRM_AMDGPU - select MMU_NOTIFIER + select HMM_MIRROR help - This option selects CONFIG_MMU_NOTIFIER if it isn't already - selected to enabled full userptr support. + This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it + isn't already selected to enabled full userptr support. config DRM_AMDGPU_GART_DEBUGFS bool "Allow GART access through debugfs" diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 9ca3b4b261b5..57ce44cc3226 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -173,7 +173,7 @@ endif amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o +amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o include $(FULL_AMD_PATH)/powerplay/Makefile diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 58ed401c5996..f000704f984d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -45,7 +45,7 @@ #include #include -#include +#include #include #include #include @@ -58,14 +58,12 @@ * * @adev: amdgpu device pointer * @mm: process address space - * @mn: MMU notifier structure * @type: type of MMU notifier * @work: destruction work item * @node: hash table node to find structure by adev and mn * @lock: rw semaphore protecting the notifier nodes * @objects: interval tree containing amdgpu_mn_nodes - * @read_lock: mutex for recursive locking of @lock - * @recursion: depth of recursion + * @mirror: HMM mirror function support * * Data for each amdgpu device and process address space. */ @@ -73,7 +71,6 @@ struct amdgpu_mn { /* constant after initialisation */ struct amdgpu_device *adev; struct mm_struct *mm; - struct mmu_notifier mn; enum amdgpu_mn_type type; /* only used on destruction */ @@ -85,8 +82,9 @@ struct amdgpu_mn { /* objects protected by lock */ struct rw_semaphore lock; struct rb_root_cached objects; - struct mutex read_lock; - atomic_t recursion; + + /* HMM mirror */ + struct hmm_mirror mirror; }; /** @@ -103,7 +101,7 @@ struct amdgpu_mn_node { }; /** - * amdgpu_mn_destroy - destroy the MMU notifier + * amdgpu_mn_destroy - destroy the HMM mirror * * @work: previously sheduled work item * @@ -129,28 +127,26 @@ static void amdgpu_mn_destroy(struct work_struct *work) } up_write(&amn->lock); mutex_unlock(&adev->mn_lock); - mmu_notifier_unregister_no_release(&amn->mn, amn->mm); + + hmm_mirror_unregister(&amn->mirror); kfree(amn); } /** - * amdgpu_mn_release - callback to notify about mm destruction + * amdgpu_hmm_mirror_release - callback to notify about mm destruction * - * @mn: our notifier - * @mm: the mm this callback is about + * @mirror: the HMM mirror (mm) this callback is about * - * Shedule a work item to lazy destroy our notifier. + * Shedule a work item to lazy destroy HMM mirror. */ -static void amdgpu_mn_release(struct mmu_notifier *mn, - struct mm_struct *mm) +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror) { - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); INIT_WORK(&amn->work, amdgpu_mn_destroy); schedule_work(&amn->work); } - /** * amdgpu_mn_lock - take the write side lock for this notifier * @@ -181,14 +177,10 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn) static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) { if (blockable) - mutex_lock(&amn->read_lock); - else if (!mutex_trylock(&amn->read_lock)) + down_read(&amn->lock); + else if (!down_read_trylock(&amn->lock)) return -EAGAIN; - if (atomic_inc_return(&amn->recursion) == 1) - down_read_non_owner(&amn->lock); - mutex_unlock(&amn->read_lock); - return 0; } @@ -199,8 +191,7 @@ static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) */ static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn) { - if (atomic_dec_return(&amn->recursion) == 0) - up_read_non_owner(&amn->lock); + up_read(&amn->lock); } /** @@ -235,143 +226,128 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, } /** - * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change + * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change * - * @mn: our notifier - * @range: mmu notifier context + * @mirror: the hmm_mirror (mm) is about to update + * @update: the update start, end address * * Block for operations on BOs to finish and mark pages as accessed and * potentially dirty. */ -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) +static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, + const struct hmm_update *update) { - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); + unsigned long start = update->start; + unsigned long end = update->end; + bool blockable = update->blockable; struct interval_tree_node *it; - unsigned long end; /* notification is exclusive, but interval is inclusive */ - end = range->end - 1; + end -= 1; /* TODO we should be able to split locking for interval tree and * amdgpu_mn_invalidate_node */ - if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range))) + if (amdgpu_mn_read_lock(amn, blockable)) return -EAGAIN; - it = interval_tree_iter_first(&amn->objects, range->start, end); + it = interval_tree_iter_first(&amn->objects, start, end); while (it) { struct amdgpu_mn_node *node; - if (!mmu_notifier_range_blockable(range)) { + if (!blockable) { amdgpu_mn_read_unlock(amn); return -EAGAIN; } node = container_of(it, struct amdgpu_mn_node, it); - it = interval_tree_iter_next(it, range->start, end); + it = interval_tree_iter_next(it, start, end); - amdgpu_mn_invalidate_node(node, range->start, end); + amdgpu_mn_invalidate_node(node, start, end); } + amdgpu_mn_read_unlock(amn); + return 0; } /** - * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change + * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change * - * @mn: our notifier - * @mm: the mm this callback is about - * @start: start of updated range - * @end: end of updated range + * @mirror: the hmm_mirror (mm) is about to update + * @update: the update start, end address * * We temporarily evict all BOs between start and end. This * necessitates evicting all user-mode queues of the process. The BOs * are restorted in amdgpu_mn_invalidate_range_end_hsa. */ -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) +static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, + const struct hmm_update *update) { - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); + unsigned long start = update->start; + unsigned long end = update->end; + bool blockable = update->blockable; struct interval_tree_node *it; - unsigned long end; /* notification is exclusive, but interval is inclusive */ - end = range->end - 1; + end -= 1; - if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range))) + if (amdgpu_mn_read_lock(amn, blockable)) return -EAGAIN; - it = interval_tree_iter_first(&amn->objects, range->start, end); + it = interval_tree_iter_first(&amn->objects, start, end); while (it) { struct amdgpu_mn_node *node; struct amdgpu_bo *bo; - if (!mmu_notifier_range_blockable(range)) { + if (!blockable) { amdgpu_mn_read_unlock(amn); return -EAGAIN; } node = container_of(it, struct amdgpu_mn_node, it); - it = interval_tree_iter_next(it, range->start, end); + it = interval_tree_iter_next(it, start, end); list_for_each_entry(bo, &node->bos, mn_list) { struct kgd_mem *mem = bo->kfd_bo; if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, - range->start, - end)) - amdgpu_amdkfd_evict_userptr(mem, range->mm); + start, end)) + amdgpu_amdkfd_evict_userptr(mem, amn->mm); } } + amdgpu_mn_read_unlock(amn); + return 0; } -/** - * amdgpu_mn_invalidate_range_end - callback to notify about mm change - * - * @mn: our notifier - * @mm: the mm this callback is about - * @start: start of updated range - * @end: end of updated range - * - * Release the lock again to allow new command submissions. - */ -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) -{ - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); - - amdgpu_mn_read_unlock(amn); -} - -static const struct mmu_notifier_ops amdgpu_mn_ops[] = { - [AMDGPU_MN_TYPE_GFX] = { - .release = amdgpu_mn_release, - .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx, - .invalidate_range_end = amdgpu_mn_invalidate_range_end, - }, - [AMDGPU_MN_TYPE_HSA] = { - .release = amdgpu_mn_release, - .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa, - .invalidate_range_end = amdgpu_mn_invalidate_range_end, - }, -}; - /* Low bits of any reasonable mm pointer will be unused due to struct * alignment. Use these bits to make a unique key from the mm pointer * and notifier type. */ #define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = { + [AMDGPU_MN_TYPE_GFX] = { + .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx, + .release = amdgpu_hmm_mirror_release + }, + [AMDGPU_MN_TYPE_HSA] = { + .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa, + .release = amdgpu_hmm_mirror_release + }, +}; + /** - * amdgpu_mn_get - create notifier context + * amdgpu_mn_get - create HMM mirror context * * @adev: amdgpu device pointer * @type: type of MMU notifier context * - * Creates a notifier context for current->mm. + * Creates a HMM mirror context for current->mm. */ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, enum amdgpu_mn_type type) @@ -401,12 +377,10 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, amn->mm = mm; init_rwsem(&amn->lock); amn->type = type; - amn->mn.ops = &amdgpu_mn_ops[type]; amn->objects = RB_ROOT_CACHED; - mutex_init(&amn->read_lock); - atomic_set(&amn->recursion, 0); - r = __mmu_notifier_register(&amn->mn, mm); + amn->mirror.ops = &amdgpu_hmm_mirror_ops[type]; + r = hmm_mirror_register(&amn->mirror, mm); if (r) goto free_amn; @@ -432,7 +406,7 @@ free_amn: * @bo: amdgpu buffer object * @addr: userptr addr we should monitor * - * Registers an MMU notifier for the given BO at the specified address. + * Registers an HMM mirror for the given BO at the specified address. * Returns 0 on success, -ERRNO if anything goes wrong. */ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) @@ -488,11 +462,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) } /** - * amdgpu_mn_unregister - unregister a BO for notifier updates + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates * * @bo: amdgpu buffer object * - * Remove any registration of MMU notifier updates from the buffer object. + * Remove any registration of HMM mirror updates from the buffer object. */ void amdgpu_mn_unregister(struct amdgpu_bo *bo) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index eb0f432f78fe..0a51fd00021c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -34,7 +34,7 @@ enum amdgpu_mn_type { AMDGPU_MN_TYPE_HSA, }; -#if defined(CONFIG_MMU_NOTIFIER) +#if defined(CONFIG_HMM_MIRROR) void amdgpu_mn_lock(struct amdgpu_mn *mn); void amdgpu_mn_unlock(struct amdgpu_mn *mn); struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, From 89cd9d23e9a74d94f0db5bbbaf2ef1f6ede36ae5 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 5 Dec 2018 14:03:43 -0500 Subject: [PATCH 136/178] drm/amdkfd: avoid HMM change cause circular lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is circular lock between gfx and kfd path with HMM change: lock(dqm) -> bo::reserve -> amdgpu_mn_lock To avoid this, move init/unint_mqd() out of lock(dqm), to remove nested locking between mmap_sem and bo::reserve. The locking order is: bo::reserve -> amdgpu_mn_lock(p->mn) Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index d6fe75245d05..cf6b57627842 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1161,21 +1161,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, int retval; struct mqd_manager *mqd_mgr; - retval = 0; - - dqm_lock(dqm); - if (dqm->total_queue_count >= max_num_of_queues_per_device) { pr_warn("Can't create new usermode queue because %d queues were already created\n", dqm->total_queue_count); retval = -EPERM; - goto out_unlock; + goto out; } if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { retval = allocate_sdma_queue(dqm, &q->sdma_id); if (retval) - goto out_unlock; + goto out; q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); q->properties.sdma_engine_id = @@ -1189,6 +1185,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (retval) goto out_deallocate_sdma_queue; + /* Do init_mqd before dqm_lock(dqm) to avoid circular locking order: + * lock(dqm) -> bo::reserve + */ mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); @@ -1196,6 +1195,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, retval = -ENOMEM; goto out_deallocate_doorbell; } + /* * Eviction state logic: we only mark active queues as evicted * to avoid the overhead of restoring inactive queues later @@ -1204,9 +1204,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, q->properties.is_evicted = (q->properties.queue_size > 0 && q->properties.queue_percent > 0 && q->properties.queue_address != 0); - dqm->asic_ops.init_sdma_vm(dqm, q, qpd); - q->properties.tba_addr = qpd->tba_addr; q->properties.tma_addr = qpd->tma_addr; retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, @@ -1214,6 +1212,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (retval) goto out_deallocate_doorbell; + dqm_lock(dqm); + list_add(&q->list, &qpd->queues_list); qpd->queue_count++; if (q->properties.is_active) { @@ -1241,9 +1241,7 @@ out_deallocate_doorbell: out_deallocate_sdma_queue: if (q->properties.type == KFD_QUEUE_TYPE_SDMA) deallocate_sdma_queue(dqm, q->sdma_id); -out_unlock: - dqm_unlock(dqm); - +out: return retval; } @@ -1406,8 +1404,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = true; } - mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); - /* * Unconditionally decrement this counter, regardless of the queue's * type @@ -1418,6 +1414,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, dqm_unlock(dqm); + /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */ + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + return retval; failed: @@ -1641,7 +1640,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = false; } - /* lastly, free mqd resources */ + dqm_unlock(dqm); + + /* Lastly, free mqd resources. + * Do uninit_mqd() after dqm_unlock to avoid circular locking. + */ list_for_each_entry_safe(q, next, &qpd->queues_list, list) { mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); @@ -1655,7 +1658,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, } out: - dqm_unlock(dqm); return retval; } From 899fbde1464639e3d12eaffdad8481a59b367fcb Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 13 Dec 2018 15:35:28 -0500 Subject: [PATCH 137/178] drm/amdgpu: replace get_user_pages with HMM mirror helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use HMM helper function hmm_vma_fault() to get physical pages backing userptr and start CPU page table update track of those pages. Then use hmm_vma_range_done() to check if those pages are updated before amdgpu_cs_submit for gfx or before user queues are resumed for kfd. If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart from scratch, for kfd, restore worker is rescheduled to retry. HMM simplify the CPU page table concurrent update check, so remove guptasklock, mmu_invalidations, last_set_pages fields from amdgpu_ttm_tt struct. HMM does not pin the page (increase page ref count), so remove related operations like release_pages(), put_page(), mark_page_dirty(). Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 - .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 95 +++------ drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 136 +++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 25 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 182 +++++++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 +- 9 files changed, 183 insertions(+), 279 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 3369017d9f4f..f57f29763769 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -61,7 +61,6 @@ struct kgd_mem { atomic_t invalid; struct amdkfd_process_info *process_info; - struct page **user_pages; struct amdgpu_sync sync; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 123eb0d7e2e9..9142a355da28 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -492,28 +492,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, goto out; } - /* If no restore worker is running concurrently, user_pages - * should not be allocated - */ - WARN(mem->user_pages, "Leaking user_pages array"); - - mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); - if (!mem->user_pages) { - pr_err("%s: Failed to allocate pages array\n", __func__); - ret = -ENOMEM; - goto unregister_out; - } - - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); if (ret) { pr_err("%s: Failed to get user pages: %d\n", __func__, ret); - goto free_out; + goto unregister_out; } - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); - ret = amdgpu_bo_reserve(bo, true); if (ret) { pr_err("%s: Failed to reserve BO\n", __func__); @@ -526,11 +510,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, amdgpu_bo_unreserve(bo); release_out: - if (ret) - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); -free_out: - kvfree(mem->user_pages); - mem->user_pages = NULL; + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); unregister_out: if (ret) amdgpu_mn_unregister(bo); @@ -589,7 +569,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; ctx->kfd_bo.tv.num_shared = 1; - ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); @@ -653,7 +632,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; ctx->kfd_bo.tv.num_shared = 1; - ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); i = 0; @@ -1268,15 +1246,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( list_del(&bo_list_entry->head); mutex_unlock(&process_info->lock); - /* Free user pages if necessary */ - if (mem->user_pages) { - pr_debug("%s: Freeing user_pages array\n", __func__); - if (mem->user_pages[0]) - release_pages(mem->user_pages, - mem->bo->tbo.ttm->num_pages); - kvfree(mem->user_pages); - } - ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); if (unlikely(ret)) return ret; @@ -1750,25 +1719,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, bo = mem->bo; - if (!mem->user_pages) { - mem->user_pages = - kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); - if (!mem->user_pages) { - pr_err("%s: Failed to allocate pages array\n", - __func__); - return -ENOMEM; - } - } else if (mem->user_pages[0]) { - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); - } - /* Get updated user pages */ ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, - mem->user_pages); + bo->tbo.ttm->pages); if (ret) { - mem->user_pages[0] = NULL; + bo->tbo.ttm->pages[0] = NULL; pr_info("%s: Failed to get user pages: %d\n", __func__, ret); /* Pretend it succeeded. It will fail later @@ -1777,12 +1732,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, * stalled user mode queues. */ } - - /* Mark the BO as valid unless it was invalidated - * again concurrently - */ - if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) - return -EAGAIN; } return 0; @@ -1812,7 +1761,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) GFP_KERNEL); if (!pd_bo_list_entries) { pr_err("%s: Failed to allocate PD BO list entries\n", __func__); - return -ENOMEM; + ret = -ENOMEM; + goto out_no_mem; } INIT_LIST_HEAD(&resv_list); @@ -1836,7 +1786,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); WARN(!list_empty(&duplicates), "Duplicates should be empty"); if (ret) - goto out; + goto out_free; amdgpu_sync_create(&sync); @@ -1852,10 +1802,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) bo = mem->bo; - /* Copy pages array and validate the BO if we got user pages */ - if (mem->user_pages[0]) { - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, - mem->user_pages); + /* Validate the BO if we got user pages */ + if (bo->tbo.ttm->pages[0]) { amdgpu_bo_placement_from_domain(bo, mem->domain); ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (ret) { @@ -1864,16 +1812,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) } } - /* Validate succeeded, now the BO owns the pages, free - * our copy of the pointer array. Put this BO back on - * the userptr_valid_list. If we need to revalidate - * it, we need to start from scratch. - */ - kvfree(mem->user_pages); - mem->user_pages = NULL; list_move_tail(&mem->validate_list.head, &process_info->userptr_valid_list); + /* Stop HMM track the userptr update. We dont check the return + * value for concurrent CPU page table update because we will + * reschedule the restore worker if process_info->evicted_bos + * is updated. + */ + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + /* Update mapping. If the BO was not validated * (because we couldn't get user pages), this will * clear the page table entries, which will result in @@ -1903,8 +1851,15 @@ unreserve_out: ttm_eu_backoff_reservation(&ticket, &resv_list); amdgpu_sync_wait(&sync, false); amdgpu_sync_free(&sync); -out: +out_free: kfree(pd_bo_list_entries); +out_no_mem: + list_for_each_entry_safe(mem, tmp_mem, + &process_info->userptr_inval_list, + validate_list.head) { + bo = mem->bo; + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + } return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 7c5f5d1601e6..a130e766cbdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry { struct amdgpu_bo_va *bo_va; uint32_t priority; struct page **user_pages; - int user_invalidated; + bool user_invalidated; }; struct amdgpu_bo_list { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8cee09c60c99..d72cc583ebd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -52,7 +52,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, p->uf_entry.tv.bo = &bo->tbo; /* One for TTM and one for the CS job */ p->uf_entry.tv.num_shared = 2; - p->uf_entry.user_pages = NULL; drm_gem_object_put_unlocked(gobj); @@ -542,14 +541,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, if (usermm && usermm != current->mm) return -EPERM; - /* Check if we have user pages and nobody bound the BO already */ - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && - lobj->user_pages) { + if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) && + lobj->user_invalidated && lobj->user_pages) { amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) return r; + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, lobj->user_pages); binding_userptr = true; @@ -580,7 +579,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_bo *gds; struct amdgpu_bo *gws; struct amdgpu_bo *oa; - unsigned tries = 10; int r; INIT_LIST_HEAD(&p->validated); @@ -616,79 +614,45 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) list_add(&p->uf_entry.tv.head, &p->validated); - while (1) { - struct list_head need_pages; + /* Get userptr backing pages. If pages are updated after registered + * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do + * amdgpu_ttm_backend_bind() to flush and invalidate new pages + */ + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + bool userpage_invalidated = false; + int i; - r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, - &duplicates); - if (unlikely(r != 0)) { - if (r != -ERESTARTSYS) - DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); - goto error_free_pages; + e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); + if (!e->user_pages) { + DRM_ERROR("calloc failure\n"); + return -ENOMEM; } - INIT_LIST_HEAD(&need_pages); - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - - if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, - &e->user_invalidated) && e->user_pages) { - - /* We acquired a page array, but somebody - * invalidated it. Free it and try again - */ - release_pages(e->user_pages, - bo->tbo.ttm->num_pages); - kvfree(e->user_pages); - e->user_pages = NULL; - } - - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && - !e->user_pages) { - list_del(&e->tv.head); - list_add(&e->tv.head, &need_pages); - - amdgpu_bo_unreserve(bo); - } + r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages); + if (r) { + kvfree(e->user_pages); + e->user_pages = NULL; + return r; } - if (list_empty(&need_pages)) - break; - - /* Unreserve everything again. */ - ttm_eu_backoff_reservation(&p->ticket, &p->validated); - - /* We tried too many times, just abort */ - if (!--tries) { - r = -EDEADLK; - DRM_ERROR("deadlock in %s\n", __func__); - goto error_free_pages; - } - - /* Fill the page arrays for all userptrs. */ - list_for_each_entry(e, &need_pages, tv.head) { - struct ttm_tt *ttm = e->tv.bo->ttm; - - e->user_pages = kvmalloc_array(ttm->num_pages, - sizeof(struct page*), - GFP_KERNEL | __GFP_ZERO); - if (!e->user_pages) { - r = -ENOMEM; - DRM_ERROR("calloc failure in %s\n", __func__); - goto error_free_pages; - } - - r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); - if (r) { - DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); - kvfree(e->user_pages); - e->user_pages = NULL; - goto error_free_pages; + for (i = 0; i < bo->tbo.ttm->num_pages; i++) { + if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { + userpage_invalidated = true; + break; } } + e->user_invalidated = userpage_invalidated; + } - /* And try again. */ - list_splice(&need_pages, &p->validated); + r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, + &duplicates); + if (unlikely(r != 0)) { + if (r != -ERESTARTSYS) + DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); + goto out; } amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, @@ -757,17 +721,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, error_validate: if (r) ttm_eu_backoff_reservation(&p->ticket, &p->validated); - -error_free_pages: - - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - if (!e->user_pages) - continue; - - release_pages(e->user_pages, e->tv.bo->ttm->num_pages); - kvfree(e->user_pages); - } - +out: return r; } @@ -1326,7 +1280,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, struct amdgpu_bo_list_entry *e; struct amdgpu_job *job; uint64_t seq; - int r; job = p->job; @@ -1336,15 +1289,23 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, if (r) goto error_unlock; - /* No memory allocation is allowed while holding the mn lock */ + /* No memory allocation is allowed while holding the mn lock. + * p->mn is hold until amdgpu_cs_submit is finished and fence is added + * to BOs. + */ amdgpu_mn_lock(p->mn); + + /* If userptr are invalidated after amdgpu_cs_parser_bos(), return + * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. + */ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { - r = -ERESTARTSYS; - goto error_abort; - } + r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + } + if (r) { + r = -EAGAIN; + goto error_abort; } job->owner = p->filp; @@ -1440,6 +1401,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index d4fcf5475464..7b840367004c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -330,26 +330,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, r = amdgpu_bo_reserve(bo, true); if (r) - goto free_pages; + goto user_pages_done; amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); amdgpu_bo_unreserve(bo); if (r) - goto free_pages; + goto user_pages_done; } r = drm_gem_handle_create(filp, gobj, &handle); - /* drop reference from allocate - handle holds it now */ - drm_gem_object_put_unlocked(gobj); if (r) - return r; + goto user_pages_done; args->handle = handle; - return 0; -free_pages: - release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages); +user_pages_done: + if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); release_object: drm_gem_object_put_unlocked(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index f000704f984d..41ccee49a224 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -220,8 +220,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, true, false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); - - amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); } } @@ -502,3 +500,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) mutex_unlock(&adev->mn_lock); } +/* flags used by HMM internal, not related to CPU/GPU PTE flags */ +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { + (1 << 0), /* HMM_PFN_VALID */ + (1 << 1), /* HMM_PFN_WRITE */ + 0 /* HMM_PFN_DEVICE_PRIVATE */ +}; + +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ + 0, /* HMM_PFN_NONE */ + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ +}; + +void amdgpu_hmm_init_range(struct hmm_range *range) +{ + if (range) { + range->flags = hmm_range_flags; + range->values = hmm_range_values; + range->pfn_shift = PAGE_SHIFT; + range->pfns = NULL; + INIT_LIST_HEAD(&range->list); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index 0a51fd00021c..4803e216e174 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -25,9 +25,10 @@ #define __AMDGPU_MN_H__ /* - * MMU Notifier + * HMM mirror */ struct amdgpu_mn; +struct hmm_range; enum amdgpu_mn_type { AMDGPU_MN_TYPE_GFX, @@ -41,6 +42,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, enum amdgpu_mn_type type); int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); +void amdgpu_hmm_init_range(struct hmm_range *range); #else static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index af1e218c6a74..da2efcffed16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "amdgpu.h" #include "amdgpu_object.h" #include "amdgpu_trace.h" @@ -703,101 +704,105 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, /* * TTM backend functions. */ -struct amdgpu_ttm_gup_task_list { - struct list_head list; - struct task_struct *task; -}; - struct amdgpu_ttm_tt { struct ttm_dma_tt ttm; u64 offset; uint64_t userptr; struct task_struct *usertask; uint32_t userflags; - spinlock_t guptasklock; - struct list_head guptasks; - atomic_t mmu_invalidations; - uint32_t last_set_pages; + struct hmm_range range; }; /** - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR - * pointer to memory + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user + * memory and start HMM tracking CPU page table update * - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). - * This provides a wrapper around the get_user_pages() call to provide - * device accessible pages that back user memory. + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only + * once afterwards to stop HMM tracking */ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) { struct amdgpu_ttm_tt *gtt = (void *)ttm; struct mm_struct *mm = gtt->usertask->mm; - unsigned int flags = 0; - unsigned pinned = 0; - int r; + unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; + struct hmm_range *range = >t->range; + int r = 0, i; if (!mm) /* Happens during process shutdown */ return -ESRCH; - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) - flags |= FOLL_WRITE; + amdgpu_hmm_init_range(range); down_read(&mm->mmap_sem); - if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { - /* - * check that we only use anonymous memory to prevent problems - * with writeback - */ - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; - struct vm_area_struct *vma; + range->vma = find_vma(mm, gtt->userptr); + if (!range_in_vma(range->vma, gtt->userptr, end)) + r = -EFAULT; + else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && + range->vma->vm_file) + r = -EPERM; + if (r) + goto out; - vma = find_vma(mm, gtt->userptr); - if (!vma || vma->vm_file || vma->vm_end < end) { - up_read(&mm->mmap_sem); - return -EPERM; - } + range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t), + GFP_KERNEL); + if (range->pfns == NULL) { + r = -ENOMEM; + goto out; } + range->start = gtt->userptr; + range->end = end; - /* loop enough times using contiguous pages of memory */ - do { - unsigned num_pages = ttm->num_pages - pinned; - uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; - struct page **p = pages + pinned; - struct amdgpu_ttm_gup_task_list guptask; + range->pfns[0] = range->flags[HMM_PFN_VALID]; + range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ? + 0 : range->flags[HMM_PFN_WRITE]; + for (i = 1; i < ttm->num_pages; i++) + range->pfns[i] = range->pfns[0]; - guptask.task = current; - spin_lock(>t->guptasklock); - list_add(&guptask.list, >t->guptasks); - spin_unlock(>t->guptasklock); - - if (mm == current->mm) - r = get_user_pages(userptr, num_pages, flags, p, NULL); - else - r = get_user_pages_remote(gtt->usertask, - mm, userptr, num_pages, - flags, p, NULL, NULL); - - spin_lock(>t->guptasklock); - list_del(&guptask.list); - spin_unlock(>t->guptasklock); - - if (r < 0) - goto release_pages; - - pinned += r; - - } while (pinned < ttm->num_pages); + /* This may trigger page table update */ + r = hmm_vma_fault(range, true); + if (r) + goto out_free_pfns; up_read(&mm->mmap_sem); + + for (i = 0; i < ttm->num_pages; i++) + pages[i] = hmm_pfn_to_page(range, range->pfns[i]); + return 0; -release_pages: - release_pages(pages, pinned); +out_free_pfns: + kvfree(range->pfns); + range->pfns = NULL; +out: up_read(&mm->mmap_sem); return r; } +/** + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change + * Check if the pages backing this ttm range have been invalidated + * + * Returns: true if pages are still valid + */ +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + bool r = false; + + if (!gtt || !gtt->userptr) + return false; + + WARN_ONCE(!gtt->range.pfns, "No user pages to check\n"); + if (gtt->range.pfns) { + r = hmm_vma_range_done(>t->range); + kvfree(gtt->range.pfns); + gtt->range.pfns = NULL; + } + + return r; +} + /** * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. * @@ -807,16 +812,10 @@ release_pages: */ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; unsigned i; - gtt->last_set_pages = atomic_read(>t->mmu_invalidations); - for (i = 0; i < ttm->num_pages; ++i) { - if (ttm->pages[i]) - put_page(ttm->pages[i]); - + for (i = 0; i < ttm->num_pages; ++i) ttm->pages[i] = pages ? pages[i] : NULL; - } } /** @@ -901,10 +900,11 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) /* unmap the pages mapped to the device */ dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); - /* mark the pages as dirty */ - amdgpu_ttm_tt_mark_user_pages(ttm); - sg_free_table(ttm->sg); + + if (gtt->range.pfns && + ttm->pages[0] == hmm_pfn_to_page(>t->range, gtt->range.pfns[0])) + WARN_ONCE(1, "Missing get_user_page_done\n"); } int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, @@ -1254,11 +1254,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, gtt->usertask = current->group_leader; get_task_struct(gtt->usertask); - spin_lock_init(>t->guptasklock); - INIT_LIST_HEAD(>t->guptasks); - atomic_set(>t->mmu_invalidations, 0); - gtt->last_set_pages = 0; - return 0; } @@ -1287,7 +1282,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end) { struct amdgpu_ttm_tt *gtt = (void *)ttm; - struct amdgpu_ttm_gup_task_list *entry; unsigned long size; if (gtt == NULL || !gtt->userptr) @@ -1300,48 +1294,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, if (gtt->userptr > end || gtt->userptr + size <= start) return false; - /* Search the lists of tasks that hold this mapping and see - * if current is one of them. If it is return false. - */ - spin_lock(>t->guptasklock); - list_for_each_entry(entry, >t->guptasks, list) { - if (entry->task == current) { - spin_unlock(>t->guptasklock); - return false; - } - } - spin_unlock(>t->guptasklock); - - atomic_inc(>t->mmu_invalidations); - return true; } /** - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated? + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? */ -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, - int *last_invalidated) -{ - struct amdgpu_ttm_tt *gtt = (void *)ttm; - int prev_invalidated = *last_invalidated; - - *last_invalidated = atomic_read(>t->mmu_invalidations); - return prev_invalidated != *last_invalidated; -} - -/** - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object - * been invalidated since the last time they've been set? - */ -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; if (gtt == NULL || !gtt->userptr) return false; - return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; + return true; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index b5b2d101f7db..8988c87fff9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -102,6 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm); int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, @@ -112,7 +113,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end); bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, int *last_invalidated); -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm); +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm); bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem); uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, From ad595b8634f36f04bf69bef4eff854091d94f8b3 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 21 Feb 2019 12:39:21 -0500 Subject: [PATCH 138/178] drm/amdgpu: fix HMM config dependency issue Only select HMM_MIRROR will get kernel config dependency warnings if CONFIG_HMM is missing in the config. Add depends on HMM will solve the issue. Add conditional compilation to fix compilation errors if HMM_MIRROR is not enabled as HMM config is not enabled. Remove unused function amdgpu_ttm_tt_mark_user_pages. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Kconfig | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 29 +++++-------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 13 ++++++++++- 3 files changed, 19 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 960a63355705..67553effb649 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -26,6 +26,7 @@ config DRM_AMDGPU_CIK config DRM_AMDGPU_USERPTR bool "Always enable userptr write support" depends on DRM_AMDGPU + depends on ARCH_HAS_HMM select HMM_MIRROR help This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index da2efcffed16..f9d4a5726929 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -710,7 +710,9 @@ struct amdgpu_ttm_tt { uint64_t userptr; struct task_struct *usertask; uint32_t userflags; +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) struct hmm_range range; +#endif }; /** @@ -720,6 +722,7 @@ struct amdgpu_ttm_tt { * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only * once afterwards to stop HMM tracking */ +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) { struct amdgpu_ttm_tt *gtt = (void *)ttm; @@ -802,6 +805,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) return r; } +#endif /** * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. @@ -818,29 +822,6 @@ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) ttm->pages[i] = pages ? pages[i] : NULL; } -/** - * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty - * - * Called while unpinning userptr pages - */ -void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) -{ - struct amdgpu_ttm_tt *gtt = (void *)ttm; - unsigned i; - - for (i = 0; i < ttm->num_pages; ++i) { - struct page *page = ttm->pages[i]; - - if (!page) - continue; - - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) - set_page_dirty(page); - - mark_page_accessed(page); - } -} - /** * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the user pages * @@ -902,9 +883,11 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) sg_free_table(ttm->sg); +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) if (gtt->range.pfns && ttm->pages[0] == hmm_pfn_to_page(>t->range, gtt->range.pfns[0])) WARN_ONCE(1, "Missing get_user_page_done\n"); +#endif } int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 8988c87fff9d..c2b7669004ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -101,10 +101,21 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); +#else +static inline int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) +{ + return -EPERM; +} +static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) +{ + return false; +} +#endif + void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); -void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm); int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, uint32_t flags); bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm); From 6c55d6e90e68a4789cbd72a0287026d4dfb4a9f9 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 4 Mar 2019 14:10:12 -0500 Subject: [PATCH 139/178] drm/amdkfd: support concurrent userptr update for HMM Userptr restore may have concurrent userptr invalidation after hmm_vma_fault adds the range to the hmm->ranges list, needs call hmm_vma_range_done to remove the range from hmm->ranges list first, then reschedule the restore worker. Otherwise hmm_vma_fault will add same range to the list, this will cause loop in the list because range->next point to range itself. Add function untrack_invalid_user_pages to reduce code duplication. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 9142a355da28..e1cae4a37113 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1737,6 +1737,23 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, return 0; } +/* Remove invalid userptr BOs from hmm track list + * + * Stop HMM track the userptr update + */ +static void untrack_invalid_user_pages(struct amdkfd_process_info *process_info) +{ + struct kgd_mem *mem, *tmp_mem; + struct amdgpu_bo *bo; + + list_for_each_entry_safe(mem, tmp_mem, + &process_info->userptr_inval_list, + validate_list.head) { + bo = mem->bo; + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + } +} + /* Validate invalid userptr BOs * * Validates BOs on the userptr_inval_list, and moves them back to the @@ -1854,12 +1871,6 @@ unreserve_out: out_free: kfree(pd_bo_list_entries); out_no_mem: - list_for_each_entry_safe(mem, tmp_mem, - &process_info->userptr_inval_list, - validate_list.head) { - bo = mem->bo; - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); - } return ret; } @@ -1924,7 +1935,9 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) * hanging. No point trying again. */ } + unlock_out: + untrack_invalid_user_pages(process_info); mutex_unlock(&process_info->lock); mmput(mm); put_task_struct(usertask); From 6826cb3b92a3ae1f696334f2069391642f8caf9f Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 4 Mar 2019 14:41:03 -0500 Subject: [PATCH 140/178] drm/amdgpu: support userptr cross VMAs case with HMM userptr may cross two VMAs if the forked child process (not call exec after fork) malloc buffer, then free it, and then malloc larger size buf, kerenl will create new VMA adjacent to old VMA which was cloned from parent process, some pages of userptr are in the first VMA, the rest pages are in the second VMA. HMM expects range only have one VMA, loop over all VMAs in the address range, create multiple ranges to handle this case. See is_mergeable_anon_vma in mm/mmap.c for details. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 130 +++++++++++++++++------- 1 file changed, 93 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index f9d4a5726929..b25922e3d1ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -711,7 +711,8 @@ struct amdgpu_ttm_tt { struct task_struct *usertask; uint32_t userflags; #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - struct hmm_range range; + struct hmm_range *ranges; + int nr_ranges; #endif }; @@ -723,62 +724,108 @@ struct amdgpu_ttm_tt { * once afterwards to stop HMM tracking */ #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) + +/* Support Userptr pages cross max 16 vmas */ +#define MAX_NR_VMAS (16) + int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) { struct amdgpu_ttm_tt *gtt = (void *)ttm; struct mm_struct *mm = gtt->usertask->mm; - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; - struct hmm_range *range = >t->range; - int r = 0, i; + unsigned long start = gtt->userptr; + unsigned long end = start + ttm->num_pages * PAGE_SIZE; + struct hmm_range *ranges; + struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS]; + uint64_t *pfns, f; + int r = 0, i, nr_pages; if (!mm) /* Happens during process shutdown */ return -ESRCH; - amdgpu_hmm_init_range(range); - down_read(&mm->mmap_sem); - range->vma = find_vma(mm, gtt->userptr); - if (!range_in_vma(range->vma, gtt->userptr, end)) - r = -EFAULT; - else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && - range->vma->vm_file) - r = -EPERM; - if (r) - goto out; + /* user pages may cross multiple VMAs */ + gtt->nr_ranges = 0; + do { + unsigned long vm_start; - range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t), - GFP_KERNEL); - if (range->pfns == NULL) { + if (gtt->nr_ranges >= MAX_NR_VMAS) { + DRM_ERROR("Too many VMAs in userptr range\n"); + r = -EFAULT; + goto out; + } + + vm_start = vma ? vma->vm_end : start; + vma = find_vma(mm, vm_start); + if (unlikely(!vma || vm_start < vma->vm_start)) { + r = -EFAULT; + goto out; + } + vmas[gtt->nr_ranges++] = vma; + } while (end > vma->vm_end); + + DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n", + start, gtt->nr_ranges, ttm->num_pages); + + if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && + vmas[0]->vm_file)) { + r = -EPERM; + goto out; + } + + ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL); + if (unlikely(!ranges)) { r = -ENOMEM; goto out; } - range->start = gtt->userptr; - range->end = end; - range->pfns[0] = range->flags[HMM_PFN_VALID]; - range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ? - 0 : range->flags[HMM_PFN_WRITE]; - for (i = 1; i < ttm->num_pages; i++) - range->pfns[i] = range->pfns[0]; + pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL); + if (unlikely(!pfns)) { + r = -ENOMEM; + goto out_free_ranges; + } + + for (i = 0; i < gtt->nr_ranges; i++) + amdgpu_hmm_init_range(&ranges[i]); + + f = ranges[0].flags[HMM_PFN_VALID]; + f |= amdgpu_ttm_tt_is_readonly(ttm) ? + 0 : ranges[0].flags[HMM_PFN_WRITE]; + memset64(pfns, f, ttm->num_pages); + + for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) { + ranges[i].vma = vmas[i]; + ranges[i].start = max(start, vmas[i]->vm_start); + ranges[i].end = min(end, vmas[i]->vm_end); + ranges[i].pfns = pfns + nr_pages; + nr_pages += (ranges[i].end - ranges[i].start) / PAGE_SIZE; + + r = hmm_vma_fault(&ranges[i], true); + if (unlikely(r)) + break; + } + if (unlikely(r)) { + while (i--) + hmm_vma_range_done(&ranges[i]); - /* This may trigger page table update */ - r = hmm_vma_fault(range, true); - if (r) goto out_free_pfns; + } up_read(&mm->mmap_sem); for (i = 0; i < ttm->num_pages; i++) - pages[i] = hmm_pfn_to_page(range, range->pfns[i]); + pages[i] = hmm_pfn_to_page(&ranges[0], pfns[i]); + gtt->ranges = ranges; return 0; out_free_pfns: - kvfree(range->pfns); - range->pfns = NULL; + kvfree(pfns); +out_free_ranges: + kvfree(ranges); out: up_read(&mm->mmap_sem); + return r; } @@ -792,15 +839,23 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; bool r = false; + int i; if (!gtt || !gtt->userptr) return false; - WARN_ONCE(!gtt->range.pfns, "No user pages to check\n"); - if (gtt->range.pfns) { - r = hmm_vma_range_done(>t->range); - kvfree(gtt->range.pfns); - gtt->range.pfns = NULL; + DRM_DEBUG_DRIVER("user_pages_done 0x%llx nr_ranges %d pages 0x%lx\n", + gtt->userptr, gtt->nr_ranges, ttm->num_pages); + + WARN_ONCE(!gtt->ranges || !gtt->ranges[0].pfns, + "No user pages to check\n"); + + if (gtt->ranges) { + for (i = 0; i < gtt->nr_ranges; i++) + r |= hmm_vma_range_done(>t->ranges[i]); + kvfree(gtt->ranges[0].pfns); + kvfree(gtt->ranges); + gtt->ranges = NULL; } return r; @@ -884,8 +939,9 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) sg_free_table(ttm->sg); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - if (gtt->range.pfns && - ttm->pages[0] == hmm_pfn_to_page(>t->range, gtt->range.pfns[0])) + if (gtt->ranges && + ttm->pages[0] == hmm_pfn_to_page(>t->ranges[0], + gtt->ranges[0].pfns[0])) WARN_ONCE(1, "Missing get_user_page_done\n"); #endif } From b9c5eb5b801a76df47e12a5754dcc1d3c4b34625 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 4 Mar 2019 10:37:55 -0500 Subject: [PATCH 141/178] drm/amdgpu: more descriptive message if HMM not enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If using old kernel config file, CONFIG_ZONE_DEVICE is not selected, so CONFIG_HMM and CONFIG_HMM_MIRROR is not enabled, the current driver error message "Failed to register MMU notifier" is not clear. Inform user with more descriptive message on how to fix the missing kernel config option. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109808 Signed-off-by: Philip Yang Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index 4803e216e174..f5b67c63ed6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -53,6 +53,8 @@ static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, } static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) { + DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, " + "add CONFIG_ZONE_DEVICE=y in config file to fix this\n"); return -ENODEV; } static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {} From 1986a3b022bee7ad34d0c539b32a81575bf3c9a3 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 7 May 2019 17:46:14 -0400 Subject: [PATCH 142/178] drm/amdgpu: Improve error handling for HMM Use unsigned long for number of pages. Check that pfns are valid after hmm_vma_fault. If they are not, return an error instead of continuing with invalid page pointers and PTEs. Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b25922e3d1ed..7138dc1dd1f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -734,10 +734,11 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) struct mm_struct *mm = gtt->usertask->mm; unsigned long start = gtt->userptr; unsigned long end = start + ttm->num_pages * PAGE_SIZE; - struct hmm_range *ranges; struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS]; + struct hmm_range *ranges; + unsigned long nr_pages, i; uint64_t *pfns, f; - int r = 0, i, nr_pages; + int r = 0; if (!mm) /* Happens during process shutdown */ return -ESRCH; @@ -813,8 +814,14 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) up_read(&mm->mmap_sem); - for (i = 0; i < ttm->num_pages; i++) + for (i = 0; i < ttm->num_pages; i++) { pages[i] = hmm_pfn_to_page(&ranges[0], pfns[i]); + if (!pages[i]) { + pr_err("Page fault failed for pfn[%lu] = 0x%llx\n", + i, pfns[i]); + goto out_invalid_pfn; + } + } gtt->ranges = ranges; return 0; @@ -827,6 +834,13 @@ out: up_read(&mm->mmap_sem); return r; + +out_invalid_pfn: + for (i = 0; i < gtt->nr_ranges; i++) + hmm_vma_range_done(&ranges[i]); + kvfree(pfns); + kvfree(ranges); + return -ENOMEM; } /** @@ -871,7 +885,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) */ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) { - unsigned i; + unsigned long i; for (i = 0; i < ttm->num_pages; ++i) ttm->pages[i] = pages ? pages[i] : NULL; From 972fcdb52fe865a2f639e3200b97e648f34a0f41 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Mon, 3 Dec 2018 13:56:14 -0600 Subject: [PATCH 143/178] drm/amdkfd: Introduce asic-specific mqd_manager_init function Global function mqd_manager_init just calls asic-specific functions and it is not necessary. Delete it and introduce a mqd_manager_init interface in dqm for asic-specific mqd manager init. Call mqd_manager_init interface directly to initialize mqd manager Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 2 ++ .../amd/amdkfd/kfd_device_queue_manager_cik.c | 2 ++ .../amd/amdkfd/kfd_device_queue_manager_v9.c | 1 + .../amd/amdkfd/kfd_device_queue_manager_vi.c | 2 ++ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 29 ------------------- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 -- 7 files changed, 8 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index cf6b57627842..a5cc64559bf1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -583,7 +583,7 @@ static struct mqd_manager *get_mqd_manager( mqd_mgr = dqm->mqd_mgrs[type]; if (!mqd_mgr) { - mqd_mgr = mqd_manager_init(type, dqm->dev); + mqd_mgr = dqm->asic_ops.mqd_manager_init(type, dqm->dev); if (!mqd_mgr) pr_err("mqd manager is NULL"); dqm->mqd_mgrs[type] = mqd_mgr; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 2770f3ece89f..a5d83ec1c6a8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -158,6 +158,8 @@ struct device_queue_manager_asic_ops { void (*init_sdma_vm)(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); + struct mqd_manager * (*mqd_manager_init)(enum KFD_MQD_TYPE type, + struct kfd_dev *dev); }; /** diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index aed4c21417bf..0d26506798cf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -48,6 +48,7 @@ void device_queue_manager_init_cik( asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik; asic_ops->update_qpd = update_qpd_cik; asic_ops->init_sdma_vm = init_sdma_vm; + asic_ops->mqd_manager_init = mqd_manager_init_cik; } void device_queue_manager_init_cik_hawaii( @@ -56,6 +57,7 @@ void device_queue_manager_init_cik_hawaii( asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik; asic_ops->update_qpd = update_qpd_cik_hawaii; asic_ops->init_sdma_vm = init_sdma_vm_hawaii; + asic_ops->mqd_manager_init = mqd_manager_init_cik_hawaii; } static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c index 417515332c35..e9fe39382371 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -37,6 +37,7 @@ void device_queue_manager_init_v9( { asic_ops->update_qpd = update_qpd_v9; asic_ops->init_sdma_vm = init_sdma_vm_v9; + asic_ops->mqd_manager_init = mqd_manager_init_v9; } static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index c3a5dcfe877a..3a7cb2f88366 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -54,6 +54,7 @@ void device_queue_manager_init_vi( asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi; asic_ops->update_qpd = update_qpd_vi; asic_ops->init_sdma_vm = init_sdma_vm; + asic_ops->mqd_manager_init = mqd_manager_init_vi; } void device_queue_manager_init_vi_tonga( @@ -62,6 +63,7 @@ void device_queue_manager_init_vi_tonga( asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga; asic_ops->update_qpd = update_qpd_vi_tonga; asic_ops->init_sdma_vm = init_sdma_vm_tonga; + asic_ops->mqd_manager_init = mqd_manager_init_vi_tonga; } static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index aed9b9b82213..eeb2b60a36b5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -24,35 +24,6 @@ #include "kfd_mqd_manager.h" #include "amdgpu_amdkfd.h" -struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, - struct kfd_dev *dev) -{ - switch (dev->device_info->asic_family) { - case CHIP_KAVERI: - return mqd_manager_init_cik(type, dev); - case CHIP_HAWAII: - return mqd_manager_init_cik_hawaii(type, dev); - case CHIP_CARRIZO: - return mqd_manager_init_vi(type, dev); - case CHIP_TONGA: - case CHIP_FIJI: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_POLARIS12: - return mqd_manager_init_vi_tonga(type, dev); - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_RAVEN: - return mqd_manager_init_v9(type, dev); - default: - WARN(1, "Unexpected ASIC family %u", - dev->device_info->asic_family); - } - - return NULL; -} - void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, const uint32_t *cu_mask, uint32_t cu_mask_count, uint32_t *se_mask) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index eac687b79ad8..2aebcc8219d1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -819,8 +819,6 @@ void uninit_queue(struct queue *q); void print_queue_properties(struct queue_properties *q); void print_queue(struct queue *q); -struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, - struct kfd_dev *dev); struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, struct kfd_dev *dev); struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type, From 59f650a06f8f530e2412e5c47106cd8b8895bd89 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Mon, 3 Dec 2018 20:38:43 -0600 Subject: [PATCH 144/178] drm/amdkfd: Introduce DIQ type mqd manager With introduction of new mqd allocation scheme for HIQ, DIQ and HIQ use different mqd allocation scheme, DIQ can't reuse HIQ mqd manager Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 3 +++ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 11 +++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 11 +++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 11 +++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 1 - 6 files changed, 37 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index f1596881f20a..58bb3ad233a1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -58,6 +58,9 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->nop_packet = nop.u32all; switch (type) { case KFD_QUEUE_TYPE_DIQ: + kq->mqd_mgr = dev->dqm->ops.get_mqd_manager(dev->dqm, + KFD_MQD_TYPE_DIQ); + break; case KFD_QUEUE_TYPE_HIQ: kq->mqd_mgr = dev->dqm->ops.get_mqd_manager(dev->dqm, KFD_MQD_TYPE_HIQ); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index ae90a99909ef..e69bb4d3c3a9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -413,6 +413,17 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, mqd->is_occupied = is_occupied; #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif + break; + case KFD_MQD_TYPE_DIQ: + mqd->init_mqd = init_mqd_hiq; + mqd->uninit_mqd = uninit_mqd; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd_hiq; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; #endif break; case KFD_MQD_TYPE_SDMA: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 9dbba609450e..75866b4d5726 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -472,6 +472,17 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->is_occupied = is_occupied; #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif + break; + case KFD_MQD_TYPE_DIQ: + mqd->init_mqd = init_mqd_hiq; + mqd->uninit_mqd = uninit_mqd; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd_hiq; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; #endif break; case KFD_MQD_TYPE_SDMA: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 7f1cff3de4eb..15d2aafff16d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -472,6 +472,17 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, mqd->is_occupied = is_occupied; #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif + break; + case KFD_MQD_TYPE_DIQ: + mqd->init_mqd = init_mqd_hiq; + mqd->uninit_mqd = uninit_mqd; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd_hiq; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; #endif break; case KFD_MQD_TYPE_SDMA: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 2aebcc8219d1..10bd1abe1646 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -475,6 +475,7 @@ enum KFD_MQD_TYPE { KFD_MQD_TYPE_HIQ, /* for hiq */ KFD_MQD_TYPE_CP, /* for cp queues and diq */ KFD_MQD_TYPE_SDMA, /* for sdma queues */ + KFD_MQD_TYPE_DIQ, /* for diq */ KFD_MQD_TYPE_MAX }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index fcaaf93681ac..7671658ef1f1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -470,7 +470,6 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) case KFD_QUEUE_TYPE_DIQ: seq_printf(m, " DIQ on device %x\n", pqn->kq->dev->id); - mqd_type = KFD_MQD_TYPE_HIQ; break; default: seq_printf(m, From fdfa090bc90f34543b8efd05b05a143ae6d52406 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Wed, 5 Dec 2018 10:15:27 -0600 Subject: [PATCH 145/178] drm/amdkfd: Init mqd managers in device queue manager init Previously mqd managers was initialized on demand. As there are only a few type of mqd managers, the on demand initialization doesn't save too much memory. Initialize them on device queue initialization instead and delete the get_mqd_manager interface. This makes codes more organized for future changes. Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 127 ++++++------------ .../drm/amd/amdkfd/kfd_device_queue_manager.h | 6 - drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 6 +- .../amd/amdkfd/kfd_process_queue_manager.c | 3 +- 4 files changed, 47 insertions(+), 95 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index a5cc64559bf1..7e79fb32eb2b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -368,9 +368,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, struct mqd_manager *mqd_mgr; int retval; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); - if (!mqd_mgr) - return -ENOMEM; + mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE]; retval = allocate_hqd(dqm, q); if (retval) @@ -425,10 +423,8 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, int retval; struct mqd_manager *mqd_mgr; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) - return -ENOMEM; + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { deallocate_hqd(dqm, q); @@ -501,12 +497,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) retval = -ENODEV; goto out_unlock; } - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) { - retval = -ENOMEM; - goto out_unlock; - } + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; /* * Eviction state logic: we only mark active queues as evicted * to avoid the overhead of restoring inactive queues later @@ -571,27 +563,6 @@ out_unlock: return retval; } -static struct mqd_manager *get_mqd_manager( - struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) -{ - struct mqd_manager *mqd_mgr; - - if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) - return NULL; - - pr_debug("mqd type %d\n", type); - - mqd_mgr = dqm->mqd_mgrs[type]; - if (!mqd_mgr) { - mqd_mgr = dqm->asic_ops.mqd_manager_init(type, dqm->dev); - if (!mqd_mgr) - pr_err("mqd manager is NULL"); - dqm->mqd_mgrs[type] = mqd_mgr; - } - - return mqd_mgr; -} - static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -612,13 +583,8 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, list_for_each_entry(q, &qpd->queues_list, list) { if (!q->properties.is_active) continue; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) { /* should not be here */ - pr_err("Cannot evict queue, mqd mgr is NULL\n"); - retval = -ENOMEM; - goto out; - } + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; q->properties.is_evicted = true; q->properties.is_active = false; retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, @@ -717,13 +683,8 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, list_for_each_entry(q, &qpd->queues_list, list) { if (!q->properties.is_evicted) continue; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) { /* should not be here */ - pr_err("Cannot restore queue, mqd mgr is NULL\n"); - retval = -ENOMEM; - goto out; - } + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; q->properties.is_evicted = false; q->properties.is_active = true; retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, @@ -950,9 +911,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, struct mqd_manager *mqd_mgr; int retval; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); - if (!mqd_mgr) - return -ENOMEM; + mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]; retval = allocate_sdma_queue(dqm, &q->sdma_id); if (retval) @@ -1185,17 +1144,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (retval) goto out_deallocate_sdma_queue; - /* Do init_mqd before dqm_lock(dqm) to avoid circular locking order: - * lock(dqm) -> bo::reserve - */ - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - - if (!mqd_mgr) { - retval = -ENOMEM; - goto out_deallocate_doorbell; - } - + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; /* * Eviction state logic: we only mark active queues as evicted * to avoid the overhead of restoring inactive queues later @@ -1380,12 +1330,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, } - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) { - retval = -ENOMEM; - goto failed; - } + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; deallocate_doorbell(qpd, q); @@ -1419,7 +1365,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, return retval; -failed: failed_try_destroy_debugged_queue: dqm_unlock(dqm); @@ -1566,11 +1511,7 @@ static int get_wave_state(struct device_queue_manager *dqm, goto dqm_unlock; } - mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); - if (!mqd_mgr) { - r = -ENOMEM; - goto dqm_unlock; - } + mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE]; if (!mqd_mgr->get_wave_state) { r = -EINVAL; @@ -1646,21 +1587,40 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, * Do uninit_mqd() after dqm_unlock to avoid circular locking. */ list_for_each_entry_safe(q, next, &qpd->queues_list, list) { - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) { - retval = -ENOMEM; - goto out; - } + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; list_del(&q->list); qpd->queue_count--; mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); } -out: return retval; } +static int init_mqd_managers(struct device_queue_manager *dqm) +{ + int i, j; + struct mqd_manager *mqd_mgr; + + for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { + mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); + if (!mqd_mgr) { + pr_err("mqd manager [%d] initialization failed\n", i); + goto out_free; + } + dqm->mqd_mgrs[i] = mqd_mgr; + } + + return 0; + +out_free: + for (j = 0; j < i; j++) { + kfree(dqm->mqd_mgrs[j]); + dqm->mqd_mgrs[j] = NULL; + } + + return -ENOMEM; +} struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) { struct device_queue_manager *dqm; @@ -1698,7 +1658,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.stop = stop_cpsch; dqm->ops.destroy_queue = destroy_queue_cpsch; dqm->ops.update_queue = update_queue; - dqm->ops.get_mqd_manager = get_mqd_manager; dqm->ops.register_process = register_process; dqm->ops.unregister_process = unregister_process; dqm->ops.uninitialize = uninitialize; @@ -1718,7 +1677,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.create_queue = create_queue_nocpsch; dqm->ops.destroy_queue = destroy_queue_nocpsch; dqm->ops.update_queue = update_queue; - dqm->ops.get_mqd_manager = get_mqd_manager; dqm->ops.register_process = register_process; dqm->ops.unregister_process = unregister_process; dqm->ops.initialize = initialize_nocpsch; @@ -1769,6 +1727,9 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) goto out_free; } + if (init_mqd_managers(dqm)) + goto out_free; + if (!dqm->ops.initialize(dqm)) return dqm; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index a5d83ec1c6a8..a5ef7a6650a5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -48,8 +48,6 @@ struct device_process_node { * * @update_queue: Queue update routine. * - * @get_mqd_manager: Returns the mqd manager according to the mqd type. - * * @exeute_queues: Dispatches the queues list to the H/W. * * @register_process: This routine associates a specific process with device. @@ -97,10 +95,6 @@ struct device_queue_manager_ops { int (*update_queue)(struct device_queue_manager *dqm, struct queue *q); - struct mqd_manager * (*get_mqd_manager) - (struct device_queue_manager *dqm, - enum KFD_MQD_TYPE type); - int (*register_process)(struct device_queue_manager *dqm, struct qcm_process_device *qpd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 58bb3ad233a1..7a737b50bed4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -58,12 +58,10 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->nop_packet = nop.u32all; switch (type) { case KFD_QUEUE_TYPE_DIQ: - kq->mqd_mgr = dev->dqm->ops.get_mqd_manager(dev->dqm, - KFD_MQD_TYPE_DIQ); + kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_DIQ]; break; case KFD_QUEUE_TYPE_HIQ: - kq->mqd_mgr = dev->dqm->ops.get_mqd_manager(dev->dqm, - KFD_MQD_TYPE_HIQ); + kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; break; default: pr_err("Invalid queue type %d\n", type); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 7671658ef1f1..f18d9cdf9aac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -461,8 +461,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) q->properties.type, q->device->id); continue; } - mqd_mgr = q->device->dqm->ops.get_mqd_manager( - q->device->dqm, mqd_type); + mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type]; } else if (pqn->kq) { q = pqn->kq->queue; mqd_mgr = pqn->kq->mqd_mgr; From 6c6cde557a8ee2400b169b37ec146cb67518befd Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Wed, 5 Dec 2018 10:56:41 -0600 Subject: [PATCH 146/178] drm/amdkfd: Add mqd size in mqd manager struct Also initialize mqd size on mqd manager initialization Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 4 ++++ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 4 ++++ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 4 ++++ 4 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index f8261313ae7b..009d232fb60b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -99,6 +99,7 @@ struct mqd_manager { struct mutex mqd_mutex; struct kfd_dev *dev; + uint32_t mqd_size; }; void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index e69bb4d3c3a9..eec131b801b0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -400,6 +400,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct cik_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif @@ -411,6 +412,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct cik_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif @@ -422,6 +424,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct cik_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif @@ -433,6 +436,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_sdma; mqd->destroy_mqd = destroy_mqd_sdma; mqd->is_occupied = is_occupied_sdma; + mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 75866b4d5726..2f3fb3ddc266 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -459,6 +459,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; mqd->get_wave_state = get_wave_state; + mqd->mqd_size = sizeof(struct v9_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif @@ -470,6 +471,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct v9_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif @@ -481,6 +483,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct v9_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif @@ -492,6 +495,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_sdma; mqd->destroy_mqd = destroy_mqd_sdma; mqd->is_occupied = is_occupied_sdma; + mqd->mqd_size = sizeof(struct v9_sdma_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 15d2aafff16d..c95568036457 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -459,6 +459,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; mqd->get_wave_state = get_wave_state; + mqd->mqd_size = sizeof(struct vi_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif @@ -470,6 +471,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct vi_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif @@ -481,6 +483,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct vi_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif @@ -492,6 +495,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_sdma; mqd->destroy_mqd = destroy_mqd_sdma; mqd->is_occupied = is_occupied_sdma; + mqd->mqd_size = sizeof(struct vi_sdma_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; #endif From 11614c36bc8f4fd22ff91e6150ac63e8bfce33b5 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Tue, 27 Nov 2018 21:58:54 -0600 Subject: [PATCH 147/178] drm/amdkfd: Allocate MQD trunk for HIQ and SDMA MEC FW for some new asic requires all SDMA MQDs to be in a continuous trunk of memory right after HIQ MQD. Add a field in device queue manager to hold the HIQ/SDMA MQD memory object and allocate MQD trunk on device queue manager initialization. Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 32 +++++++++++++++++++ .../drm/amd/amdkfd/kfd_device_queue_manager.h | 1 + 2 files changed, 33 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 7e79fb32eb2b..9f159aa67f0c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1621,6 +1621,25 @@ out_free: return -ENOMEM; } + +/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ +static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) +{ + int retval; + struct kfd_dev *dev = dqm->dev; + struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; + uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * + dev->device_info->num_sdma_engines * + dev->device_info->num_sdma_queues_per_engine + + dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; + + retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size, + &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), + (void *)&(mem_obj->cpu_ptr), true); + + return retval; +} + struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) { struct device_queue_manager *dqm; @@ -1730,6 +1749,11 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) if (init_mqd_managers(dqm)) goto out_free; + if (allocate_hiq_sdma_mqd(dqm)) { + pr_err("Failed to allocate hiq sdma mqd trunk buffer\n"); + goto out_free; + } + if (!dqm->ops.initialize(dqm)) return dqm; @@ -1738,9 +1762,17 @@ out_free: return NULL; } +void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, struct kfd_mem_obj *mqd) +{ + WARN(!mqd, "No hiq sdma mqd trunk to free"); + + amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem); +} + void device_queue_manager_uninit(struct device_queue_manager *dqm) { dqm->ops.uninitialize(dqm); + deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); kfree(dqm); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index a5ef7a6650a5..3742fd340ec3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -197,6 +197,7 @@ struct device_queue_manager { /* hw exception */ bool is_hws_hang; struct work_struct hw_exception_work; + struct kfd_mem_obj hiq_sdma_mqd; }; void device_queue_manager_init_cik( From e73390d181103a19e1111ec2f25559a0570e9fe0 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Tue, 27 Nov 2018 22:08:25 -0600 Subject: [PATCH 148/178] drm/amdkfd: Fix a potential memory leak Free mqd_mem_obj it GTT buffer allocation for MQD+control stack fails. Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 2f3fb3ddc266..15274a880ea2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -76,6 +76,7 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, struct v9_mqd *m; struct kfd_dev *kfd = mm->dev; + *mqd_mem_obj = NULL; /* From V9, for CWSR, the control stack is located on the next page * boundary after the mqd, we will use the gtt allocation function * instead of sub-allocation function. @@ -93,8 +94,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, } else retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd), mqd_mem_obj); - if (retval != 0) + if (retval) { + kfree(*mqd_mem_obj); return -ENOMEM; + } m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr; addr = (*mqd_mem_obj)->gpu_addr; From d1f8f0d17d40f05d45c6c15b8fabbec978d92380 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Tue, 27 Nov 2018 22:55:50 -0600 Subject: [PATCH 149/178] drm/amdkfd: Move non-sdma mqd allocation out of init_mqd This is preparation work to introduce more mqd allocation scheme Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 20 +++++-- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 57 ++++++++++++------- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 18 +++++- 3 files changed, 67 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index eec131b801b0..a00402077e34 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -66,6 +66,19 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, m->compute_static_thread_mgmt_se3); } +static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, + struct queue_properties *q) +{ + struct kfd_mem_obj *mqd_mem_obj; + + if (kfd_gtt_sa_allocate(kfd, sizeof(struct cik_mqd), + &mqd_mem_obj)) + return NULL; + + return mqd_mem_obj; +} + + static int init_mqd(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -73,11 +86,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, uint64_t addr; struct cik_mqd *m; int retval; + struct kfd_dev *kfd = mm->dev; - retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd), - mqd_mem_obj); - - if (retval != 0) + *mqd_mem_obj = allocate_mqd(kfd, q); + if (!*mqd_mem_obj) return -ENOMEM; m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 15274a880ea2..8f8166189fd5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -67,6 +67,40 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, m->compute_static_thread_mgmt_se3); } +static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, + struct queue_properties *q) +{ + int retval; + struct kfd_mem_obj *mqd_mem_obj = NULL; + + /* From V9, for CWSR, the control stack is located on the next page + * boundary after the mqd, we will use the gtt allocation function + * instead of sub-allocation function. + */ + if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { + mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); + if (!mqd_mem_obj) + return NULL; + retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd, + ALIGN(q->ctl_stack_size, PAGE_SIZE) + + ALIGN(sizeof(struct v9_mqd), PAGE_SIZE), + &(mqd_mem_obj->gtt_mem), + &(mqd_mem_obj->gpu_addr), + (void *)&(mqd_mem_obj->cpu_ptr), true); + } else { + retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v9_mqd), + &mqd_mem_obj); + } + + if (retval) { + kfree(mqd_mem_obj); + return NULL; + } + + return mqd_mem_obj; + +} + static int init_mqd(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -76,28 +110,9 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, struct v9_mqd *m; struct kfd_dev *kfd = mm->dev; - *mqd_mem_obj = NULL; - /* From V9, for CWSR, the control stack is located on the next page - * boundary after the mqd, we will use the gtt allocation function - * instead of sub-allocation function. - */ - if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { - *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); - if (!*mqd_mem_obj) - return -ENOMEM; - retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd, - ALIGN(q->ctl_stack_size, PAGE_SIZE) + - ALIGN(sizeof(struct v9_mqd), PAGE_SIZE), - &((*mqd_mem_obj)->gtt_mem), - &((*mqd_mem_obj)->gpu_addr), - (void *)&((*mqd_mem_obj)->cpu_ptr), true); - } else - retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd), - mqd_mem_obj); - if (retval) { - kfree(*mqd_mem_obj); + *mqd_mem_obj = allocate_mqd(kfd, q); + if (!*mqd_mem_obj) return -ENOMEM; - } m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr; addr = (*mqd_mem_obj)->gpu_addr; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index c95568036457..7f0b10e46358 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -68,6 +68,18 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, m->compute_static_thread_mgmt_se3); } +static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, + struct queue_properties *q) +{ + struct kfd_mem_obj *mqd_mem_obj; + + if (kfd_gtt_sa_allocate(kfd, sizeof(struct vi_mqd), + &mqd_mem_obj)) + return NULL; + + return mqd_mem_obj; +} + static int init_mqd(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -75,10 +87,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, int retval; uint64_t addr; struct vi_mqd *m; + struct kfd_dev *kfd = mm->dev; - retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct vi_mqd), - mqd_mem_obj); - if (retval != 0) + *mqd_mem_obj = allocate_mqd(kfd, q); + if (!*mqd_mem_obj) return -ENOMEM; m = (struct vi_mqd *) (*mqd_mem_obj)->cpu_ptr; From 0803e7a9e850f9d6397c594d6c6deac9b2b6d696 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Tue, 27 Nov 2018 23:50:12 -0600 Subject: [PATCH 150/178] drm/amdkfd: Allocate hiq and sdma mqd from mqd trunk Instead of allocat hiq and sdma mqd from sub-allocator, allocate them from a mqd trunk pool. This is done for all asics Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 49 +++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h | 7 +++ .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 20 +++----- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 22 +++------ .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 22 +++------ 5 files changed, 80 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index eeb2b60a36b5..9307811bc427 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -23,6 +23,55 @@ #include "kfd_mqd_manager.h" #include "amdgpu_amdkfd.h" +#include "kfd_device_queue_manager.h" + +struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev) +{ + struct kfd_mem_obj *mqd_mem_obj = NULL; + + mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); + if (!mqd_mem_obj) + return NULL; + + mqd_mem_obj->gtt_mem = dev->dqm->hiq_sdma_mqd.gtt_mem; + mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr; + mqd_mem_obj->cpu_ptr = dev->dqm->hiq_sdma_mqd.cpu_ptr; + + return mqd_mem_obj; +} + +struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev, + struct queue_properties *q) +{ + struct kfd_mem_obj *mqd_mem_obj = NULL; + uint64_t offset; + + mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); + if (!mqd_mem_obj) + return NULL; + + offset = (q->sdma_engine_id * + dev->device_info->num_sdma_queues_per_engine + + q->sdma_queue_id) * + dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size; + + offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; + + mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem + + offset); + mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset; + mqd_mem_obj->cpu_ptr = (uint32_t *)((uint64_t) + dev->dqm->hiq_sdma_mqd.cpu_ptr + offset); + + return mqd_mem_obj; +} + +void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) +{ + WARN_ON(!mqd_mem_obj->gtt_mem); + kfree(mqd_mem_obj); +} void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, const uint32_t *cu_mask, uint32_t cu_mask_count, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index 009d232fb60b..56af256a191b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -102,6 +102,13 @@ struct mqd_manager { uint32_t mqd_size; }; +struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev); + +struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev, + struct queue_properties *q); +void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj); + void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, const uint32_t *cu_mask, uint32_t cu_mask_count, uint32_t *se_mask); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index a00402077e34..6e8509ec29d9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -71,6 +71,9 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, { struct kfd_mem_obj *mqd_mem_obj; + if (q->type == KFD_QUEUE_TYPE_HIQ) + return allocate_hiq_mqd(kfd); + if (kfd_gtt_sa_allocate(kfd, sizeof(struct cik_mqd), &mqd_mem_obj)) return NULL; @@ -148,12 +151,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, { int retval; struct cik_sdma_rlc_registers *m; + struct kfd_dev *dev = mm->dev; - retval = kfd_gtt_sa_allocate(mm->dev, - sizeof(struct cik_sdma_rlc_registers), - mqd_mem_obj); - - if (retval != 0) + *mqd_mem_obj = allocate_sdma_mqd(dev, q); + if (!*mqd_mem_obj) return -ENOMEM; m = (struct cik_sdma_rlc_registers *) (*mqd_mem_obj)->cpu_ptr; @@ -175,11 +176,6 @@ static void uninit_mqd(struct mqd_manager *mm, void *mqd, kfd_gtt_sa_free(mm->dev, mqd_mem_obj); } -static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, - struct kfd_mem_obj *mqd_mem_obj) -{ - kfd_gtt_sa_free(mm->dev, mqd_mem_obj); -} static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, @@ -419,7 +415,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, break; case KFD_MQD_TYPE_HIQ: mqd->init_mqd = init_mqd_hiq; - mqd->uninit_mqd = uninit_mqd; + mqd->uninit_mqd = uninit_mqd_hiq_sdma; mqd->load_mqd = load_mqd; mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = destroy_mqd; @@ -443,7 +439,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, break; case KFD_MQD_TYPE_SDMA: mqd->init_mqd = init_mqd_sdma; - mqd->uninit_mqd = uninit_mqd_sdma; + mqd->uninit_mqd = uninit_mqd_hiq_sdma; mqd->load_mqd = load_mqd_sdma; mqd->update_mqd = update_mqd_sdma; mqd->destroy_mqd = destroy_mqd_sdma; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 8f8166189fd5..4750338199b6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -73,6 +73,9 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, int retval; struct kfd_mem_obj *mqd_mem_obj = NULL; + if (q->type == KFD_QUEUE_TYPE_HIQ) + return allocate_hiq_mqd(kfd); + /* From V9, for CWSR, the control stack is located on the next page * boundary after the mqd, we will use the gtt allocation function * instead of sub-allocation function. @@ -346,13 +349,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, { int retval; struct v9_sdma_mqd *m; + struct kfd_dev *dev = mm->dev; - - retval = kfd_gtt_sa_allocate(mm->dev, - sizeof(struct v9_sdma_mqd), - mqd_mem_obj); - - if (retval != 0) + *mqd_mem_obj = allocate_sdma_mqd(dev, q); + if (!*mqd_mem_obj) return -ENOMEM; m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr; @@ -368,12 +368,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, return retval; } -static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, - struct kfd_mem_obj *mqd_mem_obj) -{ - kfd_gtt_sa_free(mm->dev, mqd_mem_obj); -} - static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, struct mm_struct *mms) @@ -484,7 +478,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, break; case KFD_MQD_TYPE_HIQ: mqd->init_mqd = init_mqd_hiq; - mqd->uninit_mqd = uninit_mqd; + mqd->uninit_mqd = uninit_mqd_hiq_sdma; mqd->load_mqd = load_mqd; mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = destroy_mqd; @@ -508,7 +502,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, break; case KFD_MQD_TYPE_SDMA: mqd->init_mqd = init_mqd_sdma; - mqd->uninit_mqd = uninit_mqd_sdma; + mqd->uninit_mqd = uninit_mqd_hiq_sdma; mqd->load_mqd = load_mqd_sdma; mqd->update_mqd = update_mqd_sdma; mqd->destroy_mqd = destroy_mqd_sdma; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 7f0b10e46358..b550dea9b10a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -73,6 +73,9 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, { struct kfd_mem_obj *mqd_mem_obj; + if (q->type == KFD_QUEUE_TYPE_HIQ) + return allocate_hiq_mqd(kfd); + if (kfd_gtt_sa_allocate(kfd, sizeof(struct vi_mqd), &mqd_mem_obj)) return NULL; @@ -341,13 +344,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, { int retval; struct vi_sdma_mqd *m; + struct kfd_dev *dev = mm->dev; - - retval = kfd_gtt_sa_allocate(mm->dev, - sizeof(struct vi_sdma_mqd), - mqd_mem_obj); - - if (retval != 0) + *mqd_mem_obj = allocate_sdma_mqd(dev, q); + if (!*mqd_mem_obj) return -ENOMEM; m = (struct vi_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr; @@ -363,12 +363,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, return retval; } -static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, - struct kfd_mem_obj *mqd_mem_obj) -{ - kfd_gtt_sa_free(mm->dev, mqd_mem_obj); -} - static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, struct mm_struct *mms) @@ -478,7 +472,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, break; case KFD_MQD_TYPE_HIQ: mqd->init_mqd = init_mqd_hiq; - mqd->uninit_mqd = uninit_mqd; + mqd->uninit_mqd = uninit_mqd_hiq_sdma; mqd->load_mqd = load_mqd; mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = destroy_mqd; @@ -502,7 +496,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, break; case KFD_MQD_TYPE_SDMA: mqd->init_mqd = init_mqd_sdma; - mqd->uninit_mqd = uninit_mqd_sdma; + mqd->uninit_mqd = uninit_mqd_hiq_sdma; mqd->load_mqd = load_mqd_sdma; mqd->update_mqd = update_mqd_sdma; mqd->destroy_mqd = destroy_mqd_sdma; From e78579aab7c3e112b85bf92d76432b917c841b5b Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 14 Jan 2019 17:36:26 -0500 Subject: [PATCH 151/178] drm/amdkfd: Move sdma_queue_id calculation into allocate_sdma_queue() This avoids duplicated code. Signed-off-by: Yong Zhao Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 29 +++++++------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 9f159aa67f0c..bac1f36d38a2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -882,7 +882,7 @@ static int stop_nocpsch(struct device_queue_manager *dqm) } static int allocate_sdma_queue(struct device_queue_manager *dqm, - unsigned int *sdma_id) + struct queue *q) { int bit; @@ -891,7 +891,14 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, bit = __ffs64(dqm->sdma_bitmap); dqm->sdma_bitmap &= ~(1ULL << bit); - *sdma_id = bit; + q->sdma_id = bit; + + q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm); + q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); + + pr_debug("SDMA id is: %d\n", q->sdma_id); + pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); + pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); return 0; } @@ -913,21 +920,14 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]; - retval = allocate_sdma_queue(dqm, &q->sdma_id); + retval = allocate_sdma_queue(dqm, q); if (retval) return retval; - q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); - q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm); - retval = allocate_doorbell(qpd, q); if (retval) goto out_deallocate_sdma_queue; - pr_debug("SDMA id is: %d\n", q->sdma_id); - pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); - pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); - dqm->asic_ops.init_sdma_vm(dqm, q, qpd); retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); @@ -1128,16 +1128,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, } if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - retval = allocate_sdma_queue(dqm, &q->sdma_id); + retval = allocate_sdma_queue(dqm, q); if (retval) goto out; - q->properties.sdma_queue_id = - q->sdma_id / get_num_sdma_engines(dqm); - q->properties.sdma_engine_id = - q->sdma_id % get_num_sdma_engines(dqm); - pr_debug("SDMA id is: %d\n", q->sdma_id); - pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); - pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); } retval = allocate_doorbell(qpd, q); From 065e4bdfa1f3ab2884c110394d8b7e7ebe3b988c Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Fri, 8 Feb 2019 15:44:35 -0600 Subject: [PATCH 152/178] drm/amdkfd: Fix sdma queue map issue Previous codes assumes there are two sdma engines. This is not true e.g., Raven only has 1 SDMA engine. Fix the issue by using sdma engine number info in device_info. Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index bac1f36d38a2..d41045d3fc3a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1212,12 +1212,17 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr, return 0; } -static int unmap_sdma_queues(struct device_queue_manager *dqm, - unsigned int sdma_engine) +static int unmap_sdma_queues(struct device_queue_manager *dqm) { - return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, - sdma_engine); + int i, retval = 0; + + for (i = 0; i < dqm->dev->device_info->num_sdma_engines; i++) { + retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i); + if (retval) + return retval; + } + return retval; } /* dqm->lock mutex has to be locked before calling this function */ @@ -1256,10 +1261,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, pr_debug("Before destroying queues, sdma queue count is : %u\n", dqm->sdma_queue_count); - if (dqm->sdma_queue_count > 0) { - unmap_sdma_queues(dqm, 0); - unmap_sdma_queues(dqm, 1); - } + if (dqm->sdma_queue_count > 0) + unmap_sdma_queues(dqm); retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, filter, filter_param, false, 0); From 1b4670f6983156526c286723465fdf805070b45d Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Thu, 7 Feb 2019 14:02:27 -0600 Subject: [PATCH 153/178] drm/amdkfd: Introduce XGMI SDMA queue type Existing QUEUE_TYPE_SDMA means PCIe optimized SDMA queues. Introduce a new QUEUE_TYPE_SDMA_XGMI, which is optimized for non-PCIe transfer such as XGMI. Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 15 +++ .../drm/amd/amdkfd/kfd_device_queue_manager.c | 123 +++++++++++++----- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 3 + .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 2 + .../gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 2 + .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 +- .../amd/amdkfd/kfd_process_queue_manager.c | 10 +- include/uapi/linux/kfd_ioctl.h | 7 +- 10 files changed, 132 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 3ccaa38779ea..38ae53fe8182 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -213,6 +213,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->type = KFD_QUEUE_TYPE_COMPUTE; else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA) q_properties->type = KFD_QUEUE_TYPE_SDMA; + else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI) + q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI; else return -ENOTSUPP; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 8202a5db3a35..1368b41cb92b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -54,6 +54,7 @@ static const struct kfd_device_info kaveri_device_info = { .needs_iommu_device = true, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -71,6 +72,7 @@ static const struct kfd_device_info carrizo_device_info = { .needs_iommu_device = true, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -87,6 +89,7 @@ static const struct kfd_device_info raven_device_info = { .needs_iommu_device = true, .needs_pci_atomics = true, .num_sdma_engines = 1, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; #endif @@ -105,6 +108,7 @@ static const struct kfd_device_info hawaii_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -121,6 +125,7 @@ static const struct kfd_device_info tonga_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -137,6 +142,7 @@ static const struct kfd_device_info fiji_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -153,6 +159,7 @@ static const struct kfd_device_info fiji_vf_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -170,6 +177,7 @@ static const struct kfd_device_info polaris10_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -186,6 +194,7 @@ static const struct kfd_device_info polaris10_vf_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -202,6 +211,7 @@ static const struct kfd_device_info polaris11_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -218,6 +228,7 @@ static const struct kfd_device_info polaris12_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -234,6 +245,7 @@ static const struct kfd_device_info vega10_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -250,6 +262,7 @@ static const struct kfd_device_info vega10_vf_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -266,6 +279,7 @@ static const struct kfd_device_info vega12_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -282,6 +296,7 @@ static const struct kfd_device_info vega20_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index d41045d3fc3a..1562590d837e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -60,14 +60,14 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd); static void deallocate_sdma_queue(struct device_queue_manager *dqm, - unsigned int sdma_queue_id); + struct queue *q); static void kfd_process_hw_exception(struct work_struct *work); static inline enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) { - if (type == KFD_QUEUE_TYPE_SDMA) + if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) return KFD_MQD_TYPE_SDMA; return KFD_MQD_TYPE_CP; } @@ -107,12 +107,23 @@ static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm) return dqm->dev->device_info->num_sdma_engines; } +static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm) +{ + return dqm->dev->device_info->num_xgmi_sdma_engines; +} + unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) { return dqm->dev->device_info->num_sdma_engines * dqm->dev->device_info->num_sdma_queues_per_engine; } +unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) +{ + return dqm->dev->device_info->num_xgmi_sdma_engines + * dqm->dev->device_info->num_sdma_queues_per_engine; +} + void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -133,7 +144,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) * preserve the user mode ABI. */ q->doorbell_id = q->properties.queue_id; - } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { /* For SDMA queues on SOC15 with 8-byte doorbell, use static * doorbell assignments based on the engine and queue id. * The doobell index distance between RLC (2*i) and (2*i+1) @@ -174,7 +186,8 @@ static void deallocate_doorbell(struct qcm_process_device *qpd, struct kfd_dev *dev = qpd->dqm->dev; if (!KFD_IS_SOC15(dev->device_info->asic_family) || - q->properties.type == KFD_QUEUE_TYPE_SDMA) + q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) return; old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); @@ -289,7 +302,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) retval = create_compute_queue_nocpsch(dqm, q, qpd); - else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) retval = create_sdma_queue_nocpsch(dqm, q, qpd); else retval = -EINVAL; @@ -307,6 +321,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, if (q->properties.type == KFD_QUEUE_TYPE_SDMA) dqm->sdma_queue_count++; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + dqm->xgmi_sdma_queue_count++; /* * Unconditionally increment this counter, regardless of the queue's @@ -430,7 +446,10 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, deallocate_hqd(dqm, q); } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { dqm->sdma_queue_count--; - deallocate_sdma_queue(dqm, q->sdma_id); + deallocate_sdma_queue(dqm, q); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + dqm->xgmi_sdma_queue_count--; + deallocate_sdma_queue(dqm, q); } else { pr_debug("q->properties.type %d is invalid\n", q->properties.type); @@ -521,7 +540,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) } } else if (prev_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || - q->properties.type == KFD_QUEUE_TYPE_SDMA)) { + q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); @@ -548,7 +568,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) retval = map_queues_cpsch(dqm); else if (q->properties.is_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || - q->properties.type == KFD_QUEUE_TYPE_SDMA)) { + q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { if (WARN(q->process->mm != current->mm, "should only run in user thread")) retval = -EFAULT; @@ -840,6 +861,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->next_pipe_to_allocate = 0; dqm->sdma_queue_count = 0; + dqm->xgmi_sdma_queue_count = 0; for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { int pipe_offset = pipe * get_queues_per_pipe(dqm); @@ -852,6 +874,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1; + dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1; return 0; } @@ -886,17 +909,34 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, { int bit; - if (dqm->sdma_bitmap == 0) - return -ENOMEM; + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + if (dqm->sdma_bitmap == 0) + return -ENOMEM; + bit = __ffs64(dqm->sdma_bitmap); + dqm->sdma_bitmap &= ~(1ULL << bit); + q->sdma_id = bit; + q->properties.sdma_engine_id = q->sdma_id % + get_num_sdma_engines(dqm); + q->properties.sdma_queue_id = q->sdma_id / + get_num_sdma_engines(dqm); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + if (dqm->xgmi_sdma_bitmap == 0) + return -ENOMEM; + bit = __ffs64(dqm->xgmi_sdma_bitmap); + dqm->xgmi_sdma_bitmap &= ~(1ULL << bit); + q->sdma_id = bit; + /* sdma_engine_id is sdma id including + * both PCIe-optimized SDMAs and XGMI- + * optimized SDMAs. The calculation below + * assumes the first N engines are always + * PCIe-optimized ones + */ + q->properties.sdma_engine_id = get_num_sdma_engines(dqm) + + q->sdma_id % get_num_xgmi_sdma_engines(dqm); + q->properties.sdma_queue_id = q->sdma_id / + get_num_xgmi_sdma_engines(dqm); + } - bit = __ffs64(dqm->sdma_bitmap); - dqm->sdma_bitmap &= ~(1ULL << bit); - q->sdma_id = bit; - - q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm); - q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); - - pr_debug("SDMA id is: %d\n", q->sdma_id); pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); @@ -904,11 +944,17 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, } static void deallocate_sdma_queue(struct device_queue_manager *dqm, - unsigned int sdma_id) + struct queue *q) { - if (sdma_id >= get_num_sdma_queues(dqm)) - return; - dqm->sdma_bitmap |= (1ULL << sdma_id); + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + if (q->sdma_id >= get_num_sdma_queues(dqm)) + return; + dqm->sdma_bitmap |= (1ULL << q->sdma_id); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) + return; + dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id); + } } static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, @@ -946,7 +992,7 @@ out_uninit_mqd: out_deallocate_doorbell: deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: - deallocate_sdma_queue(dqm, q->sdma_id); + deallocate_sdma_queue(dqm, q); return retval; } @@ -1004,8 +1050,10 @@ static int initialize_cpsch(struct device_queue_manager *dqm) INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->processes_count = 0; dqm->sdma_queue_count = 0; + dqm->xgmi_sdma_queue_count = 0; dqm->active_runlist = false; dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1; + dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1; INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); @@ -1127,7 +1175,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, goto out; } - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { retval = allocate_sdma_queue(dqm, q); if (retval) goto out; @@ -1167,6 +1216,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (q->properties.type == KFD_QUEUE_TYPE_SDMA) dqm->sdma_queue_count++; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + dqm->xgmi_sdma_queue_count++; /* * Unconditionally increment this counter, regardless of the queue's * type or whether the queue is active. @@ -1182,8 +1233,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, out_deallocate_doorbell: deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - deallocate_sdma_queue(dqm, q->sdma_id); + if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + deallocate_sdma_queue(dqm, q); out: return retval; } @@ -1216,7 +1268,8 @@ static int unmap_sdma_queues(struct device_queue_manager *dqm) { int i, retval = 0; - for (i = 0; i < dqm->dev->device_info->num_sdma_engines; i++) { + for (i = 0; i < dqm->dev->device_info->num_sdma_engines + + dqm->dev->device_info->num_xgmi_sdma_engines; i++) { retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i); if (retval) @@ -1258,10 +1311,10 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, if (!dqm->active_runlist) return retval; - pr_debug("Before destroying queues, sdma queue count is : %u\n", - dqm->sdma_queue_count); + pr_debug("Before destroying queues, sdma queue count is : %u, xgmi sdma queue count is : %u\n", + dqm->sdma_queue_count, dqm->xgmi_sdma_queue_count); - if (dqm->sdma_queue_count > 0) + if (dqm->sdma_queue_count > 0 || dqm->xgmi_sdma_queue_count) unmap_sdma_queues(dqm); retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, @@ -1333,7 +1386,10 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { dqm->sdma_queue_count--; - deallocate_sdma_queue(dqm, q->sdma_id); + deallocate_sdma_queue(dqm, q); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + dqm->xgmi_sdma_queue_count--; + deallocate_sdma_queue(dqm, q); } list_del(&q->list); @@ -1550,7 +1606,10 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, list_for_each_entry(q, &qpd->queues_list, list) { if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { dqm->sdma_queue_count--; - deallocate_sdma_queue(dqm, q->sdma_id); + deallocate_sdma_queue(dqm, q); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + dqm->xgmi_sdma_queue_count--; + deallocate_sdma_queue(dqm, q); } if (q->properties.is_active) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 3742fd340ec3..88b4c007696e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -181,10 +181,12 @@ struct device_queue_manager { unsigned int processes_count; unsigned int queue_count; unsigned int sdma_queue_count; + unsigned int xgmi_sdma_queue_count; unsigned int total_queue_count; unsigned int next_pipe_to_allocate; unsigned int *allocated_queues; uint64_t sdma_bitmap; + uint64_t xgmi_sdma_bitmap; unsigned int vmid_bitmap; uint64_t pipelines_addr; struct kfd_mem_obj *pipeline_mem; @@ -216,6 +218,7 @@ unsigned int get_queues_num(struct device_queue_manager *dqm); unsigned int get_queues_per_pipe(struct device_queue_manager *dqm); unsigned int get_pipes_per_mec(struct device_queue_manager *dqm); unsigned int get_num_sdma_queues(struct device_queue_manager *dqm); +unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm); static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c index 33830b1a5a54..604570bea6bd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c @@ -175,6 +175,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, queue_type__mes_map_queues__debug_interface_queue_vi; break; case KFD_QUEUE_TYPE_SDMA: + case KFD_QUEUE_TYPE_SDMA_XGMI: packet->bitfields2.engine_sel = q->properties.sdma_engine_id + engine_sel__mes_map_queues__sdma0_vi; use_static = false; /* no static queues under SDMA */ @@ -221,6 +222,7 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer, engine_sel__mes_unmap_queues__compute; break; case KFD_QUEUE_TYPE_SDMA: + case KFD_QUEUE_TYPE_SDMA_XGMI: packet->bitfields2.engine_sel = engine_sel__mes_unmap_queues__sdma0 + sdma_engine; break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c index bf20c6d32ef3..3cdb19826927 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c @@ -212,6 +212,7 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, queue_type__mes_map_queues__debug_interface_queue_vi; break; case KFD_QUEUE_TYPE_SDMA: + case KFD_QUEUE_TYPE_SDMA_XGMI: packet->bitfields2.engine_sel = q->properties.sdma_engine_id + engine_sel__mes_map_queues__sdma0_vi; use_static = false; /* no static queues under SDMA */ @@ -258,6 +259,7 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer, engine_sel__mes_unmap_queues__compute; break; case KFD_QUEUE_TYPE_SDMA: + case KFD_QUEUE_TYPE_SDMA_XGMI: packet->bitfields2.engine_sel = engine_sel__mes_unmap_queues__sdma0 + sdma_engine; break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 045a229436a0..077c47fd4fee 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -48,7 +48,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm, process_count = pm->dqm->processes_count; queue_count = pm->dqm->queue_count; - compute_queue_count = queue_count - pm->dqm->sdma_queue_count; + compute_queue_count = queue_count - pm->dqm->sdma_queue_count - + pm->dqm->xgmi_sdma_queue_count; /* check if there is over subscription * Note: the arbitration between the number of VMIDs and diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 10bd1abe1646..8f02d7817162 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -188,6 +188,7 @@ struct kfd_device_info { bool needs_iommu_device; bool needs_pci_atomics; unsigned int num_sdma_engines; + unsigned int num_xgmi_sdma_engines; unsigned int num_sdma_queues_per_engine; }; @@ -329,7 +330,8 @@ enum kfd_queue_type { KFD_QUEUE_TYPE_COMPUTE, KFD_QUEUE_TYPE_SDMA, KFD_QUEUE_TYPE_HIQ, - KFD_QUEUE_TYPE_DIQ + KFD_QUEUE_TYPE_DIQ, + KFD_QUEUE_TYPE_SDMA_XGMI }; enum kfd_queue_format { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index f18d9cdf9aac..e652e25ede75 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -186,8 +186,13 @@ int pqm_create_queue(struct process_queue_manager *pqm, switch (type) { case KFD_QUEUE_TYPE_SDMA: - if (dev->dqm->queue_count >= get_num_sdma_queues(dev->dqm)) { - pr_err("Over-subscription is not allowed for SDMA.\n"); + case KFD_QUEUE_TYPE_SDMA_XGMI: + if ((type == KFD_QUEUE_TYPE_SDMA && dev->dqm->sdma_queue_count + >= get_num_sdma_queues(dev->dqm)) || + (type == KFD_QUEUE_TYPE_SDMA_XGMI && + dev->dqm->xgmi_sdma_queue_count + >= get_num_xgmi_sdma_queues(dev->dqm))) { + pr_debug("Over-subscription is not allowed for SDMA.\n"); retval = -EPERM; goto err_create_queue; } @@ -446,6 +451,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) q = pqn->q; switch (q->properties.type) { case KFD_QUEUE_TYPE_SDMA: + case KFD_QUEUE_TYPE_SDMA_XGMI: seq_printf(m, " SDMA queue on device %x\n", q->device->id); mqd_type = KFD_MQD_TYPE_SDMA; diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 1e7d5f3376b0..20917c59f39c 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -35,9 +35,10 @@ struct kfd_ioctl_get_version_args { }; /* For kfd_ioctl_create_queue_args.queue_type. */ -#define KFD_IOC_QUEUE_TYPE_COMPUTE 0 -#define KFD_IOC_QUEUE_TYPE_SDMA 1 -#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 2 +#define KFD_IOC_QUEUE_TYPE_COMPUTE 0x0 +#define KFD_IOC_QUEUE_TYPE_SDMA 0x1 +#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2 +#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3 #define KFD_MAX_QUEUE_PERCENTAGE 100 #define KFD_MAX_QUEUE_PRIORITY 15 From 14568cf6583e9d82ee21129293dda68ef6a7af9e Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Fri, 8 Feb 2019 10:56:42 -0600 Subject: [PATCH 154/178] drm/amdkfd: Expose sdma engine numbers to topology Expose available numbers of both SDMA queue types in the topology. Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 7 +++++++ drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 769dbc7be8cb..381d09e0d0f7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -476,6 +476,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.drm_render_minor); sysfs_show_64bit_prop(buffer, "hive_id", dev->node_props.hive_id); + sysfs_show_32bit_prop(buffer, "num_sdma_engines", + dev->node_props.num_sdma_engines); + sysfs_show_32bit_prop(buffer, "num_sdma_xgmi_engines", + dev->node_props.num_sdma_xgmi_engines); if (dev->gpu) { log_max_watch_addr = @@ -1281,6 +1285,9 @@ int kfd_topology_add_device(struct kfd_dev *gpu) gpu->shared_resources.drm_render_minor; dev->node_props.hive_id = gpu->hive_id; + dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines; + dev->node_props.num_sdma_xgmi_engines = + gpu->device_info->num_xgmi_sdma_engines; kfd_fill_mem_clk_max_info(dev); kfd_fill_iolink_non_crat_info(dev); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 84710cfd23c2..949e885dfb53 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -78,6 +78,8 @@ struct kfd_node_properties { uint32_t max_engine_clk_fcompute; uint32_t max_engine_clk_ccompute; int32_t drm_render_minor; + uint32_t num_sdma_engines; + uint32_t num_sdma_xgmi_engines; uint16_t marketing_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; }; From 16631afff224475134124fb1982ba80365ec5640 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Thu, 14 Feb 2019 10:25:42 -0600 Subject: [PATCH 155/178] drm/amdkfd: Delete alloc_format field from map_queue struct Alloc format was never really supported by MEC FW. FW always does one per pipe allocation. Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 2 -- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 2 -- drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h | 7 +------ drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h | 7 +------ 4 files changed, 2 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c index 604570bea6bd..3dd731c69b5d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c @@ -153,8 +153,6 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES, sizeof(struct pm4_mes_map_queues)); - packet->bitfields2.alloc_format = - alloc_format__mes_map_queues__one_per_pipe_vi; packet->bitfields2.num_queues = 1; packet->bitfields2.queue_sel = queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c index 3cdb19826927..2adaf40027eb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c @@ -190,8 +190,6 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES, sizeof(struct pm4_mes_map_queues)); - packet->bitfields2.alloc_format = - alloc_format__mes_map_queues__one_per_pipe_vi; packet->bitfields2.num_queues = 1; packet->bitfields2.queue_sel = queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h index f2bcf5c092ea..0661339071f0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h @@ -255,11 +255,6 @@ enum mes_map_queues_queue_type_enum { queue_type__mes_map_queues__low_latency_static_queue_vi = 3 }; -enum mes_map_queues_alloc_format_enum { - alloc_format__mes_map_queues__one_per_pipe_vi = 0, -alloc_format__mes_map_queues__all_on_one_pipe_vi = 1 -}; - enum mes_map_queues_engine_sel_enum { engine_sel__mes_map_queues__compute_vi = 0, engine_sel__mes_map_queues__sdma0_vi = 2, @@ -279,7 +274,7 @@ struct pm4_mes_map_queues { enum mes_map_queues_queue_sel_enum queue_sel:2; uint32_t reserved2:15; enum mes_map_queues_queue_type_enum queue_type:3; - enum mes_map_queues_alloc_format_enum alloc_format:2; + uint32_t reserved3:2; enum mes_map_queues_engine_sel_enum engine_sel:3; uint32_t num_queues:3; } bitfields2; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h index 7c8d9b357749..5466cfe1c3cc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h @@ -216,11 +216,6 @@ enum mes_map_queues_queue_type_vi_enum { queue_type__mes_map_queues__low_latency_static_queue_vi = 3 }; -enum mes_map_queues_alloc_format_vi_enum { - alloc_format__mes_map_queues__one_per_pipe_vi = 0, -alloc_format__mes_map_queues__all_on_one_pipe_vi = 1 -}; - enum mes_map_queues_engine_sel_vi_enum { engine_sel__mes_map_queues__compute_vi = 0, engine_sel__mes_map_queues__sdma0_vi = 2, @@ -240,7 +235,7 @@ struct pm4_mes_map_queues { enum mes_map_queues_queue_sel_vi_enum queue_sel:2; uint32_t reserved2:15; enum mes_map_queues_queue_type_vi_enum queue_type:3; - enum mes_map_queues_alloc_format_vi_enum alloc_format:2; + uint32_t reserved3:2; enum mes_map_queues_engine_sel_vi_enum engine_sel:3; uint32_t num_queues:3; } bitfields2; From 32cce8bc86032cc0c24086d6ce3ccf8e05e686cf Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 23 Apr 2019 23:32:56 -0400 Subject: [PATCH 156/178] drm/amdkfd: Fix a circular lock dependency Fix a circular lock dependency exposed under userptr memory pressure. The DQM lock is the only one taken inside the MMU notifier. We need to make sure that no reclaim is done under this lock, and that no other locks are taken under which reclaim is possible. Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 33 ++++++++++++++++--- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 1562590d837e..0bfdb141b6e7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -794,10 +794,14 @@ static int register_process(struct device_queue_manager *dqm, retval = dqm->asic_ops.update_qpd(dqm, qpd); dqm->processes_count++; - kfd_inc_compute_active(dqm->dev); dqm_unlock(dqm); + /* Outside the DQM lock because under the DQM lock we can't do + * reclaim or take other locks that others hold while reclaiming. + */ + kfd_inc_compute_active(dqm->dev); + return retval; } @@ -818,7 +822,6 @@ static int unregister_process(struct device_queue_manager *dqm, list_del(&cur->list); kfree(cur); dqm->processes_count--; - kfd_dec_compute_active(dqm->dev); goto out; } } @@ -826,6 +829,13 @@ static int unregister_process(struct device_queue_manager *dqm, retval = 1; out: dqm_unlock(dqm); + + /* Outside the DQM lock because under the DQM lock we can't do + * reclaim or take other locks that others hold while reclaiming. + */ + if (!retval) + kfd_dec_compute_active(dqm->dev); + return retval; } @@ -1519,6 +1529,7 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm, struct queue *q, *next; struct device_process_node *cur, *next_dpn; int retval = 0; + bool found = false; dqm_lock(dqm); @@ -1537,12 +1548,19 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm, list_del(&cur->list); kfree(cur); dqm->processes_count--; - kfd_dec_compute_active(dqm->dev); + found = true; break; } } dqm_unlock(dqm); + + /* Outside the DQM lock because under the DQM lock we can't do + * reclaim or take other locks that others hold while reclaiming. + */ + if (found) + kfd_dec_compute_active(dqm->dev); + return retval; } @@ -1588,6 +1606,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, struct device_process_node *cur, *next_dpn; enum kfd_unmap_queues_filter filter = KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; + bool found = false; retval = 0; @@ -1624,7 +1643,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, list_del(&cur->list); kfree(cur); dqm->processes_count--; - kfd_dec_compute_active(dqm->dev); + found = true; break; } } @@ -1638,6 +1657,12 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, dqm_unlock(dqm); + /* Outside the DQM lock because under the DQM lock we can't do + * reclaim or take other locks that others hold while reclaiming. + */ + if (found) + kfd_dec_compute_active(dqm->dev); + /* Lastly, free mqd resources. * Do uninit_mqd() after dqm_unlock to avoid circular locking. */ From ed81cd6e0e9f37c6aca6d96194307b995f3b5f30 Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Thu, 21 Mar 2019 08:08:17 -0400 Subject: [PATCH 157/178] drm/amdkfd: Add VegaM support Add the VegaM information to KFD Signed-off-by: Kent Russell Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 5 +++++ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 20 +++++++++++++++++++ .../drm/amd/amdkfd/kfd_device_queue_manager.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 1 + .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 1 + 7 files changed, 30 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 1714900035d7..59f8ca4297db 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -134,6 +134,7 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = { #define polaris10_cache_info carrizo_cache_info #define polaris11_cache_info carrizo_cache_info #define polaris12_cache_info carrizo_cache_info +#define vegam_cache_info carrizo_cache_info /* TODO - check & update Vega10 cache details */ #define vega10_cache_info carrizo_cache_info #define raven_cache_info carrizo_cache_info @@ -652,6 +653,10 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, pcache_info = polaris12_cache_info; num_of_cache_types = ARRAY_SIZE(polaris12_cache_info); break; + case CHIP_VEGAM: + pcache_info = vegam_cache_info; + num_of_cache_types = ARRAY_SIZE(vegam_cache_info); + break; case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 1368b41cb92b..a53dda9071b1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -232,6 +232,23 @@ static const struct kfd_device_info polaris12_device_info = { .num_sdma_queues_per_engine = 2, }; +static const struct kfd_device_info vegam_device_info = { + .asic_family = CHIP_VEGAM, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 4, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = true, + .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, +}; + static const struct kfd_device_info vega10_device_info = { .asic_family = CHIP_VEGA10, .max_pasid_bits = 16, @@ -387,6 +404,9 @@ static const struct kfd_deviceid supported_devices[] = { { 0x6995, &polaris12_device_info }, /* Polaris12 */ { 0x6997, &polaris12_device_info }, /* Polaris12 */ { 0x699F, &polaris12_device_info }, /* Polaris12 */ + { 0x694C, &vegam_device_info }, /* VegaM */ + { 0x694E, &vegam_device_info }, /* VegaM */ + { 0x694F, &vegam_device_info }, /* VegaM */ { 0x6860, &vega10_device_info }, /* Vega10 */ { 0x6861, &vega10_device_info }, /* Vega10 */ { 0x6862, &vega10_device_info }, /* Vega10 */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 0bfdb141b6e7..ece35c7a77b5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1811,6 +1811,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: device_queue_manager_init_vi_tonga(&dqm->asic_ops); break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index c2a22f6acf9a..22a8e88b6a67 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -398,6 +398,7 @@ int kfd_init_apertures(struct kfd_process *process) case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: kfd_init_apertures_vi(pdd, id); break; case CHIP_VEGA10: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 7a737b50bed4..1cc03b3ddbb9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -315,6 +315,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: kernel_queue_init_vi(&kq->ops_asic_specific); break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 077c47fd4fee..808194663a7d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -228,6 +228,7 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: pm->pmf = &kfd_vi_pm_funcs; break; case CHIP_VEGA10: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 381d09e0d0f7..478b5daace4b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1305,6 +1305,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: pr_debug("Adding doorbell packet type capability\n"); dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 << HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & From 460960587ae80802c76384b0580ed9c89e6e4bb7 Mon Sep 17 00:00:00 2001 From: Amber Lin Date: Thu, 14 Feb 2019 16:02:26 -0500 Subject: [PATCH 158/178] drm/amdkfd: Add domain number into gpu_id A multi-socket server can have multiple PCIe segments so BFD is not enough to distingush each GPU. Also add domain number into account when generating gpu_id. Signed-off-by: Amber Lin Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 478b5daace4b..592cc6acffd9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1082,8 +1082,9 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) local_mem_info.local_mem_size_public; buf[0] = gpu->pdev->devfn; - buf[1] = gpu->pdev->subsystem_vendor; - buf[2] = gpu->pdev->subsystem_device; + buf[1] = gpu->pdev->subsystem_vendor | + (gpu->pdev->subsystem_device << 16); + buf[2] = pci_domain_nr(gpu->pdev->bus); buf[3] = gpu->pdev->device; buf[4] = gpu->pdev->bus->number; buf[5] = lower_32_bits(local_mem_size); From fb2dbfd2427e82ae63742f667cda19f1af6b77c2 Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Wed, 15 May 2019 08:35:29 -0400 Subject: [PATCH 159/178] drm/amdgpu: Add Unique Identifier sysfs file unique_id v2 Add a file that provides a Unique ID for the GPU. This will persist across machines and is guaranteed to be unique. This is only available for GFX9 and newer, so older ASICs will not have this file in the sysfs pool v2: Store it in adev for ASICs that don't have a hwmgr Reviewed-by: Alex Deucher Signed-off-by: Kent Russell Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 32 +++++++++++++++++++ .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 9 ++++++ .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 10 ++++++ .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 10 ++++++ 5 files changed, 63 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 38e782dee478..58f8f132904d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -957,6 +957,8 @@ struct amdgpu_device { long sdma_timeout; long video_timeout; long compute_timeout; + + uint64_t unique_id; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index adba9ea03e63..a73e1903d29b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1368,6 +1368,29 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev, count0, count1, pcie_get_mps(adev->pdev)); } +/** + * DOC: unique_id + * + * The amdgpu driver provides a sysfs API for providing a unique ID for the GPU + * The file unique_id is used for this. + * This will provide a Unique ID that will persist from machine to machine + * + * NOTE: This will only work for GFX9 and newer. This file will be absent + * on unsupported ASICs (GFX8 and older) + */ +static ssize_t amdgpu_get_unique_id(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + if (adev->unique_id) + return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id); + + return 0; +} + static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR, amdgpu_get_dpm_forced_performance_level, @@ -1418,6 +1441,7 @@ static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL); static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR, amdgpu_get_ppfeature_status, amdgpu_set_ppfeature_status); +static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL); static ssize_t amdgpu_hwmon_show_temp(struct device *dev, struct device_attribute *attr, @@ -2814,6 +2838,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) return ret; } } + if (adev->unique_id) + ret = device_create_file(adev->dev, &dev_attr_unique_id); + if (ret) { + DRM_ERROR("failed to create device file unique_id\n"); + return ret; + } ret = amdgpu_debugfs_pm_init(adev); if (ret) { DRM_ERROR("Failed to register debugfs file for dpm!\n"); @@ -2875,6 +2905,8 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_mem_busy_percent); if (!(adev->flags & AMD_IS_APU)) device_remove_file(adev->dev, &dev_attr_pcie_bw); + if (adev->unique_id) + device_remove_file(adev->dev, &dev_attr_unique_id); if ((adev->asic_type >= CHIP_VEGA10) && !(adev->flags & AMD_IS_APU)) device_remove_file(adev->dev, &dev_attr_ppfeatures); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 9585ba51d853..ce6aeb5a0362 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -356,6 +356,7 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr) struct vega10_hwmgr *data = hwmgr->backend; int i; uint32_t sub_vendor_id, hw_revision; + uint32_t top32, bottom32; struct amdgpu_device *adev = hwmgr->adev; vega10_initialize_power_tune_defaults(hwmgr); @@ -499,6 +500,14 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr) (hw_revision == 0) && (sub_vendor_id != 0x1002)) data->smu_features[GNLD_PCC_LIMIT].supported = true; + + /* Get the SN to turn into a Unique ID */ + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32); + top32 = smum_get_argument(hwmgr); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32); + bottom32 = smum_get_argument(hwmgr); + + adev->unique_id = ((uint64_t)bottom32 << 32) | top32; } #ifdef PPLIB_VEGA10_EVV_SUPPORT diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index 1a909dda37c7..efb6d3762feb 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -289,6 +289,8 @@ static int vega12_set_features_platform_caps(struct pp_hwmgr *hwmgr) static void vega12_init_dpm_defaults(struct pp_hwmgr *hwmgr) { struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend); + struct amdgpu_device *adev = hwmgr->adev; + uint32_t top32, bottom32; int i; data->smu_features[GNLD_DPM_PREFETCHER].smu_feature_id = @@ -353,6 +355,14 @@ static void vega12_init_dpm_defaults(struct pp_hwmgr *hwmgr) ((data->registry_data.disallowed_features >> i) & 1) ? false : true; } + + /* Get the SN to turn into a Unique ID */ + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32); + top32 = smum_get_argument(hwmgr); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32); + bottom32 = smum_get_argument(hwmgr); + + adev->unique_id = ((uint64_t)bottom32 << 32) | top32; } static int vega12_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index d18f34d4a51e..f27c6fbb192e 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -324,6 +324,8 @@ static int vega20_set_features_platform_caps(struct pp_hwmgr *hwmgr) static void vega20_init_dpm_defaults(struct pp_hwmgr *hwmgr) { struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend); + struct amdgpu_device *adev = hwmgr->adev; + uint32_t top32, bottom32; int i; data->smu_features[GNLD_DPM_PREFETCHER].smu_feature_id = @@ -393,6 +395,14 @@ static void vega20_init_dpm_defaults(struct pp_hwmgr *hwmgr) ((data->registry_data.disallowed_features >> i) & 1) ? false : true; } + + /* Get the SN to turn into a Unique ID */ + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32); + top32 = smum_get_argument(hwmgr); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32); + bottom32 = smum_get_argument(hwmgr); + + adev->unique_id = ((uint64_t)bottom32 << 32) | top32; } static int vega20_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr) From a7517677085413ff69d0c9038b0aadc2e7954143 Mon Sep 17 00:00:00 2001 From: Chengming Gui Date: Tue, 21 May 2019 15:57:21 +0800 Subject: [PATCH 160/178] drm/amd/powerplay: Fix code error for translating int type to bool type correctly Fix code error to support value < 0 or > 1. Signed-off-by: Chengming Gui Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 3a47130f8150..d5f03b962539 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -348,7 +348,7 @@ static int smu_early_init(void *handle) struct smu_context *smu = &adev->smu; smu->adev = adev; - smu->pm_enabled = amdgpu_dpm; + smu->pm_enabled = !!amdgpu_dpm; mutex_init(&smu->mutex); return smu_set_funcs(adev); From 1825fd34e8ed026911c6de6d7be7bd2d1ff8101a Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 22 May 2019 12:00:54 -0400 Subject: [PATCH 161/178] drm/amd/display: Switch the custom "max bpc" property to the DRM prop [Why] The custom "max bpc" property was added to limit color depth while the DRM one was still being merged. It's been a few kernel versions since then and this TODO was still sticking around. [How] Attach the DRM max bpc property to the connector and drop all of our custom property management. Set the max bpc to 8 by default since DRM defaults to the max in the range which would be 16 in this case. No behavioral changes are intended with this patch, it should just be a refactor. v2: Don't force 8bpc when no state is given Cc: Leo Li Cc: Harry Wentland Signed-off-by: Nicholas Kazlauskas Acked-by: Alex Deucher Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 4 --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 2 -- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 29 ++++++++----------- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 1 - 4 files changed, 12 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index b083b219b1a9..30e6ad8a90bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -631,10 +631,6 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev) amdgpu_dither_enum_list, sz); if (amdgpu_device_has_dc_support(adev)) { - adev->mode_info.max_bpc_property = - drm_property_create_range(adev->ddev, 0, "max bpc", 8, 16); - if (!adev->mode_info.max_bpc_property) - return -ENOMEM; adev->mode_info.abm_level_property = drm_property_create_range(adev->ddev, 0, "abm level", 0, 4); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 2e9e3db778c6..eb9975f4decb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -331,8 +331,6 @@ struct amdgpu_mode_info { struct drm_property *audio_property; /* FMT dithering */ struct drm_property *dither_property; - /* maximum number of bits per channel for monitor color */ - struct drm_property *max_bpc_property; /* Adaptive Backlight Modulation (power feature) */ struct drm_property *abm_level_property; /* hardcoded DFP edid from BIOS */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8d53aced6c9f..acb894ae6013 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2969,14 +2969,14 @@ static void update_stream_scaling_settings(const struct drm_display_mode *mode, static enum dc_color_depth convert_color_depth_from_display_info(const struct drm_connector *connector) { - struct dm_connector_state *dm_conn_state = - to_dm_connector_state(connector->state); uint32_t bpc = connector->display_info.bpc; - /* TODO: Remove this when there's support for max_bpc in drm */ - if (dm_conn_state && bpc > dm_conn_state->max_bpc) - /* Round down to nearest even number. */ - bpc = dm_conn_state->max_bpc - (dm_conn_state->max_bpc & 1); + /* TODO: Use passed in state instead of the current state. */ + if (connector->state) { + bpc = connector->state->max_bpc; + /* Round down to the nearest even number. */ + bpc = bpc - (bpc & 1); + } switch (bpc) { case 0: @@ -3618,9 +3618,6 @@ int amdgpu_dm_connector_atomic_set_property(struct drm_connector *connector, } else if (property == adev->mode_info.underscan_property) { dm_new_state->underscan_enable = val; ret = 0; - } else if (property == adev->mode_info.max_bpc_property) { - dm_new_state->max_bpc = val; - ret = 0; } else if (property == adev->mode_info.abm_level_property) { dm_new_state->abm_level = val; ret = 0; @@ -3666,9 +3663,6 @@ int amdgpu_dm_connector_atomic_get_property(struct drm_connector *connector, } else if (property == adev->mode_info.underscan_property) { *val = dm_state->underscan_enable; ret = 0; - } else if (property == adev->mode_info.max_bpc_property) { - *val = dm_state->max_bpc; - ret = 0; } else if (property == adev->mode_info.abm_level_property) { *val = dm_state->abm_level; ret = 0; @@ -3725,7 +3719,6 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector) state->underscan_enable = false; state->underscan_hborder = 0; state->underscan_vborder = 0; - state->max_bpc = 8; __drm_atomic_helper_connector_reset(connector, &state->base); } @@ -3751,7 +3744,6 @@ amdgpu_dm_connector_atomic_duplicate_state(struct drm_connector *connector) new_state->underscan_enable = state->underscan_enable; new_state->underscan_hborder = state->underscan_hborder; new_state->underscan_vborder = state->underscan_vborder; - new_state->max_bpc = state->max_bpc; return &new_state->base; } @@ -4672,9 +4664,12 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm, drm_object_attach_property(&aconnector->base.base, adev->mode_info.underscan_vborder_property, 0); - drm_object_attach_property(&aconnector->base.base, - adev->mode_info.max_bpc_property, - 0); + + drm_connector_attach_max_bpc_property(&aconnector->base, 8, 16); + + /* This defaults to the max in the range, but we want 8bpc. */ + aconnector->base.state->max_bpc = 8; + aconnector->base.state->max_requested_bpc = 8; if (connector_type == DRM_MODE_CONNECTOR_eDP && dc_is_dmcu_initialized(adev->dm.dc)) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 978ff14a7d45..b0ce44422e90 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -304,7 +304,6 @@ struct dm_connector_state { enum amdgpu_rmx_type scaling; uint8_t underscan_vborder; uint8_t underscan_hborder; - uint8_t max_bpc; bool underscan_enable; bool freesync_capable; uint8_t abm_level; From 42ba01fc30e6f84e7433879052474e716237ab33 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 22 May 2019 12:00:55 -0400 Subject: [PATCH 162/178] drm/amd/display: Use new connector state when getting color depth [Why] The current state on the connector is queried when getting the max bpc rather than the new state. This means that a new max bpc value can only currently take effect on the commit *after* it changes. The new state should be passed in instead. [How] Pass down the dm_state as drm state to where we do color depth lookup. The passed in state can still be NULL when called from amdgpu_dm_connector_mode_valid, so make sure that we have reasonable defaults in place. That should probably be addressed at some point. This change now (correctly) causes a modeset to occur when changing the max bpc for a connector. v2: Drop extra TODO. Cc: Leo Li Cc: Harry Wentland Signed-off-by: Nicholas Kazlauskas Acked-by: Alex Deucher Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index acb894ae6013..340404f78034 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2967,13 +2967,13 @@ static void update_stream_scaling_settings(const struct drm_display_mode *mode, } static enum dc_color_depth -convert_color_depth_from_display_info(const struct drm_connector *connector) +convert_color_depth_from_display_info(const struct drm_connector *connector, + const struct drm_connector_state *state) { uint32_t bpc = connector->display_info.bpc; - /* TODO: Use passed in state instead of the current state. */ - if (connector->state) { - bpc = connector->state->max_bpc; + if (state) { + bpc = state->max_bpc; /* Round down to the nearest even number. */ bpc = bpc - (bpc & 1); } @@ -3094,11 +3094,12 @@ static void adjust_colour_depth_from_display_info(struct dc_crtc_timing *timing_ } -static void -fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream, - const struct drm_display_mode *mode_in, - const struct drm_connector *connector, - const struct dc_stream_state *old_stream) +static void fill_stream_properties_from_drm_display_mode( + struct dc_stream_state *stream, + const struct drm_display_mode *mode_in, + const struct drm_connector *connector, + const struct drm_connector_state *connector_state, + const struct dc_stream_state *old_stream) { struct dc_crtc_timing *timing_out = &stream->timing; const struct drm_display_info *info = &connector->display_info; @@ -3121,7 +3122,7 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream, timing_out->timing_3d_format = TIMING_3D_FORMAT_NONE; timing_out->display_color_depth = convert_color_depth_from_display_info( - connector); + connector, connector_state); timing_out->scan_type = SCANNING_TYPE_NODATA; timing_out->hdmi_vic = 0; @@ -3318,6 +3319,8 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, { struct drm_display_mode *preferred_mode = NULL; struct drm_connector *drm_connector; + const struct drm_connector_state *con_state = + dm_state ? &dm_state->base : NULL; struct dc_stream_state *stream = NULL; struct drm_display_mode mode = *drm_mode; bool native_mode_found = false; @@ -3390,10 +3393,10 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, */ if (!scale || mode_refresh != preferred_refresh) fill_stream_properties_from_drm_display_mode(stream, - &mode, &aconnector->base, NULL); + &mode, &aconnector->base, con_state, NULL); else fill_stream_properties_from_drm_display_mode(stream, - &mode, &aconnector->base, old_stream); + &mode, &aconnector->base, con_state, old_stream); update_stream_scaling_settings(&mode, dm_state, stream); From f1e5e913028669c2bbac6664b8f01cb124349692 Mon Sep 17 00:00:00 2001 From: Yogesh Mohan Marimuthu Date: Fri, 17 May 2019 15:46:58 +0530 Subject: [PATCH 163/178] drm/amdgpu: sort probed modes before adding common modes [Why] There are monitors which can have more than one preferred mode set. There are chances in these monitors that if common modes are added in function amdgpu_dm_connector_add_common_modes(), these common modes can be calculated with different preferred mode than the one used in function decide_crtc_timing_for_drm_display_mode(). The preferred mode can be different because after common modes are added, the mode list is sorted and this changes the order of preferred modes in the list. The first mode in the list with preferred flag set is selected as preferred mode. Due to this the preferred mode selected varies. If same preferred mode is not selected in common mode calculation and crtc timing, then during mode set instead of setting preferred timing, common mode timing will be applied which can cause "out of range" message in the monitor with monitor blanking out. [How] Sort the modes before adding common modes. The same sorting function is called during common mode addition and deciding crtc timing. Signed-off-by: Yogesh Mohan Marimuthu Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 340404f78034..c3e78039d1e6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4588,6 +4588,15 @@ static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector, amdgpu_dm_connector->num_modes = drm_add_edid_modes(connector, edid); + /* sorting the probed modes before calling function + * amdgpu_dm_get_native_mode() since EDID can have + * more than one preferred mode. The modes that are + * later in the probed mode list could be of higher + * and preferred resolution. For example, 3840x2160 + * resolution in base EDID preferred timing and 4096x2160 + * preferred resolution in DID extension block later. + */ + drm_mode_sort(&connector->probed_modes); amdgpu_dm_get_native_mode(connector); } else { amdgpu_dm_connector->num_modes = 0; From 1c1e53f7f2ce191e6787d3d0648fe8ce7088ceaa Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Fri, 24 May 2019 09:15:17 -0400 Subject: [PATCH 164/178] drm/amd/doc: Add XGMI sysfs documentation Acked-by: Slava Abramov Signed-off-by: Tom St Denis Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu.rst | 9 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 28 ++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst index a740e491dfcc..cacfcfad2356 100644 --- a/Documentation/gpu/amdgpu.rst +++ b/Documentation/gpu/amdgpu.rst @@ -70,6 +70,15 @@ Interrupt Handling .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c :internal: +AMDGPU XGMI Support +=================== + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c + :doc: AMDGPU XGMI Support + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c + :internal: + GPU Power/Thermal Controls and Monitoring ========================================= diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index e48e9394f1e4..d11eba09eadd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -40,6 +40,34 @@ void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive) return &hive->device_list; } +/** + * DOC: AMDGPU XGMI Support + * + * XGMI is a high speed interconnect that joins multiple GPU cards + * into a homogeneous memory space that is organized by a collective + * hive ID and individual node IDs, both of which are 64-bit numbers. + * + * The file xgmi_device_id contains the unique per GPU device ID and + * is stored in the /sys/class/drm/card${cardno}/device/ directory. + * + * Inside the device directory a sub-directory 'xgmi_hive_info' is + * created which contains the hive ID and the list of nodes. + * + * The hive ID is stored in: + * /sys/class/drm/card${cardno}/device/xgmi_hive_info/xgmi_hive_id + * + * The node information is stored in numbered directories: + * /sys/class/drm/card${cardno}/device/xgmi_hive_info/node${nodeno}/xgmi_device_id + * + * Each device has their own xgmi_hive_info direction with a mirror + * set of node sub-directories. + * + * The XGMI memory space is built by contiguously adding the power of + * two padded VRAM space from each node to each other. + * + */ + + static ssize_t amdgpu_xgmi_show_hive_id(struct device *dev, struct device_attribute *attr, char *buf) { From 74abc2210e105f0fffe59c35d2329201f1b4310e Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Fri, 24 May 2019 09:21:54 -0400 Subject: [PATCH 165/178] drm/amd/doc: Add RAS documentation to guide Acked-by: Slava Abramov Signed-off-by: Tom St Denis Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu.rst | 11 +++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst index cacfcfad2356..86138798128f 100644 --- a/Documentation/gpu/amdgpu.rst +++ b/Documentation/gpu/amdgpu.rst @@ -79,6 +79,17 @@ AMDGPU XGMI Support .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c :internal: +AMDGPU RAS debugfs control interface +==================================== + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c + :doc: AMDGPU RAS debugfs control interface + + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c + :internal: + + GPU Power/Thermal Controls and Monitoring ========================================= diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index d5719b0fb82c..7c8a4aedf07c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -244,8 +244,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, return 0; } -/* - * DOC: ras debugfs control interface +/** + * DOC: AMDGPU RAS debugfs control interface * * It accepts struct ras_debug_if who has two members. * From c8bdf2b63e5b6b31b3b4826b8e87c0c2f6b650ff Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Mon, 27 May 2019 11:12:51 +0800 Subject: [PATCH 166/178] drm/amdgpu: fix unload driver fail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dc_destroy should be called amdgpu_cgs_destroy_device, as it will use cgs context to read or write registers. Signed-off-by: Emily Deng Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index c3e78039d1e6..53b76e0de940 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -616,6 +616,10 @@ error: static void amdgpu_dm_fini(struct amdgpu_device *adev) { amdgpu_dm_destroy_drm_device(&adev->dm); + + /* DC Destroy TODO: Replace destroy DAL */ + if (adev->dm.dc) + dc_destroy(&adev->dm.dc); /* * TODO: pageflip, vlank interrupt * @@ -630,9 +634,6 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) mod_freesync_destroy(adev->dm.freesync_module); adev->dm.freesync_module = NULL; } - /* DC Destroy TODO: Replace destroy DAL */ - if (adev->dm.dc) - dc_destroy(&adev->dm.dc); mutex_destroy(&adev->dm.dc_lock); From d33ea570bd20f7ac7abea4f56f1df299e179356b Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Thu, 23 May 2019 17:12:41 -0500 Subject: [PATCH 167/178] drm/amdkfd: Use kfd fd to mmap mmio TTM doesn't support CPU mapping of sg type bo (under which mmio bo is created). Switch mmaping of mmio page to kfd device file. Signed-off-by: Oak Zeng Acked-by: Christian Konig Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 45 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + 2 files changed, 46 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 38ae53fe8182..c92e931ceb27 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1309,6 +1309,14 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); args->mmap_offset = offset; + /* MMIO is mapped through kfd device + * Generate a kfd mmap offset + */ + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { + args->mmap_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(args->gpu_id); + args->mmap_offset <<= PAGE_SHIFT; + } + return 0; err_free: @@ -1853,6 +1861,39 @@ err_i1: return retcode; } +static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process, + struct vm_area_struct *vma) +{ + phys_addr_t address; + int ret; + + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); + + vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | + VM_DONTDUMP | VM_PFNMAP; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + pr_debug("Process %d mapping mmio page\n" + " target user address == 0x%08llX\n" + " physical address == 0x%08llX\n" + " vm_flags == 0x%04lX\n" + " size == 0x%04lX\n", + process->pasid, (unsigned long long) vma->vm_start, + address, vma->vm_flags, PAGE_SIZE); + + ret = io_remap_pfn_range(vma, + vma->vm_start, + address >> PAGE_SHIFT, + PAGE_SIZE, + vma->vm_page_prot); + return ret; +} + + static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) { struct kfd_process *process; @@ -1883,6 +1924,10 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) if (!dev) return -ENODEV; return kfd_reserved_mem_mmap(dev, process, vma); + case KFD_MMAP_TYPE_MMIO: + if (!dev) + return -ENODEV; + return kfd_mmio_mmap(dev, process, vma); } return -EFAULT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 8f02d7817162..b44ea00ded9d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -59,6 +59,7 @@ #define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT) #define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT) #define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT) +#define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT) #define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT) #define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \ From 29e764621bbcd4a93745cf1b9fd5a2f5f45c3f72 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Fri, 3 May 2019 09:10:38 -0500 Subject: [PATCH 168/178] drm/amdkfd: Add gws number to kfd topology node properties Add amdgpu_amdkfd interface to get num_gws and add num_gws to /sys/class/kfd/kfd/topology/nodes/x/properties. Only report num_gws if MEC FW support GWS barriers. Currently it is determined by a module parameter which will be replaced with MEC FW version check when firmware is ready. Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 7 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 ++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 +++++ drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 5 +++++ drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 1 + 6 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 98326e3b5619..a4780d5532be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -544,6 +544,13 @@ uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd) return adev->rmmio_remap.bus_addr; } +uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + return adev->gds.gws_size; +} + int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index f57f29763769..57006432a36e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -169,6 +169,7 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd); +uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd); uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src); #define read_user_wptr(mmptr, wptr, dst) \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8fd8807272a7..78706dfa753a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -666,6 +666,16 @@ MODULE_PARM_DESC(noretry, int halt_if_hws_hang; module_param(halt_if_hws_hang, int, 0644); MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)"); + +/** + * DOC: hws_gws_support(bool) + * Whether HWS support gws barriers. Default value: false (not supported) + * This will be replaced with a MEC firmware version check once firmware + * is ready + */ +bool hws_gws_support; +module_param(hws_gws_support, bool, 0444); +MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supported (Default), true = supported)"); #endif /** diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index b44ea00ded9d..b6a60fc3094b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -161,6 +161,11 @@ extern int noretry; */ extern int halt_if_hws_hang; +/* + * Whether MEC FW support GWS barriers + */ +extern bool hws_gws_support; + enum cache_policy { cache_policy_coherent, cache_policy_noncoherent diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 592cc6acffd9..d241a8672599 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -454,6 +454,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.lds_size_in_kb); sysfs_show_32bit_prop(buffer, "gds_size_in_kb", dev->node_props.gds_size_in_kb); + sysfs_show_32bit_prop(buffer, "num_gws", + dev->node_props.num_gws); sysfs_show_32bit_prop(buffer, "wave_front_size", dev->node_props.wave_front_size); sysfs_show_32bit_prop(buffer, "array_count", @@ -1289,6 +1291,9 @@ int kfd_topology_add_device(struct kfd_dev *gpu) dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines; dev->node_props.num_sdma_xgmi_engines = gpu->device_info->num_xgmi_sdma_engines; + dev->node_props.num_gws = (hws_gws_support && + dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? + amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0; kfd_fill_mem_clk_max_info(dev); kfd_fill_iolink_non_crat_info(dev); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 949e885dfb53..276354aa0fcc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -65,6 +65,7 @@ struct kfd_node_properties { uint32_t max_waves_per_simd; uint32_t lds_size_in_kb; uint32_t gds_size_in_kb; + uint32_t num_gws; uint32_t wave_front_size; uint32_t array_count; uint32_t simd_arrays_per_engine; From ca66fb8fbb9b9690591b4e85707a4f31cb042adf Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Mon, 6 May 2019 09:48:37 -0500 Subject: [PATCH 169/178] drm/amdgpu: Add interface to alloc gws from amdgpu Add amdgpu_amdkfd interface to alloc and free gws from amdgpu Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 34 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 ++ 2 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index a4780d5532be..4af3989e4a75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -339,6 +339,40 @@ void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) amdgpu_bo_unref(&(bo)); } +int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, + void **mem_obj) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct amdgpu_bo *bo = NULL; + struct amdgpu_bo_param bp; + int r; + + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = 1; + bp.domain = AMDGPU_GEM_DOMAIN_GWS; + bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + bp.type = ttm_bo_type_device; + bp.resv = NULL; + + r = amdgpu_bo_create(adev, &bp, &bo); + if (r) { + dev_err(adev->dev, + "failed to allocate gws BO for amdkfd (%d)\n", r); + return r; + } + + *mem_obj = bo; + return 0; +} + +void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj) +{ + struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj; + + amdgpu_bo_unref(&bo); +} + uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 57006432a36e..c00c9749406e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -153,6 +153,8 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr, bool mqd_gfx9); void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); +int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj); +void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj); uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, From e09d4fc8db949715848b2d4567ad47e6b8cf0a1b Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Mon, 6 May 2019 13:52:12 -0500 Subject: [PATCH 170/178] drm/amdkfd: Allocate gws on device initialization On device initialization, KFD allocates all (64) gws which is shared by all KFD processes. Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 14 +++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 +++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index a53dda9071b1..b08dc264d4db 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -552,6 +552,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, } else kfd->max_proc_per_quantum = hws_max_conc_proc; + /* Allocate global GWS that is shared by all KFD processes */ + if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd, + amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) { + dev_err(kfd_device, "Could not allocate %d gws\n", + amdgpu_amdkfd_get_num_gws(kfd->kgd)); + goto out; + } /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * kfd->device_info->mqd_size_aligned; @@ -575,7 +582,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr, false)) { dev_err(kfd_device, "Could not allocate %d bytes\n", size); - goto out; + goto alloc_gtt_mem_failure; } dev_info(kfd_device, "Allocated %d bytes on gart\n", size); @@ -645,6 +652,9 @@ kfd_doorbell_error: kfd_gtt_sa_fini(kfd); kfd_gtt_sa_init_error: amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); +alloc_gtt_mem_failure: + if (hws_gws_support) + amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); dev_err(kfd_device, "device %x:%x NOT added due to errors\n", kfd->pdev->vendor, kfd->pdev->device); @@ -662,6 +672,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) kfd_doorbell_fini(kfd); kfd_gtt_sa_fini(kfd); amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); + if (hws_gws_support) + amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); } kfree(kfd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index b6a60fc3094b..57e87de34ab1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -289,6 +289,9 @@ struct kfd_dev { /* Compute Profile ref. count */ atomic_t compute_profile; + + /* Global GWS resource shared b/t processes*/ + void *gws; }; enum kfd_mempool { From 71efab6a30ab392af6a3fe1ce06bcf25c82ff3cb Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Wed, 8 May 2019 16:14:45 -0500 Subject: [PATCH 171/178] drm/amdgpu: Add function to add/remove gws to kfd process GWS bo is shared between all kfd processes. Add function to add gws to kfd process's bo list so gws can be evicted from and restored for process. Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 103 +++++++++++++++++- 2 files changed, 100 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index c00c9749406e..f968bf147c5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -155,6 +155,8 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj); void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj); +int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem); +int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem); uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index e1cae4a37113..87177ed37dd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -457,6 +457,17 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, mutex_unlock(&process_info->lock); } +static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem, + struct amdkfd_process_info *process_info) +{ + struct ttm_validate_buffer *bo_list_entry; + + bo_list_entry = &mem->validate_list; + mutex_lock(&process_info->lock); + list_del(&bo_list_entry->head); + mutex_unlock(&process_info->lock); +} + /* Initializes user pages. It registers the MMU notifier and validates * the userptr BO in the GTT domain. * @@ -1183,12 +1194,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (user_addr) { ret = init_user_pages(*mem, current->mm, user_addr); - if (ret) { - mutex_lock(&avm->process_info->lock); - list_del(&(*mem)->validate_list.head); - mutex_unlock(&avm->process_info->lock); + if (ret) goto allocate_init_user_pages_failed; - } } if (offset) @@ -1197,6 +1204,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( return 0; allocate_init_user_pages_failed: + remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); amdgpu_bo_unref(&bo); /* Don't unreserve system mem limit twice */ goto err_reserve_limit; @@ -2104,3 +2112,88 @@ ttm_reserve_fail: kfree(pd_bo_list); return ret; } + +int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem) +{ + struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info; + struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws; + int ret; + + if (!info || !gws) + return -EINVAL; + + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); + if (!*mem) + return -EINVAL; + + mutex_init(&(*mem)->lock); + (*mem)->bo = amdgpu_bo_ref(gws_bo); + (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS; + (*mem)->process_info = process_info; + add_kgd_mem_to_kfd_bo_list(*mem, process_info, false); + amdgpu_sync_create(&(*mem)->sync); + + + /* Validate gws bo the first time it is added to process */ + mutex_lock(&(*mem)->process_info->lock); + ret = amdgpu_bo_reserve(gws_bo, false); + if (unlikely(ret)) { + pr_err("Reserve gws bo failed %d\n", ret); + goto bo_reservation_failure; + } + + ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true); + if (ret) { + pr_err("GWS BO validate failed %d\n", ret); + goto bo_validation_failure; + } + /* GWS resource is shared b/t amdgpu and amdkfd + * Add process eviction fence to bo so they can + * evict each other. + */ + amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true); + amdgpu_bo_unreserve(gws_bo); + mutex_unlock(&(*mem)->process_info->lock); + + return ret; + +bo_validation_failure: + amdgpu_bo_unreserve(gws_bo); +bo_reservation_failure: + mutex_unlock(&(*mem)->process_info->lock); + amdgpu_sync_free(&(*mem)->sync); + remove_kgd_mem_from_kfd_bo_list(*mem, process_info); + amdgpu_bo_unref(&gws_bo); + mutex_destroy(&(*mem)->lock); + kfree(*mem); + *mem = NULL; + return ret; +} + +int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem) +{ + int ret; + struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info; + struct kgd_mem *kgd_mem = (struct kgd_mem *)mem; + struct amdgpu_bo *gws_bo = kgd_mem->bo; + + /* Remove BO from process's validate list so restore worker won't touch + * it anymore + */ + remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info); + + ret = amdgpu_bo_reserve(gws_bo, false); + if (unlikely(ret)) { + pr_err("Reserve gws bo failed %d\n", ret); + //TODO add BO back to validate_list? + return ret; + } + amdgpu_amdkfd_remove_eviction_fence(gws_bo, + process_info->eviction_fence); + amdgpu_bo_unreserve(gws_bo); + amdgpu_sync_free(&kgd_mem->sync); + amdgpu_bo_unref(&gws_bo); + mutex_destroy(&kgd_mem->lock); + kfree(mem); + return 0; +} From eb82da1dc4429c026a165f419f8c76d7bbdcf26f Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Mon, 6 May 2019 21:31:52 -0500 Subject: [PATCH 172/178] drm/amdkfd: Add function to set queue gws Add functions in process queue manager to set/unset queue gws. Also set process's number of gws used. Currently only one queue in process can use and use all gws. Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 ++ .../amd/amdkfd/kfd_process_queue_manager.c | 57 +++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 57e87de34ab1..b61dc53f42d2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -455,6 +455,9 @@ struct queue_properties { * * @device: The kfd device that created this queue. * + * @gws: Pointing to gws kgd_mem if this is a gws control queue; NULL + * otherwise. + * * This structure represents user mode compute queues. * It contains all the necessary data to handle such queues. * @@ -476,6 +479,7 @@ struct queue { struct kfd_process *process; struct kfd_dev *device; + void *gws; }; /* @@ -869,6 +873,8 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, struct queue_properties *p); int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid, struct queue_properties *p); +int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, + void *gws); struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, unsigned int qid); int pqm_get_wave_state(struct process_queue_manager *pqm, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index e652e25ede75..c2c570e6e54f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -26,6 +26,7 @@ #include "kfd_device_queue_manager.h" #include "kfd_priv.h" #include "kfd_kernel_queue.h" +#include "amdgpu_amdkfd.h" static inline struct process_queue_node *get_queue_by_qid( struct process_queue_manager *pqm, unsigned int qid) @@ -74,6 +75,55 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) pdd->already_dequeued = true; } +int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, + void *gws) +{ + struct kfd_dev *dev = NULL; + struct process_queue_node *pqn; + struct kfd_process_device *pdd; + struct kgd_mem *mem = NULL; + int ret; + + pqn = get_queue_by_qid(pqm, qid); + if (!pqn) { + pr_err("Queue id does not match any known queue\n"); + return -EINVAL; + } + + if (pqn->q) + dev = pqn->q->device; + if (WARN_ON(!dev)) + return -ENODEV; + + pdd = kfd_get_process_device_data(dev, pqm->process); + if (!pdd) { + pr_err("Process device data doesn't exist\n"); + return -EINVAL; + } + + /* Only allow one queue per process can have GWS assigned */ + if (gws && pdd->qpd.num_gws) + return -EINVAL; + + if (!gws && pdd->qpd.num_gws == 0) + return -EINVAL; + + if (gws) + ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, + gws, &mem); + else + ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info, + pqn->q->gws); + if (unlikely(ret)) + return ret; + + pqn->q->gws = mem; + pdd->qpd.num_gws = gws ? amdgpu_amdkfd_get_num_gws(dev->kgd) : 0; + + return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, + pqn->q); +} + void kfd_process_dequeue_from_all_devices(struct kfd_process *p) { struct kfd_process_device *pdd; @@ -330,6 +380,13 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (retval != -ETIME) goto err_destroy_queue; } + + if (pqn->q->gws) { + amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, + pqn->q->gws); + pdd->qpd.num_gws = 0; + } + kfree(pqn->q->properties.cu_mask); pqn->q->properties.cu_mask = NULL; uninit_queue(pqn->q); From 1a058c3376765ee31d65e28cbbb9d4ff15120056 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Mon, 6 May 2019 22:11:14 -0500 Subject: [PATCH 173/178] drm/amdkfd: New IOCTL to allocate queue GWS Add a new kfd ioctl to allocate queue GWS. Queue GWS is released on queue destroy. Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 27 ++++++++++++++++++++++++ include/uapi/linux/kfd_ioctl.h | 20 +++++++++++++++++- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index c92e931ceb27..aab2aa6c1dee 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1567,6 +1567,31 @@ copy_from_user_failed: return err; } +static int kfd_ioctl_alloc_queue_gws(struct file *filep, + struct kfd_process *p, void *data) +{ + int retval; + struct kfd_ioctl_alloc_queue_gws_args *args = data; + struct kfd_dev *dev = NULL; + + if (!hws_gws_support || + dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) + return -EINVAL; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) { + pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); + return -EINVAL; + } + + mutex_lock(&p->mutex); + retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL); + mutex_unlock(&p->mutex); + + args->first_gws = 0; + return retval; +} + static int kfd_ioctl_get_dmabuf_info(struct file *filep, struct kfd_process *p, void *data) { @@ -1769,6 +1794,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF, kfd_ioctl_import_dmabuf, 0), + AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS, + kfd_ioctl_alloc_queue_gws, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 20917c59f39c..070d1bc7e725 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -410,6 +410,21 @@ struct kfd_ioctl_unmap_memory_from_gpu_args { __u32 n_success; /* to/from KFD */ }; +/* Allocate GWS for specific queue + * + * @gpu_id: device identifier + * @queue_id: queue's id that GWS is allocated for + * @num_gws: how many GWS to allocate + * @first_gws: index of the first GWS allocated. + * only support contiguous GWS allocation + */ +struct kfd_ioctl_alloc_queue_gws_args { + __u32 gpu_id; /* to KFD */ + __u32 queue_id; /* to KFD */ + __u32 num_gws; /* to KFD */ + __u32 first_gws; /* from KFD */ +}; + struct kfd_ioctl_get_dmabuf_info_args { __u64 size; /* from KFD */ __u64 metadata_ptr; /* to KFD */ @@ -529,7 +544,10 @@ enum kfd_mmio_remap { #define AMDKFD_IOC_IMPORT_DMABUF \ AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args) +#define AMDKFD_IOC_ALLOC_QUEUE_GWS \ + AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x1E +#define AMDKFD_COMMAND_END 0x1F #endif From 023509737140b9518c36fb77887e2e22be22a150 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Fri, 10 May 2019 09:44:17 -0500 Subject: [PATCH 174/178] drm/amdkfd: PM4 packets change to support GWS Add a field in map_queues packet to indicate whether this is a gws control queue. Only one queue per process can be gws control queue. Change num_gws field in map_process packet to 7 bits Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c index 3dd731c69b5d..07f02f8e4fe4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c @@ -159,6 +159,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, packet->bitfields2.engine_sel = engine_sel__mes_map_queues__compute_vi; + packet->bitfields2.gws_control_queue = q->gws ? 1 : 0; packet->bitfields2.queue_type = queue_type__mes_map_queues__normal_compute_vi; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h index 0661339071f0..49ab66b703fa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h @@ -176,8 +176,7 @@ struct pm4_mes_map_process { union { struct { - uint32_t num_gws:6; - uint32_t reserved7:1; + uint32_t num_gws:7; uint32_t sdma_enable:1; uint32_t num_oac:4; uint32_t reserved8:4; @@ -272,7 +271,9 @@ struct pm4_mes_map_queues { struct { uint32_t reserved1:4; enum mes_map_queues_queue_sel_enum queue_sel:2; - uint32_t reserved2:15; + uint32_t reserved5:6; + uint32_t gws_control_queue:1; + uint32_t reserved2:8; enum mes_map_queues_queue_type_enum queue_type:3; uint32_t reserved3:2; enum mes_map_queues_engine_sel_enum engine_sel:3; From 8d8a5a64a8904ea32bbf7292b89c11156d64f9a1 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Tue, 28 May 2019 10:46:04 +0800 Subject: [PATCH 175/178] drm/amdgpu: add DRIVER_SYNCOBJ_TIMELINE to amdgpu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Christian König Signed-off-by: Chunming Zhou Reviewed-by: Flora Cui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 78706dfa753a..1f38d6fc1fe3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1307,7 +1307,8 @@ static struct drm_driver kms_driver = { .driver_features = DRIVER_USE_AGP | DRIVER_ATOMIC | DRIVER_GEM | - DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, + DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ | + DRIVER_SYNCOBJ_TIMELINE, .load = amdgpu_driver_load_kms, .open = amdgpu_driver_open_kms, .postclose = amdgpu_driver_postclose_kms, From d55f33da541324c7f41156dd6d045b8b450de230 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 17 May 2019 09:21:13 -0500 Subject: [PATCH 176/178] drm/amdgpu/soc15: skip reset on init Not necessary on soc15 and breaks driver reload on server cards. Acked-by: Amber Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 32dc5a128249..78bd4fc07bab 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -761,6 +761,11 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev) { u32 sol_reg; + /* Just return false for soc15 GPUs. Reset does not seem to + * be necessary. + */ + return false; + if (adev->flags & AMD_IS_APU) return false; From 394e9a14c63d58e0f45323629a3f9ce1e5bf0215 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Tue, 28 May 2019 10:17:04 +0800 Subject: [PATCH 177/178] drm/amdgpu: Need to set the baco cap before baco reset For passthrough, after rebooted the VM, driver will do a baco reset before doing other driver initialization during loading driver. For doing the baco reset, it will first check the baco reset capability. So first need to set the cap from the vbios information or baco reset won't be enabled. Signed-off-by: Emily Deng Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 24 +++++++++---------- drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ++- .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 4 ++++ .../powerplay/hwmgr/vega10_processpptables.c | 24 +++++++++++++++++++ .../powerplay/hwmgr/vega10_processpptables.h | 1 + 5 files changed, 42 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a7ff8d6dbc6c..fe2708295867 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1541,6 +1541,17 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + /* Read BIOS */ + if (!amdgpu_get_bios(adev)) + return -EINVAL; + + r = amdgpu_atombios_init(adev); + if (r) { + dev_err(adev->dev, "amdgpu_atombios_init failed\n"); + amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); + return r; + } + for (i = 0; i < adev->num_ip_blocks; i++) { if ((amdgpu_ip_block_mask & (1 << i)) == 0) { DRM_ERROR("disabled ip block: %d <%s>\n", @@ -2591,19 +2602,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, goto fence_driver_init; } - /* Read BIOS */ - if (!amdgpu_get_bios(adev)) { - r = -EINVAL; - goto failed; - } - - r = amdgpu_atombios_init(adev); - if (r) { - dev_err(adev->dev, "amdgpu_atombios_init failed\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); - goto failed; - } - /* detect if we are with an SRIOV vbios */ amdgpu_device_detect_sriov_bios(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 78bd4fc07bab..d9fdd95fd6e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -764,7 +764,8 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev) /* Just return false for soc15 GPUs. Reset does not seem to * be necessary. */ - return false; + if (!amdgpu_passthrough(adev)) + return false; if (adev->flags & AMD_IS_APU) return false; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index ce6aeb5a0362..1d9bb29adaef 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -5311,8 +5311,12 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = { int vega10_hwmgr_init(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; + hwmgr->hwmgr_func = &vega10_hwmgr_funcs; hwmgr->pptable_func = &vega10_pptable_funcs; + if (amdgpu_passthrough(adev)) + return vega10_baco_set_cap(hwmgr); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c index b6767d74dc85..83d22cdeaa29 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c @@ -1371,3 +1371,27 @@ int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr, return result; } + +int vega10_baco_set_cap(struct pp_hwmgr *hwmgr) +{ + int result = 0; + + const ATOM_Vega10_POWERPLAYTABLE *powerplay_table; + + powerplay_table = get_powerplay_table(hwmgr); + + PP_ASSERT_WITH_CODE((powerplay_table != NULL), + "Missing PowerPlay Table!", return -1); + + result = check_powerplay_tables(hwmgr, powerplay_table); + + PP_ASSERT_WITH_CODE((result == 0), + "check_powerplay_tables failed", return result); + + set_hw_cap( + hwmgr, + 0 != (le32_to_cpu(powerplay_table->ulPlatformCaps) & ATOM_VEGA10_PP_PLATFORM_CAP_BACO), + PHM_PlatformCaps_BACO); + return result; +} + diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h index d83ed2af7aa3..da5fbec9b0cd 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h @@ -59,4 +59,5 @@ extern int vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr *hwmgr); extern int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr, uint32_t entry_index, struct pp_power_state *power_state, int (*call_back_func)(struct pp_hwmgr *, void *, struct pp_power_state *, void *, uint32_t)); +extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr); #endif From cf401e2856b27b2deeada498eab864e2a50cf219 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 29 May 2019 16:07:34 +0100 Subject: [PATCH 178/178] drm/amdkfd: fix null pointer dereference on dev The pointer dev is set to null yet it is being dereferenced when checking dev->dqm->sched_policy. Fix this by performing the check on dev->dqm->sched_policy after dev has been assigned and null checked. Also remove the redundant null assignment to dev. Addresses-Coverity: ("Explicit null dereference") Fixes: 1a058c337676 ("drm/amdkfd: New IOCTL to allocate queue GWS") Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index aab2aa6c1dee..ea82828fdc76 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1572,10 +1572,9 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep, { int retval; struct kfd_ioctl_alloc_queue_gws_args *args = data; - struct kfd_dev *dev = NULL; + struct kfd_dev *dev; - if (!hws_gws_support || - dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) + if (!hws_gws_support) return -EINVAL; dev = kfd_device_by_id(args->gpu_id); @@ -1583,6 +1582,8 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep, pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); return -EINVAL; } + if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) + return -EINVAL; mutex_lock(&p->mutex); retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);