Thermald thermal state cleanup (#21959)

* dumb controller 'tuned' on a desk

* reduce Ki

* fix prev_time bug

* reset integral on going onroad

* cleanup thermal detection logic

* more minor tuning

* use controls' PI controller

* get rid of bat_temp

* change if once more

* cleanup

* cleanup import order

* add back bat temp in this PR

* cleanup thermal status logic

* use offroad danger temp

* revert that

* use ordered dict for prev/next bands

Co-authored-by: Comma Device <device@comma.ai>
Co-authored-by: Willem Melching <willem.melching@gmail.com>
Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com>
pull/22124/head
Robbe Derks 2021-09-04 02:19:11 +02:00 committed by GitHub
parent 2b62b5a117
commit 86bceed57d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 33 additions and 30 deletions

View File

@ -4,6 +4,7 @@ import os
import time
from pathlib import Path
from typing import Dict, Optional, Tuple
from collections import namedtuple, OrderedDict
import psutil
from smbus2 import SMBus
@ -11,7 +12,7 @@ from smbus2 import SMBus
import cereal.messaging as messaging
from cereal import log
from common.filter_simple import FirstOrderFilter
from common.numpy_fast import clip, interp
from common.numpy_fast import interp
from common.params import Params, ParamKeyType
from common.realtime import DT_TRML, sec_since_boot
from common.dict_helpers import strip_deprecated_keys
@ -30,16 +31,27 @@ ThermalStatus = log.DeviceState.ThermalStatus
NetworkType = log.DeviceState.NetworkType
NetworkStrength = log.DeviceState.NetworkStrength
CURRENT_TAU = 15. # 15s time constant
CPU_TEMP_TAU = 5. # 5s time constant
TEMP_TAU = 5. # 5s time constant
DAYS_NO_CONNECTIVITY_MAX = 7 # do not allow to engage after a week without internet
DAYS_NO_CONNECTIVITY_PROMPT = 4 # send an offroad prompt after 4 days with no internet
DISCONNECT_TIMEOUT = 5. # wait 5 seconds before going offroad after disconnect so you get an alert
ThermalBand = namedtuple("ThermalBand", ['min_temp', 'max_temp'])
# List of thermal bands. We will stay within this region as long as we are within the bounds.
# When exiting the bounds, we'll jump to the lower or higher band. Bands are ordered in the dict.
THERMAL_BANDS = OrderedDict({
ThermalStatus.green: ThermalBand(None, 80.0),
ThermalStatus.yellow: ThermalBand(75.0, 96.0),
ThermalStatus.red: ThermalBand(80.0, 107.),
ThermalStatus.danger: ThermalBand(94.0, None),
})
# Override to highest thermal band when offroad and above this temp
OFFROAD_DANGER_TEMP = 70.0
prev_offroad_states: Dict[str, Tuple[bool, Optional[str]]] = {}
last_eon_fan_val = None
def read_tz(x):
if x is None:
return 0
@ -65,6 +77,7 @@ def setup_eon_fan():
os.system("echo 2 > /sys/module/dwc3_msm/parameters/otg_switch")
last_eon_fan_val = None
def set_eon_fan(val):
global last_eon_fan_val
@ -122,7 +135,7 @@ def handle_fan_tici(controller, max_cpu_temp, fan_speed, ignition):
controller.pos_limit = -(30 if ignition else 0)
fan_pwr_out = -int(controller.update(
setpoint=(75 if ignition else 68),
setpoint=(75 if ignition else (OFFROAD_DANGER_TEMP - 2)),
measurement=max_cpu_temp,
feedforward=interp(max_cpu_temp, [60.0, 100.0], [0, -80])
))
@ -167,7 +180,7 @@ def thermald_thread():
registered_count = 0
current_filter = FirstOrderFilter(0., CURRENT_TAU, DT_TRML)
cpu_temp_filter = FirstOrderFilter(0., CPU_TEMP_TAU, DT_TRML)
temp_filter = FirstOrderFilter(0., TEMP_TAU, DT_TRML)
pandaState_prev = None
should_start_prev = False
handle_fan = None
@ -294,36 +307,26 @@ def thermald_thread():
current_filter.update(msg.deviceState.batteryCurrent / 1e6)
# TODO: add car battery voltage check
max_cpu_temp = cpu_temp_filter.update(max(msg.deviceState.cpuTempC))
max_comp_temp = max(max_cpu_temp, msg.deviceState.memoryTempC, max(msg.deviceState.gpuTempC))
bat_temp = msg.deviceState.batteryTempC
max_comp_temp = temp_filter.update(
max(max(msg.deviceState.cpuTempC), msg.deviceState.memoryTempC, max(msg.deviceState.gpuTempC))
)
if handle_fan is not None:
fan_speed = handle_fan(controller, max_cpu_temp, fan_speed, startup_conditions["ignition"])
fan_speed = handle_fan(controller, max_comp_temp, fan_speed, startup_conditions["ignition"])
msg.deviceState.fanSpeedPercentDesired = fan_speed
# If device is offroad we want to cool down before going onroad
# since going onroad increases load and can make temps go over 107
# We only do this if there is a relay that prevents the car from faulting
is_offroad_for_5_min = (started_ts is None) and ((not started_seen) or (off_ts is None) or (sec_since_boot() - off_ts > 60 * 5))
if max_cpu_temp > 107. or bat_temp >= 63. or (is_offroad_for_5_min and max_cpu_temp > 70.0):
# onroad not allowed
if is_offroad_for_5_min and max_comp_temp > OFFROAD_DANGER_TEMP:
# If device is offroad we want to cool down before going onroad
# since going onroad increases load and can make temps go over 107
thermal_status = ThermalStatus.danger
elif max_comp_temp > 96.0 or bat_temp > 60.:
# hysteresis between onroad not allowed and engage not allowed
thermal_status = clip(thermal_status, ThermalStatus.red, ThermalStatus.danger)
elif max_cpu_temp > 94.0:
# hysteresis between engage not allowed and uploader not allowed
thermal_status = clip(thermal_status, ThermalStatus.yellow, ThermalStatus.red)
elif max_cpu_temp > 80.0:
# uploader not allowed
thermal_status = ThermalStatus.yellow
elif max_cpu_temp > 75.0:
# hysteresis between uploader not allowed and all good
thermal_status = clip(thermal_status, ThermalStatus.green, ThermalStatus.yellow)
else:
thermal_status = ThermalStatus.green # default to good condition
current_band = THERMAL_BANDS[thermal_status]
band_idx = list(THERMAL_BANDS.keys()).index(thermal_status)
if current_band.min_temp is not None and max_comp_temp < current_band.min_temp:
thermal_status = list(THERMAL_BANDS.keys())[band_idx - 1]
elif current_band.max_temp is not None and max_comp_temp > current_band.max_temp:
thermal_status = list(THERMAL_BANDS.keys())[band_idx + 1]
# **** starting logic ****