alert for per-core cpu usage (#21983)

* thermald: per-core cpu usage

* add alert

* better alert text

* list

* fix process replay
pull/21988/head
Adeeb Shihadeh 2021-08-19 19:14:51 -07:00 committed by GitHub
parent fc56ae70ee
commit fc8b571a84
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 14 additions and 4 deletions

2
cereal

@ -1 +1 @@
Subproject commit dbdab72167c83ee12968e44df646436bd0a4727e
Subproject commit 9c23923d4840c622d88c0408f72776db363dd22f

View File

@ -23,7 +23,7 @@ from selfdrive.controls.lib.events import Events, ET
from selfdrive.controls.lib.alertmanager import AlertManager
from selfdrive.controls.lib.vehicle_model import VehicleModel
from selfdrive.locationd.calibrationd import Calibration
from selfdrive.hardware import HARDWARE, TICI
from selfdrive.hardware import HARDWARE, TICI, EON
from selfdrive.manager.process_config import managed_processes
LDW_MIN_SPEED = 31 * CV.MPH_TO_MS
@ -201,6 +201,9 @@ class Controls:
# TODO: make tici threshold the same
if self.sm['deviceState'].memoryUsagePercent > (90 if TICI else 65) and not SIMULATION:
self.events.add(EventName.lowMemory)
cpus = list(self.sm['deviceState'].cpuUsagePercent)[:(-1 if EON else None)]
if max(cpus, default=0) > 95:
self.events.add(EventName.highCpuUsage)
# Alert if fan isn't spinning for 5 seconds
if self.sm['pandaState'].pandaType in [PandaType.uno, PandaType.dos]:

View File

@ -729,7 +729,14 @@ EVENTS: Dict[int, Dict[str, Union[Alert, Callable[[Any, messaging.SubMaster, boo
ET.SOFT_DISABLE: SoftDisableAlert("Low Memory: Reboot Your Device"),
ET.PERMANENT: NormalPermanentAlert("Low Memory", "Reboot your Device"),
ET.NO_ENTRY: NoEntryAlert("Low Memory: Reboot Your Device",
audible_alert=AudibleAlert.chimeDisengage),
audible_alert=AudibleAlert.chimeDisengage),
},
EventName.highCpuUsage: {
#ET.SOFT_DISABLE: SoftDisableAlert("System Malfunction: Reboot Your Device"),
#ET.PERMANENT: NormalPermanentAlert("System Malfunction", "Reboot your Device"),
ET.NO_ENTRY: NoEntryAlert("System Malfunction: Reboot Your Device",
audible_alert=AudibleAlert.chimeDisengage),
},
EventName.accFaulted: {

View File

@ -259,7 +259,7 @@ def thermald_thread():
msg.deviceState.freeSpacePercent = get_available_percent(default=100.0)
msg.deviceState.memoryUsagePercent = int(round(psutil.virtual_memory().percent))
msg.deviceState.cpuUsagePercent = int(round(psutil.cpu_percent()))
msg.deviceState.cpuUsagePercent = [int(round(n)) for n in psutil.cpu_percent(percpu=True)]
msg.deviceState.gpuUsagePercent = int(round(HARDWARE.get_gpu_usage_percent()))
msg.deviceState.networkType = network_type
msg.deviceState.networkStrength = network_strength