Refactor CPU usage test (#1802)

* pull CPU usage test out of manager

* remove that

* add to release files

* cleanup

* executable

* this should work

* check if car started procs are running

* debug

* add min cpu usage

* remove debug prints

* adjust min cpu threshold

Co-authored-by: Comma Device <device@comma.ai>
albatross
Adeeb Shihadeh 2020-07-03 15:40:13 -07:00 committed by GitHub
parent 9de015f588
commit 6db7fa8c03
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 98 additions and 77 deletions

View File

@ -1,50 +1 @@
def cputime_total(ct):
return ct.cpuUser + ct.cpuSystem + ct.cpuChildrenUser + ct.cpuChildrenSystem
def print_cpu_usage(first_proc, last_proc):
r = 0
procs = [
("selfdrive.controls.controlsd", 59.46),
("./_modeld", 12.74),
("./loggerd", 28.49),
("selfdrive.controls.plannerd", 19.77),
("selfdrive.controls.radard", 9.54),
("./_ui", 9.54),
("./camerad", 7.07),
("selfdrive.locationd.locationd", 34.38),
("selfdrive.locationd.paramsd", 11.53),
("./_sensord", 6.17),
("selfdrive.monitoring.dmonitoringd", 5.48),
("./boardd", 3.63),
("./_dmonitoringmodeld", 2.67),
("selfdrive.logmessaged", 2.71),
("selfdrive.thermald.thermald", 2.41),
("selfdrive.locationd.calibrationd", 6.81),
("./proclogd", 1.54),
("./_gpsd", 0.09),
("./clocksd", 0.02),
("./ubloxd", 0.02),
("selfdrive.tombstoned", 0),
("./logcatd", 0),
]
dt = (last_proc.logMonoTime - first_proc.logMonoTime) / 1e9
print("------------------------------------------------")
for proc_name, normal_cpu_usage in procs:
try:
first = [p for p in first_proc.procLog.procs if proc_name in p.cmdline][0]
last = [p for p in last_proc.procLog.procs if proc_name in p.cmdline][0]
cpu_time = cputime_total(last) - cputime_total(first)
cpu_usage = cpu_time / dt * 100.
if cpu_usage > max(normal_cpu_usage * 1.1, normal_cpu_usage + 5.0):
print(f"Warning {proc_name} using more CPU than normal")
r = 1
print(f"{proc_name.ljust(35)} {cpu_usage:.2f}%")
except IndexError:
print(f"{proc_name.ljust(35)} NO METRICS FOUND")
r = 1
print("------------------------------------------------")
return r

View File

@ -80,7 +80,7 @@ echo -n "1" > /data/params/d/HasCompletedSetup
echo -n "1" > /data/params/d/CommunityFeaturesToggle
PYTHONPATH="$TARGET_DIR:$TARGET_DIR/pyextra" nosetests -s selfdrive/test/test_openpilot.py
PYTHONPATH="$TARGET_DIR:$TARGET_DIR/pyextra" GET_CPU_USAGE=1 selfdrive/manager.py
PYTHONPATH="$TARGET_DIR:$TARGET_DIR/pyextra" selfdrive/test/test_cpu_usage.py
PYTHONPATH="$TARGET_DIR:$TARGET_DIR/pyextra" selfdrive/car/tests/test_car_interfaces.py
echo "[-] testing panda build T=$SECONDS"

View File

@ -320,6 +320,7 @@ selfdrive/test/__init__.py
selfdrive/test/test_openpilot.py
selfdrive/test/test_fingerprints.py
selfdrive/test/test_car_models.py
selfdrive/test/test_cpu_usage.py
selfdrive/ui/SConscript
selfdrive/ui/*.cc

View File

@ -161,7 +161,6 @@ from selfdrive.loggerd.config import ROOT
from selfdrive.launcher import launcher
from common import android
from common.apk import update_apks, pm_apply_packages, start_offroad
from common.manager_helpers import print_cpu_usage
ThermalStatus = cereal.log.ThermalData.ThermalStatus
@ -428,9 +427,6 @@ def manager_thread():
# now loop
thermal_sock = messaging.sub_sock('thermal')
if os.getenv("GET_CPU_USAGE"):
proc_sock = messaging.sub_sock('procLog', conflate=True)
cloudlog.info("manager start")
cloudlog.info({"environ": os.environ})
@ -461,9 +457,6 @@ def manager_thread():
logger_dead = False
start_t = time.time()
first_proc = None
while 1:
msg = messaging.recv_sock(thermal_sock, wait=True)
@ -504,26 +497,6 @@ def manager_thread():
if params.get("DoUninstall", encoding='utf8') == "1":
break
if os.getenv("GET_CPU_USAGE"):
dt = time.time() - start_t
# Get first sample
if dt > 30 and first_proc is None:
first_proc = messaging.recv_sock(proc_sock)
# Get last sample and exit
if dt > 90:
last_proc = messaging.recv_sock(proc_sock, wait=True)
all_running = all(running[p].is_alive() for p in car_started_processes)
cleanup_all_processes(None, None)
return_code = print_cpu_usage(first_proc, last_proc)
if not all_running:
return_code = 1
sys.exit(return_code)
def manager_prepare(spinner=None):
# build all processes
os.chdir(os.path.dirname(os.path.abspath(__file__)))

View File

@ -0,0 +1,96 @@
#!/usr/bin/env python3
import time
import threading
import _thread
import signal
import sys
import cereal.messaging as messaging
import selfdrive.manager as manager
def cputime_total(ct):
return ct.cpuUser + ct.cpuSystem + ct.cpuChildrenUser + ct.cpuChildrenSystem
def print_cpu_usage(first_proc, last_proc):
procs = [
("selfdrive.controls.controlsd", 59.46),
("selfdrive.locationd.locationd", 34.38),
("./loggerd", 28.49),
("selfdrive.controls.plannerd", 19.77),
("./_modeld", 12.74),
("selfdrive.locationd.paramsd", 11.53),
("selfdrive.controls.radard", 9.54),
("./_ui", 9.54),
("./camerad", 7.07),
("selfdrive.locationd.calibrationd", 6.81),
("./_sensord", 6.17),
("selfdrive.monitoring.dmonitoringd", 5.48),
("./boardd", 3.63),
("./_dmonitoringmodeld", 2.67),
("selfdrive.logmessaged", 2.71),
("selfdrive.thermald.thermald", 2.41),
("./proclogd", 1.54),
("./_gpsd", 0.09),
("./clocksd", 0.02),
("./ubloxd", 0.02),
("selfdrive.tombstoned", 0),
("./logcatd", 0),
]
r = 0
dt = (last_proc.logMonoTime - first_proc.logMonoTime) / 1e9
result = "------------------------------------------------\n"
for proc_name, normal_cpu_usage in procs:
try:
first = [p for p in first_proc.procLog.procs if proc_name in p.cmdline][0]
last = [p for p in last_proc.procLog.procs if proc_name in p.cmdline][0]
cpu_time = cputime_total(last) - cputime_total(first)
cpu_usage = cpu_time / dt * 100.
if cpu_usage > max(normal_cpu_usage * 1.1, normal_cpu_usage + 5.0):
result += f"Warning {proc_name} using more CPU than normal\n"
r = 1
elif cpu_usage < min(normal_cpu_usage * 0.3, max(normal_cpu_usage - 1.0, 0.0)):
result += f"Warning {proc_name} using less CPU than normal\n"
r = 1
result += f"{proc_name.ljust(35)} {cpu_usage:.2f}%\n"
except IndexError:
result += f"{proc_name.ljust(35)} NO METRICS FOUND\n"
r = 1
result += "------------------------------------------------\n"
print(result)
return r
return_code = 1
def test_thread():
global return_code
proc_sock = messaging.sub_sock('procLog', conflate=True)
# wait until everything's started and get first sample
time.sleep(30)
first_proc = messaging.recv_sock(proc_sock, wait=True)
# run for a minute and get last sample
time.sleep(60)
last_proc = messaging.recv_sock(proc_sock, wait=True)
running = manager.get_running()
all_running = all(p in running and running[p].is_alive() for p in manager.car_started_processes)
return_code = print_cpu_usage(first_proc, last_proc)
if not all_running:
return_code = 1
_thread.interrupt_main()
if __name__ == "__main__":
# setup signal handler to exit with test status
def handle_exit(sig, frame):
sys.exit(return_code)
signal.signal(signal.SIGINT, handle_exit)
# start manager and test thread
t = threading.Thread(target=test_thread)
t.daemon = True
t.start()
manager.main()