diff --git a/selfdrive/__init__.py b/selfdrive/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/selfdrive/config.py b/selfdrive/config.py new file mode 100644 index 000000000..6c2296273 --- /dev/null +++ b/selfdrive/config.py @@ -0,0 +1,30 @@ +import numpy as np + +class Conversions: + #Speed + MPH_TO_KPH = 1.609344 + KPH_TO_MPH = 1. / MPH_TO_KPH + MS_TO_KPH = 3.6 + KPH_TO_MS = 1. / MS_TO_KPH + MS_TO_MPH = MS_TO_KPH * KPH_TO_MPH + MPH_TO_MS = MPH_TO_KPH * KPH_TO_MS + MS_TO_KNOTS = 1.9438 + KNOTS_TO_MS = 1. / MS_TO_KNOTS + #Angle + DEG_TO_RAD = np.pi/180. + RAD_TO_DEG = 1. / DEG_TO_RAD + #Mass + LB_TO_KG = 0.453592 + + +RADAR_TO_CENTER = 2.7 # (deprecated) RADAR is ~ 2.7m ahead from center of car +RADAR_TO_CAMERA = 1.52 # RADAR is ~ 1.5m ahead from center of mesh frame + +class UIParams: + lidar_x, lidar_y, lidar_zoom = 384, 960, 6 + lidar_car_x, lidar_car_y = lidar_x/2., lidar_y/1.1 + car_hwidth = 1.7272/2 * lidar_zoom + car_front = 2.6924 * lidar_zoom + car_back = 1.8796 * lidar_zoom + car_color = 110 + diff --git a/selfdrive/crash.py b/selfdrive/crash.py new file mode 100644 index 000000000..6ceb56b62 --- /dev/null +++ b/selfdrive/crash.py @@ -0,0 +1,66 @@ +"""Install exception handler for process crash.""" +import os +import sys +import threading +import capnp +from selfdrive.version import version, dirty + +from selfdrive.swaglog import cloudlog + +if os.getenv("NOLOG") or os.getenv("NOCRASH"): + def capture_exception(*exc_info): + pass + def bind_user(**kwargs): + pass + def bind_extra(**kwargs): + pass + def install(): + pass +else: + from raven import Client + from raven.transport.http import HTTPTransport + client = Client('https://1994756b5e6f41cf939a4c65de45f4f2:cefebaf3a8aa40d182609785f7189bd7@app.getsentry.com/77924', + install_sys_hook=False, transport=HTTPTransport, release=version, tags={'dirty': dirty}) + + def capture_exception(*args, **kwargs): + exc_info = sys.exc_info() + if not exc_info[0] is capnp.lib.capnp.KjException: + client.captureException(*args, **kwargs) + cloudlog.error("crash", exc_info=kwargs.get('exc_info', 1)) + + def bind_user(**kwargs): + client.user_context(kwargs) + + def bind_extra(**kwargs): + client.extra_context(kwargs) + + def install(): + # installs a sys.excepthook + __excepthook__ = sys.excepthook + def handle_exception(*exc_info): + if exc_info[0] not in (KeyboardInterrupt, SystemExit): + capture_exception(exc_info=exc_info) + __excepthook__(*exc_info) + sys.excepthook = handle_exception + + """ + Workaround for `sys.excepthook` thread bug from: + http://bugs.python.org/issue1230540 + Call once from the main thread before creating any threads. + Source: https://stackoverflow.com/a/31622038 + """ + init_original = threading.Thread.__init__ + + def init(self, *args, **kwargs): + init_original(self, *args, **kwargs) + run_original = self.run + + def run_with_except_hook(*args2, **kwargs2): + try: + run_original(*args2, **kwargs2) + except Exception: + sys.excepthook(*sys.exc_info()) + + self.run = run_with_except_hook + + threading.Thread.__init__ = init diff --git a/selfdrive/launcher.py b/selfdrive/launcher.py new file mode 100644 index 000000000..8253347d6 --- /dev/null +++ b/selfdrive/launcher.py @@ -0,0 +1,27 @@ +import importlib +from setproctitle import setproctitle #pylint: disable=no-name-in-module + +import cereal.messaging as messaging +import selfdrive.crash as crash +from selfdrive.swaglog import cloudlog + +def launcher(proc): + try: + # import the process + mod = importlib.import_module(proc) + + # rename the process + setproctitle(proc) + + # create new context since we forked + messaging.context = messaging.Context() + + # exec the process + mod.main() + except KeyboardInterrupt: + cloudlog.warning("child %s got SIGINT" % proc) + except Exception: + # can't install the crash handler becuase sys.excepthook doesn't play nice + # with threads, so catch it here. + crash.capture_exception() + raise diff --git a/selfdrive/logmessaged.py b/selfdrive/logmessaged.py new file mode 100755 index 000000000..3362b5baa --- /dev/null +++ b/selfdrive/logmessaged.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +import zmq +from logentries import LogentriesHandler +import cereal.messaging as messaging + +def main(gctx=None): + # setup logentries. we forward log messages to it + le_token = "e8549616-0798-4d7e-a2ca-2513ae81fa17" + le_handler = LogentriesHandler(le_token, use_tls=False, verbose=False) + + le_level = 20 #logging.INFO + + ctx = zmq.Context().instance() + sock = ctx.socket(zmq.PULL) + sock.bind("ipc:///tmp/logmessage") + + # and we publish them + pub_sock = messaging.pub_sock('logMessage') + + while True: + dat = b''.join(sock.recv_multipart()) + dat = dat.decode('utf8') + + # print "RECV", repr(dat) + + levelnum = ord(dat[0]) + dat = dat[1:] + + if levelnum >= le_level: + # push to logentries + # TODO: push to athena instead + le_handler.emit_raw(dat) + + # then we publish them + msg = messaging.new_message() + msg.logMessage = dat + pub_sock.send(msg.to_bytes()) + +if __name__ == "__main__": + main() diff --git a/selfdrive/manager.py b/selfdrive/manager.py new file mode 100755 index 000000000..358eb3893 --- /dev/null +++ b/selfdrive/manager.py @@ -0,0 +1,530 @@ +#!/usr/bin/env python3.7 +import os +import time +import sys +import fcntl +import errno +import signal +import shutil +import subprocess +import datetime + +from common.basedir import BASEDIR +from common.android import ANDROID +sys.path.append(os.path.join(BASEDIR, "pyextra")) +os.environ['BASEDIR'] = BASEDIR + +TOTAL_SCONS_NODES = 1170 +prebuilt = os.path.exists(os.path.join(BASEDIR, 'prebuilt')) + +# Create folders needed for msgq +try: + os.mkdir("/dev/shm") +except FileExistsError: + pass + +if ANDROID: + os.chmod("/dev/shm", 0o777) + +def unblock_stdout(): + # get a non-blocking stdout + child_pid, child_pty = os.forkpty() + if child_pid != 0: # parent + + # child is in its own process group, manually pass kill signals + signal.signal(signal.SIGINT, lambda signum, frame: os.kill(child_pid, signal.SIGINT)) + signal.signal(signal.SIGTERM, lambda signum, frame: os.kill(child_pid, signal.SIGTERM)) + + fcntl.fcntl(sys.stdout, fcntl.F_SETFL, + fcntl.fcntl(sys.stdout, fcntl.F_GETFL) | os.O_NONBLOCK) + + while True: + try: + dat = os.read(child_pty, 4096) + except OSError as e: + if e.errno == errno.EIO: + break + continue + + if not dat: + break + + try: + sys.stdout.write(dat.decode('utf8')) + except (OSError, IOError, UnicodeDecodeError): + pass + + os._exit(os.wait()[1]) + + +if __name__ == "__main__": + unblock_stdout() + from common.spinner import Spinner +else: + from common.spinner import FakeSpinner as Spinner + +import importlib +import traceback +from multiprocessing import Process + +# Run scons +spinner = Spinner() +spinner.update("0") + +if not prebuilt: + for retry in [True, False]: + # run scons + env = os.environ.copy() + env['SCONS_PROGRESS'] = "1" + env['SCONS_CACHE'] = "1" + + nproc = os.cpu_count() + j_flag = "" if nproc is None else "-j%d" % (nproc - 1) + scons = subprocess.Popen(["scons", j_flag], cwd=BASEDIR, env=env, stderr=subprocess.PIPE) + + # Read progress from stderr and update spinner + while scons.poll() is None: + try: + line = scons.stderr.readline() + if line is None: + continue + + line = line.rstrip() + prefix = b'progress: ' + if line.startswith(prefix): + i = int(line[len(prefix):]) + if spinner is not None: + spinner.update("%d" % (50.0 * (i / TOTAL_SCONS_NODES))) + elif len(line): + print(line.decode('utf8')) + except Exception: + pass + + if scons.returncode != 0: + if retry: + print("scons build failed, cleaning in") + for i in range(3,-1,-1): + print("....%d" % i) + time.sleep(1) + subprocess.check_call(["scons", "-c"], cwd=BASEDIR, env=env) + shutil.rmtree("/tmp/scons_cache") + else: + raise RuntimeError("scons build failed") + else: + break + +import cereal +import cereal.messaging as messaging + +from common.params import Params +import selfdrive.crash as crash +from selfdrive.swaglog import cloudlog +from selfdrive.registration import register +from selfdrive.version import version, dirty +from selfdrive.loggerd.config import ROOT +from selfdrive.launcher import launcher +from common import android +from common.apk import update_apks, pm_apply_packages, start_frame + +ThermalStatus = cereal.log.ThermalData.ThermalStatus + +# comment out anything you don't want to run +managed_processes = { + "thermald": "selfdrive.thermald", + "uploader": "selfdrive.loggerd.uploader", + "deleter": "selfdrive.loggerd.deleter", + "controlsd": "selfdrive.controls.controlsd", + "plannerd": "selfdrive.controls.plannerd", + "radard": "selfdrive.controls.radard", + "ubloxd": ("selfdrive/locationd", ["./ubloxd"]), + "loggerd": ("selfdrive/loggerd", ["./loggerd"]), + "logmessaged": "selfdrive.logmessaged", + "tombstoned": "selfdrive.tombstoned", + "logcatd": ("selfdrive/logcatd", ["./logcatd"]), + "proclogd": ("selfdrive/proclogd", ["./proclogd"]), + "boardd": ("selfdrive/boardd", ["./boardd"]), # not used directly + "pandad": "selfdrive.pandad", + "ui": ("selfdrive/ui", ["./ui"]), + "calibrationd": "selfdrive.locationd.calibrationd", + "paramsd": ("selfdrive/locationd", ["./paramsd"]), + "camerad": ("selfdrive/camerad", ["./camerad"]), + "sensord": ("selfdrive/sensord", ["./sensord"]), + "clocksd": ("selfdrive/clocksd", ["./clocksd"]), + "gpsd": ("selfdrive/sensord", ["./gpsd"]), + "updated": "selfdrive.updated", + "monitoringd": ("selfdrive/modeld", ["./monitoringd"]), + "modeld": ("selfdrive/modeld", ["./modeld"]), +} + +daemon_processes = { + "manage_athenad": ("selfdrive.athena.manage_athenad", "AthenadPid"), +} + +running = {} +def get_running(): + return running + +# due to qualcomm kernel bugs SIGKILLing camerad sometimes causes page table corruption +unkillable_processes = ['camerad'] + +# processes to end with SIGINT instead of SIGTERM +interrupt_processes = [] + +# processes to end with SIGKILL instead of SIGTERM +kill_processes = ['sensord', 'paramsd'] + +# processes to end if thermal conditions exceed Green parameters +green_temp_processes = ['uploader'] + +persistent_processes = [ + 'thermald', + 'logmessaged', + 'ui', + 'uploader', +] +if ANDROID: + persistent_processes += [ + 'logcatd', + 'tombstoned', + 'updated', + ] + +car_started_processes = [ + 'controlsd', + 'plannerd', + 'loggerd', + 'radard', + 'calibrationd', + 'paramsd', + 'camerad', + 'modeld', + 'proclogd', + 'ubloxd', +] +if ANDROID: + car_started_processes += [ + 'sensord', + 'clocksd', + 'gpsd', + 'monitoringd', + 'deleter', + ] + +def register_managed_process(name, desc, car_started=False): + global managed_processes, car_started_processes, persistent_processes + print("registering %s" % name) + managed_processes[name] = desc + if car_started: + car_started_processes.append(name) + else: + persistent_processes.append(name) + +# ****************** process management functions ****************** +def nativelauncher(pargs, cwd): + # exec the process + os.chdir(cwd) + + # because when extracted from pex zips permissions get lost -_- + os.chmod(pargs[0], 0o700) + + os.execvp(pargs[0], pargs) + +def start_managed_process(name): + if name in running or name not in managed_processes: + return + proc = managed_processes[name] + if isinstance(proc, str): + cloudlog.info("starting python %s" % proc) + running[name] = Process(name=name, target=launcher, args=(proc,)) + else: + pdir, pargs = proc + cwd = os.path.join(BASEDIR, pdir) + cloudlog.info("starting process %s" % name) + running[name] = Process(name=name, target=nativelauncher, args=(pargs, cwd)) + running[name].start() + +def start_daemon_process(name): + params = Params() + proc, pid_param = daemon_processes[name] + pid = params.get(pid_param) + + if pid is not None: + try: + os.kill(int(pid), 0) + # process is running (kill is a poorly-named system call) + return + except OSError: + # process is dead + pass + + cloudlog.info("starting daemon %s" % name) + proc = subprocess.Popen(['python', '-m', proc], + stdin=open('/dev/null', 'r'), + stdout=open('/dev/null', 'w'), + stderr=open('/dev/null', 'w'), + preexec_fn=os.setpgrp) + + params.put(pid_param, str(proc.pid)) + +def prepare_managed_process(p): + proc = managed_processes[p] + if isinstance(proc, str): + # import this python + cloudlog.info("preimporting %s" % proc) + importlib.import_module(proc) + elif os.path.isfile(os.path.join(BASEDIR, proc[0], "Makefile")): + # build this process + cloudlog.info("building %s" % (proc,)) + try: + subprocess.check_call(["make", "-j4"], cwd=os.path.join(BASEDIR, proc[0])) + except subprocess.CalledProcessError: + # make clean if the build failed + cloudlog.warning("building %s failed, make clean" % (proc, )) + subprocess.check_call(["make", "clean"], cwd=os.path.join(BASEDIR, proc[0])) + subprocess.check_call(["make", "-j4"], cwd=os.path.join(BASEDIR, proc[0])) + +def kill_managed_process(name): + if name not in running or name not in managed_processes: + return + cloudlog.info("killing %s" % name) + + if running[name].exitcode is None: + if name in interrupt_processes: + os.kill(running[name].pid, signal.SIGINT) + elif name in kill_processes: + os.kill(running[name].pid, signal.SIGKILL) + else: + running[name].terminate() + + # Process().join(timeout) will hang due to a python 3 bug: https://bugs.python.org/issue28382 + # We have to poll the exitcode instead + # running[name].join(5.0) + + t = time.time() + while time.time() - t < 5 and running[name].exitcode is None: + time.sleep(0.001) + + if running[name].exitcode is None: + if name in unkillable_processes: + cloudlog.critical("unkillable process %s failed to exit! rebooting in 15 if it doesn't die" % name) + running[name].join(15.0) + if running[name].exitcode is None: + cloudlog.critical("FORCE REBOOTING PHONE!") + os.system("date >> /sdcard/unkillable_reboot") + os.system("reboot") + raise RuntimeError + else: + cloudlog.info("killing %s with SIGKILL" % name) + os.kill(running[name].pid, signal.SIGKILL) + running[name].join() + + cloudlog.info("%s is dead with %d" % (name, running[name].exitcode)) + del running[name] + + +def cleanup_all_processes(signal, frame): + cloudlog.info("caught ctrl-c %s %s" % (signal, frame)) + + if ANDROID: + pm_apply_packages('disable') + + for name in list(running.keys()): + kill_managed_process(name) + cloudlog.info("everything is dead") + +# ****************** run loop ****************** + +def manager_init(should_register=True): + if should_register: + reg_res = register() + if reg_res: + dongle_id, dongle_secret = reg_res + else: + raise Exception("server registration failed") + else: + dongle_id = "c"*16 + + # set dongle id + cloudlog.info("dongle id is " + dongle_id) + os.environ['DONGLE_ID'] = dongle_id + + cloudlog.info("dirty is %d" % dirty) + if not dirty: + os.environ['CLEAN'] = '1' + + cloudlog.bind_global(dongle_id=dongle_id, version=version, dirty=dirty, is_eon=True) + crash.bind_user(id=dongle_id) + crash.bind_extra(version=version, dirty=dirty, is_eon=True) + + os.umask(0) + try: + os.mkdir(ROOT, 0o777) + except OSError: + pass + + # ensure shared libraries are readable by apks + os.chmod(BASEDIR, 0o755) + os.chmod(os.path.join(BASEDIR, "cereal"), 0o755) + os.chmod(os.path.join(BASEDIR, "cereal", "libmessaging_shared.so"), 0o755) + +def manager_thread(): + # now loop + thermal_sock = messaging.sub_sock('thermal') + + cloudlog.info("manager start") + cloudlog.info({"environ": os.environ}) + + # save boot log + subprocess.call(["./loggerd", "--bootlog"], cwd=os.path.join(BASEDIR, "selfdrive/loggerd")) + + params = Params() + + # start daemon processes + for p in daemon_processes: + start_daemon_process(p) + + # start persistent processes + for p in persistent_processes: + start_managed_process(p) + + # start frame + if ANDROID: + pm_apply_packages('enable') + start_frame() + + if os.getenv("NOBOARD") is None: + start_managed_process("pandad") + + logger_dead = False + + while 1: + msg = messaging.recv_sock(thermal_sock, wait=True) + + # heavyweight batch processes are gated on favorable thermal conditions + if msg.thermal.thermalStatus >= ThermalStatus.yellow: + for p in green_temp_processes: + if p in persistent_processes: + kill_managed_process(p) + else: + for p in green_temp_processes: + if p in persistent_processes: + start_managed_process(p) + + if msg.thermal.freeSpace < 0.05: + logger_dead = True + + if msg.thermal.started: + for p in car_started_processes: + if p == "loggerd" and logger_dead: + kill_managed_process(p) + else: + start_managed_process(p) + else: + logger_dead = False + for p in reversed(car_started_processes): + kill_managed_process(p) + + # check the status of all processes, did any of them die? + running_list = ["%s%s\u001b[0m" % ("\u001b[32m" if running[p].is_alive() else "\u001b[31m", p) for p in running] + cloudlog.debug(' '.join(running_list)) + + # Exit main loop when uninstall is needed + if params.get("DoUninstall", encoding='utf8') == "1": + break + +def manager_prepare(spinner=None): + # build all processes + os.chdir(os.path.dirname(os.path.abspath(__file__))) + + # Spinner has to start from 70 here + total = 100.0 if prebuilt else 50.0 + + for i, p in enumerate(managed_processes): + if spinner is not None: + spinner.update("%d" % ((100.0 - total) + total * (i + 1) / len(managed_processes),)) + prepare_managed_process(p) + +def uninstall(): + cloudlog.warning("uninstalling") + with open('/cache/recovery/command', 'w') as f: + f.write('--wipe_data\n') + # IPowerManager.reboot(confirm=false, reason="recovery", wait=true) + android.reboot(reason="recovery") + +def main(): + # the flippening! + os.system('LD_LIBRARY_PATH="" content insert --uri content://settings/system --bind name:s:user_rotation --bind value:i:1') + + # disable bluetooth + os.system('service call bluetooth_manager 8') + + params = Params() + params.manager_start() + + # set unset params + if params.get("CommunityFeaturesToggle") is None: + params.put("CommunityFeaturesToggle", "0") + if params.get("CompletedTrainingVersion") is None: + params.put("CompletedTrainingVersion", "0") + if params.get("IsMetric") is None: + params.put("IsMetric", "0") + if params.get("RecordFront") is None: + params.put("RecordFront", "0") + if params.get("HasAcceptedTerms") is None: + params.put("HasAcceptedTerms", "0") + if params.get("HasCompletedSetup") is None: + params.put("HasCompletedSetup", "0") + if params.get("IsUploadRawEnabled") is None: + params.put("IsUploadRawEnabled", "1") + if params.get("IsLdwEnabled") is None: + params.put("IsLdwEnabled", "1") + if params.get("IsGeofenceEnabled") is None: + params.put("IsGeofenceEnabled", "-1") + if params.get("SpeedLimitOffset") is None: + params.put("SpeedLimitOffset", "0") + if params.get("LongitudinalControl") is None: + params.put("LongitudinalControl", "0") + if params.get("LimitSetSpeed") is None: + params.put("LimitSetSpeed", "0") + if params.get("LimitSetSpeedNeural") is None: + params.put("LimitSetSpeedNeural", "0") + if params.get("LastUpdateTime") is None: + t = datetime.datetime.now().isoformat() + params.put("LastUpdateTime", t.encode('utf8')) + if params.get("OpenpilotEnabledToggle") is None: + params.put("OpenpilotEnabledToggle", "1") + + # is this chffrplus? + if os.getenv("PASSIVE") is not None: + params.put("Passive", str(int(os.getenv("PASSIVE")))) + + if params.get("Passive") is None: + raise Exception("Passive must be set to continue") + + if ANDROID: + update_apks() + manager_init() + manager_prepare(spinner) + spinner.close() + + if os.getenv("PREPAREONLY") is not None: + return + + # SystemExit on sigterm + signal.signal(signal.SIGTERM, lambda signum, frame: sys.exit(1)) + + try: + manager_thread() + except Exception: + traceback.print_exc() + crash.capture_exception() + finally: + cleanup_all_processes(None, None) + + if params.get("DoUninstall", encoding='utf8') == "1": + uninstall() + +if __name__ == "__main__": + main() + # manual exit because we are forked + sys.exit(0) diff --git a/selfdrive/pandad.py b/selfdrive/pandad.py new file mode 100755 index 000000000..7cbc704b9 --- /dev/null +++ b/selfdrive/pandad.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +# simple boardd wrapper that updates the panda first +import os +import time + +from selfdrive.swaglog import cloudlog +from panda import Panda, PandaDFU, BASEDIR, build_st + + +def get_firmware_fn(): + signed_fn = os.path.join(BASEDIR, "board", "obj", "panda.bin.signed") + if os.path.exists(signed_fn): + cloudlog.info("Using prebuilt signed firmware") + return signed_fn + else: + cloudlog.info("Building panda firmware") + fn = "obj/panda.bin" + build_st(fn, clean=False) + return os.path.join(BASEDIR, "board", fn) + + +def get_expected_signature(fw_fn=None): + if fw_fn is None: + fw_fn = get_firmware_fn() + + return Panda.get_signature_from_firmware(fw_fn) + + +def update_panda(): + panda = None + panda_dfu = None + + cloudlog.info("Connecting to panda") + + while True: + # break on normal mode Panda + panda_list = Panda.list() + if len(panda_list) > 0: + cloudlog.info("Panda found, connecting") + panda = Panda(panda_list[0]) + break + + # flash on DFU mode Panda + panda_dfu = PandaDFU.list() + if len(panda_dfu) > 0: + cloudlog.info("Panda in DFU mode found, flashing recovery") + panda_dfu = PandaDFU(panda_dfu[0]) + panda_dfu.recover() + + time.sleep(1) + + fw_fn = get_firmware_fn() + fw_signature = get_expected_signature(fw_fn) + + try: + serial = panda.get_serial()[0].decode("utf-8") + except Exception: + serial = None + + panda_version = "bootstub" if panda.bootstub else panda.get_version() + panda_signature = "bootstub" if panda.bootstub else panda.get_signature() + cloudlog.warning("Panda %s connected, version: %s, signature %s, expected %s" % ( + serial, + panda_version, + panda_signature.hex(), + fw_signature.hex(), + )) + + if panda.bootstub or panda_signature != fw_signature: + cloudlog.info("Panda firmware out of date, update required") + panda.flash(fw_fn) + cloudlog.info("Done flashing") + + if panda.bootstub: + cloudlog.info("Flashed firmware not booting, flashing development bootloader") + panda.recover() + cloudlog.info("Done flashing bootloader") + + if panda.bootstub: + cloudlog.info("Panda still not booting, exiting") + raise AssertionError + + panda_signature = panda.get_signature() + if panda_signature != fw_signature: + cloudlog.info("Version mismatch after flashing, exiting") + raise AssertionError + + +def main(gctx=None): + update_panda() + + os.chdir("boardd") + os.execvp("./boardd", ["./boardd"]) + +if __name__ == "__main__": + main() diff --git a/selfdrive/registration.py b/selfdrive/registration.py new file mode 100644 index 000000000..afa2771bc --- /dev/null +++ b/selfdrive/registration.py @@ -0,0 +1,66 @@ +import os +import json + +from datetime import datetime, timedelta +from selfdrive.swaglog import cloudlog +from selfdrive.version import version, terms_version, training_version, get_git_commit, get_git_branch, get_git_remote +from common.android import get_imei, get_serial, get_subscriber_info +from common.api import api_get +from common.params import Params +from common.file_helpers import mkdirs_exists_ok + +def register(): + params = Params() + params.put("Version", version) + params.put("TermsVersion", terms_version) + params.put("TrainingVersion", training_version) + params.put("GitCommit", get_git_commit()) + params.put("GitBranch", get_git_branch()) + params.put("GitRemote", get_git_remote()) + params.put("SubscriberInfo", get_subscriber_info()) + + # create a key for auth + # your private key is kept on your device persist partition and never sent to our servers + # do not erase your persist partition + if not os.path.isfile("/persist/comma/id_rsa.pub"): + cloudlog.warning("generating your personal RSA key") + mkdirs_exists_ok("/persist/comma") + assert os.system("openssl genrsa -out /persist/comma/id_rsa.tmp 2048") == 0 + assert os.system("openssl rsa -in /persist/comma/id_rsa.tmp -pubout -out /persist/comma/id_rsa.tmp.pub") == 0 + os.rename("/persist/comma/id_rsa.tmp", "/persist/comma/id_rsa") + os.rename("/persist/comma/id_rsa.tmp.pub", "/persist/comma/id_rsa.pub") + + # make key readable by app users (ai.comma.plus.offroad) + os.chmod('/persist/comma/', 0o755) + os.chmod('/persist/comma/id_rsa', 0o744) + + dongle_id, access_token = params.get("DongleId", encoding='utf8'), params.get("AccessToken", encoding='utf8') + public_key = open("/persist/comma/id_rsa.pub").read() + + # create registration token + # in the future, this key will make JWTs directly + private_key = open("/persist/comma/id_rsa").read() + + # late import + import jwt + register_token = jwt.encode({'register':True, 'exp': datetime.utcnow() + timedelta(hours=1)}, private_key, algorithm='RS256') + + try: + cloudlog.info("getting pilotauth") + resp = api_get("v2/pilotauth/", method='POST', timeout=15, + imei=get_imei(0), imei2=get_imei(1), serial=get_serial(), public_key=public_key, register_token=register_token) + dongleauth = json.loads(resp.text) + dongle_id, access_token = dongleauth["dongle_id"], dongleauth["access_token"] + + params.put("DongleId", dongle_id) + params.put("AccessToken", access_token) + return dongle_id, access_token + except Exception: + cloudlog.exception("failed to authenticate") + if dongle_id is not None and access_token is not None: + return dongle_id, access_token + else: + return None + +if __name__ == "__main__": + print(register()) diff --git a/selfdrive/swaglog.py b/selfdrive/swaglog.py new file mode 100644 index 000000000..90962cb5d --- /dev/null +++ b/selfdrive/swaglog.py @@ -0,0 +1,40 @@ +import os +import logging + +import zmq + +from common.logging_extra import SwagLogger, SwagFormatter + +class LogMessageHandler(logging.Handler): + def __init__(self, formatter): + logging.Handler.__init__(self) + self.setFormatter(formatter) + self.pid = None + + def connect(self): + self.zctx = zmq.Context() + self.sock = self.zctx.socket(zmq.PUSH) + self.sock.setsockopt(zmq.LINGER, 10) + self.sock.connect("ipc:///tmp/logmessage") + self.pid = os.getpid() + + def emit(self, record): + if os.getpid() != self.pid: + self.connect() + + msg = self.format(record).rstrip('\n') + # print("SEND".format(repr(msg))) + try: + s = chr(record.levelno)+msg + self.sock.send(s.encode('utf8'), zmq.NOBLOCK) + except zmq.error.Again: + # drop :/ + pass + +cloudlog = log = SwagLogger() +log.setLevel(logging.DEBUG) + +outhandler = logging.StreamHandler() +log.addHandler(outhandler) + +log.addHandler(LogMessageHandler(SwagFormatter(log))) diff --git a/selfdrive/thermald.py b/selfdrive/thermald.py new file mode 100755 index 000000000..aa4c47a6e --- /dev/null +++ b/selfdrive/thermald.py @@ -0,0 +1,379 @@ +#!/usr/bin/env python3.7 +import os +import json +import copy +import datetime +import psutil +from smbus2 import SMBus +from cereal import log +from common.android import ANDROID +from common.basedir import BASEDIR +from common.params import Params +from common.realtime import sec_since_boot, DT_TRML +from common.numpy_fast import clip, interp +from common.filter_simple import FirstOrderFilter +from selfdrive.version import terms_version, training_version +from selfdrive.swaglog import cloudlog +import cereal.messaging as messaging +from selfdrive.loggerd.config import get_available_percent +from selfdrive.pandad import get_expected_signature + +FW_SIGNATURE = get_expected_signature() + +ThermalStatus = log.ThermalData.ThermalStatus +CURRENT_TAU = 15. # 15s time constant +DAYS_NO_CONNECTIVITY_MAX = 7 # do not allow to engage after a week without internet +DAYS_NO_CONNECTIVITY_PROMPT = 4 # send an offroad prompt after 4 days with no internet + + +with open(BASEDIR + "/selfdrive/controls/lib/alerts_offroad.json") as json_file: + OFFROAD_ALERTS = json.load(json_file) + +def read_tz(x, clip=True): + try: + with open("/sys/devices/virtual/thermal/thermal_zone%d/temp" % x) as f: + ret = int(f.read()) + if clip: + ret = max(0, ret) + except FileNotFoundError: + return 0 + + return ret + +def read_thermal(): + dat = messaging.new_message() + dat.init('thermal') + dat.thermal.cpu0 = read_tz(5) + dat.thermal.cpu1 = read_tz(7) + dat.thermal.cpu2 = read_tz(10) + dat.thermal.cpu3 = read_tz(12) + dat.thermal.mem = read_tz(2) + dat.thermal.gpu = read_tz(16) + dat.thermal.bat = read_tz(29) + dat.thermal.pa0 = read_tz(25) + return dat + +LEON = False +def setup_eon_fan(): + global LEON + + os.system("echo 2 > /sys/module/dwc3_msm/parameters/otg_switch") + + bus = SMBus(7, force=True) + try: + bus.write_byte_data(0x21, 0x10, 0xf) # mask all interrupts + bus.write_byte_data(0x21, 0x03, 0x1) # set drive current and global interrupt disable + bus.write_byte_data(0x21, 0x02, 0x2) # needed? + bus.write_byte_data(0x21, 0x04, 0x4) # manual override source + except IOError: + print("LEON detected") + #os.system("echo 1 > /sys/devices/soc/6a00000.ssusb/power_supply/usb/usb_otg") + LEON = True + bus.close() + +last_eon_fan_val = None +def set_eon_fan(val): + global LEON, last_eon_fan_val + + if last_eon_fan_val is None or last_eon_fan_val != val: + bus = SMBus(7, force=True) + if LEON: + try: + i = [0x1, 0x3 | 0, 0x3 | 0x08, 0x3 | 0x10][val] + bus.write_i2c_block_data(0x3d, 0, [i]) + except IOError: + # tusb320 + if val == 0: + bus.write_i2c_block_data(0x67, 0xa, [0]) + #bus.write_i2c_block_data(0x67, 0x45, [1<<2]) + else: + #bus.write_i2c_block_data(0x67, 0x45, [0]) + bus.write_i2c_block_data(0x67, 0xa, [0x20]) + bus.write_i2c_block_data(0x67, 0x8, [(val-1)<<6]) + else: + bus.write_byte_data(0x21, 0x04, 0x2) + bus.write_byte_data(0x21, 0x03, (val*2)+1) + bus.write_byte_data(0x21, 0x04, 0x4) + bus.close() + last_eon_fan_val = val + +# temp thresholds to control fan speed - high hysteresis +_TEMP_THRS_H = [50., 65., 80., 10000] +# temp thresholds to control fan speed - low hysteresis +_TEMP_THRS_L = [42.5, 57.5, 72.5, 10000] +# fan speed options +_FAN_SPEEDS = [0, 16384, 32768, 65535] +# max fan speed only allowed if battery is hot +_BAT_TEMP_THERSHOLD = 45. + + +def handle_fan_eon(max_cpu_temp, bat_temp, fan_speed): + new_speed_h = next(speed for speed, temp_h in zip(_FAN_SPEEDS, _TEMP_THRS_H) if temp_h > max_cpu_temp) + new_speed_l = next(speed for speed, temp_l in zip(_FAN_SPEEDS, _TEMP_THRS_L) if temp_l > max_cpu_temp) + + if new_speed_h > fan_speed: + # update speed if using the high thresholds results in fan speed increment + fan_speed = new_speed_h + elif new_speed_l < fan_speed: + # update speed if using the low thresholds results in fan speed decrement + fan_speed = new_speed_l + + if bat_temp < _BAT_TEMP_THERSHOLD: + # no max fan speed unless battery is hot + fan_speed = min(fan_speed, _FAN_SPEEDS[-2]) + + set_eon_fan(fan_speed//16384) + + return fan_speed + +def handle_fan_uno(max_cpu_temp, bat_temp, fan_speed): + # TODO: implement better fan control + return int(interp(max_cpu_temp, [40.0, 80.0], [0, 100])) + +def thermald_thread(): + # prevent LEECO from undervoltage + BATT_PERC_OFF = 10 if LEON else 3 + + health_timeout = int(1000 * 2.5 * DT_TRML) # 2.5x the expected health frequency + + # now loop + thermal_sock = messaging.pub_sock('thermal') + health_sock = messaging.sub_sock('health', timeout=health_timeout) + location_sock = messaging.sub_sock('gpsLocation') + + fan_speed = 0 + count = 0 + + off_ts = None + started_ts = None + started_seen = False + thermal_status = ThermalStatus.green + thermal_status_prev = ThermalStatus.green + usb_power = True + usb_power_prev = True + + current_filter = FirstOrderFilter(0., CURRENT_TAU, DT_TRML) + health_prev = None + fw_version_match_prev = True + current_connectivity_alert = None + time_valid_prev = True + should_start_prev = False + + is_uno = (read_tz(29, clip=False) < -1000) + if is_uno or not ANDROID: + handle_fan = handle_fan_uno + else: + setup_eon_fan() + handle_fan = handle_fan_eon + + params = Params() + + while 1: + health = messaging.recv_sock(health_sock, wait=True) + location = messaging.recv_sock(location_sock) + location = location.gpsLocation if location else None + msg = read_thermal() + + # clear car params when panda gets disconnected + if health is None and health_prev is not None: + params.panda_disconnect() + health_prev = health + + if health is not None: + usb_power = health.health.usbPowerMode != log.HealthData.UsbPowerMode.client + + msg.thermal.freeSpace = get_available_percent(default=100.0) / 100.0 + msg.thermal.memUsedPercent = int(round(psutil.virtual_memory().percent)) + msg.thermal.cpuPerc = int(round(psutil.cpu_percent())) + + try: + with open("/sys/class/power_supply/battery/capacity") as f: + msg.thermal.batteryPercent = int(f.read()) + with open("/sys/class/power_supply/battery/status") as f: + msg.thermal.batteryStatus = f.read().strip() + with open("/sys/class/power_supply/battery/current_now") as f: + msg.thermal.batteryCurrent = int(f.read()) + with open("/sys/class/power_supply/battery/voltage_now") as f: + msg.thermal.batteryVoltage = int(f.read()) + with open("/sys/class/power_supply/usb/present") as f: + msg.thermal.usbOnline = bool(int(f.read())) + except FileNotFoundError: + pass + + # Fake battery levels on uno for frame + if is_uno: + msg.thermal.batteryPercent = 100 + msg.thermal.batteryStatus = "Charging" + + current_filter.update(msg.thermal.batteryCurrent / 1e6) + + # TODO: add car battery voltage check + max_cpu_temp = max(msg.thermal.cpu0, msg.thermal.cpu1, + msg.thermal.cpu2, msg.thermal.cpu3) / 10.0 + max_comp_temp = max(max_cpu_temp, msg.thermal.mem / 10., msg.thermal.gpu / 10.) + bat_temp = msg.thermal.bat/1000. + + fan_speed = handle_fan(max_cpu_temp, bat_temp, fan_speed) + msg.thermal.fanSpeed = fan_speed + + # thermal logic with hysterisis + if max_cpu_temp > 107. or bat_temp >= 63.: + # onroad not allowed + thermal_status = ThermalStatus.danger + elif max_comp_temp > 92.5 or bat_temp > 60.: # CPU throttling starts around ~90C + # hysteresis between onroad not allowed and engage not allowed + thermal_status = clip(thermal_status, ThermalStatus.red, ThermalStatus.danger) + elif max_cpu_temp > 87.5: + # hysteresis between engage not allowed and uploader not allowed + thermal_status = clip(thermal_status, ThermalStatus.yellow, ThermalStatus.red) + elif max_cpu_temp > 80.0: + # uploader not allowed + thermal_status = ThermalStatus.yellow + elif max_cpu_temp > 75.0: + # hysteresis between uploader not allowed and all good + thermal_status = clip(thermal_status, ThermalStatus.green, ThermalStatus.yellow) + else: + # all good + thermal_status = ThermalStatus.green + + # **** starting logic **** + + # Check for last update time and display alerts if needed + now = datetime.datetime.now() + + # show invalid date/time alert + time_valid = now.year >= 2019 + if time_valid and not time_valid_prev: + params.delete("Offroad_InvalidTime") + if not time_valid and time_valid_prev: + params.put("Offroad_InvalidTime", json.dumps(OFFROAD_ALERTS["Offroad_InvalidTime"])) + time_valid_prev = time_valid + + # Show update prompt + try: + last_update = datetime.datetime.fromisoformat(params.get("LastUpdateTime", encoding='utf8')) + except (TypeError, ValueError): + last_update = now + dt = now - last_update + + if dt.days > DAYS_NO_CONNECTIVITY_MAX: + if current_connectivity_alert != "expired": + current_connectivity_alert = "expired" + params.delete("Offroad_ConnectivityNeededPrompt") + params.put("Offroad_ConnectivityNeeded", json.dumps(OFFROAD_ALERTS["Offroad_ConnectivityNeeded"])) + elif dt.days > DAYS_NO_CONNECTIVITY_PROMPT: + remaining_time = str(DAYS_NO_CONNECTIVITY_MAX - dt.days) + if current_connectivity_alert != "prompt" + remaining_time: + current_connectivity_alert = "prompt" + remaining_time + alert_connectivity_prompt = copy.copy(OFFROAD_ALERTS["Offroad_ConnectivityNeededPrompt"]) + alert_connectivity_prompt["text"] += remaining_time + " days." + params.delete("Offroad_ConnectivityNeeded") + params.put("Offroad_ConnectivityNeededPrompt", json.dumps(alert_connectivity_prompt)) + elif current_connectivity_alert is not None: + current_connectivity_alert = None + params.delete("Offroad_ConnectivityNeeded") + params.delete("Offroad_ConnectivityNeededPrompt") + + # start constellation of processes when the car starts + ignition = health is not None and (health.health.ignitionLine or health.health.ignitionCan) + + do_uninstall = params.get("DoUninstall") == b"1" + accepted_terms = params.get("HasAcceptedTerms") == terms_version + completed_training = params.get("CompletedTrainingVersion") == training_version + + panda_signature = params.get("PandaFirmware") + fw_version_match = (panda_signature is None) or (panda_signature == FW_SIGNATURE) # don't show alert is no panda is connected (None) + + should_start = ignition + + # with 2% left, we killall, otherwise the phone will take a long time to boot + should_start = should_start and msg.thermal.freeSpace > 0.02 + + # confirm we have completed training and aren't uninstalling + should_start = should_start and accepted_terms and completed_training and (not do_uninstall) + + # check for firmware mismatch + should_start = should_start and fw_version_match + + # check if system time is valid + should_start = should_start and time_valid + + # don't start while taking snapshot + if not should_start_prev: + is_taking_snapshot = params.get("IsTakingSnapshot") == b"1" + should_start = should_start and (not is_taking_snapshot) + + if fw_version_match and not fw_version_match_prev: + params.delete("Offroad_PandaFirmwareMismatch") + if not fw_version_match and fw_version_match_prev: + params.put("Offroad_PandaFirmwareMismatch", json.dumps(OFFROAD_ALERTS["Offroad_PandaFirmwareMismatch"])) + + # if any CPU gets above 107 or the battery gets above 63, kill all processes + # controls will warn with CPU above 95 or battery above 60 + if thermal_status >= ThermalStatus.danger: + should_start = False + if thermal_status_prev < ThermalStatus.danger: + params.put("Offroad_TemperatureTooHigh", json.dumps(OFFROAD_ALERTS["Offroad_TemperatureTooHigh"])) + else: + if thermal_status_prev >= ThermalStatus.danger: + params.delete("Offroad_TemperatureTooHigh") + + if should_start: + if not should_start_prev: + params.delete("IsOffroad") + + off_ts = None + if started_ts is None: + started_ts = sec_since_boot() + started_seen = True + os.system('echo performance > /sys/class/devfreq/soc:qcom,cpubw/governor') + else: + if should_start_prev or (count == 0): + params.put("IsOffroad", "1") + + started_ts = None + if off_ts is None: + off_ts = sec_since_boot() + os.system('echo powersave > /sys/class/devfreq/soc:qcom,cpubw/governor') + + # shutdown if the battery gets lower than 3%, it's discharging, we aren't running for + # more than a minute but we were running + if msg.thermal.batteryPercent < BATT_PERC_OFF and msg.thermal.batteryStatus == "Discharging" and \ + started_seen and (sec_since_boot() - off_ts) > 60: + os.system('LD_LIBRARY_PATH="" svc power shutdown') + + msg.thermal.chargingError = current_filter.x > 0. and msg.thermal.batteryPercent < 90 # if current is positive, then battery is being discharged + msg.thermal.started = started_ts is not None + msg.thermal.startedTs = int(1e9*(started_ts or 0)) + + msg.thermal.thermalStatus = thermal_status + thermal_sock.send(msg.to_bytes()) + + if usb_power_prev and not usb_power: + params.put("Offroad_ChargeDisabled", json.dumps(OFFROAD_ALERTS["Offroad_ChargeDisabled"])) + elif usb_power and not usb_power_prev: + params.delete("Offroad_ChargeDisabled") + + thermal_status_prev = thermal_status + usb_power_prev = usb_power + fw_version_match_prev = fw_version_match + should_start_prev = should_start + + #print(msg) + + # report to server once per minute + if (count % int(60. / DT_TRML)) == 0: + cloudlog.event("STATUS_PACKET", + count=count, + health=(health.to_dict() if health else None), + location=(location.to_dict() if location else None), + thermal=msg.to_dict()) + + count += 1 + + +def main(gctx=None): + thermald_thread() + +if __name__ == "__main__": + main() diff --git a/selfdrive/tombstoned.py b/selfdrive/tombstoned.py new file mode 100644 index 000000000..292ec0408 --- /dev/null +++ b/selfdrive/tombstoned.py @@ -0,0 +1,122 @@ +import os +import re +import time +import datetime + +from raven import Client +from raven.transport.http import HTTPTransport + +from selfdrive.version import version, dirty +from selfdrive.swaglog import cloudlog + +def get_tombstones(): + DIR_DATA = "/data/tombstones/" + return [(DIR_DATA + fn, int(os.stat(DIR_DATA + fn).st_ctime) ) + for fn in os.listdir(DIR_DATA) if fn.startswith("tombstone")] + +def report_tombstone(fn, client): + mtime = os.path.getmtime(fn) + with open(fn, encoding='ISO-8859-1') as f: + dat = f.read() + + # see system/core/debuggerd/tombstone.cpp + parsed = re.match(r"[* ]*\n" + r"(?P
CM Version:[\s\S]*?ABI:.*\n)" + r"(?Ppid:.*\n)" + r"(?Psignal.*\n)?" + r"(?PAbort.*\n)?" + r"(?P\s+x0[\s\S]*?\n)\n" + r"(?:backtrace:\n" + r"(?P[\s\S]*?\n)\n" + r"stack:\n" + r"(?P[\s\S]*?\n)\n" + r")?", dat) + + logtail = re.search(r"--------- tail end of.*\n([\s\S]*?\n)---", dat) + logtail = logtail and logtail.group(1) + + if parsed: + parsedict = parsed.groupdict() + else: + parsedict = {} + + thread_line = parsedict.get('thread', '') + thread_parsed = re.match(r'pid: (?P\d+), tid: (?P\d+), name: (?P.*) >>> (?P.*) <<<', thread_line) + if thread_parsed: + thread_parseddict = thread_parsed.groupdict() + else: + thread_parseddict = {} + pid = thread_parseddict.get('pid', '') + tid = thread_parseddict.get('tid', '') + name = thread_parseddict.get('name', 'unknown') + cmd = thread_parseddict.get('cmd', 'unknown') + + signal_line = parsedict.get('signal', '') + signal_parsed = re.match(r'signal (?P.*?), code (?P.*?), fault addr (?P.*)\n', signal_line) + if signal_parsed: + signal_parseddict = signal_parsed.groupdict() + else: + signal_parseddict = {} + signal = signal_parseddict.get('signal', 'unknown') + code = signal_parseddict.get('code', 'unknown') + fault_addr = signal_parseddict.get('fault_addr', '') + + abort_line = parsedict.get('abort', '') + + if parsed: + message = 'Process {} ({}) got signal {} code {}'.format(name, cmd, signal, code) + if abort_line: + message += '\n'+abort_line + else: + message = fn+'\n'+dat[:1024] + + + client.captureMessage( + message=message, + date=datetime.datetime.utcfromtimestamp(mtime), + data={ + 'logger':'tombstoned', + 'platform':'other', + }, + sdk={'name': 'tombstoned', 'version': '0'}, + extra={ + 'fault_addr': fault_addr, + 'abort_msg': abort_line, + 'pid': pid, + 'tid': tid, + 'name':'{} ({})'.format(name, cmd), + 'tombstone_fn': fn, + 'header': parsedict.get('header'), + 'registers': parsedict.get('registers'), + 'backtrace': parsedict.get('backtrace'), + 'logtail': logtail, + }, + tags={ + 'name':'{} ({})'.format(name, cmd), + 'signal':signal, + 'code':code, + 'fault_addr':fault_addr, + }, + ) + cloudlog.error({'tombstone': message}) + + +def main(gctx=None): + initial_tombstones = set(get_tombstones()) + + client = Client('https://d3b175702f62402c91ade04d1c547e68:b20d68c813c74f63a7cdf9c4039d8f56@sentry.io/157615', + install_sys_hook=False, transport=HTTPTransport, release=version, tags={'dirty': dirty}, string_max_length=10000) + + client.user_context({'id': os.environ.get('DONGLE_ID')}) + while True: + now_tombstones = set(get_tombstones()) + + for fn, ctime in (now_tombstones - initial_tombstones): + cloudlog.info("reporting new tombstone %s", fn) + report_tombstone(fn, client) + + initial_tombstones = now_tombstones + time.sleep(5) + +if __name__ == "__main__": + main() diff --git a/selfdrive/updated.py b/selfdrive/updated.py new file mode 100755 index 000000000..efde9adcc --- /dev/null +++ b/selfdrive/updated.py @@ -0,0 +1,361 @@ +#!/usr/bin/env python3 + +# Safe Update: A simple service that waits for network access and tries to +# update every 10 minutes. It's intended to make the OP update process more +# robust against Git repository corruption. This service DOES NOT try to fix +# an already-corrupt BASEDIR Git repo, only prevent it from happening. +# +# During normal operation, both onroad and offroad, the update process makes +# no changes to the BASEDIR install of OP. All update attempts are performed +# in a disposable staging area provided by OverlayFS. It assumes the deleter +# process provides enough disk space to carry out the process. +# +# If an update succeeds, a flag is set, and the update is swapped in at the +# next reboot. If an update is interrupted or otherwise fails, the OverlayFS +# upper layer and metadata can be discarded before trying again. +# +# The swap on boot is triggered by launch_chffrplus.sh +# gated on the existence of $FINALIZED/.overlay_consistent and also the +# existence and mtime of $BASEDIR/.overlay_init. +# +# Other than build byproducts, BASEDIR should not be modified while this +# service is running. Developers modifying code directly in BASEDIR should +# disable this service. + +import os +import datetime +import subprocess +import psutil +from stat import S_ISREG, S_ISDIR, S_ISLNK, S_IMODE, ST_MODE, ST_INO, ST_UID, ST_GID, ST_ATIME, ST_MTIME +import shutil +import signal +from pathlib import Path +import fcntl +import threading +from cffi import FFI + +from common.basedir import BASEDIR +from common.params import Params +from selfdrive.swaglog import cloudlog + +STAGING_ROOT = "/data/safe_staging" + +OVERLAY_UPPER = os.path.join(STAGING_ROOT, "upper") +OVERLAY_METADATA = os.path.join(STAGING_ROOT, "metadata") +OVERLAY_MERGED = os.path.join(STAGING_ROOT, "merged") +FINALIZED = os.path.join(STAGING_ROOT, "finalized") + +NICE_LOW_PRIORITY = ["nice", "-n", "19"] +SHORT = os.getenv("SHORT") is not None + +# Workaround for the EON/termux build of Python having os.link removed. +ffi = FFI() +ffi.cdef("int link(const char *oldpath, const char *newpath);") +libc = ffi.dlopen(None) + + +class WaitTimeHelper: + ready_event = threading.Event() + shutdown = False + + def __init__(self): + signal.signal(signal.SIGTERM, self.graceful_shutdown) + signal.signal(signal.SIGINT, self.graceful_shutdown) + signal.signal(signal.SIGHUP, self.update_now) + + def graceful_shutdown(self, signum, frame): + # umount -f doesn't appear effective in avoiding "device busy" on EON, + # so don't actually die until the next convenient opportunity in main(). + cloudlog.info("caught SIGINT/SIGTERM, dismounting overlay at next opportunity") + self.shutdown = True + self.ready_event.set() + + def update_now(self, signum, frame): + cloudlog.info("caught SIGHUP, running update check immediately") + self.ready_event.set() + + +def wait_between_updates(ready_event): + ready_event.clear() + if SHORT: + ready_event.wait(timeout=10) + else: + ready_event.wait(timeout=60 * 10) + + +def link(src, dest): + # Workaround for the EON/termux build of Python having os.link removed. + return libc.link(src.encode(), dest.encode()) + + +def run(cmd, cwd=None): + return subprocess.check_output(cmd, cwd=cwd, stderr=subprocess.STDOUT, encoding='utf8') + + +def remove_consistent_flag(): + os.system("sync") + consistent_file = Path(os.path.join(FINALIZED, ".overlay_consistent")) + try: + consistent_file.unlink() + except FileNotFoundError: + pass + os.system("sync") + + +def set_consistent_flag(): + consistent_file = Path(os.path.join(FINALIZED, ".overlay_consistent")) + os.system("sync") + consistent_file.touch() + os.system("sync") + + +def set_update_available_params(new_version=False): + params = Params() + + t = datetime.datetime.now().isoformat() + params.put("LastUpdateTime", t.encode('utf8')) + + if new_version: + try: + with open(os.path.join(FINALIZED, "RELEASES.md"), "rb") as f: + r = f.read() + r = r[:r.find(b'\n\n')] # Slice latest release notes + params.put("ReleaseNotes", r + b"\n") + except Exception: + params.put("ReleaseNotes", "") + params.put("UpdateAvailable", "1") + + +def dismount_ovfs(): + if os.path.ismount(OVERLAY_MERGED): + cloudlog.error("unmounting existing overlay") + run(["umount", "-l", OVERLAY_MERGED]) + + +def init_ovfs(): + cloudlog.info("preparing new safe staging area") + Params().put("UpdateAvailable", "0") + + remove_consistent_flag() + + dismount_ovfs() + if os.path.isdir(STAGING_ROOT): + shutil.rmtree(STAGING_ROOT) + + for dirname in [STAGING_ROOT, OVERLAY_UPPER, OVERLAY_METADATA, OVERLAY_MERGED, FINALIZED]: + os.mkdir(dirname, 0o755) + if not os.lstat(BASEDIR).st_dev == os.lstat(OVERLAY_MERGED).st_dev: + raise RuntimeError("base and overlay merge directories are on different filesystems; not valid for overlay FS!") + + # Remove consistent flag from current BASEDIR so it's not copied over + if os.path.isfile(os.path.join(BASEDIR, ".overlay_consistent")): + os.remove(os.path.join(BASEDIR, ".overlay_consistent")) + + # We sync FS object atimes (which EON doesn't use) and mtimes, but ctimes + # are outside user control. Make sure Git is set up to ignore system ctimes, + # because they change when we make hard links during finalize. Otherwise, + # there is a lot of unnecessary churn. This appears to be a common need on + # OSX as well: https://www.git-tower.com/blog/make-git-rebase-safe-on-osx/ + run(["git", "config", "core.trustctime", "false"], BASEDIR) + + # We are temporarily using copytree to copy the directory, which also changes + # inode numbers. Ignore those changes too. + run(["git", "config", "core.checkStat", "minimal"], BASEDIR) + + # Leave a timestamped canary in BASEDIR to check at startup. The EON clock + # should be correct by the time we get here. If the init file disappears, or + # critical mtimes in BASEDIR are newer than .overlay_init, continue.sh can + # assume that BASEDIR has used for local development or otherwise modified, + # and skips the update activation attempt. + Path(os.path.join(BASEDIR, ".overlay_init")).touch() + + overlay_opts = f"lowerdir={BASEDIR},upperdir={OVERLAY_UPPER},workdir={OVERLAY_METADATA}" + run(["mount", "-t", "overlay", "-o", overlay_opts, "none", OVERLAY_MERGED]) + + +def inodes_in_tree(search_dir): + """Given a search root, produce a dictionary mapping of inodes to relative + pathnames of regular files (no directories, symlinks, or special files).""" + inode_map = {} + for root, dirs, files in os.walk(search_dir, topdown=True): + for file_name in files: + full_path_name = os.path.join(root, file_name) + st = os.lstat(full_path_name) + if S_ISREG(st[ST_MODE]): + inode_map[st[ST_INO]] = full_path_name + return inode_map + + +def dup_ovfs_object(inode_map, source_obj, target_dir): + """Given a relative pathname to copy, and a new target root, duplicate the + source object in the target root, using hardlinks for regular files.""" + + source_full_path = os.path.join(OVERLAY_MERGED, source_obj) + st = os.lstat(source_full_path) + target_full_path = os.path.join(target_dir, source_obj) + + if S_ISREG(st[ST_MODE]): + # Hardlink all regular files; ownership and permissions are shared. + link(inode_map[st[ST_INO]], target_full_path) + else: + # Recreate all directories and symlinks; copy ownership and permissions. + if S_ISDIR(st[ST_MODE]): + os.mkdir(os.path.join(FINALIZED, source_obj), S_IMODE(st[ST_MODE])) + elif S_ISLNK(st[ST_MODE]): + os.symlink(os.readlink(source_full_path), target_full_path) + os.chmod(target_full_path, S_IMODE(st[ST_MODE]), follow_symlinks=False) + else: + # Ran into a FIFO, socket, etc. Should not happen in OP install dir. + # Ignore without copying for the time being; revisit later if needed. + cloudlog.error("can't copy this file type: %s" % source_full_path) + os.chown(target_full_path, st[ST_UID], st[ST_GID], follow_symlinks=False) + + # Sync target mtimes to the cached lstat() value from each source object. + # Restores shared inode mtimes after linking, fixes symlinks and dirs. + os.utime(target_full_path, (st[ST_ATIME], st[ST_MTIME]), follow_symlinks=False) + + +def finalize_from_ovfs_hardlink(): + """Take the current OverlayFS merged view and finalize a copy outside of + OverlayFS, ready to be swapped-in at BASEDIR. Copy using hardlinks""" + + cloudlog.info("creating finalized version of the overlay") + + # The "copy" is done with hardlinks, but since the OverlayFS merge looks + # like a different filesystem, and hardlinks can't cross filesystems, we + # have to borrow a source pathname from the upper or lower layer. + inode_map = inodes_in_tree(BASEDIR) + inode_map.update(inodes_in_tree(OVERLAY_UPPER)) + + shutil.rmtree(FINALIZED) + os.umask(0o077) + os.mkdir(FINALIZED) + for root, dirs, files in os.walk(OVERLAY_MERGED, topdown=True): + for obj_name in dirs: + relative_path_name = os.path.relpath(os.path.join(root, obj_name), OVERLAY_MERGED) + dup_ovfs_object(inode_map, relative_path_name, FINALIZED) + for obj_name in files: + relative_path_name = os.path.relpath(os.path.join(root, obj_name), OVERLAY_MERGED) + dup_ovfs_object(inode_map, relative_path_name, FINALIZED) + cloudlog.info("done finalizing overlay") + + +def finalize_from_ovfs_copy(): + """Take the current OverlayFS merged view and finalize a copy outside of + OverlayFS, ready to be swapped-in at BASEDIR. Copy using shutil.copytree""" + + cloudlog.info("creating finalized version of the overlay") + shutil.rmtree(FINALIZED) + shutil.copytree(OVERLAY_MERGED, FINALIZED, symlinks=True) + cloudlog.info("done finalizing overlay") + + +def attempt_update(): + cloudlog.info("attempting git update inside staging overlay") + + git_fetch_output = run(NICE_LOW_PRIORITY + ["git", "fetch"], OVERLAY_MERGED) + cloudlog.info("git fetch success: %s", git_fetch_output) + + cur_hash = run(["git", "rev-parse", "HEAD"], OVERLAY_MERGED).rstrip() + upstream_hash = run(["git", "rev-parse", "@{u}"], OVERLAY_MERGED).rstrip() + new_version = cur_hash != upstream_hash + + git_fetch_result = len(git_fetch_output) > 0 and (git_fetch_output != "Failed to add the host to the list of known hosts (/data/data/com.termux/files/home/.ssh/known_hosts).\n") + + cloudlog.info("comparing %s to %s" % (cur_hash, upstream_hash)) + if new_version or git_fetch_result: + cloudlog.info("Running update") + if new_version: + cloudlog.info("git reset in progress") + r = [ + run(NICE_LOW_PRIORITY + ["git", "reset", "--hard", "@{u}"], OVERLAY_MERGED), + run(NICE_LOW_PRIORITY + ["git", "clean", "-xdf"], OVERLAY_MERGED), + run(NICE_LOW_PRIORITY + ["git", "submodule", "init"], OVERLAY_MERGED), + run(NICE_LOW_PRIORITY + ["git", "submodule", "update"], OVERLAY_MERGED), + ] + cloudlog.info("git reset success: %s", '\n'.join(r)) + + # Un-set the validity flag to prevent the finalized tree from being + # activated later if the finalize step is interrupted + remove_consistent_flag() + + finalize_from_ovfs_copy() + + # Make sure the validity flag lands on disk LAST, only when the local git + # repo and OP install are in a consistent state. + set_consistent_flag() + + cloudlog.info("update successful!") + else: + cloudlog.info("nothing new from git at this time") + + set_update_available_params(new_version=new_version) + + +def main(gctx=None): + overlay_init_done = False + wait_helper = WaitTimeHelper() + params = Params() + + if not os.geteuid() == 0: + raise RuntimeError("updated must be launched as root!") + + # Set low io priority + p = psutil.Process() + if psutil.LINUX: + p.ionice(psutil.IOPRIO_CLASS_BE, value=7) + + ov_lock_fd = open('/tmp/safe_staging_overlay.lock', 'w') + try: + fcntl.flock(ov_lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + except IOError: + raise RuntimeError("couldn't get overlay lock; is another updated running?") + + while True: + time_wrong = datetime.datetime.now().year < 2019 + ping_failed = subprocess.call(["ping", "-W", "4", "-c", "1", "8.8.8.8"]) + + # Wait until we have a valid datetime to initialize the overlay + if not (ping_failed or time_wrong): + try: + # If the git directory has modifcations after we created the overlay + # we need to recreate the overlay + if overlay_init_done: + overlay_init_fn = os.path.join(BASEDIR, ".overlay_init") + git_dir_path = os.path.join(BASEDIR, ".git") + new_files = run(["find", git_dir_path, "-newer", overlay_init_fn]) + + if len(new_files.splitlines()): + cloudlog.info(".git directory changed, recreating overlay") + overlay_init_done = False + + if not overlay_init_done: + init_ovfs() + overlay_init_done = True + + if params.get("IsOffroad") == b"1": + attempt_update() + else: + cloudlog.info("not running updater, openpilot running") + + except subprocess.CalledProcessError as e: + cloudlog.event( + "update process failed", + cmd=e.cmd, + output=e.output, + returncode=e.returncode + ) + overlay_init_done = False + except Exception: + cloudlog.exception("uncaught updated exception, shouldn't happen") + overlay_init_done = False + + wait_between_updates(wait_helper.ready_event) + if wait_helper.shutdown: + break + + # We've been signaled to shut down + dismount_ovfs() + +if __name__ == "__main__": + main() diff --git a/selfdrive/version.py b/selfdrive/version.py new file mode 100644 index 000000000..d2f665196 --- /dev/null +++ b/selfdrive/version.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 +import os +import subprocess +from selfdrive.swaglog import cloudlog + + +def get_git_commit(): + return subprocess.check_output(["git", "rev-parse", "HEAD"], encoding='utf8').strip() + + +def get_git_branch(): + return subprocess.check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"], encoding='utf8').strip() + + +def get_git_full_branchname(): + return subprocess.check_output(["git", "rev-parse", "--abbrev-ref", "--symbolic-full-name", "@{u}"], encoding='utf8').strip() + + +def get_git_remote(): + try: + local_branch = subprocess.check_output(["git", "name-rev", "--name-only", "HEAD"], encoding='utf8').strip() + tracking_remote = subprocess.check_output(["git", "config", "branch." + local_branch + ".remote"], encoding='utf8').strip() + return subprocess.check_output(["git", "config", "remote." + tracking_remote + ".url"], encoding='utf8').strip() + except subprocess.CalledProcessError: + # Not on a branch, fallback + return subprocess.check_output(["git", "config", "--get", "remote.origin.url"], encoding='utf8').strip() + + +with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), "common", "version.h")) as _versionf: + version = _versionf.read().split('"')[1] + +try: + origin = get_git_remote() + if origin.startswith('git@github.com:commaai') or origin.startswith('https://github.com/commaai'): + if origin.endswith('/one.git'): + dirty = True + else: + branch = get_git_full_branchname() + + # This is needed otherwise touched files might show up as modified + try: + subprocess.check_call(["git", "update-index", "--refresh"]) + except subprocess.CalledProcessError: + pass + + dirty = subprocess.call(["git", "diff-index", "--quiet", branch, "--"]) != 0 + if dirty: + dirty_files = subprocess.check_output(["git", "diff-index", branch, "--"], encoding='utf8') + commit = subprocess.check_output(["git", "rev-parse", "--verify", "HEAD"], encoding='utf8').rstrip() + origin_commit = subprocess.check_output(["git", "rev-parse", "--verify", branch], encoding='utf8').rstrip() + cloudlog.event("dirty comma branch", version=version, dirty=dirty, origin=origin, branch=branch, dirty_files=dirty_files, commit=commit, origin_commit=origin_commit) + + else: + dirty = True +except subprocess.CalledProcessError: + try: + cloudlog.exception("git subprocess failed while finding version") + except: + pass + dirty = True + +training_version = b"0.2.0" +terms_version = b"2" + +if __name__ == "__main__": + print("Dirty: %s" % dirty) + print("Version: %s" % version) + print("Remote: %s" % origin) + + try: + print("Branch %s" % branch) + except NameError: + pass