diff --git a/Jenkinsfile b/Jenkinsfile index 83c9007f..bae3469b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -120,12 +120,13 @@ pipeline { } } - stage('HW Tests') { + stage('HW + Unit Tests') { steps { phone_steps("eon", [ ["build cereal", "SCONS_CACHE=1 scons -j4 cereal/"], ["test sounds", "nosetests -s selfdrive/test/test_sounds.py"], ["test boardd loopback", "nosetests -s selfdrive/boardd/tests/test_boardd_loopback.py"], + //["test updater", "python installer/updater/test_updater.py"], ]) } } diff --git a/installer/updater/test_updater.py b/installer/updater/test_updater.py new file mode 100755 index 00000000..6e811921 --- /dev/null +++ b/installer/updater/test_updater.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +import os +import shutil +import subprocess +import tempfile +import time +import unittest + +from common.basedir import BASEDIR + +UPDATER_PATH = os.path.join(BASEDIR, "installer/updater") +UPDATER = os.path.join(UPDATER_PATH, "updater") +UPDATE_MANIFEST = os.path.join(UPDATER_PATH, "update.json") + + +class TestUpdater(unittest.TestCase): + + @classmethod + def setUpClass(cls): + # test that the updater builds + cls.assertTrue(f"cd {UPDATER_PATH} && make clean && make", "updater failed to build") + + # restore the checked-in version, since that's what actually runs on devices + os.system(f"git reset --hard {UPDATER_PATH}") + + def setUp(self): + self._clear_dir() + + def tearDown(self): + self._clear_dir() + + def _clear_dir(self): + if os.path.isdir("/data/neoupdate"): + shutil.rmtree("/data/neoupdate") + + def _assert_ok(self, cmd, msg=None): + self.assertTrue(os.system(cmd) == 0, msg) + + def _assert_fails(self, cmd): + self.assertFalse(os.system(cmd) == 0) + + def test_background_download(self): + self._assert_ok(f"{UPDATER} bgcache 'file://{UPDATE_MANIFEST}'") + + def test_background_download_bad_manifest(self): + # update with bad manifest should fail + with tempfile.NamedTemporaryFile(mode="w", suffix=".json") as f: + f.write("{}") + self._assert_fails(f"{UPDATER} bgcache 'file://{f.name}'") + + def test_cache_resume(self): + self._assert_ok(f"{UPDATER} bgcache 'file://{UPDATE_MANIFEST}'") + # a full download takes >1m, but resuming from fully cached should only be a few seconds + start_time = time.monotonic() + self._assert_ok(f"{UPDATER} bgcache 'file://{UPDATE_MANIFEST}'") + self.assertLess(time.monotonic() - start_time, 10) + + # make sure we can recover from corrupt downloads + def test_recover_from_corrupt(self): + # download the whole update + self._assert_ok(f"{UPDATER} bgcache 'file://{UPDATE_MANIFEST}'") + + # write some random bytes + for f in os.listdir("/data/neoupdate"): + with open(os.path.join("/data/neoupdate", f), "ab") as f: + f.write(b"\xab"*20) + + # this attempt should fail, then it unlinks + self._assert_fails(f"{UPDATER} bgcache 'file://{UPDATE_MANIFEST}'") + + # now it should pass + self._assert_ok(f"{UPDATER} bgcache 'file://{UPDATE_MANIFEST}'") + + # simple test that the updater doesn't crash in UI mode + def test_ui_init(self): + with subprocess.Popen(UPDATER) as proc: + time.sleep(5) + self.assertTrue(proc.poll() is None) + proc.terminate() + +if __name__ == "__main__": + unittest.main() diff --git a/installer/updater/updater b/installer/updater/updater index 15858eab..66047420 100755 Binary files a/installer/updater/updater and b/installer/updater/updater differ diff --git a/installer/updater/updater.cc b/installer/updater/updater.cc index a76be8b8..ca0b9270 100644 --- a/installer/updater/updater.cc +++ b/installer/updater/updater.cc @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -33,10 +34,10 @@ #define USER_AGENT "NEOSUpdater-0.2" -#define MANIFEST_URL_EON_STAGING "https://github.com/commaai/eon-neos/raw/master/update.staging.json" -#define MANIFEST_URL_EON_LOCAL "http://192.168.5.1:8000/neosupdate/update.local.json" -#define MANIFEST_URL_EON "https://github.com/commaai/eon-neos/raw/master/update.json" -const char *manifest_url = MANIFEST_URL_EON; +#define MANIFEST_URL_NEOS_STAGING "https://github.com/commaai/eon-neos/raw/master/update.staging.json" +#define MANIFEST_URL_NEOS_LOCAL "http://192.168.5.1:8000/neosupdate/update.local.json" +#define MANIFEST_URL_NEOS "https://github.com/commaai/eon-neos/raw/master/update.json" +const char *manifest_url = MANIFEST_URL_NEOS; #define RECOVERY_DEV "/dev/block/bootdevice/by-name/recovery" #define RECOVERY_COMMAND "/cache/recovery/command" @@ -96,7 +97,7 @@ std::string download_string(CURL *curl, std::string url) { curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); - curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1); + curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 0); curl_easy_setopt(curl, CURLOPT_USERAGENT, USER_AGENT); curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1); curl_easy_setopt(curl, CURLOPT_RESUME_FROM, 0); @@ -149,6 +150,32 @@ static void start_settings_activity(const char* name) { system(launch_cmd); } +bool is_settings_active() { + FILE *fp; + char sys_output[4096]; + + fp = popen("/bin/dumpsys window windows", "r"); + if (fp == NULL) { + return false; + } + + bool active = false; + while (fgets(sys_output, sizeof(sys_output), fp) != NULL) { + if (strstr(sys_output, "mCurrentFocus=null") != NULL) { + break; + } + + if (strstr(sys_output, "mCurrentFocus=Window") != NULL) { + active = true; + break; + } + } + + pclose(fp); + + return active; +} + struct Updater { bool do_exit = false; @@ -166,7 +193,6 @@ struct Updater { std::mutex lock; - // i hate state machines give me coroutines already enum UpdateState { CONFIRMATION, LOW_BATTERY, @@ -190,9 +216,15 @@ struct Updater { int b_x, b_w, b_y, b_h; int balt_x; + // download stage writes these for the installation stage + int recovery_len; + std::string recovery_hash; + std::string recovery_fn; + std::string ota_fn; + CURL *curl = NULL; - Updater() { + void ui_init() { touch_init(&touch); fb = framebuffer_init("updater", 0x00001000, false, @@ -218,7 +250,6 @@ struct Updater { b_h = 220; state = CONFIRMATION; - } int download_file_xferinfo(curl_off_t dltotal, curl_off_t dlno, @@ -251,7 +282,7 @@ struct Updater { curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); - curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1); + curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 0); curl_easy_setopt(curl, CURLOPT_USERAGENT, USER_AGENT); curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1); curl_easy_setopt(curl, CURLOPT_RESUME_FROM, resume_from); @@ -319,32 +350,113 @@ struct Updater { state = RUNNING; } - std::string stage_download(std::string url, std::string hash, std::string name) { + std::string download(std::string url, std::string hash, std::string name) { std::string out_fn = UPDATE_DIR "/" + util::base_name(url); - set_progress("Downloading " + name + "..."); - bool r = download_file(url, out_fn); - if (!r) { - set_error("failed to download " + name); - return ""; + // start or resume downloading if hash doesn't match + std::string fn_hash = sha256_file(out_fn); + if (hash.compare(fn_hash) != 0) { + set_progress("Downloading " + name + "..."); + bool r = download_file(url, out_fn); + if (!r) { + set_error("failed to download " + name); + unlink(out_fn.c_str()); + return ""; + } + fn_hash = sha256_file(out_fn); } set_progress("Verifying " + name + "..."); - std::string fn_hash = sha256_file(out_fn); printf("got %s hash: %s\n", name.c_str(), hash.c_str()); if (fn_hash != hash) { set_error(name + " was corrupt"); unlink(out_fn.c_str()); return ""; } - return out_fn; } - void run_stages() { + bool download_stage() { curl = curl_easy_init(); assert(curl); + // ** quick checks before download ** + + if (!check_space()) { + set_error("2GB of free space required to update"); + return false; + } + + mkdir(UPDATE_DIR, 0777); + + set_progress("Finding latest version..."); + std::string manifest_s = download_string(curl, manifest_url); + printf("manifest: %s\n", manifest_s.c_str()); + + std::string err; + auto manifest = json11::Json::parse(manifest_s, err); + if (manifest.is_null() || !err.empty()) { + set_error("failed to load update manifest"); + return false; + } + + std::string ota_url = manifest["ota_url"].string_value(); + std::string ota_hash = manifest["ota_hash"].string_value(); + + std::string recovery_url = manifest["recovery_url"].string_value(); + recovery_hash = manifest["recovery_hash"].string_value(); + recovery_len = manifest["recovery_len"].int_value(); + + // std::string installer_url = manifest["installer_url"].string_value(); + // std::string installer_hash = manifest["installer_hash"].string_value(); + + if (ota_url.empty() || ota_hash.empty()) { + set_error("invalid update manifest"); + return false; + } + + // std::string installer_fn = download(installer_url, installer_hash, "installer"); + // if (installer_fn.empty()) { + // //error'd + // return; + // } + + // ** handle recovery download ** + if (recovery_url.empty() || recovery_hash.empty() || recovery_len == 0) { + set_progress("Skipping recovery flash..."); + } else { + // only download the recovery if it differs from what's flashed + set_progress("Checking recovery..."); + std::string existing_recovery_hash = sha256_file(RECOVERY_DEV, recovery_len); + printf("existing recovery hash: %s\n", existing_recovery_hash.c_str()); + + if (existing_recovery_hash != recovery_hash) { + recovery_fn = download(recovery_url, recovery_hash, "recovery"); + if (recovery_fn.empty()) { + // error'd + return false; + } + } + } + + // ** handle ota download ** + ota_fn = download(ota_url, ota_hash, "update"); + if (ota_fn.empty()) { + //error'd + return false; + } + + // download sucessful + return true; + } + + // thread that handles downloading and installing the update + void run_stages() { + printf("run_stages start\n"); + + + // ** download update ** + if (!check_battery()) { set_battery_low(); int battery_cap = battery_capacity(); @@ -356,77 +468,12 @@ struct Updater { set_running(); } - if (!check_space()) { - set_error("2GB of free space required to update"); + bool sucess = download_stage(); + if (!sucess) { return; } - mkdir(UPDATE_DIR, 0777); - - const int EON = (access("/EON", F_OK) != -1); - - set_progress("Finding latest version..."); - std::string manifest_s; - if (EON) { - manifest_s = download_string(curl, manifest_url); - } else { - // don't update NEO - exit(0); - } - - printf("manifest: %s\n", manifest_s.c_str()); - - std::string err; - auto manifest = json11::Json::parse(manifest_s, err); - if (manifest.is_null() || !err.empty()) { - set_error("failed to load update manifest"); - return; - } - - std::string ota_url = manifest["ota_url"].string_value(); - std::string ota_hash = manifest["ota_hash"].string_value(); - - std::string recovery_url = manifest["recovery_url"].string_value(); - std::string recovery_hash = manifest["recovery_hash"].string_value(); - int recovery_len = manifest["recovery_len"].int_value(); - - // std::string installer_url = manifest["installer_url"].string_value(); - // std::string installer_hash = manifest["installer_hash"].string_value(); - - if (ota_url.empty() || ota_hash.empty()) { - set_error("invalid update manifest"); - return; - } - - // std::string installer_fn = stage_download(installer_url, installer_hash, "installer"); - // if (installer_fn.empty()) { - // //error'd - // return; - // } - - std::string recovery_fn; - if (recovery_url.empty() || recovery_hash.empty() || recovery_len == 0) { - set_progress("Skipping recovery flash..."); - } else { - // only download the recovery if it differs from what's flashed - set_progress("Checking recovery..."); - std::string existing_recovery_hash = sha256_file(RECOVERY_DEV, recovery_len); - printf("existing recovery hash: %s\n", existing_recovery_hash.c_str()); - - if (existing_recovery_hash != recovery_hash) { - recovery_fn = stage_download(recovery_url, recovery_hash, "recovery"); - if (recovery_fn.empty()) { - // error'd - return; - } - } - } - - std::string ota_fn = stage_download(ota_url, ota_hash, "update"); - if (ota_fn.empty()) { - //error'd - return; - } + // ** install update ** if (!check_battery()) { set_battery_low(); @@ -601,7 +648,7 @@ struct Updater { int powerprompt_y = 312; nvgFontFace(vg, "opensans_regular"); nvgFontSize(vg, 64.0f); - nvgText(vg, fb_w/2, 740, "Ensure EON is connected to power.", NULL); + nvgText(vg, fb_w/2, 740, "Ensure your device remains connected to a power source.", NULL); NVGpaint paint = nvgBoxGradient( vg, progress_x + 1, progress_y + 1, @@ -657,9 +704,7 @@ struct Updater { void ui_update() { std::lock_guard guard(lock); - switch (state) { - case ERROR: - case CONFIRMATION: { + if (state == ERROR || state == CONFIRMATION) { int touch_x = -1, touch_y = -1; int res = touch_poll(&touch, &touch_x, &touch_y, 0); if (res == 1 && !is_settings_active()) { @@ -678,13 +723,11 @@ struct Updater { } } } - default: - break; - } } - void go() { + ui_init(); + while (!do_exit) { ui_update(); @@ -718,51 +761,37 @@ struct Updater { update_thread_handle.join(); } + // reboot system("service call power 16 i32 0 i32 0 i32 1"); } - bool is_settings_active() { - FILE *fp; - char sys_output[4096]; - - fp = popen("/bin/dumpsys window windows", "r"); - if (fp == NULL) { - return false; - } - - bool active = false; - while (fgets(sys_output, sizeof(sys_output), fp) != NULL) { - if (strstr(sys_output, "mCurrentFocus=null") != NULL) { - break; - } - - if (strstr(sys_output, "mCurrentFocus=Window") != NULL) { - active = true; - break; - } - } - - pclose(fp); - - return active; - } - }; } + int main(int argc, char *argv[]) { + bool background_cache = false; if (argc > 1) { if (strcmp(argv[1], "local") == 0) { - manifest_url = MANIFEST_URL_EON_LOCAL; + manifest_url = MANIFEST_URL_NEOS_LOCAL; } else if (strcmp(argv[1], "staging") == 0) { - manifest_url = MANIFEST_URL_EON_STAGING; + manifest_url = MANIFEST_URL_NEOS_STAGING; + } else if (strcmp(argv[1], "bgcache") == 0) { + manifest_url = argv[2]; + background_cache = true; } else { manifest_url = argv[1]; } } + printf("updating from %s\n", manifest_url); Updater updater; - updater.go(); - return 0; + int err = 0; + if (background_cache) { + err = !updater.download_stage(); + } else { + updater.go(); + } + return err; } diff --git a/launch_chffrplus.sh b/launch_chffrplus.sh index 1a1c4170..af548356 100755 --- a/launch_chffrplus.sh +++ b/launch_chffrplus.sh @@ -1,23 +1,13 @@ #!/usr/bin/bash -export OMP_NUM_THREADS=1 -export MKL_NUM_THREADS=1 -export NUMEXPR_NUM_THREADS=1 -export OPENBLAS_NUM_THREADS=1 -export VECLIB_MAXIMUM_THREADS=1 - if [ -z "$BASEDIR" ]; then BASEDIR="/data/openpilot" fi -if [ -z "$PASSIVE" ]; then - export PASSIVE="1" -fi +source "$BASEDIR/launch_env.sh" DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" -STAGING_ROOT="/data/safe_staging" - function launch { # Wifi scan wpa_cli IFNAME=wlan0 SCAN @@ -54,6 +44,7 @@ function launch { git submodule foreach --recursive git reset --hard echo "Restarting launch script ${LAUNCHER_LOCATION}" + unset REQUIRED_NEOS_VERSION exec "${LAUNCHER_LOCATION}" else echo "openpilot backup found, not updating" @@ -81,24 +72,20 @@ function launch { [ -d "/proc/irq/736" ] && echo 3 > /proc/irq/736/smp_affinity_list # USB for OP3T - # Remove old NEOS update file - # TODO: move this code to the updater - if [ -d /data/neoupdate ]; then - rm -rf /data/neoupdate - fi - # Check for NEOS update - if [ $(< /VERSION) != "14" ]; then + if [ $(< /VERSION) != "$REQUIRED_NEOS_VERSION" ]; then if [ -f "$DIR/scripts/continue.sh" ]; then cp "$DIR/scripts/continue.sh" "/data/data/com.termux/files/continue.sh" fi if [ ! -f "$BASEDIR/prebuilt" ]; then - echo "Clearing build products and resetting scons state prior to NEOS update" - cd $BASEDIR && scons --clean - rm -rf /tmp/scons_cache - rm -r $BASEDIR/.sconsign.dblite + # Clean old build products, but preserve the scons cache + cd $DIR + scons --clean + git clean -xdf + git submodule foreach --recursive git clean -xdf fi + "$DIR/installer/updater/updater" "file://$DIR/installer/updater/update.json" else if [[ $(uname -v) == "#1 SMP PREEMPT Wed Jun 10 12:40:53 PDT 2020" ]]; then diff --git a/launch_env.sh b/launch_env.sh new file mode 100755 index 00000000..9a86d315 --- /dev/null +++ b/launch_env.sh @@ -0,0 +1,17 @@ +#!/usr/bin/bash + +export OMP_NUM_THREADS=1 +export MKL_NUM_THREADS=1 +export NUMEXPR_NUM_THREADS=1 +export OPENBLAS_NUM_THREADS=1 +export VECLIB_MAXIMUM_THREADS=1 + +if [ -z "$REQUIRED_NEOS_VERSION" ]; then + export REQUIRED_NEOS_VERSION="14" +fi + +if [ -z "$PASSIVE" ]; then + export PASSIVE="1" +fi + +export STAGING_ROOT="/data/safe_staging" diff --git a/selfdrive/controls/lib/alerts_offroad.json b/selfdrive/controls/lib/alerts_offroad.json index 7cf5d822..f343f4cd 100644 --- a/selfdrive/controls/lib/alerts_offroad.json +++ b/selfdrive/controls/lib/alerts_offroad.json @@ -32,5 +32,9 @@ "Offroad_IsTakingSnapshot": { "text": "Taking camera snapshots. System won't start until finished.", "severity": 0 + }, + "Offroad_NeosUpdate": { + "text": "An update to your device's operating system is downloading in the background. You will be prompted to update when it's ready to install.", + "severity": 0 } } diff --git a/selfdrive/test/test_updated.py b/selfdrive/test/test_updated.py index 08eca5bd..8ecd2745 100755 --- a/selfdrive/test/test_updated.py +++ b/selfdrive/test/test_updated.py @@ -13,7 +13,7 @@ from common.basedir import BASEDIR from common.params import Params -class TestUpdater(unittest.TestCase): +class TestUpdated(unittest.TestCase): def setUp(self): self.updated_proc = None @@ -27,6 +27,13 @@ class TestUpdater(unittest.TestCase): for d in [org_dir, self.basedir, self.git_remote_dir, self.staging_dir]: os.mkdir(d) + self.neos_version = os.path.join(org_dir, "neos_version") + self.neosupdate_dir = os.path.join(org_dir, "neosupdate") + with open(self.neos_version, "w") as f: + v = subprocess.check_output(r"bash -c 'source launch_env.sh && echo $REQUIRED_NEOS_VERSION'", + cwd=BASEDIR, shell=True, encoding='utf8').strip() + f.write(v) + self.upper_dir = os.path.join(self.staging_dir, "upper") self.merged_dir = os.path.join(self.staging_dir, "merged") self.finalized_dir = os.path.join(self.staging_dir, "finalized") @@ -43,7 +50,7 @@ class TestUpdater(unittest.TestCase): f"git clone {BASEDIR} {self.git_remote_dir}", f"git clone {self.git_remote_dir} {self.basedir}", f"cd {self.basedir} && git submodule init && git submodule update", - f"cd {self.basedir} && scons -j{os.cpu_count()} cereal" + f"cd {self.basedir} && scons -j{os.cpu_count()} cereal/ common/" ]) self.params = Params(db=os.path.join(self.basedir, "persist/params")) @@ -79,6 +86,8 @@ class TestUpdater(unittest.TestCase): os.environ["UPDATER_TEST_IP"] = "localhost" os.environ["UPDATER_LOCK_FILE"] = os.path.join(self.tmp_dir.name, "updater.lock") os.environ["UPDATER_STAGING_ROOT"] = self.staging_dir + os.environ["UPDATER_NEOS_VERSION"] = self.neos_version + os.environ["UPDATER_NEOSUPDATE_DIR"] = self.neosupdate_dir updated_path = os.path.join(self.basedir, "selfdrive/updated.py") return subprocess.Popen(updated_path, env=os.environ) @@ -252,5 +261,40 @@ class TestUpdater(unittest.TestCase): self.assertTrue(ret_code is not None) + # *** test cases with NEOS updates *** + + + # Run updated with no update, make sure it clears the old NEOS update + def test_clear_neos_cache(self): + # make the dir and some junk files + os.mkdir(self.neosupdate_dir) + for _ in range(15): + with tempfile.NamedTemporaryFile(dir=self.neosupdate_dir, delete=False) as f: + f.write(os.urandom(random.randrange(1, 1000000))) + + self._start_updater() + self._wait_for_update(clear_param=True) + self._check_update_state(False) + self.assertFalse(os.path.isdir(self.neosupdate_dir)) + + # Let the updater run with no update for a cycle, then write an update + @unittest.skip("TODO: only runs on device") + def test_update_with_neos_update(self): + # bump the NEOS version and commit it + self._run([ + "echo 'export REQUIRED_NEOS_VERSION=3' >> launch_env.sh", + "git -c user.name='testy' -c user.email='testy@tester.test' \ + commit -am 'a neos update'", + ], cwd=self.git_remote_dir) + + # run for a cycle to get the update + self._start_updater() + self._wait_for_update(timeout=60, clear_param=True) + self._check_update_state(True) + + # TODO: more comprehensive check + self.assertTrue(os.path.isdir(self.neosupdate_dir)) + + if __name__ == "__main__": unittest.main() diff --git a/selfdrive/updated.py b/selfdrive/updated.py index f63da544..20590680 100755 --- a/selfdrive/updated.py +++ b/selfdrive/updated.py @@ -29,6 +29,7 @@ import psutil import shutil import signal import fcntl +import time import threading from cffi import FFI from pathlib import Path @@ -36,17 +37,20 @@ from pathlib import Path from common.basedir import BASEDIR from common.params import Params from selfdrive.swaglog import cloudlog +from selfdrive.controls.lib.alertmanager import set_offroad_alert TEST_IP = os.getenv("UPDATER_TEST_IP", "8.8.8.8") LOCK_FILE = os.getenv("UPDATER_LOCK_FILE", "/tmp/safe_staging_overlay.lock") STAGING_ROOT = os.getenv("UPDATER_STAGING_ROOT", "/data/safe_staging") +NEOS_VERSION = os.getenv("UPDATER_NEOS_VERSION", "/VERSION") +NEOSUPDATE_DIR = os.getenv("UPDATER_NEOSUPDATE_DIR", "/data/neoupdate") + OVERLAY_UPPER = os.path.join(STAGING_ROOT, "upper") OVERLAY_METADATA = os.path.join(STAGING_ROOT, "metadata") OVERLAY_MERGED = os.path.join(STAGING_ROOT, "merged") FINALIZED = os.path.join(STAGING_ROOT, "finalized") -NICE_LOW_PRIORITY = ["nice", "-n", "19"] # Workaround for lack of os.link in the NEOS/termux python ffi = FFI() @@ -57,7 +61,8 @@ def link(src, dest): class WaitTimeHelper: - def __init__(self): + def __init__(self, proc): + self.proc = proc self.ready_event = threading.Event() self.shutdown = False signal.signal(signal.SIGTERM, self.graceful_shutdown) @@ -68,6 +73,12 @@ class WaitTimeHelper: # umount -f doesn't appear effective in avoiding "device busy" on NEOS, # so don't actually die until the next convenient opportunity in main(). cloudlog.info("caught SIGINT/SIGTERM, dismounting overlay at next opportunity") + + # forward the signal to all our child processes + child_procs = self.proc.children(recursive=True) + for p in child_procs: + p.send_signal(signum) + self.shutdown = True self.ready_event.set() @@ -79,7 +90,9 @@ class WaitTimeHelper: self.ready_event.wait(timeout=t) -def run(cmd, cwd=None): +def run(cmd, cwd=None, low_priority=False): + if low_priority: + cmd = ["nice", "-n", "19"] + cmd return subprocess.check_output(cmd, cwd=cwd, stderr=subprocess.STDOUT, encoding='utf8') @@ -93,7 +106,7 @@ def set_consistent_flag(consistent): os.system("sync") -def set_update_available_params(new_version=False): +def set_update_available_params(new_version): params = Params() t = datetime.datetime.utcnow().isoformat() @@ -132,7 +145,7 @@ def setup_git_options(cwd): for option, value in git_cfg: try: ret = run(["git", "config", "--get", option], cwd) - config_ok = (ret.strip() == value) + config_ok = ret.strip() == value except subprocess.CalledProcessError: config_ok = False @@ -168,6 +181,7 @@ def init_ovfs(): # and skips the update activation attempt. Path(os.path.join(BASEDIR, ".overlay_init")).touch() + os.system("sync") overlay_opts = f"lowerdir={BASEDIR},upperdir={OVERLAY_UPPER},workdir={OVERLAY_METADATA}" run(["mount", "-t", "overlay", "-o", overlay_opts, "none", OVERLAY_MERGED]) @@ -176,18 +190,23 @@ def finalize_from_ovfs(): """Take the current OverlayFS merged view and finalize a copy outside of OverlayFS, ready to be swapped-in at BASEDIR. Copy using shutil.copytree""" + # Remove the update ready flag and any old updates cloudlog.info("creating finalized version of the overlay") + set_consistent_flag(False) shutil.rmtree(FINALIZED) + + # Copy the merged overlay view and set the update ready flag shutil.copytree(OVERLAY_MERGED, FINALIZED, symlinks=True) + set_consistent_flag(True) cloudlog.info("done finalizing overlay") -def attempt_update(): +def attempt_update(wait_helper): cloudlog.info("attempting git update inside staging overlay") setup_git_options(OVERLAY_MERGED) - git_fetch_output = run(NICE_LOW_PRIORITY + ["git", "fetch"], OVERLAY_MERGED) + git_fetch_output = run(["git", "fetch"], OVERLAY_MERGED, low_priority=True) cloudlog.info("git fetch success: %s", git_fetch_output) cur_hash = run(["git", "rev-parse", "HEAD"], OVERLAY_MERGED).rstrip() @@ -200,46 +219,75 @@ def attempt_update(): cloudlog.info("comparing %s to %s" % (cur_hash, upstream_hash)) if new_version or git_fetch_result: cloudlog.info("Running update") + if new_version: cloudlog.info("git reset in progress") r = [ - run(NICE_LOW_PRIORITY + ["git", "reset", "--hard", "@{u}"], OVERLAY_MERGED), - run(NICE_LOW_PRIORITY + ["git", "clean", "-xdf"], OVERLAY_MERGED), - run(NICE_LOW_PRIORITY + ["git", "submodule", "init"], OVERLAY_MERGED), - run(NICE_LOW_PRIORITY + ["git", "submodule", "update"], OVERLAY_MERGED), + run(["git", "reset", "--hard", "@{u}"], OVERLAY_MERGED, low_priority=True), + run(["git", "clean", "-xdf"], OVERLAY_MERGED, low_priority=True ), + run(["git", "submodule", "init"], OVERLAY_MERGED, low_priority=True), + run(["git", "submodule", "update"], OVERLAY_MERGED, low_priority=True), ] cloudlog.info("git reset success: %s", '\n'.join(r)) - # Un-set the validity flag to prevent the finalized tree from being - # activated later if the finalize step is interrupted - set_consistent_flag(False) + # Download the accompanying NEOS version if it doesn't match the current version + with open(NEOS_VERSION, "r") as f: + cur_neos = f.read().strip() + updated_neos = run(["bash", "-c", r"unset REQUIRED_NEOS_VERSION && source launch_env.sh && \ + echo -n $REQUIRED_NEOS_VERSION"], OVERLAY_MERGED).strip() + + cloudlog.info(f"NEOS version check: {cur_neos} vs {updated_neos}") + if cur_neos != updated_neos: + cloudlog.info(f"Beginning background download for NEOS {updated_neos}") + + set_offroad_alert("Offroad_NeosUpdate", True) + updater_path = os.path.join(OVERLAY_MERGED, "installer/updater/updater") + update_manifest = f"file://{OVERLAY_MERGED}/installer/updater/update.json" + + neos_downloaded = False + start_time = time.monotonic() + # Try to download for one day + while (time.monotonic() - start_time < 60*60*24) and not wait_helper.shutdown: + wait_helper.ready_event.clear() + try: + run([updater_path, "bgcache", update_manifest], OVERLAY_MERGED, low_priority=True) + neos_downloaded = True + break + except subprocess.CalledProcessError: + cloudlog.info("NEOS background download failed, retrying") + wait_helper.sleep(120) + + # If the download failed, we'll show the alert again when we retry + set_offroad_alert("Offroad_NeosUpdate", False) + if not neos_downloaded: + raise Exception("Failed to download NEOS update") + + cloudlog.info(f"NEOS background download successful, took {time.monotonic() - start_time} seconds") + + # Create the finalized, ready-to-swap update finalize_from_ovfs() - - # Make sure the validity flag lands on disk LAST, only when the local git - # repo and OP install are in a consistent state. - set_consistent_flag(True) - - cloudlog.info("update successful!") + cloudlog.info("openpilot update successful!") else: cloudlog.info("nothing new from git at this time") - set_update_available_params(new_version=new_version) + set_update_available_params(new_version) + return new_version def main(): params = Params() if params.get("DisableUpdates") == b"1": - raise RuntimeError("updates are disabled by param") + raise RuntimeError("updates are disabled by the DisableUpdates param") if os.geteuid() != 0: raise RuntimeError("updated must be launched as root!") # Set low io priority - p = psutil.Process() + proc = psutil.Process() if psutil.LINUX: - p.ionice(psutil.IOPRIO_CLASS_BE, value=7) + proc.ionice(psutil.IOPRIO_CLASS_BE, value=7) ov_lock_fd = open(LOCK_FILE, 'w') try: @@ -248,10 +296,11 @@ def main(): raise RuntimeError("couldn't get overlay lock; is another updated running?") # Wait for IsOffroad to be set before our first update attempt - wait_helper = WaitTimeHelper() + wait_helper = WaitTimeHelper(proc) wait_helper.sleep(30) update_failed_count = 0 + update_available = False overlay_initialized = False while not wait_helper.shutdown: wait_helper.ready_event.clear() @@ -282,8 +331,10 @@ def main(): overlay_initialized = True if params.get("IsOffroad") == b"1": - attempt_update() + update_available = attempt_update(wait_helper) or update_available update_failed_count = 0 + if not update_available and os.path.isdir(NEOSUPDATE_DIR): + shutil.rmtree(NEOSUPDATE_DIR) else: cloudlog.info("not running updater, openpilot running") @@ -308,7 +359,6 @@ def main(): # Wait 10 minutes between update attempts wait_helper.sleep(60*10) - # We've been signaled to shut down dismount_ovfs() if __name__ == "__main__":