From ea0e61187298c9c02238ad673f5932f08aa6f0c7 Mon Sep 17 00:00:00 2001 From: Willem Melching Date: Thu, 22 Apr 2021 15:24:35 +0200 Subject: [PATCH] Switch to new sentry_sdk (#20690) * remove unused crash.install * use sentry_sdk in crash.py * athena crashes dont rely on excepthook * make default crash handler work * make it work in athena * fixup tombstoned --- selfdrive/athena/athenad.py | 10 ++++- selfdrive/athena/manage_athenad.py | 4 -- selfdrive/crash.py | 59 ++++++------------------------ selfdrive/manager/manager.py | 5 ++- selfdrive/tombstoned.py | 54 ++++++++++++++------------- 5 files changed, 51 insertions(+), 81 deletions(-) diff --git a/selfdrive/athena/athenad.py b/selfdrive/athena/athenad.py index 5191e322..c7f501bc 100755 --- a/selfdrive/athena/athenad.py +++ b/selfdrive/athena/athenad.py @@ -29,6 +29,8 @@ from selfdrive.hardware import HARDWARE, PC from selfdrive.loggerd.config import ROOT from selfdrive.loggerd.xattr_cache import getxattr, setxattr from selfdrive.swaglog import cloudlog, SWAGLOG_DIR +import selfdrive.crash as crash +from selfdrive.version import dirty, origin, branch, commit ATHENA_HOST = os.getenv('ATHENA_HOST', 'wss://athena.comma.ai') HANDLER_THREADS = int(os.getenv('HANDLER_THREADS', "4")) @@ -409,7 +411,11 @@ def backoff(retries): def main(): params = Params() - dongle_id = params.get("DongleId").decode('utf-8') + dongle_id = params.get("DongleId", encoding='utf-8') + crash.bind_user(id=dongle_id) + crash.bind_extra(dirty=dirty, origin=origin, branch=branch, commit=commit, + device=HARDWARE.get_device_type()) + ws_uri = ATHENA_HOST + "/ws/v2/" + dongle_id api = Api(dongle_id) @@ -427,7 +433,9 @@ def main(): except (KeyboardInterrupt, SystemExit): break except Exception: + crash.capture_exception() cloudlog.exception("athenad.main.exception") + conn_retries += 1 params.delete("LastAthenaPingTime") diff --git a/selfdrive/athena/manage_athenad.py b/selfdrive/athena/manage_athenad.py index c16bef07..7bb717e0 100755 --- a/selfdrive/athena/manage_athenad.py +++ b/selfdrive/athena/manage_athenad.py @@ -3,7 +3,6 @@ import time from multiprocessing import Process -import selfdrive.crash as crash from common.params import Params from selfdrive.manager.process import launcher from selfdrive.swaglog import cloudlog @@ -16,9 +15,6 @@ def main(): params = Params() dongle_id = params.get("DongleId").decode('utf-8') cloudlog.bind_global(dongle_id=dongle_id, version=version, dirty=dirty) - crash.bind_user(id=dongle_id) - crash.bind_extra(version=version, dirty=dirty) - crash.install() try: while 1: diff --git a/selfdrive/crash.py b/selfdrive/crash.py index 28a6ae3b..c8d1ecda 100644 --- a/selfdrive/crash.py +++ b/selfdrive/crash.py @@ -1,12 +1,11 @@ """Install exception handler for process crash.""" import os import sys -import threading import capnp -from selfdrive.version import version, dirty, origin, branch from selfdrive.hardware import PC from selfdrive.swaglog import cloudlog +from selfdrive.version import version if os.getenv("NOLOG") or os.getenv("NOCRASH") or PC: def capture_exception(*args, **kwargs): @@ -18,60 +17,24 @@ if os.getenv("NOLOG") or os.getenv("NOCRASH") or PC: def bind_extra(**kwargs): pass - def install(): - pass else: - from raven import Client - from raven.transport.http import HTTPTransport - - tags = { - 'dirty': dirty, - 'origin': origin, - 'branch': branch - } - client = Client('https://a8dc76b5bfb34908a601d67e2aa8bcf9:4336ee4648984e438370a3fa3f5adda2@o33823.ingest.sentry.io/77924', - install_sys_hook=False, transport=HTTPTransport, release=version, tags=tags) + import sentry_sdk + from sentry_sdk.integrations.threading import ThreadingIntegration def capture_exception(*args, **kwargs): exc_info = sys.exc_info() if not exc_info[0] is capnp.lib.capnp.KjException: - client.captureException(*args, **kwargs) + sentry_sdk.capture_exception(*args, **kwargs) + sentry_sdk.flush() # https://github.com/getsentry/sentry-python/issues/291 cloudlog.error("crash", exc_info=kwargs.get('exc_info', 1)) def bind_user(**kwargs): - client.user_context(kwargs) + sentry_sdk.set_user(kwargs) def bind_extra(**kwargs): - client.extra_context(kwargs) + for k, v in kwargs.items(): + sentry_sdk.set_tag(k, v) - def install(): - """ - Workaround for `sys.excepthook` thread bug from: - http://bugs.python.org/issue1230540 - Call once from the main thread before creating any threads. - Source: https://stackoverflow.com/a/31622038 - """ - # installs a sys.excepthook - __excepthook__ = sys.excepthook - - def handle_exception(*exc_info): - if exc_info[0] not in (KeyboardInterrupt, SystemExit): - capture_exception() - __excepthook__(*exc_info) - sys.excepthook = handle_exception - - init_original = threading.Thread.__init__ - - def init(self, *args, **kwargs): - init_original(self, *args, **kwargs) - run_original = self.run - - def run_with_except_hook(*args2, **kwargs2): - try: - run_original(*args2, **kwargs2) - except Exception: - sys.excepthook(*sys.exc_info()) - - self.run = run_with_except_hook - - threading.Thread.__init__ = init + sentry_sdk.init("https://a8dc76b5bfb34908a601d67e2aa8bcf9@o33823.ingest.sentry.io/77924", + default_integrations=False, integrations=[ThreadingIntegration(propagate_hub=True)], + release=version) diff --git a/selfdrive/manager/manager.py b/selfdrive/manager/manager.py index b5d833ff..9c4ff207 100755 --- a/selfdrive/manager/manager.py +++ b/selfdrive/manager/manager.py @@ -18,7 +18,7 @@ from selfdrive.manager.process import ensure_running from selfdrive.manager.process_config import managed_processes from selfdrive.registration import register from selfdrive.swaglog import cloudlog, add_file_handler -from selfdrive.version import dirty, version +from selfdrive.version import dirty, version, origin, branch, commit def manager_init(): @@ -78,7 +78,8 @@ def manager_init(): cloudlog.bind_global(dongle_id=dongle_id, version=version, dirty=dirty, device=HARDWARE.get_device_type()) crash.bind_user(id=dongle_id) - crash.bind_extra(version=version, dirty=dirty, device=HARDWARE.get_device_type()) + crash.bind_extra(dirty=dirty, origin=origin, branch=branch, commit=commit, + device=HARDWARE.get_device_type()) def manager_prepare(): diff --git a/selfdrive/tombstoned.py b/selfdrive/tombstoned.py index 185bd9ad..fb7a1a2c 100755 --- a/selfdrive/tombstoned.py +++ b/selfdrive/tombstoned.py @@ -8,11 +8,11 @@ import subprocess import time import glob -from raven import Client -from raven.transport.http import HTTPTransport +import sentry_sdk +from common.params import Params from common.file_helpers import mkdirs_exists_ok -from selfdrive.hardware import TICI +from selfdrive.hardware import TICI, HARDWARE from selfdrive.loggerd.config import ROOT from selfdrive.swaglog import cloudlog from selfdrive.version import branch, commit, dirty, origin, version @@ -31,16 +31,15 @@ def safe_fn(s): return "".join(c for c in s if c.isalnum() or c in extra).rstrip() -def sentry_report(client, fn, message, contents): +def sentry_report(fn, message, contents): cloudlog.error({'tombstone': message}) - client.captureMessage( - message=message, - sdk={'name': 'tombstoned', 'version': '0'}, - extra={ - 'tombstone_fn': fn, - 'tombstone': contents - }, - ) + + with sentry_sdk.configure_scope() as scope: + scope.set_extra("tombstone_fn", fn) + scope.set_extra("tombstone", contents) + sentry_sdk.capture_message(message=message) + sentry_sdk.flush() + def clear_apport_folder(): for f in glob.glob(APPORT_DIR + '*'): @@ -77,7 +76,7 @@ def get_tombstones(): return files -def report_tombstone_android(fn, client): +def report_tombstone_android(fn): f_size = os.path.getsize(fn) if f_size > MAX_SIZE: cloudlog.error(f"Tombstone {fn} too big, {f_size}. Skipping...") @@ -104,7 +103,7 @@ def report_tombstone_android(fn, client): if fault_idx >= 0: message = message[:fault_idx] - sentry_report(client, fn, message, contents) + sentry_report(fn, message, contents) # Copy crashlog to upload folder clean_path = executable.replace('./', '').replace('/', '_') @@ -118,7 +117,7 @@ def report_tombstone_android(fn, client): shutil.copy(fn, os.path.join(crashlog_dir, new_fn)) -def report_tombstone_apport(fn, client): +def report_tombstone_apport(fn): f_size = os.path.getsize(fn) if f_size > MAX_SIZE: cloudlog.error(f"Tombstone {fn} too big, {f_size}. Skipping...") @@ -178,7 +177,7 @@ def report_tombstone_apport(fn, client): contents = stacktrace + "\n\n" + contents message = message + " - " + crash_function - sentry_report(client, fn, message, contents) + sentry_report(fn, message, contents) # Copy crashlog to upload folder clean_path = path.replace('/', '_') @@ -202,15 +201,18 @@ def main(): clear_apport_folder() # Clear apport folder on start, otherwise duplicate crashes won't register initial_tombstones = set(get_tombstones()) - tags = { - 'dirty': dirty, - 'origin': origin, - 'branch': branch - } - client = Client('https://a40f22e13cbc4261873333c125fc9d38:dd57b2dc8113415093f8d9c017df092b@o33823.ingest.sentry.io/157615', - install_sys_hook=False, transport=HTTPTransport, release=version, tags=tags, string_max_length=10000) + sentry_sdk.utils.MAX_STRING_LENGTH = 8192 + sentry_sdk.init("https://a40f22e13cbc4261873333c125fc9d38@o33823.ingest.sentry.io/157615", + default_integrations=False, release=version) + + dongle_id = Params().get("DongleId", encoding='utf-8') + sentry_sdk.set_user({"id": dongle_id}) + sentry_sdk.set_tag("dirty", dirty) + sentry_sdk.set_tag("origin", origin) + sentry_sdk.set_tag("branch", branch) + sentry_sdk.set_tag("commit", commit) + sentry_sdk.set_tag("device", HARDWARE.get_device_type()) - client.user_context({'id': os.environ.get('DONGLE_ID')}) while True: now_tombstones = set(get_tombstones()) @@ -218,9 +220,9 @@ def main(): try: cloudlog.info(f"reporting new tombstone {fn}") if fn.endswith(".crash"): - report_tombstone_apport(fn, client) + report_tombstone_apport(fn) else: - report_tombstone_android(fn, client) + report_tombstone_android(fn) except Exception: cloudlog.exception(f"Error reporting tombstone {fn}")