Switch to new sentry_sdk (#20690)

* remove unused crash.install

* use sentry_sdk in crash.py

* athena crashes dont rely on excepthook

* make default crash handler work

* make it work in athena

* fixup tombstoned
albatross
Willem Melching 2021-04-22 15:24:35 +02:00 committed by GitHub
parent c4189e158e
commit ea0e611872
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 51 additions and 81 deletions

View File

@ -29,6 +29,8 @@ from selfdrive.hardware import HARDWARE, PC
from selfdrive.loggerd.config import ROOT from selfdrive.loggerd.config import ROOT
from selfdrive.loggerd.xattr_cache import getxattr, setxattr from selfdrive.loggerd.xattr_cache import getxattr, setxattr
from selfdrive.swaglog import cloudlog, SWAGLOG_DIR from selfdrive.swaglog import cloudlog, SWAGLOG_DIR
import selfdrive.crash as crash
from selfdrive.version import dirty, origin, branch, commit
ATHENA_HOST = os.getenv('ATHENA_HOST', 'wss://athena.comma.ai') ATHENA_HOST = os.getenv('ATHENA_HOST', 'wss://athena.comma.ai')
HANDLER_THREADS = int(os.getenv('HANDLER_THREADS', "4")) HANDLER_THREADS = int(os.getenv('HANDLER_THREADS', "4"))
@ -409,7 +411,11 @@ def backoff(retries):
def main(): def main():
params = Params() params = Params()
dongle_id = params.get("DongleId").decode('utf-8') dongle_id = params.get("DongleId", encoding='utf-8')
crash.bind_user(id=dongle_id)
crash.bind_extra(dirty=dirty, origin=origin, branch=branch, commit=commit,
device=HARDWARE.get_device_type())
ws_uri = ATHENA_HOST + "/ws/v2/" + dongle_id ws_uri = ATHENA_HOST + "/ws/v2/" + dongle_id
api = Api(dongle_id) api = Api(dongle_id)
@ -427,7 +433,9 @@ def main():
except (KeyboardInterrupt, SystemExit): except (KeyboardInterrupt, SystemExit):
break break
except Exception: except Exception:
crash.capture_exception()
cloudlog.exception("athenad.main.exception") cloudlog.exception("athenad.main.exception")
conn_retries += 1 conn_retries += 1
params.delete("LastAthenaPingTime") params.delete("LastAthenaPingTime")

View File

@ -3,7 +3,6 @@
import time import time
from multiprocessing import Process from multiprocessing import Process
import selfdrive.crash as crash
from common.params import Params from common.params import Params
from selfdrive.manager.process import launcher from selfdrive.manager.process import launcher
from selfdrive.swaglog import cloudlog from selfdrive.swaglog import cloudlog
@ -16,9 +15,6 @@ def main():
params = Params() params = Params()
dongle_id = params.get("DongleId").decode('utf-8') dongle_id = params.get("DongleId").decode('utf-8')
cloudlog.bind_global(dongle_id=dongle_id, version=version, dirty=dirty) cloudlog.bind_global(dongle_id=dongle_id, version=version, dirty=dirty)
crash.bind_user(id=dongle_id)
crash.bind_extra(version=version, dirty=dirty)
crash.install()
try: try:
while 1: while 1:

View File

@ -1,12 +1,11 @@
"""Install exception handler for process crash.""" """Install exception handler for process crash."""
import os import os
import sys import sys
import threading
import capnp import capnp
from selfdrive.version import version, dirty, origin, branch
from selfdrive.hardware import PC from selfdrive.hardware import PC
from selfdrive.swaglog import cloudlog from selfdrive.swaglog import cloudlog
from selfdrive.version import version
if os.getenv("NOLOG") or os.getenv("NOCRASH") or PC: if os.getenv("NOLOG") or os.getenv("NOCRASH") or PC:
def capture_exception(*args, **kwargs): def capture_exception(*args, **kwargs):
@ -18,60 +17,24 @@ if os.getenv("NOLOG") or os.getenv("NOCRASH") or PC:
def bind_extra(**kwargs): def bind_extra(**kwargs):
pass pass
def install():
pass
else: else:
from raven import Client import sentry_sdk
from raven.transport.http import HTTPTransport from sentry_sdk.integrations.threading import ThreadingIntegration
tags = {
'dirty': dirty,
'origin': origin,
'branch': branch
}
client = Client('https://a8dc76b5bfb34908a601d67e2aa8bcf9:4336ee4648984e438370a3fa3f5adda2@o33823.ingest.sentry.io/77924',
install_sys_hook=False, transport=HTTPTransport, release=version, tags=tags)
def capture_exception(*args, **kwargs): def capture_exception(*args, **kwargs):
exc_info = sys.exc_info() exc_info = sys.exc_info()
if not exc_info[0] is capnp.lib.capnp.KjException: if not exc_info[0] is capnp.lib.capnp.KjException:
client.captureException(*args, **kwargs) sentry_sdk.capture_exception(*args, **kwargs)
sentry_sdk.flush() # https://github.com/getsentry/sentry-python/issues/291
cloudlog.error("crash", exc_info=kwargs.get('exc_info', 1)) cloudlog.error("crash", exc_info=kwargs.get('exc_info', 1))
def bind_user(**kwargs): def bind_user(**kwargs):
client.user_context(kwargs) sentry_sdk.set_user(kwargs)
def bind_extra(**kwargs): def bind_extra(**kwargs):
client.extra_context(kwargs) for k, v in kwargs.items():
sentry_sdk.set_tag(k, v)
def install(): sentry_sdk.init("https://a8dc76b5bfb34908a601d67e2aa8bcf9@o33823.ingest.sentry.io/77924",
""" default_integrations=False, integrations=[ThreadingIntegration(propagate_hub=True)],
Workaround for `sys.excepthook` thread bug from: release=version)
http://bugs.python.org/issue1230540
Call once from the main thread before creating any threads.
Source: https://stackoverflow.com/a/31622038
"""
# installs a sys.excepthook
__excepthook__ = sys.excepthook
def handle_exception(*exc_info):
if exc_info[0] not in (KeyboardInterrupt, SystemExit):
capture_exception()
__excepthook__(*exc_info)
sys.excepthook = handle_exception
init_original = threading.Thread.__init__
def init(self, *args, **kwargs):
init_original(self, *args, **kwargs)
run_original = self.run
def run_with_except_hook(*args2, **kwargs2):
try:
run_original(*args2, **kwargs2)
except Exception:
sys.excepthook(*sys.exc_info())
self.run = run_with_except_hook
threading.Thread.__init__ = init

View File

@ -18,7 +18,7 @@ from selfdrive.manager.process import ensure_running
from selfdrive.manager.process_config import managed_processes from selfdrive.manager.process_config import managed_processes
from selfdrive.registration import register from selfdrive.registration import register
from selfdrive.swaglog import cloudlog, add_file_handler from selfdrive.swaglog import cloudlog, add_file_handler
from selfdrive.version import dirty, version from selfdrive.version import dirty, version, origin, branch, commit
def manager_init(): def manager_init():
@ -78,7 +78,8 @@ def manager_init():
cloudlog.bind_global(dongle_id=dongle_id, version=version, dirty=dirty, cloudlog.bind_global(dongle_id=dongle_id, version=version, dirty=dirty,
device=HARDWARE.get_device_type()) device=HARDWARE.get_device_type())
crash.bind_user(id=dongle_id) crash.bind_user(id=dongle_id)
crash.bind_extra(version=version, dirty=dirty, device=HARDWARE.get_device_type()) crash.bind_extra(dirty=dirty, origin=origin, branch=branch, commit=commit,
device=HARDWARE.get_device_type())
def manager_prepare(): def manager_prepare():

View File

@ -8,11 +8,11 @@ import subprocess
import time import time
import glob import glob
from raven import Client import sentry_sdk
from raven.transport.http import HTTPTransport
from common.params import Params
from common.file_helpers import mkdirs_exists_ok from common.file_helpers import mkdirs_exists_ok
from selfdrive.hardware import TICI from selfdrive.hardware import TICI, HARDWARE
from selfdrive.loggerd.config import ROOT from selfdrive.loggerd.config import ROOT
from selfdrive.swaglog import cloudlog from selfdrive.swaglog import cloudlog
from selfdrive.version import branch, commit, dirty, origin, version from selfdrive.version import branch, commit, dirty, origin, version
@ -31,16 +31,15 @@ def safe_fn(s):
return "".join(c for c in s if c.isalnum() or c in extra).rstrip() return "".join(c for c in s if c.isalnum() or c in extra).rstrip()
def sentry_report(client, fn, message, contents): def sentry_report(fn, message, contents):
cloudlog.error({'tombstone': message}) cloudlog.error({'tombstone': message})
client.captureMessage(
message=message, with sentry_sdk.configure_scope() as scope:
sdk={'name': 'tombstoned', 'version': '0'}, scope.set_extra("tombstone_fn", fn)
extra={ scope.set_extra("tombstone", contents)
'tombstone_fn': fn, sentry_sdk.capture_message(message=message)
'tombstone': contents sentry_sdk.flush()
},
)
def clear_apport_folder(): def clear_apport_folder():
for f in glob.glob(APPORT_DIR + '*'): for f in glob.glob(APPORT_DIR + '*'):
@ -77,7 +76,7 @@ def get_tombstones():
return files return files
def report_tombstone_android(fn, client): def report_tombstone_android(fn):
f_size = os.path.getsize(fn) f_size = os.path.getsize(fn)
if f_size > MAX_SIZE: if f_size > MAX_SIZE:
cloudlog.error(f"Tombstone {fn} too big, {f_size}. Skipping...") cloudlog.error(f"Tombstone {fn} too big, {f_size}. Skipping...")
@ -104,7 +103,7 @@ def report_tombstone_android(fn, client):
if fault_idx >= 0: if fault_idx >= 0:
message = message[:fault_idx] message = message[:fault_idx]
sentry_report(client, fn, message, contents) sentry_report(fn, message, contents)
# Copy crashlog to upload folder # Copy crashlog to upload folder
clean_path = executable.replace('./', '').replace('/', '_') clean_path = executable.replace('./', '').replace('/', '_')
@ -118,7 +117,7 @@ def report_tombstone_android(fn, client):
shutil.copy(fn, os.path.join(crashlog_dir, new_fn)) shutil.copy(fn, os.path.join(crashlog_dir, new_fn))
def report_tombstone_apport(fn, client): def report_tombstone_apport(fn):
f_size = os.path.getsize(fn) f_size = os.path.getsize(fn)
if f_size > MAX_SIZE: if f_size > MAX_SIZE:
cloudlog.error(f"Tombstone {fn} too big, {f_size}. Skipping...") cloudlog.error(f"Tombstone {fn} too big, {f_size}. Skipping...")
@ -178,7 +177,7 @@ def report_tombstone_apport(fn, client):
contents = stacktrace + "\n\n" + contents contents = stacktrace + "\n\n" + contents
message = message + " - " + crash_function message = message + " - " + crash_function
sentry_report(client, fn, message, contents) sentry_report(fn, message, contents)
# Copy crashlog to upload folder # Copy crashlog to upload folder
clean_path = path.replace('/', '_') clean_path = path.replace('/', '_')
@ -202,15 +201,18 @@ def main():
clear_apport_folder() # Clear apport folder on start, otherwise duplicate crashes won't register clear_apport_folder() # Clear apport folder on start, otherwise duplicate crashes won't register
initial_tombstones = set(get_tombstones()) initial_tombstones = set(get_tombstones())
tags = { sentry_sdk.utils.MAX_STRING_LENGTH = 8192
'dirty': dirty, sentry_sdk.init("https://a40f22e13cbc4261873333c125fc9d38@o33823.ingest.sentry.io/157615",
'origin': origin, default_integrations=False, release=version)
'branch': branch
} dongle_id = Params().get("DongleId", encoding='utf-8')
client = Client('https://a40f22e13cbc4261873333c125fc9d38:dd57b2dc8113415093f8d9c017df092b@o33823.ingest.sentry.io/157615', sentry_sdk.set_user({"id": dongle_id})
install_sys_hook=False, transport=HTTPTransport, release=version, tags=tags, string_max_length=10000) sentry_sdk.set_tag("dirty", dirty)
sentry_sdk.set_tag("origin", origin)
sentry_sdk.set_tag("branch", branch)
sentry_sdk.set_tag("commit", commit)
sentry_sdk.set_tag("device", HARDWARE.get_device_type())
client.user_context({'id': os.environ.get('DONGLE_ID')})
while True: while True:
now_tombstones = set(get_tombstones()) now_tombstones = set(get_tombstones())
@ -218,9 +220,9 @@ def main():
try: try:
cloudlog.info(f"reporting new tombstone {fn}") cloudlog.info(f"reporting new tombstone {fn}")
if fn.endswith(".crash"): if fn.endswith(".crash"):
report_tombstone_apport(fn, client) report_tombstone_apport(fn)
else: else:
report_tombstone_android(fn, client) report_tombstone_android(fn)
except Exception: except Exception:
cloudlog.exception(f"Error reporting tombstone {fn}") cloudlog.exception(f"Error reporting tombstone {fn}")