Switch to new sentry_sdk (#20690)

* remove unused crash.install

* use sentry_sdk in crash.py

* athena crashes dont rely on excepthook

* make default crash handler work

* make it work in athena

* fixup tombstoned
albatross
Willem Melching 2021-04-22 15:24:35 +02:00 committed by GitHub
parent c4189e158e
commit ea0e611872
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 51 additions and 81 deletions

View File

@ -29,6 +29,8 @@ from selfdrive.hardware import HARDWARE, PC
from selfdrive.loggerd.config import ROOT
from selfdrive.loggerd.xattr_cache import getxattr, setxattr
from selfdrive.swaglog import cloudlog, SWAGLOG_DIR
import selfdrive.crash as crash
from selfdrive.version import dirty, origin, branch, commit
ATHENA_HOST = os.getenv('ATHENA_HOST', 'wss://athena.comma.ai')
HANDLER_THREADS = int(os.getenv('HANDLER_THREADS', "4"))
@ -409,7 +411,11 @@ def backoff(retries):
def main():
params = Params()
dongle_id = params.get("DongleId").decode('utf-8')
dongle_id = params.get("DongleId", encoding='utf-8')
crash.bind_user(id=dongle_id)
crash.bind_extra(dirty=dirty, origin=origin, branch=branch, commit=commit,
device=HARDWARE.get_device_type())
ws_uri = ATHENA_HOST + "/ws/v2/" + dongle_id
api = Api(dongle_id)
@ -427,7 +433,9 @@ def main():
except (KeyboardInterrupt, SystemExit):
break
except Exception:
crash.capture_exception()
cloudlog.exception("athenad.main.exception")
conn_retries += 1
params.delete("LastAthenaPingTime")

View File

@ -3,7 +3,6 @@
import time
from multiprocessing import Process
import selfdrive.crash as crash
from common.params import Params
from selfdrive.manager.process import launcher
from selfdrive.swaglog import cloudlog
@ -16,9 +15,6 @@ def main():
params = Params()
dongle_id = params.get("DongleId").decode('utf-8')
cloudlog.bind_global(dongle_id=dongle_id, version=version, dirty=dirty)
crash.bind_user(id=dongle_id)
crash.bind_extra(version=version, dirty=dirty)
crash.install()
try:
while 1:

View File

@ -1,12 +1,11 @@
"""Install exception handler for process crash."""
import os
import sys
import threading
import capnp
from selfdrive.version import version, dirty, origin, branch
from selfdrive.hardware import PC
from selfdrive.swaglog import cloudlog
from selfdrive.version import version
if os.getenv("NOLOG") or os.getenv("NOCRASH") or PC:
def capture_exception(*args, **kwargs):
@ -18,60 +17,24 @@ if os.getenv("NOLOG") or os.getenv("NOCRASH") or PC:
def bind_extra(**kwargs):
pass
def install():
pass
else:
from raven import Client
from raven.transport.http import HTTPTransport
tags = {
'dirty': dirty,
'origin': origin,
'branch': branch
}
client = Client('https://a8dc76b5bfb34908a601d67e2aa8bcf9:4336ee4648984e438370a3fa3f5adda2@o33823.ingest.sentry.io/77924',
install_sys_hook=False, transport=HTTPTransport, release=version, tags=tags)
import sentry_sdk
from sentry_sdk.integrations.threading import ThreadingIntegration
def capture_exception(*args, **kwargs):
exc_info = sys.exc_info()
if not exc_info[0] is capnp.lib.capnp.KjException:
client.captureException(*args, **kwargs)
sentry_sdk.capture_exception(*args, **kwargs)
sentry_sdk.flush() # https://github.com/getsentry/sentry-python/issues/291
cloudlog.error("crash", exc_info=kwargs.get('exc_info', 1))
def bind_user(**kwargs):
client.user_context(kwargs)
sentry_sdk.set_user(kwargs)
def bind_extra(**kwargs):
client.extra_context(kwargs)
for k, v in kwargs.items():
sentry_sdk.set_tag(k, v)
def install():
"""
Workaround for `sys.excepthook` thread bug from:
http://bugs.python.org/issue1230540
Call once from the main thread before creating any threads.
Source: https://stackoverflow.com/a/31622038
"""
# installs a sys.excepthook
__excepthook__ = sys.excepthook
def handle_exception(*exc_info):
if exc_info[0] not in (KeyboardInterrupt, SystemExit):
capture_exception()
__excepthook__(*exc_info)
sys.excepthook = handle_exception
init_original = threading.Thread.__init__
def init(self, *args, **kwargs):
init_original(self, *args, **kwargs)
run_original = self.run
def run_with_except_hook(*args2, **kwargs2):
try:
run_original(*args2, **kwargs2)
except Exception:
sys.excepthook(*sys.exc_info())
self.run = run_with_except_hook
threading.Thread.__init__ = init
sentry_sdk.init("https://a8dc76b5bfb34908a601d67e2aa8bcf9@o33823.ingest.sentry.io/77924",
default_integrations=False, integrations=[ThreadingIntegration(propagate_hub=True)],
release=version)

View File

@ -18,7 +18,7 @@ from selfdrive.manager.process import ensure_running
from selfdrive.manager.process_config import managed_processes
from selfdrive.registration import register
from selfdrive.swaglog import cloudlog, add_file_handler
from selfdrive.version import dirty, version
from selfdrive.version import dirty, version, origin, branch, commit
def manager_init():
@ -78,7 +78,8 @@ def manager_init():
cloudlog.bind_global(dongle_id=dongle_id, version=version, dirty=dirty,
device=HARDWARE.get_device_type())
crash.bind_user(id=dongle_id)
crash.bind_extra(version=version, dirty=dirty, device=HARDWARE.get_device_type())
crash.bind_extra(dirty=dirty, origin=origin, branch=branch, commit=commit,
device=HARDWARE.get_device_type())
def manager_prepare():

View File

@ -8,11 +8,11 @@ import subprocess
import time
import glob
from raven import Client
from raven.transport.http import HTTPTransport
import sentry_sdk
from common.params import Params
from common.file_helpers import mkdirs_exists_ok
from selfdrive.hardware import TICI
from selfdrive.hardware import TICI, HARDWARE
from selfdrive.loggerd.config import ROOT
from selfdrive.swaglog import cloudlog
from selfdrive.version import branch, commit, dirty, origin, version
@ -31,16 +31,15 @@ def safe_fn(s):
return "".join(c for c in s if c.isalnum() or c in extra).rstrip()
def sentry_report(client, fn, message, contents):
def sentry_report(fn, message, contents):
cloudlog.error({'tombstone': message})
client.captureMessage(
message=message,
sdk={'name': 'tombstoned', 'version': '0'},
extra={
'tombstone_fn': fn,
'tombstone': contents
},
)
with sentry_sdk.configure_scope() as scope:
scope.set_extra("tombstone_fn", fn)
scope.set_extra("tombstone", contents)
sentry_sdk.capture_message(message=message)
sentry_sdk.flush()
def clear_apport_folder():
for f in glob.glob(APPORT_DIR + '*'):
@ -77,7 +76,7 @@ def get_tombstones():
return files
def report_tombstone_android(fn, client):
def report_tombstone_android(fn):
f_size = os.path.getsize(fn)
if f_size > MAX_SIZE:
cloudlog.error(f"Tombstone {fn} too big, {f_size}. Skipping...")
@ -104,7 +103,7 @@ def report_tombstone_android(fn, client):
if fault_idx >= 0:
message = message[:fault_idx]
sentry_report(client, fn, message, contents)
sentry_report(fn, message, contents)
# Copy crashlog to upload folder
clean_path = executable.replace('./', '').replace('/', '_')
@ -118,7 +117,7 @@ def report_tombstone_android(fn, client):
shutil.copy(fn, os.path.join(crashlog_dir, new_fn))
def report_tombstone_apport(fn, client):
def report_tombstone_apport(fn):
f_size = os.path.getsize(fn)
if f_size > MAX_SIZE:
cloudlog.error(f"Tombstone {fn} too big, {f_size}. Skipping...")
@ -178,7 +177,7 @@ def report_tombstone_apport(fn, client):
contents = stacktrace + "\n\n" + contents
message = message + " - " + crash_function
sentry_report(client, fn, message, contents)
sentry_report(fn, message, contents)
# Copy crashlog to upload folder
clean_path = path.replace('/', '_')
@ -202,15 +201,18 @@ def main():
clear_apport_folder() # Clear apport folder on start, otherwise duplicate crashes won't register
initial_tombstones = set(get_tombstones())
tags = {
'dirty': dirty,
'origin': origin,
'branch': branch
}
client = Client('https://a40f22e13cbc4261873333c125fc9d38:dd57b2dc8113415093f8d9c017df092b@o33823.ingest.sentry.io/157615',
install_sys_hook=False, transport=HTTPTransport, release=version, tags=tags, string_max_length=10000)
sentry_sdk.utils.MAX_STRING_LENGTH = 8192
sentry_sdk.init("https://a40f22e13cbc4261873333c125fc9d38@o33823.ingest.sentry.io/157615",
default_integrations=False, release=version)
dongle_id = Params().get("DongleId", encoding='utf-8')
sentry_sdk.set_user({"id": dongle_id})
sentry_sdk.set_tag("dirty", dirty)
sentry_sdk.set_tag("origin", origin)
sentry_sdk.set_tag("branch", branch)
sentry_sdk.set_tag("commit", commit)
sentry_sdk.set_tag("device", HARDWARE.get_device_type())
client.user_context({'id': os.environ.get('DONGLE_ID')})
while True:
now_tombstones = set(get_tombstones())
@ -218,9 +220,9 @@ def main():
try:
cloudlog.info(f"reporting new tombstone {fn}")
if fn.endswith(".crash"):
report_tombstone_apport(fn, client)
report_tombstone_apport(fn)
else:
report_tombstone_android(fn, client)
report_tombstone_android(fn)
except Exception:
cloudlog.exception(f"Error reporting tombstone {fn}")