Switch to new sentry_sdk (#20690)
* remove unused crash.install * use sentry_sdk in crash.py * athena crashes dont rely on excepthook * make default crash handler work * make it work in athena * fixup tombstonedalbatross
parent
c4189e158e
commit
ea0e611872
|
@ -29,6 +29,8 @@ from selfdrive.hardware import HARDWARE, PC
|
||||||
from selfdrive.loggerd.config import ROOT
|
from selfdrive.loggerd.config import ROOT
|
||||||
from selfdrive.loggerd.xattr_cache import getxattr, setxattr
|
from selfdrive.loggerd.xattr_cache import getxattr, setxattr
|
||||||
from selfdrive.swaglog import cloudlog, SWAGLOG_DIR
|
from selfdrive.swaglog import cloudlog, SWAGLOG_DIR
|
||||||
|
import selfdrive.crash as crash
|
||||||
|
from selfdrive.version import dirty, origin, branch, commit
|
||||||
|
|
||||||
ATHENA_HOST = os.getenv('ATHENA_HOST', 'wss://athena.comma.ai')
|
ATHENA_HOST = os.getenv('ATHENA_HOST', 'wss://athena.comma.ai')
|
||||||
HANDLER_THREADS = int(os.getenv('HANDLER_THREADS', "4"))
|
HANDLER_THREADS = int(os.getenv('HANDLER_THREADS', "4"))
|
||||||
|
@ -409,7 +411,11 @@ def backoff(retries):
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
params = Params()
|
params = Params()
|
||||||
dongle_id = params.get("DongleId").decode('utf-8')
|
dongle_id = params.get("DongleId", encoding='utf-8')
|
||||||
|
crash.bind_user(id=dongle_id)
|
||||||
|
crash.bind_extra(dirty=dirty, origin=origin, branch=branch, commit=commit,
|
||||||
|
device=HARDWARE.get_device_type())
|
||||||
|
|
||||||
ws_uri = ATHENA_HOST + "/ws/v2/" + dongle_id
|
ws_uri = ATHENA_HOST + "/ws/v2/" + dongle_id
|
||||||
|
|
||||||
api = Api(dongle_id)
|
api = Api(dongle_id)
|
||||||
|
@ -427,7 +433,9 @@ def main():
|
||||||
except (KeyboardInterrupt, SystemExit):
|
except (KeyboardInterrupt, SystemExit):
|
||||||
break
|
break
|
||||||
except Exception:
|
except Exception:
|
||||||
|
crash.capture_exception()
|
||||||
cloudlog.exception("athenad.main.exception")
|
cloudlog.exception("athenad.main.exception")
|
||||||
|
|
||||||
conn_retries += 1
|
conn_retries += 1
|
||||||
params.delete("LastAthenaPingTime")
|
params.delete("LastAthenaPingTime")
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
import time
|
import time
|
||||||
from multiprocessing import Process
|
from multiprocessing import Process
|
||||||
|
|
||||||
import selfdrive.crash as crash
|
|
||||||
from common.params import Params
|
from common.params import Params
|
||||||
from selfdrive.manager.process import launcher
|
from selfdrive.manager.process import launcher
|
||||||
from selfdrive.swaglog import cloudlog
|
from selfdrive.swaglog import cloudlog
|
||||||
|
@ -16,9 +15,6 @@ def main():
|
||||||
params = Params()
|
params = Params()
|
||||||
dongle_id = params.get("DongleId").decode('utf-8')
|
dongle_id = params.get("DongleId").decode('utf-8')
|
||||||
cloudlog.bind_global(dongle_id=dongle_id, version=version, dirty=dirty)
|
cloudlog.bind_global(dongle_id=dongle_id, version=version, dirty=dirty)
|
||||||
crash.bind_user(id=dongle_id)
|
|
||||||
crash.bind_extra(version=version, dirty=dirty)
|
|
||||||
crash.install()
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
while 1:
|
while 1:
|
||||||
|
|
|
@ -1,12 +1,11 @@
|
||||||
"""Install exception handler for process crash."""
|
"""Install exception handler for process crash."""
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import threading
|
|
||||||
import capnp
|
import capnp
|
||||||
from selfdrive.version import version, dirty, origin, branch
|
|
||||||
|
|
||||||
from selfdrive.hardware import PC
|
from selfdrive.hardware import PC
|
||||||
from selfdrive.swaglog import cloudlog
|
from selfdrive.swaglog import cloudlog
|
||||||
|
from selfdrive.version import version
|
||||||
|
|
||||||
if os.getenv("NOLOG") or os.getenv("NOCRASH") or PC:
|
if os.getenv("NOLOG") or os.getenv("NOCRASH") or PC:
|
||||||
def capture_exception(*args, **kwargs):
|
def capture_exception(*args, **kwargs):
|
||||||
|
@ -18,60 +17,24 @@ if os.getenv("NOLOG") or os.getenv("NOCRASH") or PC:
|
||||||
def bind_extra(**kwargs):
|
def bind_extra(**kwargs):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def install():
|
|
||||||
pass
|
|
||||||
else:
|
else:
|
||||||
from raven import Client
|
import sentry_sdk
|
||||||
from raven.transport.http import HTTPTransport
|
from sentry_sdk.integrations.threading import ThreadingIntegration
|
||||||
|
|
||||||
tags = {
|
|
||||||
'dirty': dirty,
|
|
||||||
'origin': origin,
|
|
||||||
'branch': branch
|
|
||||||
}
|
|
||||||
client = Client('https://a8dc76b5bfb34908a601d67e2aa8bcf9:4336ee4648984e438370a3fa3f5adda2@o33823.ingest.sentry.io/77924',
|
|
||||||
install_sys_hook=False, transport=HTTPTransport, release=version, tags=tags)
|
|
||||||
|
|
||||||
def capture_exception(*args, **kwargs):
|
def capture_exception(*args, **kwargs):
|
||||||
exc_info = sys.exc_info()
|
exc_info = sys.exc_info()
|
||||||
if not exc_info[0] is capnp.lib.capnp.KjException:
|
if not exc_info[0] is capnp.lib.capnp.KjException:
|
||||||
client.captureException(*args, **kwargs)
|
sentry_sdk.capture_exception(*args, **kwargs)
|
||||||
|
sentry_sdk.flush() # https://github.com/getsentry/sentry-python/issues/291
|
||||||
cloudlog.error("crash", exc_info=kwargs.get('exc_info', 1))
|
cloudlog.error("crash", exc_info=kwargs.get('exc_info', 1))
|
||||||
|
|
||||||
def bind_user(**kwargs):
|
def bind_user(**kwargs):
|
||||||
client.user_context(kwargs)
|
sentry_sdk.set_user(kwargs)
|
||||||
|
|
||||||
def bind_extra(**kwargs):
|
def bind_extra(**kwargs):
|
||||||
client.extra_context(kwargs)
|
for k, v in kwargs.items():
|
||||||
|
sentry_sdk.set_tag(k, v)
|
||||||
|
|
||||||
def install():
|
sentry_sdk.init("https://a8dc76b5bfb34908a601d67e2aa8bcf9@o33823.ingest.sentry.io/77924",
|
||||||
"""
|
default_integrations=False, integrations=[ThreadingIntegration(propagate_hub=True)],
|
||||||
Workaround for `sys.excepthook` thread bug from:
|
release=version)
|
||||||
http://bugs.python.org/issue1230540
|
|
||||||
Call once from the main thread before creating any threads.
|
|
||||||
Source: https://stackoverflow.com/a/31622038
|
|
||||||
"""
|
|
||||||
# installs a sys.excepthook
|
|
||||||
__excepthook__ = sys.excepthook
|
|
||||||
|
|
||||||
def handle_exception(*exc_info):
|
|
||||||
if exc_info[0] not in (KeyboardInterrupt, SystemExit):
|
|
||||||
capture_exception()
|
|
||||||
__excepthook__(*exc_info)
|
|
||||||
sys.excepthook = handle_exception
|
|
||||||
|
|
||||||
init_original = threading.Thread.__init__
|
|
||||||
|
|
||||||
def init(self, *args, **kwargs):
|
|
||||||
init_original(self, *args, **kwargs)
|
|
||||||
run_original = self.run
|
|
||||||
|
|
||||||
def run_with_except_hook(*args2, **kwargs2):
|
|
||||||
try:
|
|
||||||
run_original(*args2, **kwargs2)
|
|
||||||
except Exception:
|
|
||||||
sys.excepthook(*sys.exc_info())
|
|
||||||
|
|
||||||
self.run = run_with_except_hook
|
|
||||||
|
|
||||||
threading.Thread.__init__ = init
|
|
||||||
|
|
|
@ -18,7 +18,7 @@ from selfdrive.manager.process import ensure_running
|
||||||
from selfdrive.manager.process_config import managed_processes
|
from selfdrive.manager.process_config import managed_processes
|
||||||
from selfdrive.registration import register
|
from selfdrive.registration import register
|
||||||
from selfdrive.swaglog import cloudlog, add_file_handler
|
from selfdrive.swaglog import cloudlog, add_file_handler
|
||||||
from selfdrive.version import dirty, version
|
from selfdrive.version import dirty, version, origin, branch, commit
|
||||||
|
|
||||||
|
|
||||||
def manager_init():
|
def manager_init():
|
||||||
|
@ -78,7 +78,8 @@ def manager_init():
|
||||||
cloudlog.bind_global(dongle_id=dongle_id, version=version, dirty=dirty,
|
cloudlog.bind_global(dongle_id=dongle_id, version=version, dirty=dirty,
|
||||||
device=HARDWARE.get_device_type())
|
device=HARDWARE.get_device_type())
|
||||||
crash.bind_user(id=dongle_id)
|
crash.bind_user(id=dongle_id)
|
||||||
crash.bind_extra(version=version, dirty=dirty, device=HARDWARE.get_device_type())
|
crash.bind_extra(dirty=dirty, origin=origin, branch=branch, commit=commit,
|
||||||
|
device=HARDWARE.get_device_type())
|
||||||
|
|
||||||
|
|
||||||
def manager_prepare():
|
def manager_prepare():
|
||||||
|
|
|
@ -8,11 +8,11 @@ import subprocess
|
||||||
import time
|
import time
|
||||||
import glob
|
import glob
|
||||||
|
|
||||||
from raven import Client
|
import sentry_sdk
|
||||||
from raven.transport.http import HTTPTransport
|
|
||||||
|
|
||||||
|
from common.params import Params
|
||||||
from common.file_helpers import mkdirs_exists_ok
|
from common.file_helpers import mkdirs_exists_ok
|
||||||
from selfdrive.hardware import TICI
|
from selfdrive.hardware import TICI, HARDWARE
|
||||||
from selfdrive.loggerd.config import ROOT
|
from selfdrive.loggerd.config import ROOT
|
||||||
from selfdrive.swaglog import cloudlog
|
from selfdrive.swaglog import cloudlog
|
||||||
from selfdrive.version import branch, commit, dirty, origin, version
|
from selfdrive.version import branch, commit, dirty, origin, version
|
||||||
|
@ -31,16 +31,15 @@ def safe_fn(s):
|
||||||
return "".join(c for c in s if c.isalnum() or c in extra).rstrip()
|
return "".join(c for c in s if c.isalnum() or c in extra).rstrip()
|
||||||
|
|
||||||
|
|
||||||
def sentry_report(client, fn, message, contents):
|
def sentry_report(fn, message, contents):
|
||||||
cloudlog.error({'tombstone': message})
|
cloudlog.error({'tombstone': message})
|
||||||
client.captureMessage(
|
|
||||||
message=message,
|
with sentry_sdk.configure_scope() as scope:
|
||||||
sdk={'name': 'tombstoned', 'version': '0'},
|
scope.set_extra("tombstone_fn", fn)
|
||||||
extra={
|
scope.set_extra("tombstone", contents)
|
||||||
'tombstone_fn': fn,
|
sentry_sdk.capture_message(message=message)
|
||||||
'tombstone': contents
|
sentry_sdk.flush()
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
def clear_apport_folder():
|
def clear_apport_folder():
|
||||||
for f in glob.glob(APPORT_DIR + '*'):
|
for f in glob.glob(APPORT_DIR + '*'):
|
||||||
|
@ -77,7 +76,7 @@ def get_tombstones():
|
||||||
return files
|
return files
|
||||||
|
|
||||||
|
|
||||||
def report_tombstone_android(fn, client):
|
def report_tombstone_android(fn):
|
||||||
f_size = os.path.getsize(fn)
|
f_size = os.path.getsize(fn)
|
||||||
if f_size > MAX_SIZE:
|
if f_size > MAX_SIZE:
|
||||||
cloudlog.error(f"Tombstone {fn} too big, {f_size}. Skipping...")
|
cloudlog.error(f"Tombstone {fn} too big, {f_size}. Skipping...")
|
||||||
|
@ -104,7 +103,7 @@ def report_tombstone_android(fn, client):
|
||||||
if fault_idx >= 0:
|
if fault_idx >= 0:
|
||||||
message = message[:fault_idx]
|
message = message[:fault_idx]
|
||||||
|
|
||||||
sentry_report(client, fn, message, contents)
|
sentry_report(fn, message, contents)
|
||||||
|
|
||||||
# Copy crashlog to upload folder
|
# Copy crashlog to upload folder
|
||||||
clean_path = executable.replace('./', '').replace('/', '_')
|
clean_path = executable.replace('./', '').replace('/', '_')
|
||||||
|
@ -118,7 +117,7 @@ def report_tombstone_android(fn, client):
|
||||||
shutil.copy(fn, os.path.join(crashlog_dir, new_fn))
|
shutil.copy(fn, os.path.join(crashlog_dir, new_fn))
|
||||||
|
|
||||||
|
|
||||||
def report_tombstone_apport(fn, client):
|
def report_tombstone_apport(fn):
|
||||||
f_size = os.path.getsize(fn)
|
f_size = os.path.getsize(fn)
|
||||||
if f_size > MAX_SIZE:
|
if f_size > MAX_SIZE:
|
||||||
cloudlog.error(f"Tombstone {fn} too big, {f_size}. Skipping...")
|
cloudlog.error(f"Tombstone {fn} too big, {f_size}. Skipping...")
|
||||||
|
@ -178,7 +177,7 @@ def report_tombstone_apport(fn, client):
|
||||||
|
|
||||||
contents = stacktrace + "\n\n" + contents
|
contents = stacktrace + "\n\n" + contents
|
||||||
message = message + " - " + crash_function
|
message = message + " - " + crash_function
|
||||||
sentry_report(client, fn, message, contents)
|
sentry_report(fn, message, contents)
|
||||||
|
|
||||||
# Copy crashlog to upload folder
|
# Copy crashlog to upload folder
|
||||||
clean_path = path.replace('/', '_')
|
clean_path = path.replace('/', '_')
|
||||||
|
@ -202,15 +201,18 @@ def main():
|
||||||
clear_apport_folder() # Clear apport folder on start, otherwise duplicate crashes won't register
|
clear_apport_folder() # Clear apport folder on start, otherwise duplicate crashes won't register
|
||||||
initial_tombstones = set(get_tombstones())
|
initial_tombstones = set(get_tombstones())
|
||||||
|
|
||||||
tags = {
|
sentry_sdk.utils.MAX_STRING_LENGTH = 8192
|
||||||
'dirty': dirty,
|
sentry_sdk.init("https://a40f22e13cbc4261873333c125fc9d38@o33823.ingest.sentry.io/157615",
|
||||||
'origin': origin,
|
default_integrations=False, release=version)
|
||||||
'branch': branch
|
|
||||||
}
|
dongle_id = Params().get("DongleId", encoding='utf-8')
|
||||||
client = Client('https://a40f22e13cbc4261873333c125fc9d38:dd57b2dc8113415093f8d9c017df092b@o33823.ingest.sentry.io/157615',
|
sentry_sdk.set_user({"id": dongle_id})
|
||||||
install_sys_hook=False, transport=HTTPTransport, release=version, tags=tags, string_max_length=10000)
|
sentry_sdk.set_tag("dirty", dirty)
|
||||||
|
sentry_sdk.set_tag("origin", origin)
|
||||||
|
sentry_sdk.set_tag("branch", branch)
|
||||||
|
sentry_sdk.set_tag("commit", commit)
|
||||||
|
sentry_sdk.set_tag("device", HARDWARE.get_device_type())
|
||||||
|
|
||||||
client.user_context({'id': os.environ.get('DONGLE_ID')})
|
|
||||||
while True:
|
while True:
|
||||||
now_tombstones = set(get_tombstones())
|
now_tombstones = set(get_tombstones())
|
||||||
|
|
||||||
|
@ -218,9 +220,9 @@ def main():
|
||||||
try:
|
try:
|
||||||
cloudlog.info(f"reporting new tombstone {fn}")
|
cloudlog.info(f"reporting new tombstone {fn}")
|
||||||
if fn.endswith(".crash"):
|
if fn.endswith(".crash"):
|
||||||
report_tombstone_apport(fn, client)
|
report_tombstone_apport(fn)
|
||||||
else:
|
else:
|
||||||
report_tombstone_android(fn, client)
|
report_tombstone_android(fn)
|
||||||
except Exception:
|
except Exception:
|
||||||
cloudlog.exception(f"Error reporting tombstone {fn}")
|
cloudlog.exception(f"Error reporting tombstone {fn}")
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue