Linux tombstones: add stacktrace and upload (#19737)
* Add stacktrace to tombstones * make sentry output prettier * Refactor * Generate upload filename * Actually move file * Fix spaces * copy and upload * dont delete just yet Co-authored-by: Comma Device <device@comma.ai>albatross
parent
10bd761cd6
commit
f3db94a0c4
|
@ -58,6 +58,7 @@ class Uploader():
|
|||
self.last_resp = None
|
||||
self.last_exc = None
|
||||
|
||||
self.immediate_folders = ["crash/"]
|
||||
self.immediate_priority = {"qlog.bz2": 0, "qcamera.ts": 1}
|
||||
self.high_priority = {"rlog.bz2": 0, "fcamera.hevc": 1, "dcamera.hevc": 2, "ecamera.hevc": 3}
|
||||
|
||||
|
@ -98,7 +99,7 @@ class Uploader():
|
|||
|
||||
# try to upload qlog files first
|
||||
for name, key, fn in upload_files:
|
||||
if name in self.immediate_priority:
|
||||
if name in self.immediate_priority or any(f in fn for f in self.immediate_folders):
|
||||
return (key, fn)
|
||||
|
||||
if with_raw:
|
||||
|
|
|
@ -1,23 +1,70 @@
|
|||
#!/usr/bin/env python3
|
||||
import datetime
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import signal
|
||||
import subprocess
|
||||
import time
|
||||
import glob
|
||||
|
||||
from raven import Client
|
||||
from raven.transport.http import HTTPTransport
|
||||
|
||||
from common.file_helpers import mkdirs_exists_ok
|
||||
from selfdrive.hardware import TICI
|
||||
from selfdrive.loggerd.config import ROOT
|
||||
from selfdrive.swaglog import cloudlog
|
||||
from selfdrive.version import version, origin, branch, dirty
|
||||
from selfdrive.version import branch, commit, dirty, origin, version
|
||||
|
||||
MAX_SIZE = 100000 * 10 # Normal size is 40-100k, allow up to 1M
|
||||
if TICI:
|
||||
MAX_SIZE = MAX_SIZE * 10 # Allow larger size for tici
|
||||
MAX_SIZE = MAX_SIZE * 100 # Allow larger size for tici since files contain coredump
|
||||
MAX_TOMBSTONE_FN_LEN = 85
|
||||
|
||||
TOMBSTONE_DIR = "/data/tombstones/"
|
||||
APPORT_DIR = "/var/crash/"
|
||||
|
||||
|
||||
def safe_fn(s):
|
||||
extra = ['_']
|
||||
return "".join(c for c in s if c.isalnum() or c in extra).rstrip()
|
||||
|
||||
|
||||
def sentry_report(client, fn, message, contents):
|
||||
cloudlog.error({'tombstone': message})
|
||||
client.captureMessage(
|
||||
message=message,
|
||||
sdk={'name': 'tombstoned', 'version': '0'},
|
||||
extra={
|
||||
'tombstone_fn': fn,
|
||||
'tombstone': contents
|
||||
},
|
||||
)
|
||||
|
||||
def clear_apport_folder():
|
||||
for f in glob.glob(APPORT_DIR + '*'):
|
||||
try:
|
||||
os.remove(f)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def get_apport_stacktrace(fn):
|
||||
try:
|
||||
cmd = f'apport-retrace -s <(cat <(echo "Package: openpilot") "{fn}")'
|
||||
return subprocess.check_output(cmd, shell=True, encoding='utf8', timeout=30, executable='/bin/bash') # pylint: disable=unexpected-keyword-arg
|
||||
except subprocess.CalledProcessError:
|
||||
return "Error getting stacktrace"
|
||||
except subprocess.TimeoutExpired:
|
||||
return "Timeout getting stacktrace"
|
||||
|
||||
|
||||
def get_tombstones():
|
||||
"""Returns list of (filename, ctime) for all tombstones in /data/tombstones
|
||||
and apport crashlogs in /var/crash"""
|
||||
files = []
|
||||
for folder in ["/data/tombstones/", "/var/crash/"]:
|
||||
for folder in [TOMBSTONE_DIR, APPORT_DIR]:
|
||||
if os.path.exists(folder):
|
||||
with os.scandir(folder) as d:
|
||||
|
||||
|
@ -30,7 +77,7 @@ def get_tombstones():
|
|||
return files
|
||||
|
||||
|
||||
def report_tombstone(fn, client):
|
||||
def report_tombstone_android(fn, client):
|
||||
f_size = os.path.getsize(fn)
|
||||
if f_size > MAX_SIZE:
|
||||
cloudlog.error(f"Tombstone {fn} too big, {f_size}. Skipping...")
|
||||
|
@ -39,41 +86,99 @@ def report_tombstone(fn, client):
|
|||
with open(fn, encoding='ISO-8859-1') as f:
|
||||
contents = f.read()
|
||||
|
||||
# Get summary for sentry title
|
||||
if fn.endswith(".crash"):
|
||||
lines = contents.split('\n')
|
||||
message = lines[6]
|
||||
message = " ".join(contents.split('\n')[5:7])
|
||||
|
||||
status_idx = contents.find('ProcStatus')
|
||||
if status_idx >= 0:
|
||||
lines = contents[status_idx:].split('\n')
|
||||
message += " " + lines[1]
|
||||
else:
|
||||
message = " ".join(contents.split('\n')[5:7])
|
||||
# Cut off pid/tid, since that varies per run
|
||||
name_idx = message.find('name')
|
||||
if name_idx >= 0:
|
||||
message = message[name_idx:]
|
||||
|
||||
# Cut off pid/tid, since that varies per run
|
||||
name_idx = message.find('name')
|
||||
if name_idx >= 0:
|
||||
message = message[name_idx:]
|
||||
# Cut off fault addr
|
||||
fault_idx = message.find(', fault addr')
|
||||
if fault_idx >= 0:
|
||||
message = message[:fault_idx]
|
||||
|
||||
# Cut off fault addr
|
||||
fault_idx = message.find(', fault addr')
|
||||
if fault_idx >= 0:
|
||||
message = message[:fault_idx]
|
||||
sentry_report(client, fn, message, contents)
|
||||
|
||||
|
||||
cloudlog.error({'tombstone': message})
|
||||
client.captureMessage(
|
||||
message=message,
|
||||
sdk={'name': 'tombstoned', 'version': '0'},
|
||||
extra={
|
||||
'tombstone_fn': fn,
|
||||
'tombstone': contents
|
||||
},
|
||||
)
|
||||
def report_tombstone_apport(fn, client):
|
||||
f_size = os.path.getsize(fn)
|
||||
if f_size > MAX_SIZE:
|
||||
cloudlog.error(f"Tombstone {fn} too big, {f_size}. Skipping...")
|
||||
return
|
||||
|
||||
message = "" # One line description of the crash
|
||||
contents = "" # Full file contents without coredump
|
||||
path = "" # File path relative to openpilot directory
|
||||
|
||||
proc_maps = False
|
||||
|
||||
with open(fn) as f:
|
||||
for line in f:
|
||||
if "CoreDump" in line:
|
||||
break
|
||||
elif "ProcMaps" in line:
|
||||
proc_maps = True
|
||||
elif "ProcStatus" in line:
|
||||
proc_maps = False
|
||||
|
||||
if not proc_maps:
|
||||
contents += line
|
||||
|
||||
if "ExecutablePath" in line:
|
||||
path = line.strip().split(': ')[-1]
|
||||
path = path.replace('/data/openpilot/', '')
|
||||
message += path
|
||||
elif "Signal" in line:
|
||||
message += " - " + line.strip()
|
||||
|
||||
try:
|
||||
sig_num = int(line.strip().split(': ')[-1])
|
||||
message += " (" + signal.Signals(sig_num).name + ")" # pylint: disable=no-member
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
stacktrace = get_apport_stacktrace(fn)
|
||||
stacktrace_s = stacktrace.split('\n')
|
||||
crash_function = "No stacktrace"
|
||||
|
||||
if len(stacktrace_s) > 2:
|
||||
found = False
|
||||
|
||||
# Try to find first entry in openpilot, fall back to first line
|
||||
for line in stacktrace_s:
|
||||
if "at selfdrive/" in line:
|
||||
crash_function = line
|
||||
found = True
|
||||
break
|
||||
|
||||
if not found:
|
||||
crash_function = stacktrace_s[1]
|
||||
|
||||
# Remove arguments that can contain pointers to make sentry one-liner unique
|
||||
crash_function = re.sub(r'\(.*?\)', '', crash_function)
|
||||
|
||||
contents = stacktrace + "\n\n" + contents
|
||||
message = message + " - " + crash_function
|
||||
sentry_report(client, fn, message, contents)
|
||||
|
||||
# Copy crashlog to upload folder
|
||||
clean_path = path.replace('/', '_')
|
||||
date = datetime.datetime.now().strftime("%Y-%m-%d--%H-%M-%S")
|
||||
|
||||
new_fn = f"{date}_{commit[:8]}_{safe_fn(clean_path)}"[:MAX_TOMBSTONE_FN_LEN]
|
||||
|
||||
crashlog_dir = os.path.join(ROOT, "crash")
|
||||
mkdirs_exists_ok(crashlog_dir)
|
||||
|
||||
# Files could be on different filesystems, copy, then delete
|
||||
shutil.copy(fn, os.path.join(crashlog_dir, new_fn))
|
||||
os.remove(fn)
|
||||
|
||||
|
||||
def main():
|
||||
# TODO: turn on when all tombstones are recovered
|
||||
# clear_apport_folder() # Clear apport folder on start, otherwise duplicate crashes won't register
|
||||
initial_tombstones = set(get_tombstones())
|
||||
|
||||
tags = {
|
||||
|
@ -91,7 +196,10 @@ def main():
|
|||
for fn, _ in (now_tombstones - initial_tombstones):
|
||||
try:
|
||||
cloudlog.info(f"reporting new tombstone {fn}")
|
||||
report_tombstone(fn, client)
|
||||
if fn.endswith(".crash"):
|
||||
report_tombstone_apport(fn, client)
|
||||
else:
|
||||
report_tombstone_android(fn, client)
|
||||
except Exception:
|
||||
cloudlog.exception(f"Error reporting tombstone {fn}")
|
||||
|
||||
|
|
|
@ -52,6 +52,7 @@ comma_remote: bool = False
|
|||
tested_branch: bool = False
|
||||
origin = get_git_remote()
|
||||
branch = get_git_full_branchname()
|
||||
commit = get_git_commit()
|
||||
|
||||
if (origin is not None) and (branch is not None):
|
||||
try:
|
||||
|
@ -74,7 +75,7 @@ if (origin is not None) and (branch is not None):
|
|||
try:
|
||||
dirty_files = run_cmd(["git", "diff-index", branch, "--"])
|
||||
cloudlog.event("dirty comma branch", version=version, dirty=dirty, origin=origin, branch=branch,
|
||||
dirty_files=dirty_files, commit=get_git_commit(), origin_commit=get_git_commit(branch))
|
||||
dirty_files=dirty_files, commit=commit, origin_commit=get_git_commit(branch))
|
||||
except subprocess.CalledProcessError:
|
||||
pass
|
||||
|
||||
|
|
Loading…
Reference in New Issue