
212 lines
6.0 KiB
Raw Normal View History

2020-03-31 15:41:55 -06:00
#!/usr/bin/env python3
import datetime
2020-01-17 12:03:22 -07:00
import os
import re
import shutil
import signal
import subprocess
2020-01-17 12:03:22 -07:00
import time
import glob
2020-01-17 12:03:22 -07:00
from raven import Client
from raven.transport.http import HTTPTransport
from common.file_helpers import mkdirs_exists_ok
2020-12-28 17:41:34 -07:00
from selfdrive.hardware import TICI
from selfdrive.loggerd.config import ROOT
2020-01-17 12:03:22 -07:00
from selfdrive.swaglog import cloudlog
from selfdrive.version import branch, commit, dirty, origin, version
2020-01-17 12:03:22 -07:00
2021-01-19 03:13:58 -07:00
MAX_SIZE = 100000 * 10 # mal size is 40-100k, allow up to 1M
2020-12-28 17:41:34 -07:00
if TICI:
MAX_SIZE = MAX_SIZE * 100 # Allow larger size for tici since files contain coredump
2021-01-19 03:13:58 -07:00
MAX_TOMBSTONE_FN_LEN = 62 # 85 - 23 ("<dongle id>/crash/")
TOMBSTONE_DIR = "/data/tombstones/"
APPORT_DIR = "/var/crash/"
def safe_fn(s):
extra = ['_']
return "".join(c for c in s if c.isalnum() or c in extra).rstrip()
def sentry_report(client, fn, message, contents):
cloudlog.error({'tombstone': message})
sdk={'name': 'tombstoned', 'version': '0'},
'tombstone_fn': fn,
'tombstone': contents
def clear_apport_folder():
for f in glob.glob(APPORT_DIR + '*'):
except Exception:
def get_apport_stacktrace(fn):
cmd = f'apport-retrace -s <(cat <(echo "Package: openpilot") "{fn}")'
return subprocess.check_output(cmd, shell=True, encoding='utf8', timeout=30, executable='/bin/bash') # pylint: disable=unexpected-keyword-arg
except subprocess.CalledProcessError:
return "Error getting stacktrace"
except subprocess.TimeoutExpired:
return "Timeout getting stacktrace"
2020-03-31 15:41:55 -06:00
2020-01-17 12:03:22 -07:00
def get_tombstones():
"""Returns list of (filename, ctime) for all tombstones in /data/tombstones
and apport crashlogs in /var/crash"""
files = []
for folder in [TOMBSTONE_DIR, APPORT_DIR]:
if os.path.exists(folder):
with os.scandir(folder) as d:
# Loop over first 1000 directory entries
for _, f in zip(range(1000), d):
files.append((f.path, int(f.stat().st_ctime)))
elif".crash") and f.stat().st_mode == 0o100640:
files.append((f.path, int(f.stat().st_ctime)))
return files
2020-01-17 12:03:22 -07:00
2020-03-31 15:41:55 -06:00
def report_tombstone_android(fn, client):
2020-03-31 15:41:55 -06:00
f_size = os.path.getsize(fn)
if f_size > MAX_SIZE:
cloudlog.error(f"Tombstone {fn} too big, {f_size}. Skipping...")
2020-03-31 15:47:46 -06:00
2020-03-31 15:41:55 -06:00
2020-01-17 12:03:22 -07:00
with open(fn, encoding='ISO-8859-1') as f:
2020-03-31 15:41:55 -06:00
contents =
message = " ".join(contents.split('\n')[5:7])
2020-01-17 12:03:22 -07:00
# Cut off pid/tid, since that varies per run
name_idx = message.find('name')
if name_idx >= 0:
message = message[name_idx:]
# Cut off fault addr
fault_idx = message.find(', fault addr')
if fault_idx >= 0:
message = message[:fault_idx]
sentry_report(client, fn, message, contents)
2020-01-17 12:03:22 -07:00
2020-04-28 12:32:42 -06:00
def report_tombstone_apport(fn, client):
f_size = os.path.getsize(fn)
if f_size > MAX_SIZE:
cloudlog.error(f"Tombstone {fn} too big, {f_size}. Skipping...")
message = "" # One line description of the crash
contents = "" # Full file contents without coredump
path = "" # File path relative to openpilot directory
proc_maps = False
with open(fn) as f:
for line in f:
if "CoreDump" in line:
elif "ProcMaps" in line:
proc_maps = True
elif "ProcStatus" in line:
proc_maps = False
if not proc_maps:
contents += line
if "ExecutablePath" in line:
path = line.strip().split(': ')[-1]
path = path.replace('/data/openpilot/', '')
message += path
elif "Signal" in line:
message += " - " + line.strip()
sig_num = int(line.strip().split(': ')[-1])
message += " (" + signal.Signals(sig_num).name + ")" # pylint: disable=no-member
except ValueError:
stacktrace = get_apport_stacktrace(fn)
stacktrace_s = stacktrace.split('\n')
crash_function = "No stacktrace"
if len(stacktrace_s) > 2:
found = False
# Try to find first entry in openpilot, fall back to first line
for line in stacktrace_s:
if "at selfdrive/" in line:
crash_function = line
found = True
if not found:
crash_function = stacktrace_s[1]
# Remove arguments that can contain pointers to make sentry one-liner unique
crash_function = " ".join(crash_function.split(' ')[4:])
crash_function = re.sub(r'\(.*?\)', '', crash_function)
contents = stacktrace + "\n\n" + contents
message = message + " - " + crash_function
sentry_report(client, fn, message, contents)
# Copy crashlog to upload folder
clean_path = path.replace('/', '_')
date ="%Y-%m-%d--%H-%M-%S")
new_fn = f"{date}_{commit[:8]}_{safe_fn(clean_path)}"[:MAX_TOMBSTONE_FN_LEN]
crashlog_dir = os.path.join(ROOT, "crash")
# Files could be on different filesystems, copy, then delete
shutil.copy(fn, os.path.join(crashlog_dir, new_fn))
2020-01-17 12:03:22 -07:00
def main():
2021-01-27 08:08:49 -07:00
clear_apport_folder() # Clear apport folder on start, otherwise duplicate crashes won't register
2020-01-17 12:03:22 -07:00
initial_tombstones = set(get_tombstones())
tags = {
'dirty': dirty,
'origin': origin,
'branch': branch
2020-01-17 12:03:22 -07:00
client = Client('',
install_sys_hook=False, transport=HTTPTransport, release=version, tags=tags, string_max_length=10000)
2020-01-17 12:03:22 -07:00
client.user_context({'id': os.environ.get('DONGLE_ID')})
while True:
now_tombstones = set(get_tombstones())
for fn, _ in (now_tombstones - initial_tombstones):
2020-03-31 15:47:46 -06:00
try:"reporting new tombstone {fn}")
if fn.endswith(".crash"):
report_tombstone_apport(fn, client)
report_tombstone_android(fn, client)
2020-03-31 15:47:46 -06:00
except Exception:
cloudlog.exception(f"Error reporting tombstone {fn}")
2020-01-17 12:03:22 -07:00
initial_tombstones = now_tombstones
2020-03-31 15:41:55 -06:00
2020-01-17 12:03:22 -07:00
if __name__ == "__main__":