openpilot/tools/lib/logreader.py

206 lines
6.0 KiB
Python

import os
import sys
import gzip
import zlib
import json
import bz2
import tempfile
import requests
import subprocess
from aenum import Enum
import capnp
import numpy as np
import platform
from tools.lib.exceptions import DataUnreadableError
try:
from xx.chffr.lib.filereader import FileReader
except ImportError:
from tools.lib.filereader import FileReader
from tools.lib.log_util import convert_old_pkt_to_new
from cereal import log as capnp_log
OP_PATH = os.path.dirname(os.path.dirname(capnp_log.__file__))
def index_log(fn):
index_log_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "index_log")
index_log = os.path.join(index_log_dir, "index_log")
phonelibs_dir = os.path.join(OP_PATH, 'phonelibs')
subprocess.check_call(["make", "PHONELIBS=" + phonelibs_dir], cwd=index_log_dir, stdout=subprocess.DEVNULL)
try:
dat = subprocess.check_output([index_log, fn, "-"])
except subprocess.CalledProcessError:
raise DataUnreadableError("%s capnp is corrupted/truncated" % fn)
return np.frombuffer(dat, dtype=np.uint64)
def event_read_multiple_bytes(dat):
with tempfile.NamedTemporaryFile() as dat_f:
dat_f.write(dat)
dat_f.flush()
idx = index_log(dat_f.name)
end_idx = np.uint64(len(dat))
idx = np.append(idx, end_idx)
return [capnp_log.Event.from_bytes(dat[idx[i]:idx[i+1]])
for i in range(len(idx)-1)]
# this is an iterator itself, and uses private variables from LogReader
class MultiLogIterator(object):
def __init__(self, log_paths, wraparound=True):
self._log_paths = log_paths
self._wraparound = wraparound
self._first_log_idx = next(i for i in range(len(log_paths)) if log_paths[i] is not None)
self._current_log = self._first_log_idx
self._idx = 0
self._log_readers = [None]*len(log_paths)
self.start_time = self._log_reader(self._first_log_idx)._ts[0]
def _log_reader(self, i):
if self._log_readers[i] is None and self._log_paths[i] is not None:
log_path = self._log_paths[i]
print("LogReader:%s" % log_path)
self._log_readers[i] = LogReader(log_path)
return self._log_readers[i]
def __iter__(self):
return self
def _inc(self):
lr = self._log_reader(self._current_log)
if self._idx < len(lr._ents)-1:
self._idx += 1
else:
self._idx = 0
self._current_log = next(i for i in range(self._current_log + 1, len(self._log_readers) + 1) if i == len(self._log_readers) or self._log_paths[i] is not None)
# wraparound
if self._current_log == len(self._log_readers):
if self._wraparound:
self._current_log = self._first_log_idx
else:
raise StopIteration
def __next__(self):
while 1:
lr = self._log_reader(self._current_log)
ret = lr._ents[self._idx]
if lr._do_conversion:
ret = convert_old_pkt_to_new(ret, lr.data_version)
self._inc()
return ret
def tell(self):
# returns seconds from start of log
return (self._log_reader(self._current_log)._ts[self._idx] - self.start_time) * 1e-9
def seek(self, ts):
# seek to nearest minute
minute = int(ts/60)
if minute >= len(self._log_paths) or self._log_paths[minute] is None:
return False
self._current_log = minute
# HACK: O(n) seek afterward
self._idx = 0
while self.tell() < ts:
self._inc()
return True
class LogReader(object):
def __init__(self, fn, canonicalize=True, only_union_types=False):
_, ext = os.path.splitext(fn)
data_version = None
with FileReader(fn) as f:
dat = f.read()
# decompress file
if ext == ".gz" and ("log_" in fn or "log2" in fn):
dat = zlib.decompress(dat, zlib.MAX_WBITS|32)
elif ext == ".bz2":
dat = bz2.decompress(dat)
elif ext == ".7z":
if platform.system() == "Darwin":
os.environ["LA_LIBRARY_FILEPATH"] = "/usr/local/opt/libarchive/lib/libarchive.dylib"
import libarchive.public
with libarchive.public.memory_reader(dat) as aa:
mdat = []
for it in aa:
for bb in it.get_blocks():
mdat.append(bb)
dat = ''.join(mdat)
# TODO: extension shouln't be a proxy for DeviceType
if ext == "":
if dat[0] == "[":
needs_conversion = True
ents = [json.loads(x) for x in dat.strip().split("\n")[:-1]]
if "_" in fn:
data_version = fn.split("_")[1]
else:
# old rlogs weren't bz2 compressed
needs_conversion = False
ents = event_read_multiple_bytes(dat)
elif ext == ".gz":
if "log_" in fn:
# Zero data file.
ents = [json.loads(x) for x in dat.strip().split("\n")[:-1]]
needs_conversion = True
elif "log2" in fn:
needs_conversion = False
ents = event_read_multiple_bytes(dat)
else:
raise Exception("unknown extension")
elif ext == ".bz2":
needs_conversion = False
ents = event_read_multiple_bytes(dat)
elif ext == ".7z":
needs_conversion = True
ents = [json.loads(x) for x in dat.strip().split("\n")]
else:
raise Exception("unknown extension")
if needs_conversion:
# TODO: should we call convert_old_pkt_to_new to generate this?
self._ts = [x[0][0]*1e9 for x in ents]
else:
self._ts = [x.logMonoTime for x in ents]
self.data_version = data_version
self._do_conversion = needs_conversion and canonicalize
self._only_union_types = only_union_types
self._ents = ents
def __iter__(self):
for ent in self._ents:
if self._do_conversion:
yield convert_old_pkt_to_new(ent, self.data_version)
elif self._only_union_types:
try:
ent.which()
yield ent
except capnp.lib.capnp.KjException:
pass
else:
yield ent
def load_many_logs_canonical(log_paths):
"""Load all logs for a sequence of log paths."""
for log_path in log_paths:
for msg in LogReader(log_path):
yield msg
if __name__ == "__main__":
log_path = sys.argv[1]
lr = LogReader(log_path)
for msg in lr:
print(msg)