From 78808961c53a29fddcec334358a0fcc55aa69a1c Mon Sep 17 00:00:00 2001 From: Greg Hogan Date: Mon, 1 Mar 2021 13:03:44 -0800 Subject: [PATCH] fix log values with non-utf8 characters and remove index_log (#20173) * remove index_log * handle non-unicode characters in values * dump.py better handling of printing non-unicode characters * only replace errors in main --- .dockerignore | 2 - SConstruct | 1 - selfdrive/debug/dump.py | 2 + tools/lib/index_log/.gitignore | 1 - tools/lib/index_log/Makefile | 9 ----- tools/lib/index_log/SConscript | 3 -- tools/lib/index_log/index_log.cc | 63 -------------------------------- tools/lib/logreader.py | 45 ++++------------------- 8 files changed, 10 insertions(+), 116 deletions(-) delete mode 100644 tools/lib/index_log/.gitignore delete mode 100644 tools/lib/index_log/Makefile delete mode 100644 tools/lib/index_log/SConscript delete mode 100644 tools/lib/index_log/index_log.cc diff --git a/.dockerignore b/.dockerignore index c52a810e3..268111455 100644 --- a/.dockerignore +++ b/.dockerignore @@ -26,10 +26,8 @@ chffr/backend/env selfdrive/nav selfdrive/baseui chffr/lib/vidindex/vidindex -chffr/lib/index_log/index_log selfdrive/test/simulator2 **/cache_data -xx/chffr/lib/index_log/index_log xx/chffr/lib/vidindex/vidindex xx/plus xx/community diff --git a/SConstruct b/SConstruct index fbd3a2e4c..6d612a617 100644 --- a/SConstruct +++ b/SConstruct @@ -392,7 +392,6 @@ if arch != "Darwin": if real_arch == "x86_64": SConscript(['tools/nui/SConscript']) - SConscript(['tools/lib/index_log/SConscript']) external_sconscript = GetOption('external_sconscript') if external_sconscript: diff --git a/selfdrive/debug/dump.py b/selfdrive/debug/dump.py index 112ca96a9..908b129c7 100755 --- a/selfdrive/debug/dump.py +++ b/selfdrive/debug/dump.py @@ -4,6 +4,8 @@ import sys import argparse import json from hexdump import hexdump +import codecs +codecs.register_error("strict", codecs.backslashreplace_errors) from cereal import log import cereal.messaging as messaging diff --git a/tools/lib/index_log/.gitignore b/tools/lib/index_log/.gitignore deleted file mode 100644 index 17955f06d..000000000 --- a/tools/lib/index_log/.gitignore +++ /dev/null @@ -1 +0,0 @@ -index_log diff --git a/tools/lib/index_log/Makefile b/tools/lib/index_log/Makefile deleted file mode 100644 index 6e1fa5785..000000000 --- a/tools/lib/index_log/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -CC := gcc -CXX := g++ - -index_log: index_log.cc - $(eval $@_TMP := $(shell mktemp)) - $(CXX) -std=c++1z -o $($@_TMP) \ - index_log.cc \ - -lcapnp -lkj - mv $($@_TMP) $@ diff --git a/tools/lib/index_log/SConscript b/tools/lib/index_log/SConscript deleted file mode 100644 index 9fdd8fc26..000000000 --- a/tools/lib/index_log/SConscript +++ /dev/null @@ -1,3 +0,0 @@ -Import('env', 'arch', 'messaging') - -env.Program('index_log', ['index_log.cc'], LIBS=[messaging, 'kj', 'capnp']) diff --git a/tools/lib/index_log/index_log.cc b/tools/lib/index_log/index_log.cc deleted file mode 100644 index ee7c7502d..000000000 --- a/tools/lib/index_log/index_log.cc +++ /dev/null @@ -1,63 +0,0 @@ -#include -#include -#include -#include - -#include -#include - -#include -#include - -#include -#include - -int main(int argc, char** argv) { - - if (argc != 3) { - printf("usage: %s \n", argv[0]); - return 1; - } - - const std::string log_fn = argv[1]; - const std::string index_fn = argv[2]; - - int log_fd = open(log_fn.c_str(), O_RDONLY, 0); - assert(log_fd >= 0); - - off_t log_size = lseek(log_fd, 0, SEEK_END); - lseek(log_fd, 0, SEEK_SET); - - FILE* index_f = NULL; - if (index_fn == "-") { - index_f = stdout; - } else { - index_f = fopen(index_fn.c_str(), "wb"); - } - assert(index_f); - - void* log_data = mmap(NULL, log_size, PROT_READ, MAP_PRIVATE, log_fd, 0); - assert(log_data); - - auto words = kj::arrayPtr((const capnp::word*)log_data, log_size/sizeof(capnp::word)); - while (words.size() > 0) { - uint64_t idx = ((uintptr_t)words.begin() - (uintptr_t)log_data); - // printf("%llu - %ld\n", idx, words.size()); - const char* idx_bytes = (const char*)&idx; - fwrite(idx_bytes, 8, 1, index_f); - try { - capnp::FlatArrayMessageReader reader(words); - words = kj::arrayPtr(reader.getEnd(), words.end()); - } catch (const kj::Exception& exc) { - break; - } - } - - munmap(log_data, log_size); - - fclose(index_f); - - close(log_fd); - - return 0; -} diff --git a/tools/lib/logreader.py b/tools/lib/logreader.py index 404dd0a7e..01c666fd1 100755 --- a/tools/lib/logreader.py +++ b/tools/lib/logreader.py @@ -2,48 +2,15 @@ import os import sys import bz2 -import tempfile -import subprocess import urllib.parse import capnp -import numpy as np -from tools.lib.exceptions import DataUnreadableError try: from xx.chffr.lib.filereader import FileReader except ImportError: from tools.lib.filereader import FileReader from cereal import log as capnp_log -OP_PATH = os.path.dirname(os.path.dirname(capnp_log.__file__)) - -def index_log(fn): - index_log_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "index_log") - index_log = os.path.join(index_log_dir, "index_log") - - if not os.path.exists(index_log): - phonelibs_dir = os.path.join(OP_PATH, 'phonelibs') - subprocess.check_call(["make", "PHONELIBS=" + phonelibs_dir], cwd=index_log_dir, stdout=subprocess.DEVNULL) - - try: - dat = subprocess.check_output([index_log, fn, "-"]) - except subprocess.CalledProcessError as e: - raise DataUnreadableError("%s capnp is corrupted/truncated" % fn) from e - return np.frombuffer(dat, dtype=np.uint64) - -def event_read_multiple_bytes(dat): - with tempfile.NamedTemporaryFile() as dat_f: - dat_f.write(dat) - dat_f.flush() - idx = index_log(dat_f.name) - - end_idx = np.uint64(len(dat)) - idx = np.append(idx, end_idx) - - return [capnp_log.Event.from_bytes(dat[idx[i]:idx[i+1]]) - for i in range(len(idx)-1)] - - # this is an iterator itself, and uses private variables from LogReader class MultiLogIterator(object): def __init__(self, log_paths, wraparound=True): @@ -117,17 +84,17 @@ class LogReader(object): if ext == "": # old rlogs weren't bz2 compressed - ents = event_read_multiple_bytes(dat) + ents = capnp_log.Event.read_multiple_bytes(dat) elif ext == ".bz2": dat = bz2.decompress(dat) - ents = event_read_multiple_bytes(dat) + ents = capnp_log.Event.read_multiple_bytes(dat) else: raise Exception(f"unknown extension {ext}") - self._ts = [x.logMonoTime for x in ents] + self._ents = list(ents) + self._ts = [x.logMonoTime for x in self._ents] self.data_version = data_version self._only_union_types = only_union_types - self._ents = ents def __iter__(self): for ent in self._ents: @@ -141,6 +108,10 @@ class LogReader(object): yield ent if __name__ == "__main__": + import codecs + # capnproto <= 0.8.0 throws errors converting byte data to string + # below line catches those errors and replaces the bytes with \x__ + codecs.register_error("strict", codecs.backslashreplace_errors) log_path = sys.argv[1] lr = LogReader(log_path) for msg in lr: