fix log values with non-utf8 characters and remove index_log (#20173)

* remove index_log

* handle non-unicode characters in values

* dump.py better handling of printing non-unicode characters

* only replace errors in main
pull/20200/head^2
Greg Hogan 2021-03-01 13:03:44 -08:00 committed by GitHub
parent 167fa565c0
commit 78808961c5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 10 additions and 116 deletions

View File

@ -26,10 +26,8 @@ chffr/backend/env
selfdrive/nav
selfdrive/baseui
chffr/lib/vidindex/vidindex
chffr/lib/index_log/index_log
selfdrive/test/simulator2
**/cache_data
xx/chffr/lib/index_log/index_log
xx/chffr/lib/vidindex/vidindex
xx/plus
xx/community

View File

@ -392,7 +392,6 @@ if arch != "Darwin":
if real_arch == "x86_64":
SConscript(['tools/nui/SConscript'])
SConscript(['tools/lib/index_log/SConscript'])
external_sconscript = GetOption('external_sconscript')
if external_sconscript:

View File

@ -4,6 +4,8 @@ import sys
import argparse
import json
from hexdump import hexdump
import codecs
codecs.register_error("strict", codecs.backslashreplace_errors)
from cereal import log
import cereal.messaging as messaging

View File

@ -1 +0,0 @@
index_log

View File

@ -1,9 +0,0 @@
CC := gcc
CXX := g++
index_log: index_log.cc
$(eval $@_TMP := $(shell mktemp))
$(CXX) -std=c++1z -o $($@_TMP) \
index_log.cc \
-lcapnp -lkj
mv $($@_TMP) $@

View File

@ -1,3 +0,0 @@
Import('env', 'arch', 'messaging')
env.Program('index_log', ['index_log.cc'], LIBS=[messaging, 'kj', 'capnp'])

View File

@ -1,63 +0,0 @@
#include <cstdio>
#include <cstdlib>
#include <cassert>
#include <string>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <kj/io.h>
#include <capnp/serialize.h>
int main(int argc, char** argv) {
if (argc != 3) {
printf("usage: %s <log_path> <index_output_path>\n", argv[0]);
return 1;
}
const std::string log_fn = argv[1];
const std::string index_fn = argv[2];
int log_fd = open(log_fn.c_str(), O_RDONLY, 0);
assert(log_fd >= 0);
off_t log_size = lseek(log_fd, 0, SEEK_END);
lseek(log_fd, 0, SEEK_SET);
FILE* index_f = NULL;
if (index_fn == "-") {
index_f = stdout;
} else {
index_f = fopen(index_fn.c_str(), "wb");
}
assert(index_f);
void* log_data = mmap(NULL, log_size, PROT_READ, MAP_PRIVATE, log_fd, 0);
assert(log_data);
auto words = kj::arrayPtr((const capnp::word*)log_data, log_size/sizeof(capnp::word));
while (words.size() > 0) {
uint64_t idx = ((uintptr_t)words.begin() - (uintptr_t)log_data);
// printf("%llu - %ld\n", idx, words.size());
const char* idx_bytes = (const char*)&idx;
fwrite(idx_bytes, 8, 1, index_f);
try {
capnp::FlatArrayMessageReader reader(words);
words = kj::arrayPtr(reader.getEnd(), words.end());
} catch (const kj::Exception& exc) {
break;
}
}
munmap(log_data, log_size);
fclose(index_f);
close(log_fd);
return 0;
}

View File

@ -2,48 +2,15 @@
import os
import sys
import bz2
import tempfile
import subprocess
import urllib.parse
import capnp
import numpy as np
from tools.lib.exceptions import DataUnreadableError
try:
from xx.chffr.lib.filereader import FileReader
except ImportError:
from tools.lib.filereader import FileReader
from cereal import log as capnp_log
OP_PATH = os.path.dirname(os.path.dirname(capnp_log.__file__))
def index_log(fn):
index_log_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "index_log")
index_log = os.path.join(index_log_dir, "index_log")
if not os.path.exists(index_log):
phonelibs_dir = os.path.join(OP_PATH, 'phonelibs')
subprocess.check_call(["make", "PHONELIBS=" + phonelibs_dir], cwd=index_log_dir, stdout=subprocess.DEVNULL)
try:
dat = subprocess.check_output([index_log, fn, "-"])
except subprocess.CalledProcessError as e:
raise DataUnreadableError("%s capnp is corrupted/truncated" % fn) from e
return np.frombuffer(dat, dtype=np.uint64)
def event_read_multiple_bytes(dat):
with tempfile.NamedTemporaryFile() as dat_f:
dat_f.write(dat)
dat_f.flush()
idx = index_log(dat_f.name)
end_idx = np.uint64(len(dat))
idx = np.append(idx, end_idx)
return [capnp_log.Event.from_bytes(dat[idx[i]:idx[i+1]])
for i in range(len(idx)-1)]
# this is an iterator itself, and uses private variables from LogReader
class MultiLogIterator(object):
def __init__(self, log_paths, wraparound=True):
@ -117,17 +84,17 @@ class LogReader(object):
if ext == "":
# old rlogs weren't bz2 compressed
ents = event_read_multiple_bytes(dat)
ents = capnp_log.Event.read_multiple_bytes(dat)
elif ext == ".bz2":
dat = bz2.decompress(dat)
ents = event_read_multiple_bytes(dat)
ents = capnp_log.Event.read_multiple_bytes(dat)
else:
raise Exception(f"unknown extension {ext}")
self._ts = [x.logMonoTime for x in ents]
self._ents = list(ents)
self._ts = [x.logMonoTime for x in self._ents]
self.data_version = data_version
self._only_union_types = only_union_types
self._ents = ents
def __iter__(self):
for ent in self._ents:
@ -141,6 +108,10 @@ class LogReader(object):
yield ent
if __name__ == "__main__":
import codecs
# capnproto <= 0.8.0 throws errors converting byte data to string
# below line catches those errors and replaces the bytes with \x__
codecs.register_error("strict", codecs.backslashreplace_errors)
log_path = sys.argv[1]
lr = LogReader(log_path)
for msg in lr: