* Added wide cam vipc client and bigmodel transform logic

* Added wide_frame to ModelState, should still work normally

* Refactored image input into addImage method, should still work normally

* Updated thneed/compile.cc

* Bigmodel, untested: 44f83118-b375-4d4c-ae12-2017124f0cf4/200

* Have to initialize extra buffer in SNPEModel

* Default paramater value in the wrong place I think

* Move USE_EXTRA to SConscript

* New model: 6c34d59a-acc3-4877-84bd-904c10745ba6/250

* move use extra check to runtime, not on C2

* this is always true

* more C2 checks

* log if frames are out of sync

* more logging on no frame

* store in pointer

* print sof

* add sync logic

* log based on sof difference as well

* keep both models

* less assumptions

* define above thneed

* typo

* simplify

* no need for second client is main is already wide

* more comments update

* no optional reference

* more logging to debug lags

* add to release files

* both defines

* New model: 6831a77f-2574-4bfb-8077-79b0972a2771/950

* Path offset no longer relevant

* Remove duplicate execute

* Moved bigmodel back to big_supercombo.dlc

* add wide vipc stream

* Tici must be tici

* Needs state too

* add wide cam support to model replay

* handle syncing better

* ugh, c2

* print that

* handle ecam lag

* skip first one

* so close

* update refs

Co-authored-by: mitchellgoffpc <mitchellgoffpc@gmail.com>
Co-authored-by: Harald Schafer <harald.the.engineer@gmail.com>
Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com>
Co-authored-by: Comma Device <device@comma.ai>
pull/23805/head
Willem Melching 2022-02-20 01:06:31 +01:00 committed by GitHub
parent 9a046e2ef3
commit 85efde269d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 410 additions and 124 deletions

BIN
models/big_supercombo.dlc (Stored with Git LFS) 100644

Binary file not shown.

BIN
models/big_supercombo.onnx (Stored with Git LFS) 100644

Binary file not shown.

View File

@ -58,6 +58,7 @@ common/transformations/transformations.pyx
common/api/__init__.py
models/supercombo.dlc
models/big_supercombo.dlc
models/dmonitoring_model_q.dlc
release/*

View File

@ -9,17 +9,16 @@ from selfdrive.swaglog import cloudlog
TRAJECTORY_SIZE = 33
# camera offset is meters from center car to camera
# model path is in the frame of EON's camera. TICI is 0.1 m away,
# however the average measured path difference is 0.04 m
# model path is in the frame of the camera. Empirically
# the model knows the difference between TICI and EON
# so a path offset is not needed
PATH_OFFSET = 0.00
if EON:
CAMERA_OFFSET = -0.06
PATH_OFFSET = 0.0
elif TICI:
CAMERA_OFFSET = 0.04
PATH_OFFSET = 0.04
else:
CAMERA_OFFSET = 0.0
PATH_OFFSET = 0.0
class LanePlanner:

View File

@ -31,6 +31,9 @@ thneed_src = [
use_thneed = not GetOption('no_thneed')
use_extra = True # arch == "larch64"
lenv['CXXFLAGS'].append('-DUSE_EXTRA=true' if use_extra else '-DUSE_EXTRA=false')
if arch == "aarch64" or arch == "larch64":
libs += ['gsl', 'CB']
libs += ['gnustl_shared'] if arch == "aarch64" else ['pthread', 'dl']
@ -65,7 +68,7 @@ common_model = lenv.Object(common_src)
# build thneed model
if use_thneed and arch in ("aarch64", "larch64"):
fn = "../../models/supercombo"
fn = "../../models/big_supercombo" if use_extra else "../../models/supercombo"
compiler = lenv.Program('thneed/compile', ["thneed/compile.cc"]+common_model, LIBS=libs)
cmd = f"cd {Dir('.').abspath} && {compiler[0].abspath} {fn}.dlc {fn}.thneed --binary"

View File

@ -1,6 +1,7 @@
#include <cstdio>
#include <cstdlib>
#include <mutex>
#include <cmath>
#include <eigen3/Eigen/Dense>
@ -15,34 +16,34 @@
ExitHandler do_exit;
mat3 update_calibration(cereal::LiveCalibrationData::Reader live_calib, bool wide_camera) {
mat3 update_calibration(Eigen::Matrix<float, 3, 4> &extrinsics, bool wide_camera, bool bigmodel_frame) {
/*
import numpy as np
from common.transformations.model import medmodel_frame_from_road_frame
medmodel_frame_from_ground = medmodel_frame_from_road_frame[:, (0, 1, 3)]
ground_from_medmodel_frame = np.linalg.inv(medmodel_frame_from_ground)
*/
static const auto ground_from_medmodel_frame = (Eigen::Matrix<float, 3, 3>() <<
0.00000000e+00, 0.00000000e+00, 1.00000000e+00,
static const auto ground_from_medmodel_frame = (Eigen::Matrix<float, 3, 3>() <<
0.00000000e+00, 0.00000000e+00, 1.00000000e+00,
-1.09890110e-03, 0.00000000e+00, 2.81318681e-01,
-1.84808520e-20, 9.00738606e-04, -4.28751576e-02).finished();
static const auto ground_from_sbigmodel_frame = (Eigen::Matrix<float, 3, 3>() <<
0.00000000e+00, 7.31372216e-19, 1.00000000e+00,
-2.19780220e-03, 4.11497335e-19, 5.62637363e-01,
-5.46146580e-20, 1.80147721e-03, -2.73464241e-01).finished();
static const auto cam_intrinsics = Eigen::Matrix<float, 3, 3, Eigen::RowMajor>(wide_camera ? ecam_intrinsic_matrix.v : fcam_intrinsic_matrix.v);
static const mat3 yuv_transform = get_model_yuv_transform();
auto extrinsic_matrix = live_calib.getExtrinsicMatrix();
Eigen::Matrix<float, 3, 4> extrinsic_matrix_eigen;
for (int i = 0; i < 4*3; i++) {
extrinsic_matrix_eigen(i / 4, i % 4) = extrinsic_matrix[i];
}
auto camera_frame_from_road_frame = cam_intrinsics * extrinsic_matrix_eigen;
auto ground_from_model_frame = bigmodel_frame ? ground_from_sbigmodel_frame : ground_from_medmodel_frame;
auto camera_frame_from_road_frame = cam_intrinsics * extrinsics;
Eigen::Matrix<float, 3, 3> camera_frame_from_ground;
camera_frame_from_ground.col(0) = camera_frame_from_road_frame.col(0);
camera_frame_from_ground.col(1) = camera_frame_from_road_frame.col(1);
camera_frame_from_ground.col(2) = camera_frame_from_road_frame.col(3);
auto warp_matrix = camera_frame_from_ground * ground_from_medmodel_frame;
auto warp_matrix = camera_frame_from_ground * ground_from_model_frame;
mat3 transform = {};
for (int i=0; i<3*3; i++) {
transform.v[i] = warp_matrix(i / 3, i % 3);
@ -50,7 +51,7 @@ mat3 update_calibration(cereal::LiveCalibrationData::Reader live_calib, bool wid
return matmul3(yuv_transform, transform);
}
void run_model(ModelState &model, VisionIpcClient &vipc_client, bool wide_camera) {
void run_model(ModelState &model, VisionIpcClient &vipc_client_main, VisionIpcClient &vipc_client_extra, bool main_wide_camera, bool use_extra, bool use_extra_client) {
// messaging
PubMaster pm({"modelV2", "cameraOdometry"});
SubMaster sm({"lateralPlan", "roadCameraState", "liveCalibration"});
@ -62,20 +63,80 @@ void run_model(ModelState &model, VisionIpcClient &vipc_client, bool wide_camera
double last = 0;
uint32_t run_count = 0;
mat3 model_transform = {};
mat3 model_transform_main = {};
mat3 model_transform_extra = {};
bool live_calib_seen = false;
VisionBuf *buf_main = nullptr;
VisionBuf *buf_extra = nullptr;
VisionIpcBufExtra meta_main = {0};
VisionIpcBufExtra meta_extra = {0};
while (!do_exit) {
VisionIpcBufExtra extra = {};
VisionBuf *buf = vipc_client.recv(&extra);
if (buf == nullptr) continue;
// TODO: change sync logic to use timestamp start of frame in case camerad skips a frame
// log frame id in model packet
// Keep receiving frames until we are at least 1 frame ahead of previous extra frame
while (meta_main.frame_id <= meta_extra.frame_id) {
buf_main = vipc_client_main.recv(&meta_main);
if (meta_main.frame_id <= meta_extra.frame_id) {
LOGE("main camera behind! main: %d (%.5f), extra: %d (%.5f)",
meta_main.frame_id, double(meta_main.timestamp_sof) / 1e9,
meta_extra.frame_id, double(meta_extra.timestamp_sof) / 1e9);
}
if (buf_main == nullptr) break;
}
if (buf_main == nullptr) {
LOGE("vipc_client_main no frame");
continue;
}
if (use_extra_client) {
// Keep receiving extra frames until frame id matches main camera
do {
buf_extra = vipc_client_extra.recv(&meta_extra);
if (meta_main.frame_id > meta_extra.frame_id) {
LOGE("extra camera behind! main: %d (%.5f), extra: %d (%.5f)",
meta_main.frame_id, double(meta_main.timestamp_sof) / 1e9,
meta_extra.frame_id, double(meta_extra.timestamp_sof) / 1e9);
}
} while (buf_extra != nullptr && meta_main.frame_id > meta_extra.frame_id);
if (buf_extra == nullptr) {
LOGE("vipc_client_extra no frame");
continue;
}
if (meta_main.frame_id != meta_extra.frame_id || std::abs((int64_t)meta_main.timestamp_sof - (int64_t)meta_extra.timestamp_sof) > 10000000ULL) {
LOGE("frames out of sync! main: %d (%.5f), extra: %d (%.5f)",
meta_main.frame_id, double(meta_main.timestamp_sof) / 1e9,
meta_extra.frame_id, double(meta_extra.timestamp_sof) / 1e9);
}
} else {
// Use single camera
buf_extra = buf_main;
meta_extra = meta_main;
}
// TODO: path planner timeout?
sm.update(0);
int desire = ((int)sm["lateralPlan"].getLateralPlan().getDesire());
frame_id = sm["roadCameraState"].getRoadCameraState().getFrameId();
if (sm.updated("liveCalibration")) {
model_transform = update_calibration(sm["liveCalibration"].getLiveCalibration(), wide_camera);
auto extrinsic_matrix = sm["liveCalibration"].getLiveCalibration().getExtrinsicMatrix();
Eigen::Matrix<float, 3, 4> extrinsic_matrix_eigen;
for (int i = 0; i < 4*3; i++) {
extrinsic_matrix_eigen(i / 4, i % 4) = extrinsic_matrix[i];
}
model_transform_main = update_calibration(extrinsic_matrix_eigen, main_wide_camera, false);
if (use_extra) {
model_transform_extra = update_calibration(extrinsic_matrix_eigen, Hardware::TICI(), true);
}
live_calib_seen = true;
}
@ -85,13 +146,12 @@ void run_model(ModelState &model, VisionIpcClient &vipc_client, bool wide_camera
}
double mt1 = millis_since_boot();
ModelOutput *model_output = model_eval_frame(&model, buf->buf_cl, buf->width, buf->height,
model_transform, vec_desire);
ModelOutput *model_output = model_eval_frame(&model, buf_main, buf_extra, model_transform_main, model_transform_extra, vec_desire);
double mt2 = millis_since_boot();
float model_execution_time = (mt2 - mt1) / 1000.0;
// tracked dropped frames
uint32_t vipc_dropped_frames = extra.frame_id - last_vipc_frame_id - 1;
uint32_t vipc_dropped_frames = meta_main.frame_id - last_vipc_frame_id - 1;
float frames_dropped = frame_dropped_filter.update((float)std::min(vipc_dropped_frames, 10U));
if (run_count < 10) { // let frame drops warm up
frame_dropped_filter.reset(0);
@ -101,13 +161,13 @@ void run_model(ModelState &model, VisionIpcClient &vipc_client, bool wide_camera
float frame_drop_ratio = frames_dropped / (1 + frames_dropped);
model_publish(pm, extra.frame_id, frame_id, frame_drop_ratio, *model_output, extra.timestamp_eof, model_execution_time,
model_publish(pm, meta_main.frame_id, frame_id, frame_drop_ratio, *model_output, meta_main.timestamp_eof, model_execution_time,
kj::ArrayPtr<const float>(model.output.data(), model.output.size()), live_calib_seen);
posenet_publish(pm, extra.frame_id, vipc_dropped_frames, *model_output, extra.timestamp_eof, live_calib_seen);
posenet_publish(pm, meta_main.frame_id, vipc_dropped_frames, *model_output, meta_main.timestamp_eof, live_calib_seen);
//printf("model process: %.2fms, from last %.2fms, vipc_frame_id %u, frame_id, %u, frame_drop %.3f\n", mt2 - mt1, mt1 - last, extra.frame_id, frame_id, frame_drop_ratio);
last = mt1;
last_vipc_frame_id = extra.frame_id;
last_vipc_frame_id = meta_main.frame_id;
}
}
@ -120,7 +180,9 @@ int main(int argc, char **argv) {
assert(ret == 0);
}
bool wide_camera = Hardware::TICI() ? Params().getBool("EnableWideCamera") : false;
bool main_wide_camera = Hardware::TICI() ? Params().getBool("EnableWideCamera") : false;
bool use_extra = USE_EXTRA;
bool use_extra_client = Hardware::TICI() && use_extra && !main_wide_camera;
// cl init
cl_device_id device_id = cl_get_device_id(CL_DEVICE_TYPE_DEFAULT);
@ -128,20 +190,32 @@ int main(int argc, char **argv) {
// init the models
ModelState model;
model_init(&model, device_id, context);
model_init(&model, device_id, context, use_extra);
LOGW("models loaded, modeld starting");
VisionIpcClient vipc_client = VisionIpcClient("camerad", wide_camera ? VISION_STREAM_WIDE_ROAD : VISION_STREAM_ROAD, true, device_id, context);
while (!do_exit && !vipc_client.connect(false)) {
VisionIpcClient vipc_client_main = VisionIpcClient("camerad", main_wide_camera ? VISION_STREAM_WIDE_ROAD : VISION_STREAM_ROAD, true, device_id, context);
VisionIpcClient vipc_client_extra = VisionIpcClient("camerad", VISION_STREAM_WIDE_ROAD, false, device_id, context);
while (!do_exit && !vipc_client_main.connect(false)) {
util::sleep_for(100);
}
while (!do_exit && use_extra_client && !vipc_client_extra.connect(false)) {
util::sleep_for(100);
}
// run the models
// vipc_client.connected is false only when do_exit is true
if (vipc_client.connected) {
const VisionBuf *b = &vipc_client.buffers[0];
LOGW("connected with buffer size: %d (%d x %d)", b->len, b->width, b->height);
run_model(model, vipc_client, wide_camera);
if (!do_exit) {
const VisionBuf *b = &vipc_client_main.buffers[0];
LOGW("connected main cam with buffer size: %d (%d x %d)", b->len, b->width, b->height);
if (use_extra_client) {
const VisionBuf *wb = &vipc_client_extra.buffers[0];
LOGW("connected extra cam with buffer size: %d (%d x %d)", wb->len, wb->width, wb->height);
}
run_model(model, vipc_client_main, vipc_client_extra, main_wide_camera, use_extra, use_extra_client);
}
model_free(&model);

View File

@ -166,7 +166,8 @@ DMonitoringResult dmonitoring_eval_frame(DMonitoringModelState* s, void* stream_
//fclose(dump_yuv_file2);
double t1 = millis_since_boot();
s->m->execute(net_input_buf, yuv_buf_len);
s->m->addImage(net_input_buf, yuv_buf_len);
s->m->execute();
double t2 = millis_since_boot();
DMonitoringResult ret = {0};

View File

@ -26,15 +26,19 @@ constexpr const kj::ArrayPtr<const T> to_kj_array_ptr(const std::array<T, size>
return kj::ArrayPtr(arr.data(), arr.size());
}
void model_init(ModelState* s, cl_device_id device_id, cl_context context) {
void model_init(ModelState* s, cl_device_id device_id, cl_context context, bool use_extra) {
s->frame = new ModelFrame(device_id, context);
s->wide_frame = new ModelFrame(device_id, context);
#ifdef USE_THNEED
s->m = std::make_unique<ThneedModel>("../../models/supercombo.thneed", &s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME);
s->m = std::make_unique<ThneedModel>(use_extra ? "../../models/big_supercombo.thneed" : "../../models/supercombo.thneed",
&s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME, use_extra);
#elif USE_ONNX_MODEL
s->m = std::make_unique<ONNXModel>("../../models/supercombo.onnx", &s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME);
s->m = std::make_unique<ONNXModel>(use_extra ? "../../models/big_supercombo.onnx" : "../../models/supercombo.onnx",
&s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME, use_extra);
#else
s->m = std::make_unique<SNPEModel>("../../models/supercombo.dlc", &s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME);
s->m = std::make_unique<SNPEModel>(use_extra ? "../../models/big_supercombo.dlc" : "../../models/supercombo.dlc",
&s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME, use_extra);
#endif
#ifdef TEMPORAL
@ -52,8 +56,8 @@ void model_init(ModelState* s, cl_device_id device_id, cl_context context) {
#endif
}
ModelOutput* model_eval_frame(ModelState* s, cl_mem yuv_cl, int width, int height,
const mat3 &transform, float *desire_in) {
ModelOutput* model_eval_frame(ModelState* s, VisionBuf* buf, VisionBuf* wbuf,
const mat3 &transform, const mat3 &transform_wide, float *desire_in) {
#ifdef DESIRE
if (desire_in != NULL) {
for (int i = 1; i < DESIRE_LEN; i++) {
@ -70,8 +74,14 @@ ModelOutput* model_eval_frame(ModelState* s, cl_mem yuv_cl, int width, int heigh
#endif
// if getInputBuf is not NULL, net_input_buf will be
auto net_input_buf = s->frame->prepare(yuv_cl, width, height, transform, static_cast<cl_mem*>(s->m->getInputBuf()));
s->m->execute(net_input_buf, s->frame->buf_size);
auto net_input_buf = s->frame->prepare(buf->buf_cl, buf->width, buf->height, transform, static_cast<cl_mem*>(s->m->getInputBuf()));
s->m->addImage(net_input_buf, s->frame->buf_size);
if (wbuf != nullptr) {
auto net_extra_buf = s->wide_frame->prepare(wbuf->buf_cl, wbuf->width, wbuf->height, transform_wide, static_cast<cl_mem*>(s->m->getExtraBuf()));
s->m->addExtra(net_extra_buf, s->wide_frame->buf_size);
}
s->m->execute();
return (ModelOutput*)&s->output;
}

View File

@ -9,6 +9,7 @@
#include <memory>
#include "cereal/messaging/messaging.h"
#include "cereal/visionipc/visionipc_client.h"
#include "selfdrive/common/mat.h"
#include "selfdrive/common/modeldata.h"
#include "selfdrive/common/util.h"
@ -25,6 +26,7 @@ constexpr int BLINKER_LEN = 6;
constexpr int META_STRIDE = 7;
constexpr int PLAN_MHP_N = 5;
constexpr int STOP_LINE_MHP_N = 3;
constexpr int LEAD_MHP_N = 2;
constexpr int LEAD_TRAJ_LEN = 6;
@ -147,6 +149,37 @@ struct ModelOutputLeads {
};
static_assert(sizeof(ModelOutputLeads) == (sizeof(ModelOutputLeadPrediction)*LEAD_MHP_N) + (sizeof(float)*LEAD_MHP_SELECTION));
struct ModelOutputStopLineElement {
ModelOutputXYZ position;
ModelOutputXYZ rotation;
float speed;
float time;
};
static_assert(sizeof(ModelOutputStopLineElement) == (sizeof(ModelOutputXYZ)*2 + sizeof(float)*2));
struct ModelOutputStopLinePrediction {
ModelOutputStopLineElement mean;
ModelOutputStopLineElement std;
float prob;
};
static_assert(sizeof(ModelOutputStopLinePrediction) == (sizeof(ModelOutputStopLineElement)*2 + sizeof(float)));
struct ModelOutputStopLines {
std::array<ModelOutputStopLinePrediction, STOP_LINE_MHP_N> prediction;
float prob;
constexpr const ModelOutputStopLinePrediction &get_best_prediction(int t_idx) const {
int max_idx = 0;
for (int i = 1; i < prediction.size(); i++) {
if (prediction[i].prob > prediction[max_idx].prob) {
max_idx = i;
}
}
return prediction[max_idx];
}
};
static_assert(sizeof(ModelOutputStopLines) == (sizeof(ModelOutputStopLinePrediction)*STOP_LINE_MHP_N) + sizeof(float));
struct ModelOutputPose {
ModelOutputXYZ velocity_mean;
ModelOutputXYZ rotation_mean;
@ -205,6 +238,7 @@ struct ModelOutput {
const ModelOutputLaneLines lane_lines;
const ModelOutputRoadEdges road_edges;
const ModelOutputLeads leads;
const ModelOutputStopLines stop_lines;
const ModelOutputMeta meta;
const ModelOutputPose pose;
};
@ -220,6 +254,7 @@ constexpr int NET_OUTPUT_SIZE = OUTPUT_SIZE + TEMPORAL_SIZE;
// TODO: convert remaining arrays to std::array and update model runners
struct ModelState {
ModelFrame *frame;
ModelFrame *wide_frame;
std::array<float, NET_OUTPUT_SIZE> output = {};
std::unique_ptr<RunModel> m;
#ifdef DESIRE
@ -231,9 +266,9 @@ struct ModelState {
#endif
};
void model_init(ModelState* s, cl_device_id device_id, cl_context context);
ModelOutput *model_eval_frame(ModelState* s, cl_mem yuv_cl, int width, int height,
const mat3 &transform, float *desire_in);
void model_init(ModelState* s, cl_device_id device_id, cl_context context, bool use_extra);
ModelOutput *model_eval_frame(ModelState* s, VisionBuf* buf, VisionBuf* buf_wide,
const mat3 &transform, const mat3 &transform_wide, float *desire_in);
void model_free(ModelState* s);
void model_publish(PubMaster &pm, uint32_t vipc_frame_id, uint32_t frame_id, float frame_drop,
const ModelOutput &net_outputs, uint64_t timestamp_eof,

View File

@ -14,11 +14,12 @@
#include "selfdrive/common/swaglog.h"
#include "selfdrive/common/util.h"
ONNXModel::ONNXModel(const char *path, float *_output, size_t _output_size, int runtime) {
ONNXModel::ONNXModel(const char *path, float *_output, size_t _output_size, int runtime, bool _use_extra) {
LOGD("loading model %s", path);
output = _output;
output_size = _output_size;
use_extra = _use_extra;
int err = pipe(pipein);
assert(err == 0);
@ -99,9 +100,24 @@ void ONNXModel::addTrafficConvention(float *state, int state_size) {
traffic_convention_size = state_size;
}
void ONNXModel::execute(float *net_input_buf, int buf_size) {
void ONNXModel::addImage(float *image_buf, int buf_size) {
image_input_buf = image_buf;
image_buf_size = buf_size;
}
void ONNXModel::addExtra(float *image_buf, int buf_size) {
extra_input_buf = image_buf;
extra_buf_size = buf_size;
}
void ONNXModel::execute() {
// order must be this
pwrite(net_input_buf, buf_size);
if (image_input_buf != NULL) {
pwrite(image_input_buf, image_buf_size);
}
if (extra_input_buf != NULL) {
pwrite(extra_input_buf, extra_buf_size);
}
if (desire_input_buf != NULL) {
pwrite(desire_input_buf, desire_state_size);
}

View File

@ -6,12 +6,14 @@
class ONNXModel : public RunModel {
public:
ONNXModel(const char *path, float *output, size_t output_size, int runtime);
ONNXModel(const char *path, float *output, size_t output_size, int runtime, bool use_extra = false);
~ONNXModel();
void addRecurrent(float *state, int state_size);
void addDesire(float *state, int state_size);
void addTrafficConvention(float *state, int state_size);
void execute(float *net_input_buf, int buf_size);
void addImage(float *image_buf, int buf_size);
void addExtra(float *image_buf, int buf_size);
void execute();
private:
int proc_pid;
@ -24,6 +26,11 @@ private:
int desire_state_size;
float *traffic_convention_input_buf = NULL;
int traffic_convention_size;
float *image_input_buf = NULL;
int image_buf_size;
float *extra_input_buf = NULL;
int extra_buf_size;
bool use_extra;
// pipe to communicate to keras subprocess
void pread(float *buf, int size);

View File

@ -5,7 +5,10 @@ public:
virtual void addRecurrent(float *state, int state_size) {}
virtual void addDesire(float *state, int state_size) {}
virtual void addTrafficConvention(float *state, int state_size) {}
virtual void execute(float *net_input_buf, int buf_size) {}
virtual void addImage(float *image_buf, int buf_size) {}
virtual void addExtra(float *image_buf, int buf_size) {}
virtual void execute() {}
virtual void* getInputBuf() { return nullptr; }
virtual void* getExtraBuf() { return nullptr; }
};

View File

@ -14,9 +14,10 @@ void PrintErrorStringAndExit() {
std::exit(EXIT_FAILURE);
}
SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime) {
SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra) {
output = loutput;
output_size = loutput_size;
use_extra = luse_extra;
#if defined(QCOM) || defined(QCOM2)
if (runtime==USE_GPU_RUNTIME) {
Runtime = zdl::DlSystem::Runtime_t::GPU;
@ -90,6 +91,25 @@ SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int
inputMap.add(input_tensor_name, inputBuffer.get());
}
if (use_extra) {
const char *extra_tensor_name = strListi.at(1);
const auto &extraDims_opt = snpe->getInputDimensions(extra_tensor_name);
const zdl::DlSystem::TensorShape& bufferShape = *extraDims_opt;
std::vector<size_t> strides(bufferShape.rank());
strides[strides.size() - 1] = sizeof(float);
size_t product = 1;
for (size_t i = 0; i < bufferShape.rank(); i++) product *= bufferShape[i];
size_t stride = strides[strides.size() - 1];
for (size_t i = bufferShape.rank() - 1; i > 0; i--) {
stride *= bufferShape[i];
strides[i-1] = stride;
}
printf("extra product is %lu\n", product);
extraBuffer = ubFactory.createUserBuffer(NULL, product*sizeof(float), strides, &userBufferEncodingFloat);
inputMap.add(extra_tensor_name, extraBuffer.get());
}
// create output buffer
{
const zdl::DlSystem::TensorShape& bufferShape = snpe->getInputOutputBufferAttributes(output_tensor_name)->getDims();
@ -121,13 +141,24 @@ void SNPEModel::addDesire(float *state, int state_size) {
desireBuffer = this->addExtra(state, state_size, 1);
}
void SNPEModel::addImage(float *image_buf, int buf_size) {
input = image_buf;
input_size = buf_size;
}
void SNPEModel::addExtra(float *image_buf, int buf_size) {
extra = image_buf;
extra_size = buf_size;
}
std::unique_ptr<zdl::DlSystem::IUserBuffer> SNPEModel::addExtra(float *state, int state_size, int idx) {
// get input and output names
const auto real_idx = idx + (use_extra ? 1 : 0);
const auto &strListi_opt = snpe->getInputTensorNames();
if (!strListi_opt) throw std::runtime_error("Error obtaining Input tensor names");
const auto &strListi = *strListi_opt;
const char *input_tensor_name = strListi.at(idx);
printf("adding index %d: %s\n", idx, input_tensor_name);
const char *input_tensor_name = strListi.at(real_idx);
printf("adding index %d: %s\n", real_idx, input_tensor_name);
zdl::DlSystem::UserBufferEncodingFloat userBufferEncodingFloat;
zdl::DlSystem::IUserBufferFactory& ubFactory = zdl::SNPE::SNPEFactory::getUserBufferFactory();
@ -137,13 +168,17 @@ std::unique_ptr<zdl::DlSystem::IUserBuffer> SNPEModel::addExtra(float *state, in
return ret;
}
void SNPEModel::execute(float *net_input_buf, int buf_size) {
void SNPEModel::execute() {
#ifdef USE_THNEED
if (Runtime == zdl::DlSystem::Runtime_t::GPU) {
float *inputs[4] = {recurrent, trafficConvention, desire, net_input_buf};
if (thneed == NULL) {
bool ret = inputBuffer->setBufferAddress(net_input_buf);
bool ret = inputBuffer->setBufferAddress(input);
assert(ret == true);
if (use_extra) {
assert(extra != NULL);
bool extra_ret = extraBuffer->setBufferAddress(extra);
assert(extra_ret == true);
}
if (!snpe->execute(inputMap, outputMap)) {
PrintErrorStringAndExit();
}
@ -161,7 +196,13 @@ void SNPEModel::execute(float *net_input_buf, int buf_size) {
memset(output, 0, output_size*sizeof(float));
memset(recurrent, 0, recurrent_size*sizeof(float));
uint64_t start_time = nanos_since_boot();
thneed->execute(inputs, output);
if (extra != NULL) {
float *inputs[5] = {recurrent, trafficConvention, desire, extra, input};
thneed->execute(inputs, output);
} else {
float *inputs[4] = {recurrent, trafficConvention, desire, input};
thneed->execute(inputs, output);
}
uint64_t elapsed_time = nanos_since_boot() - start_time;
printf("ran model in %.2f ms\n", float(elapsed_time)/1e6);
@ -175,12 +216,22 @@ void SNPEModel::execute(float *net_input_buf, int buf_size) {
}
free(outputs_golden);
} else {
thneed->execute(inputs, output);
if (use_extra) {
float *inputs[5] = {recurrent, trafficConvention, desire, extra, input};
thneed->execute(inputs, output);
} else {
float *inputs[4] = {recurrent, trafficConvention, desire, input};
thneed->execute(inputs, output);
}
}
} else {
#endif
bool ret = inputBuffer->setBufferAddress(net_input_buf);
bool ret = inputBuffer->setBufferAddress(input);
assert(ret == true);
if (use_extra) {
bool extra_ret = extraBuffer->setBufferAddress(extra);
assert(extra_ret == true);
}
if (!snpe->execute(inputMap, outputMap)) {
PrintErrorStringAndExit();
}

View File

@ -22,11 +22,13 @@
class SNPEModel : public RunModel {
public:
SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime);
SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra = false);
void addRecurrent(float *state, int state_size);
void addTrafficConvention(float *state, int state_size);
void addDesire(float *state, int state_size);
void execute(float *net_input_buf, int buf_size);
void addImage(float *image_buf, int buf_size);
void addExtra(float *image_buf, int buf_size);
void execute();
#ifdef USE_THNEED
Thneed *thneed = NULL;
@ -45,6 +47,8 @@ private:
// snpe input stuff
zdl::DlSystem::UserBufferMap inputMap;
std::unique_ptr<zdl::DlSystem::IUserBuffer> inputBuffer;
float *input;
size_t input_size;
// snpe output stuff
zdl::DlSystem::UserBufferMap outputMap;
@ -52,6 +56,12 @@ private:
float *output;
size_t output_size;
// extra input stuff
std::unique_ptr<zdl::DlSystem::IUserBuffer> extraBuffer;
float *extra;
size_t extra_size;
bool use_extra;
// recurrent and desire
std::unique_ptr<zdl::DlSystem::IUserBuffer> addExtra(float *state, int state_size, int idx);
float *recurrent;

View File

@ -2,7 +2,7 @@
#include <cassert>
ThneedModel::ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime) {
ThneedModel::ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra) {
thneed = new Thneed(true);
thneed->record = 0;
thneed->load(path);
@ -11,6 +11,7 @@ ThneedModel::ThneedModel(const char *path, float *loutput, size_t loutput_size,
recorded = false;
output = loutput;
use_extra = luse_extra;
}
void ThneedModel::addRecurrent(float *state, int state_size) {
@ -25,23 +26,48 @@ void ThneedModel::addDesire(float *state, int state_size) {
desire = state;
}
void ThneedModel::addImage(float *image_input_buf, int buf_size) {
input = image_input_buf;
}
void ThneedModel::addExtra(float *extra_input_buf, int buf_size) {
extra = extra_input_buf;
}
void* ThneedModel::getInputBuf() {
if (use_extra && thneed->input_clmem.size() > 4) return &(thneed->input_clmem[4]);
else if (!use_extra && thneed->input_clmem.size() > 3) return &(thneed->input_clmem[3]);
else return nullptr;
}
void* ThneedModel::getExtraBuf() {
if (thneed->input_clmem.size() > 3) return &(thneed->input_clmem[3]);
else return nullptr;
}
void ThneedModel::execute(float *net_input_buf, int buf_size) {
float *inputs[4] = {recurrent, trafficConvention, desire, net_input_buf};
void ThneedModel::execute() {
if (!recorded) {
thneed->record = THNEED_RECORD;
thneed->copy_inputs(inputs);
if (use_extra) {
float *inputs[5] = {recurrent, trafficConvention, desire, extra, input};
thneed->copy_inputs(inputs);
} else {
float *inputs[4] = {recurrent, trafficConvention, desire, input};
thneed->copy_inputs(inputs);
}
thneed->clexec();
thneed->copy_output(output);
thneed->stop();
recorded = true;
} else {
thneed->execute(inputs, output);
if (use_extra) {
float *inputs[5] = {recurrent, trafficConvention, desire, extra, input};
thneed->execute(inputs, output);
} else {
float *inputs[4] = {recurrent, trafficConvention, desire, input};
thneed->execute(inputs, output);
}
}
}

View File

@ -5,16 +5,22 @@
class ThneedModel : public RunModel {
public:
ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime);
ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra = false);
void addRecurrent(float *state, int state_size);
void addTrafficConvention(float *state, int state_size);
void addDesire(float *state, int state_size);
void execute(float *net_input_buf, int buf_size);
void addImage(float *image_buf, int buf_size);
void addExtra(float *image_buf, int buf_size);
void execute();
void* getInputBuf();
void* getExtraBuf();
private:
Thneed *thneed = NULL;
bool recorded;
bool use_extra;
float *input;
float *extra;
float *output;
// recurrent and desire

View File

@ -2,6 +2,7 @@
#include "selfdrive/modeld/runners/snpemodel.h"
#include "selfdrive/modeld/thneed/thneed.h"
#include "selfdrive/hardware/hw.h"
#define TEMPORAL_SIZE 512
#define DESIRE_LEN 8
@ -10,22 +11,28 @@
// TODO: This should probably use SNPE directly.
int main(int argc, char* argv[]) {
#define OUTPUT_SIZE 0x10000
float *output = (float*)calloc(OUTPUT_SIZE, sizeof(float));
SNPEModel mdl(argv[1], output, 0, USE_GPU_RUNTIME);
SNPEModel mdl(argv[1], output, 0, USE_GPU_RUNTIME, USE_EXTRA);
float state[TEMPORAL_SIZE] = {0};
float desire[DESIRE_LEN] = {0};
float traffic_convention[TRAFFIC_CONVENTION_LEN] = {0};
float *input = (float*)calloc(0x1000000, sizeof(float));
float *extra = (float*)calloc(0x1000000, sizeof(float));
mdl.addRecurrent(state, TEMPORAL_SIZE);
mdl.addDesire(desire, DESIRE_LEN);
mdl.addTrafficConvention(traffic_convention, TRAFFIC_CONVENTION_LEN);
mdl.addImage(input, 0);
if (USE_EXTRA) {
mdl.addExtra(extra, 0);
}
// first run
printf("************** execute 1 **************\n");
memset(output, 0, OUTPUT_SIZE * sizeof(float));
mdl.execute(input, 0);
mdl.execute();
// save model
bool save_binaries = (argc > 3) && (strcmp(argv[3], "--binary") == 0);

View File

@ -8,7 +8,7 @@ BASE_URL = "https://commadataci.blob.core.windows.net/openpilotci/"
TOKEN_PATH = "/data/azure_token"
def get_url(route_name, segment_num, log_type="rlog"):
ext = "hevc" if log_type in ["fcamera", "dcamera"] else "bz2"
ext = "hevc" if log_type.endswith('camera') else "bz2"
return BASE_URL + f"{route_name.replace('|', '/')}/{segment_num}/{log_type}.{ext}"
def upload_file(path, name):

View File

@ -3,8 +3,8 @@ import os
import sys
import time
from collections import defaultdict
from tqdm import tqdm
from typing import Any
from itertools import zip_longest
import cereal.messaging as messaging
from cereal.visionipc.visionipc_pyx import VisionIpcServer, VisionStreamType # pylint: disable=no-name-in-module, import-error
@ -29,6 +29,8 @@ SEGMENT = 0
SEND_EXTRA_INPUTS = bool(os.getenv("SEND_EXTRA_INPUTS", "0"))
VIPC_STREAM = {"roadCameraState": VisionStreamType.VISION_STREAM_ROAD, "driverCameraState": VisionStreamType.VISION_STREAM_DRIVER,
"wideRoadCameraState": VisionStreamType.VISION_STREAM_WIDE_ROAD}
def get_log_fn(ref_commit):
return f"{TEST_ROUTE}_{'model_tici' if TICI else 'model'}_{ref_commit}.bz2"
@ -48,19 +50,21 @@ def model_replay(lr, frs):
vipc_server = VisionIpcServer("camerad")
vipc_server.create_buffers(VisionStreamType.VISION_STREAM_ROAD, 40, False, *(tici_f_frame_size if TICI else eon_f_frame_size))
vipc_server.create_buffers(VisionStreamType.VISION_STREAM_DRIVER, 40, False, *(tici_d_frame_size if TICI else eon_d_frame_size))
vipc_server.create_buffers(VisionStreamType.VISION_STREAM_WIDE_ROAD, 40, False, *(tici_f_frame_size))
vipc_server.start_listener()
sm = messaging.SubMaster(['modelV2', 'driverState'])
pm = messaging.PubMaster(['roadCameraState', 'driverCameraState', 'liveCalibration', 'lateralPlan'])
pm = messaging.PubMaster(['roadCameraState', 'wideRoadCameraState', 'driverCameraState', 'liveCalibration', 'lateralPlan'])
try:
managed_processes['modeld'].start()
managed_processes['dmonitoringmodeld'].start()
time.sleep(2)
time.sleep(5)
sm.update(1000)
log_msgs = []
last_desire = None
recv_cnt = defaultdict(lambda: 0)
frame_idxs = defaultdict(lambda: 0)
# init modeld with valid calibration
@ -69,38 +73,59 @@ def model_replay(lr, frs):
pm.send(cal_msgs[0].which(), cal_msgs[0].as_builder())
time.sleep(0.1)
for msg in tqdm(lr):
if SEND_EXTRA_INPUTS:
if msg.which() == "liveCalibration":
last_calib = list(msg.liveCalibration.rpyCalib)
pm.send(msg.which(), replace_calib(msg, last_calib))
elif msg.which() == "lateralPlan":
last_desire = msg.lateralPlan.desire
dat = messaging.new_message('lateralPlan')
dat.lateralPlan.desire = last_desire
pm.send('lateralPlan', dat)
msgs = defaultdict(list)
for msg in lr:
msgs[msg.which()].append(msg)
if msg.which() in ["roadCameraState", "driverCameraState"]:
camera_state = getattr(msg, msg.which())
stream = VisionStreamType.VISION_STREAM_ROAD if msg.which() == "roadCameraState" else VisionStreamType.VISION_STREAM_DRIVER
img = frs[msg.which()].get(frame_idxs[msg.which()], pix_fmt="yuv420p")[0]
for cam_msgs in zip_longest(msgs['roadCameraState'], msgs['wideRoadCameraState'], msgs['driverCameraState']):
# need a pair of road/wide msgs
if TICI and None in (cam_msgs[0], cam_msgs[1]):
break
# send camera state and frame
pm.send(msg.which(), msg.as_builder())
vipc_server.send(stream, img.flatten().tobytes(), camera_state.frameId,
camera_state.timestampSof, camera_state.timestampEof)
for msg in cam_msgs:
if msg is None:
continue
# wait for a response
with Timeout(seconds=15):
packet_from_camera = {"roadCameraState": "modelV2", "driverCameraState": "driverState"}
log_msgs.append(messaging.recv_one(sm.sock[packet_from_camera[msg.which()]]))
if SEND_EXTRA_INPUTS:
if msg.which() == "liveCalibration":
last_calib = list(msg.liveCalibration.rpyCalib)
pm.send(msg.which(), replace_calib(msg, last_calib))
elif msg.which() == "lateralPlan":
last_desire = msg.lateralPlan.desire
dat = messaging.new_message('lateralPlan')
dat.lateralPlan.desire = last_desire
pm.send('lateralPlan', dat)
frame_idxs[msg.which()] += 1
if frame_idxs[msg.which()] >= frs[msg.which()].frame_count:
break
if msg.which() in VIPC_STREAM:
msg = msg.as_builder()
camera_state = getattr(msg, msg.which())
img = frs[msg.which()].get(frame_idxs[msg.which()], pix_fmt="yuv420p")[0]
frame_idxs[msg.which()] += 1
spinner.update("replaying models: road %d/%d, driver %d/%d" % (frame_idxs['roadCameraState'],
frs['roadCameraState'].frame_count, frame_idxs['driverCameraState'], frs['driverCameraState'].frame_count))
# send camera state and frame
camera_state.frameId = frame_idxs[msg.which()]
pm.send(msg.which(), msg)
vipc_server.send(VIPC_STREAM[msg.which()], img.flatten().tobytes(), camera_state.frameId,
camera_state.timestampSof, camera_state.timestampEof)
recv = None
if msg.which() in ('roadCameraState', 'wideRoadCameraState'):
if not TICI or min(frame_idxs['roadCameraState'], frame_idxs['wideRoadCameraState']) > recv_cnt['modelV2']:
recv = "modelV2"
elif msg.which() == 'driverCameraState':
recv = "driverState"
# wait for a response
with Timeout(15, f"timed out waiting for {recv}"):
if recv:
recv_cnt[recv] += 1
log_msgs.append(messaging.recv_one(sm.sock[recv]))
spinner.update("replaying models: road %d/%d, driver %d/%d" % (frame_idxs['roadCameraState'],
frs['roadCameraState'].frame_count, frame_idxs['driverCameraState'], frs['driverCameraState'].frame_count))
if any(frame_idxs[c] >= frs[c].frame_count for c in frame_idxs.keys()):
break
finally:
spinner.close()
@ -123,6 +148,8 @@ if __name__ == "__main__":
'roadCameraState': FrameReader(get_url(TEST_ROUTE, SEGMENT, log_type="fcamera")),
'driverCameraState': FrameReader(get_url(TEST_ROUTE, SEGMENT, log_type="dcamera")),
}
if TICI:
frs['wideRoadCameraState'] = FrameReader(get_url(TEST_ROUTE, SEGMENT, log_type="ecamera"))
# run replay
log_msgs = model_replay(lr, frs)
@ -133,25 +160,29 @@ if __name__ == "__main__":
with open(ref_commit_fn) as f:
ref_commit = f.read().strip()
log_fn = get_log_fn(ref_commit)
cmp_log = LogReader(BASE_URL + log_fn)
try:
cmp_log = LogReader(BASE_URL + log_fn)
ignore = [
'logMonoTime',
'modelV2.frameDropPerc',
'modelV2.modelExecutionTime',
'driverState.modelExecutionTime',
'driverState.dspExecutionTime'
]
tolerance = None if not PC else 1e-3
results: Any = {TEST_ROUTE: {}}
results[TEST_ROUTE]["models"] = compare_logs(cmp_log, log_msgs, tolerance=tolerance, ignore_fields=ignore)
diff1, diff2, failed = format_diff(results, ref_commit)
ignore = [
'logMonoTime',
'modelV2.frameDropPerc',
'modelV2.modelExecutionTime',
'driverState.modelExecutionTime',
'driverState.dspExecutionTime'
]
tolerance = None if not PC else 1e-3
results: Any = {TEST_ROUTE: {}}
results[TEST_ROUTE]["models"] = compare_logs(cmp_log, log_msgs, tolerance=tolerance, ignore_fields=ignore)
diff1, diff2, failed = format_diff(results, ref_commit)
print(diff2)
print('-------------\n'*5)
print(diff1)
with open("model_diff.txt", "w") as f:
f.write(diff2)
print(diff2)
print('-------------\n'*5)
print(diff1)
with open("model_diff.txt", "w") as f:
f.write(diff2)
except Exception as e:
print(str(e))
failed = True
# upload new refs
if update or failed:

View File

@ -1 +1 @@
0c94f7c258bcdabc34c7f7be6cb6c2502afbb339
b48904b48036aa0c023759f214740f226b52b5b8