bad AF state logging (#1365)

albatross
ZwX1616 2020-05-02 21:40:43 -07:00 committed by GitHub
parent e568d3cadc
commit 968e2585cc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 358 additions and 4 deletions

View File

@ -355,6 +355,11 @@ selfdrive/camerad/transforms/rgb_to_yuv.h
selfdrive/camerad/transforms/rgb_to_yuv.cl
selfdrive/camerad/transforms/rgb_to_yuv_test.cc
selfdrive/camerad/imgproc/conv.cl
selfdrive/camerad/imgproc/pool.cl
selfdrive/camerad/imgproc/utils.cc
selfdrive/camerad/imgproc/utils.h
selfdrive/modeld/SConscript
selfdrive/modeld/modeld.cc
selfdrive/modeld/dmonitoringmodeld.cc

View File

@ -26,5 +26,6 @@ env.SharedLibrary('snapshot/visionipc',
env.Program('camerad', [
'main.cc',
'transforms/rgb_to_yuv.c',
'imgproc/utils.cc',
cameras,
], LIBS=libs)

View File

@ -1719,6 +1719,8 @@ static void parse_autofocus(CameraState *s, uint8_t *d) {
for (int i = 0; i < NUM_FOCUS; i++) {
int doff = i*5+5;
s->confidence[i] = d[doff];
// this should just be a 10-bit signed int instead of 11
// TODO: write it in a nicer way
int16_t focus_t = (d[doff+1] << 3) | (d[doff+2] >> 5);
if (focus_t >= 1024) focus_t = -(2048-focus_t);
s->focus[i] = focus_t;

View File

@ -0,0 +1,110 @@
// const __constant float3 rgb_weights = (0.299, 0.587, 0.114); // opencv rgb2gray weights
// const __constant float3 bgr_weights = (0.114, 0.587, 0.299); // bgr2gray weights
// convert input rgb image to single channel then conv
__kernel void rgb2gray_conv2d(
const __global uchar * input,
__global short * output,
__constant short * filter,
__local uchar3 * cached
)
{
const int rowOffset = get_global_id(1) * IMAGE_W;
const int my = get_global_id(0) + rowOffset;
const int localRowLen = TWICE_HALF_FILTER_SIZE + get_local_size(0);
const int localRowOffset = ( get_local_id(1) + HALF_FILTER_SIZE ) * localRowLen;
const int myLocal = localRowOffset + get_local_id(0) + HALF_FILTER_SIZE;
// cache local pixels
cached[ myLocal ].x = input[ my * 3 ]; // r
cached[ myLocal ].y = input[ my * 3 + 1]; // g
cached[ myLocal ].z = input[ my * 3 + 2]; // b
// pad
if (
get_global_id(0) < HALF_FILTER_SIZE ||
get_global_id(0) > IMAGE_W - HALF_FILTER_SIZE - 1 ||
get_global_id(1) < HALF_FILTER_SIZE ||
get_global_id(1) > IMAGE_H - HALF_FILTER_SIZE - 1
)
{
barrier(CLK_LOCAL_MEM_FENCE);
return;
}
else
{
int localColOffset = -1;
int globalColOffset = -1;
// cache extra
if ( get_local_id(0) < HALF_FILTER_SIZE )
{
localColOffset = get_local_id(0);
globalColOffset = -HALF_FILTER_SIZE;
cached[ localRowOffset + get_local_id(0) ].x = input[ my * 3 - HALF_FILTER_SIZE * 3 ];
cached[ localRowOffset + get_local_id(0) ].y = input[ my * 3 - HALF_FILTER_SIZE * 3 + 1];
cached[ localRowOffset + get_local_id(0) ].z = input[ my * 3 - HALF_FILTER_SIZE * 3 + 2];
}
else if ( get_local_id(0) >= get_local_size(0) - HALF_FILTER_SIZE )
{
localColOffset = get_local_id(0) + TWICE_HALF_FILTER_SIZE;
globalColOffset = HALF_FILTER_SIZE;
cached[ myLocal + HALF_FILTER_SIZE ].x = input[ my * 3 + HALF_FILTER_SIZE * 3 ];
cached[ myLocal + HALF_FILTER_SIZE ].y = input[ my * 3 + HALF_FILTER_SIZE * 3 + 1];
cached[ myLocal + HALF_FILTER_SIZE ].z = input[ my * 3 + HALF_FILTER_SIZE * 3 + 2];
}
if ( get_local_id(1) < HALF_FILTER_SIZE )
{
cached[ get_local_id(1) * localRowLen + get_local_id(0) + HALF_FILTER_SIZE ].x = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 ];
cached[ get_local_id(1) * localRowLen + get_local_id(0) + HALF_FILTER_SIZE ].y = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + 1];
cached[ get_local_id(1) * localRowLen + get_local_id(0) + HALF_FILTER_SIZE ].z = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + 2];
if (localColOffset > 0)
{
cached[ get_local_id(1) * localRowLen + localColOffset ].x = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3];
cached[ get_local_id(1) * localRowLen + localColOffset ].y = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 1];
cached[ get_local_id(1) * localRowLen + localColOffset ].z = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 2];
}
}
else if ( get_local_id(1) >= get_local_size(1) -HALF_FILTER_SIZE )
{
int offset = ( get_local_id(1) + TWICE_HALF_FILTER_SIZE ) * localRowLen;
cached[ offset + get_local_id(0) + HALF_FILTER_SIZE ].x = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 ];
cached[ offset + get_local_id(0) + HALF_FILTER_SIZE ].y = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + 1];
cached[ offset + get_local_id(0) + HALF_FILTER_SIZE ].z = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + 2];
if (localColOffset > 0)
{
cached[ offset + localColOffset ].x = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3];
cached[ offset + localColOffset ].y = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 1];
cached[ offset + localColOffset ].z = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 2];
}
}
// sync
barrier(CLK_LOCAL_MEM_FENCE);
// perform convolution
int fIndex = 0;
short sum = 0;
for (int r = -HALF_FILTER_SIZE; r <= HALF_FILTER_SIZE; r++)
{
int curRow = r * localRowLen;
for (int c = -HALF_FILTER_SIZE; c <= HALF_FILTER_SIZE; c++, fIndex++)
{
if (!FLIP_RB){
// sum += dot(rgb_weights, cached[ myLocal + curRow + c ]) * filter[ fIndex ];
sum += (cached[ myLocal + curRow + c ].x / 3 + cached[ myLocal + curRow + c ].y / 2 + cached[ myLocal + curRow + c ].z / 9) * filter[ fIndex ];
} else {
// sum += dot(bgr_weights, cached[ myLocal + curRow + c ]) * filter[ fIndex ];
sum += (cached[ myLocal + curRow + c ].x / 9 + cached[ myLocal + curRow + c ].y / 2 + cached[ myLocal + curRow + c ].z / 3) * filter[ fIndex ];
}
}
}
output[my] = sum;
}
}

View File

@ -0,0 +1,34 @@
// calculate variance in each subregion
__kernel void var_pool(
const __global char * input,
__global ushort * output // should not be larger than 128*128 so uint16
)
{
const int xidx = get_global_id(0) + ROI_X_MIN;
const int yidx = get_global_id(1) + ROI_Y_MIN;
const int size = X_PITCH * Y_PITCH;
float fsum = 0;
char mean, max;
for (int i = 0; i < size; i++) {
int x_offset = i % X_PITCH;
int y_offset = i / X_PITCH;
fsum += input[xidx*X_PITCH + yidx*Y_PITCH*FULL_STRIDE_X + x_offset + y_offset*FULL_STRIDE_X];
max = input[xidx*X_PITCH + yidx*Y_PITCH*FULL_STRIDE_X + x_offset + y_offset*FULL_STRIDE_X]>max ? input[xidx*X_PITCH + yidx*Y_PITCH*FULL_STRIDE_X + x_offset + y_offset*FULL_STRIDE_X]:max;
}
mean = convert_char_rte(fsum / size);
float fvar = 0;
for (int i = 0; i < size; i++) {
int x_offset = i % X_PITCH;
int y_offset = i / X_PITCH;
fvar += (input[xidx*X_PITCH + yidx*Y_PITCH*FULL_STRIDE_X + x_offset + y_offset*FULL_STRIDE_X] - mean) * (input[xidx*X_PITCH + yidx*Y_PITCH*FULL_STRIDE_X + x_offset + y_offset*FULL_STRIDE_X] - mean);
}
fvar = fvar / size;
output[(xidx-ROI_X_MIN)+(yidx-ROI_Y_MIN)*(ROI_X_MAX-ROI_X_MIN+1)] = convert_ushort_rte(5 * fvar + convert_float_rte(max));
}

View File

@ -0,0 +1,44 @@
#include "utils.h"
#include <stdio.h>
#include <algorithm>
// calculate score based on laplacians in one area
void get_lapmap_one(int16_t *lap, uint16_t *res, int x_pitch, int y_pitch) {
int size = x_pitch * y_pitch;
// avg and max of roi
float fsum = 0;
int16_t mean, max;
max = 0;
for (int i = 0; i < size; i++) {
int x_offset = i % x_pitch;
int y_offset = i / x_pitch;
fsum += lap[x_offset + y_offset*x_pitch];
max = std::max(lap[x_offset + y_offset*x_pitch], max);
}
mean = fsum / size;
// var of roi
float fvar = 0;
for (int i = 0; i < size; i++) {
int x_offset = i % x_pitch;
int y_offset = i / x_pitch;
fvar += (float)((lap[x_offset + y_offset*x_pitch] - mean) * (lap[x_offset + y_offset*x_pitch] - mean));
}
fvar = fvar / size;
*res = std::min(5 * fvar + max, (float)65535);
}
bool is_blur(uint16_t *lapmap) {
int n_roi = (ROI_X_MAX - ROI_X_MIN + 1) * (ROI_Y_MAX - ROI_Y_MIN + 1);
float bad_sum = 0;
for (int i = 0; i < n_roi; i++) {
if (*(lapmap + i) < LM_THRESH) {
bad_sum += 1/(float)n_roi;
}
}
return (bad_sum > LM_PREC_THRESH);
}

View File

@ -0,0 +1,30 @@
#ifndef IMGPROC_UTILS
#define IMGPROC_UTILS
#include <stdint.h>
#define NUM_SEGMENTS_X 8
#define NUM_SEGMENTS_Y 6
#define ROI_X_MIN 1
#define ROI_X_MAX 6
#define ROI_Y_MIN 2
#define ROI_Y_MAX 3
#define LM_THRESH 222
#define LM_PREC_THRESH 0.9
// only apply to QCOM
#define FULL_STRIDE_X 1280
#define FULL_STRIDE_Y 896
#define CONV_LOCAL_WORKSIZE 16
const int16_t lapl_conv_krnl[9] = {0, 1, 0,
1, -4, 1,
0, 1, 0};
void get_lapmap_one(int16_t *lap, uint16_t *res, int x_pitch, int y_pitch);
bool is_blur(uint16_t *lapmap);
#endif

View File

@ -23,6 +23,7 @@
#include "messaging.hpp"
#include "transforms/rgb_to_yuv.h"
#include "imgproc/utils.h"
#include "clutil.h"
#include "bufs.h"
@ -78,6 +79,14 @@ struct VisionState {
cl_kernel krnl_debayer_rear;
cl_kernel krnl_debayer_front;
cl_program prg_rgb_laplacian;
cl_kernel krnl_rgb_laplacian;
int conv_cl_localMemSize;
size_t conv_cl_globalWorkSize[2];
size_t conv_cl_localWorkSize[2];
size_t pool_cl_globalWorkSize[2];
// processing
TBuffer ui_tb;
TBuffer ui_front_tb;
@ -110,6 +119,8 @@ struct VisionState {
int rgb_width, rgb_height, rgb_stride;
VisionBuf rgb_bufs[UI_BUF_COUNT];
cl_mem rgb_bufs_cl[UI_BUF_COUNT];
cl_mem rgb_conv_roi_cl, rgb_conv_result_cl, rgb_conv_filter_cl;
uint16_t lapres[(ROI_X_MAX-ROI_X_MIN+1)*(ROI_Y_MAX-ROI_Y_MIN+1)];
size_t rgb_front_buf_size;
int rgb_front_width, rgb_front_height, rgb_front_stride;
@ -177,9 +188,9 @@ void* frontview_thread(void *arg) {
cl_event debayer_event;
if (s->cameras.front.ci.bayer) {
err = clSetKernelArg(s->krnl_debayer_front, 0, sizeof(cl_mem), &s->front_camera_bufs_cl[buf_idx]);
cl_check_error(err);
assert(err == 0);
err = clSetKernelArg(s->krnl_debayer_front, 1, sizeof(cl_mem), &s->rgb_front_bufs_cl[rgb_idx]);
cl_check_error(err);
assert(err == 0);
float digital_gain = 1.0;
err = clSetKernelArg(s->krnl_debayer_front, 2, sizeof(float), &digital_gain);
assert(err == 0);
@ -390,9 +401,9 @@ void* processing_thread(void *arg) {
cl_event debayer_event;
if (s->cameras.rear.ci.bayer) {
err = clSetKernelArg(s->krnl_debayer_rear, 0, sizeof(cl_mem), &s->camera_bufs_cl[buf_idx]);
cl_check_error(err);
assert(err == 0);
err = clSetKernelArg(s->krnl_debayer_rear, 1, sizeof(cl_mem), &s->rgb_bufs_cl[rgb_idx]);
cl_check_error(err);
assert(err == 0);
err = clSetKernelArg(s->krnl_debayer_rear, 2, sizeof(float), &s->cameras.rear.digital_gain);
assert(err == 0);
@ -416,6 +427,74 @@ void* processing_thread(void *arg) {
visionbuf_sync(&s->rgb_bufs[rgb_idx], VISIONBUF_SYNC_FROM_DEVICE);
#ifdef QCOM
/*FILE *dump_rgb_file = fopen("/tmp/process_dump.rgb", "wb");
fwrite(s->rgb_bufs[rgb_idx].addr, s->rgb_bufs[rgb_idx].len, sizeof(uint8_t), dump_rgb_file);
fclose(dump_rgb_file);
printf("ORIGINAL SAVED!!\n");*/
/*double t10 = millis_since_boot();*/
// cache rgb roi and write to cl
uint8_t *rgb_roi_buf = new uint8_t[(s->rgb_width/NUM_SEGMENTS_X)*(s->rgb_height/NUM_SEGMENTS_Y)*3];
int roi_id = cnt % ((ROI_X_MAX-ROI_X_MIN+1)*(ROI_Y_MAX-ROI_Y_MIN+1)); // rolling roi
int roi_x_offset = roi_id % (ROI_X_MAX-ROI_X_MIN+1);
int roi_y_offset = roi_id / (ROI_X_MAX-ROI_X_MIN+1);
for (int r=0;r<(s->rgb_height/NUM_SEGMENTS_Y);r++) {
memcpy(rgb_roi_buf + r * (s->rgb_width/NUM_SEGMENTS_X) * 3,
(uint8_t *) s->rgb_bufs[rgb_idx].addr + \
(ROI_Y_MIN + roi_y_offset) * s->rgb_height/NUM_SEGMENTS_Y * FULL_STRIDE_X * 3 + \
(ROI_X_MIN + roi_x_offset) * s->rgb_width/NUM_SEGMENTS_X * 3 + r * FULL_STRIDE_X * 3,
s->rgb_width/NUM_SEGMENTS_X * 3);
}
err = clEnqueueWriteBuffer (q, s->rgb_conv_roi_cl, true, 0,
s->rgb_width/NUM_SEGMENTS_X * s->rgb_height/NUM_SEGMENTS_Y * 3 * sizeof(uint8_t), rgb_roi_buf, 0, 0, 0);
assert(err == 0);
/*double t11 = millis_since_boot();
printf("cache time: %f ms\n", t11 - t10);
t10 = millis_since_boot();*/
err = clSetKernelArg(s->krnl_rgb_laplacian, 0, sizeof(cl_mem), (void *) &s->rgb_conv_roi_cl);
assert(err == 0);
err = clSetKernelArg(s->krnl_rgb_laplacian, 1, sizeof(cl_mem), (void *) &s->rgb_conv_result_cl);
assert(err == 0);
err = clSetKernelArg(s->krnl_rgb_laplacian, 2, sizeof(cl_mem), (void *) &s->rgb_conv_filter_cl);
assert(err == 0);
err = clSetKernelArg(s->krnl_rgb_laplacian, 3, s->conv_cl_localMemSize, 0);
assert(err == 0);
cl_event conv_event;
err = clEnqueueNDRangeKernel(q, s->krnl_rgb_laplacian, 2, NULL,
s->conv_cl_globalWorkSize, s->conv_cl_localWorkSize, 0, 0, &conv_event);
assert(err == 0);
clWaitForEvents(1, &conv_event);
clReleaseEvent(conv_event);
int16_t *conv_result = new int16_t[(s->rgb_width/NUM_SEGMENTS_X)*(s->rgb_height/NUM_SEGMENTS_Y)];
err = clEnqueueReadBuffer(q, s->rgb_conv_result_cl, true, 0,
s->rgb_width/NUM_SEGMENTS_X * s->rgb_height/NUM_SEGMENTS_Y * sizeof(int16_t), conv_result, 0, 0, 0);
assert(err == 0);
/*t11 = millis_since_boot();
printf("conv time: %f ms\n", t11 - t10);
t10 = millis_since_boot();*/
get_lapmap_one(conv_result, &s->lapres[roi_id], s->rgb_width/NUM_SEGMENTS_X, s->rgb_height/NUM_SEGMENTS_Y);
/*t11 = millis_since_boot();
printf("pool time: %f ms\n", t11 - t10);
t10 = millis_since_boot();*/
delete [] rgb_roi_buf;
delete [] conv_result;
/*t11 = millis_since_boot();
printf("process time: %f ms\n ----- \n", t11 - t10);
t10 = millis_since_boot();*/
#endif
double t2 = millis_since_boot();
@ -465,6 +544,8 @@ void* processing_thread(void *arg) {
kj::ArrayPtr<const uint8_t> focus_confs(&s->cameras.rear.confidence[0], NUM_FOCUS);
framed.setFocusVal(focus_vals);
framed.setFocusConf(focus_confs);
kj::ArrayPtr<const uint16_t> sharpness_score(&s->lapres[0], (ROI_X_MAX-ROI_X_MIN+1)*(ROI_Y_MAX-ROI_Y_MIN+1));
framed.setSharpnessScore(sharpness_score);
#endif
// TODO: add this back
@ -907,6 +988,34 @@ cl_program build_debayer_program(VisionState *s,
return CLU_LOAD_FROM_FILE(s->context, s->device_id, "cameras/debayer.cl", args);
}
cl_program build_conv_program(VisionState *s,
int image_w, int image_h,
int filter_size) {
char args[4096];
snprintf(args, sizeof(args),
"-cl-fast-relaxed-math -cl-denorms-are-zero "
"-DIMAGE_W=%d -DIMAGE_H=%d -DFLIP_RB=%d "
"-DFILTER_SIZE=%d -DHALF_FILTER_SIZE=%d -DTWICE_HALF_FILTER_SIZE=%d -DHALF_FILTER_SIZE_IMAGE_W=%d",
image_w, image_h, 1,
filter_size, filter_size/2, (filter_size/2)*2, (filter_size/2)*image_w);
return CLU_LOAD_FROM_FILE(s->context, s->device_id, "imgproc/conv.cl", args);
}
cl_program build_pool_program(VisionState *s,
int full_stride_x,
int x_pitch, int y_pitch,
int roi_x_min, int roi_x_max,
int roi_y_min, int roi_y_max) {
char args[4096];
snprintf(args, sizeof(args),
"-cl-fast-relaxed-math -cl-denorms-are-zero "
"-DFULL_STRIDE_X=%d -DX_PITCH=%d -DY_PITCH=%d "
"-DROI_X_MIN=%d -DROI_X_MAX=%d -DROI_Y_MIN=%d -DROI_Y_MAX=%d",
full_stride_x, x_pitch, y_pitch,
roi_x_min, roi_x_max, roi_y_min, roi_y_max);
return CLU_LOAD_FROM_FILE(s->context, s->device_id, "imgproc/pool.cl", args);
}
void cl_init(VisionState *s) {
int err;
cl_platform_id platform_id = NULL;
@ -1049,6 +1158,25 @@ void init_buffers(VisionState *s) {
assert(err == 0);
}
s->prg_rgb_laplacian = build_conv_program(s, s->rgb_width/NUM_SEGMENTS_X, s->rgb_height/NUM_SEGMENTS_Y,
3);
s->krnl_rgb_laplacian = clCreateKernel(s->prg_rgb_laplacian, "rgb2gray_conv2d", &err);
assert(err == 0);
s->rgb_conv_roi_cl = clCreateBuffer(s->context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
s->rgb_width/NUM_SEGMENTS_X * s->rgb_height/NUM_SEGMENTS_Y * 3 * sizeof(uint8_t), NULL, NULL);
s->rgb_conv_result_cl = clCreateBuffer(s->context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
s->rgb_width/NUM_SEGMENTS_X * s->rgb_height/NUM_SEGMENTS_Y * sizeof(int16_t), NULL, NULL);
s->rgb_conv_filter_cl = clCreateBuffer(s->context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
9 * sizeof(int16_t), (void*)&lapl_conv_krnl, NULL);
s->conv_cl_localMemSize = ( CONV_LOCAL_WORKSIZE + 2 * (3 / 2) ) * ( CONV_LOCAL_WORKSIZE + 2 * (3 / 2) );
s->conv_cl_localMemSize *= 3 * sizeof(uint8_t);
s->conv_cl_globalWorkSize[0] = s->rgb_width/NUM_SEGMENTS_X;
s->conv_cl_globalWorkSize[1] = s->rgb_height/NUM_SEGMENTS_Y;
s->conv_cl_localWorkSize[0] = CONV_LOCAL_WORKSIZE;
s->conv_cl_localWorkSize[1] = CONV_LOCAL_WORKSIZE;
for (int i=0; i<(ROI_X_MAX-ROI_X_MIN+1)*(ROI_Y_MAX-ROI_Y_MIN+1); i++) {s->lapres[i] = 16160;}
rgb_to_yuv_init(&s->rgb_to_yuv_state, s->context, s->device_id, s->yuv_width, s->yuv_height, s->rgb_stride);
rgb_to_yuv_init(&s->front_rgb_to_yuv_state, s->context, s->device_id, s->yuv_front_width, s->yuv_front_height, s->rgb_front_stride);
}