bad AF state logging (#1365)
parent
e568d3cadc
commit
968e2585cc
|
@ -355,6 +355,11 @@ selfdrive/camerad/transforms/rgb_to_yuv.h
|
|||
selfdrive/camerad/transforms/rgb_to_yuv.cl
|
||||
selfdrive/camerad/transforms/rgb_to_yuv_test.cc
|
||||
|
||||
selfdrive/camerad/imgproc/conv.cl
|
||||
selfdrive/camerad/imgproc/pool.cl
|
||||
selfdrive/camerad/imgproc/utils.cc
|
||||
selfdrive/camerad/imgproc/utils.h
|
||||
|
||||
selfdrive/modeld/SConscript
|
||||
selfdrive/modeld/modeld.cc
|
||||
selfdrive/modeld/dmonitoringmodeld.cc
|
||||
|
|
|
@ -26,5 +26,6 @@ env.SharedLibrary('snapshot/visionipc',
|
|||
env.Program('camerad', [
|
||||
'main.cc',
|
||||
'transforms/rgb_to_yuv.c',
|
||||
'imgproc/utils.cc',
|
||||
cameras,
|
||||
], LIBS=libs)
|
||||
|
|
|
@ -1719,6 +1719,8 @@ static void parse_autofocus(CameraState *s, uint8_t *d) {
|
|||
for (int i = 0; i < NUM_FOCUS; i++) {
|
||||
int doff = i*5+5;
|
||||
s->confidence[i] = d[doff];
|
||||
// this should just be a 10-bit signed int instead of 11
|
||||
// TODO: write it in a nicer way
|
||||
int16_t focus_t = (d[doff+1] << 3) | (d[doff+2] >> 5);
|
||||
if (focus_t >= 1024) focus_t = -(2048-focus_t);
|
||||
s->focus[i] = focus_t;
|
||||
|
|
|
@ -0,0 +1,110 @@
|
|||
// const __constant float3 rgb_weights = (0.299, 0.587, 0.114); // opencv rgb2gray weights
|
||||
// const __constant float3 bgr_weights = (0.114, 0.587, 0.299); // bgr2gray weights
|
||||
|
||||
// convert input rgb image to single channel then conv
|
||||
__kernel void rgb2gray_conv2d(
|
||||
const __global uchar * input,
|
||||
__global short * output,
|
||||
__constant short * filter,
|
||||
__local uchar3 * cached
|
||||
)
|
||||
{
|
||||
const int rowOffset = get_global_id(1) * IMAGE_W;
|
||||
const int my = get_global_id(0) + rowOffset;
|
||||
|
||||
const int localRowLen = TWICE_HALF_FILTER_SIZE + get_local_size(0);
|
||||
const int localRowOffset = ( get_local_id(1) + HALF_FILTER_SIZE ) * localRowLen;
|
||||
const int myLocal = localRowOffset + get_local_id(0) + HALF_FILTER_SIZE;
|
||||
|
||||
// cache local pixels
|
||||
cached[ myLocal ].x = input[ my * 3 ]; // r
|
||||
cached[ myLocal ].y = input[ my * 3 + 1]; // g
|
||||
cached[ myLocal ].z = input[ my * 3 + 2]; // b
|
||||
|
||||
// pad
|
||||
if (
|
||||
get_global_id(0) < HALF_FILTER_SIZE ||
|
||||
get_global_id(0) > IMAGE_W - HALF_FILTER_SIZE - 1 ||
|
||||
get_global_id(1) < HALF_FILTER_SIZE ||
|
||||
get_global_id(1) > IMAGE_H - HALF_FILTER_SIZE - 1
|
||||
)
|
||||
{
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
int localColOffset = -1;
|
||||
int globalColOffset = -1;
|
||||
|
||||
// cache extra
|
||||
if ( get_local_id(0) < HALF_FILTER_SIZE )
|
||||
{
|
||||
localColOffset = get_local_id(0);
|
||||
globalColOffset = -HALF_FILTER_SIZE;
|
||||
|
||||
cached[ localRowOffset + get_local_id(0) ].x = input[ my * 3 - HALF_FILTER_SIZE * 3 ];
|
||||
cached[ localRowOffset + get_local_id(0) ].y = input[ my * 3 - HALF_FILTER_SIZE * 3 + 1];
|
||||
cached[ localRowOffset + get_local_id(0) ].z = input[ my * 3 - HALF_FILTER_SIZE * 3 + 2];
|
||||
}
|
||||
else if ( get_local_id(0) >= get_local_size(0) - HALF_FILTER_SIZE )
|
||||
{
|
||||
localColOffset = get_local_id(0) + TWICE_HALF_FILTER_SIZE;
|
||||
globalColOffset = HALF_FILTER_SIZE;
|
||||
|
||||
cached[ myLocal + HALF_FILTER_SIZE ].x = input[ my * 3 + HALF_FILTER_SIZE * 3 ];
|
||||
cached[ myLocal + HALF_FILTER_SIZE ].y = input[ my * 3 + HALF_FILTER_SIZE * 3 + 1];
|
||||
cached[ myLocal + HALF_FILTER_SIZE ].z = input[ my * 3 + HALF_FILTER_SIZE * 3 + 2];
|
||||
}
|
||||
|
||||
|
||||
if ( get_local_id(1) < HALF_FILTER_SIZE )
|
||||
{
|
||||
cached[ get_local_id(1) * localRowLen + get_local_id(0) + HALF_FILTER_SIZE ].x = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 ];
|
||||
cached[ get_local_id(1) * localRowLen + get_local_id(0) + HALF_FILTER_SIZE ].y = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + 1];
|
||||
cached[ get_local_id(1) * localRowLen + get_local_id(0) + HALF_FILTER_SIZE ].z = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + 2];
|
||||
if (localColOffset > 0)
|
||||
{
|
||||
cached[ get_local_id(1) * localRowLen + localColOffset ].x = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3];
|
||||
cached[ get_local_id(1) * localRowLen + localColOffset ].y = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 1];
|
||||
cached[ get_local_id(1) * localRowLen + localColOffset ].z = input[ my * 3 - HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 2];
|
||||
}
|
||||
}
|
||||
else if ( get_local_id(1) >= get_local_size(1) -HALF_FILTER_SIZE )
|
||||
{
|
||||
int offset = ( get_local_id(1) + TWICE_HALF_FILTER_SIZE ) * localRowLen;
|
||||
cached[ offset + get_local_id(0) + HALF_FILTER_SIZE ].x = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 ];
|
||||
cached[ offset + get_local_id(0) + HALF_FILTER_SIZE ].y = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + 1];
|
||||
cached[ offset + get_local_id(0) + HALF_FILTER_SIZE ].z = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + 2];
|
||||
if (localColOffset > 0)
|
||||
{
|
||||
cached[ offset + localColOffset ].x = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3];
|
||||
cached[ offset + localColOffset ].y = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 1];
|
||||
cached[ offset + localColOffset ].z = input[ my * 3 + HALF_FILTER_SIZE_IMAGE_W * 3 + globalColOffset * 3 + 2];
|
||||
}
|
||||
}
|
||||
|
||||
// sync
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
// perform convolution
|
||||
int fIndex = 0;
|
||||
short sum = 0;
|
||||
|
||||
for (int r = -HALF_FILTER_SIZE; r <= HALF_FILTER_SIZE; r++)
|
||||
{
|
||||
int curRow = r * localRowLen;
|
||||
for (int c = -HALF_FILTER_SIZE; c <= HALF_FILTER_SIZE; c++, fIndex++)
|
||||
{
|
||||
if (!FLIP_RB){
|
||||
// sum += dot(rgb_weights, cached[ myLocal + curRow + c ]) * filter[ fIndex ];
|
||||
sum += (cached[ myLocal + curRow + c ].x / 3 + cached[ myLocal + curRow + c ].y / 2 + cached[ myLocal + curRow + c ].z / 9) * filter[ fIndex ];
|
||||
} else {
|
||||
// sum += dot(bgr_weights, cached[ myLocal + curRow + c ]) * filter[ fIndex ];
|
||||
sum += (cached[ myLocal + curRow + c ].x / 9 + cached[ myLocal + curRow + c ].y / 2 + cached[ myLocal + curRow + c ].z / 3) * filter[ fIndex ];
|
||||
}
|
||||
}
|
||||
}
|
||||
output[my] = sum;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
// calculate variance in each subregion
|
||||
__kernel void var_pool(
|
||||
const __global char * input,
|
||||
__global ushort * output // should not be larger than 128*128 so uint16
|
||||
)
|
||||
{
|
||||
const int xidx = get_global_id(0) + ROI_X_MIN;
|
||||
const int yidx = get_global_id(1) + ROI_Y_MIN;
|
||||
|
||||
const int size = X_PITCH * Y_PITCH;
|
||||
|
||||
float fsum = 0;
|
||||
char mean, max;
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
int x_offset = i % X_PITCH;
|
||||
int y_offset = i / X_PITCH;
|
||||
fsum += input[xidx*X_PITCH + yidx*Y_PITCH*FULL_STRIDE_X + x_offset + y_offset*FULL_STRIDE_X];
|
||||
max = input[xidx*X_PITCH + yidx*Y_PITCH*FULL_STRIDE_X + x_offset + y_offset*FULL_STRIDE_X]>max ? input[xidx*X_PITCH + yidx*Y_PITCH*FULL_STRIDE_X + x_offset + y_offset*FULL_STRIDE_X]:max;
|
||||
}
|
||||
|
||||
mean = convert_char_rte(fsum / size);
|
||||
|
||||
float fvar = 0;
|
||||
for (int i = 0; i < size; i++) {
|
||||
int x_offset = i % X_PITCH;
|
||||
int y_offset = i / X_PITCH;
|
||||
fvar += (input[xidx*X_PITCH + yidx*Y_PITCH*FULL_STRIDE_X + x_offset + y_offset*FULL_STRIDE_X] - mean) * (input[xidx*X_PITCH + yidx*Y_PITCH*FULL_STRIDE_X + x_offset + y_offset*FULL_STRIDE_X] - mean);
|
||||
}
|
||||
|
||||
fvar = fvar / size;
|
||||
|
||||
output[(xidx-ROI_X_MIN)+(yidx-ROI_Y_MIN)*(ROI_X_MAX-ROI_X_MIN+1)] = convert_ushort_rte(5 * fvar + convert_float_rte(max));
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
#include "utils.h"
|
||||
#include <stdio.h>
|
||||
#include <algorithm>
|
||||
|
||||
// calculate score based on laplacians in one area
|
||||
void get_lapmap_one(int16_t *lap, uint16_t *res, int x_pitch, int y_pitch) {
|
||||
int size = x_pitch * y_pitch;
|
||||
// avg and max of roi
|
||||
float fsum = 0;
|
||||
int16_t mean, max;
|
||||
max = 0;
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
int x_offset = i % x_pitch;
|
||||
int y_offset = i / x_pitch;
|
||||
fsum += lap[x_offset + y_offset*x_pitch];
|
||||
max = std::max(lap[x_offset + y_offset*x_pitch], max);
|
||||
}
|
||||
|
||||
mean = fsum / size;
|
||||
|
||||
// var of roi
|
||||
float fvar = 0;
|
||||
for (int i = 0; i < size; i++) {
|
||||
int x_offset = i % x_pitch;
|
||||
int y_offset = i / x_pitch;
|
||||
fvar += (float)((lap[x_offset + y_offset*x_pitch] - mean) * (lap[x_offset + y_offset*x_pitch] - mean));
|
||||
}
|
||||
|
||||
fvar = fvar / size;
|
||||
|
||||
*res = std::min(5 * fvar + max, (float)65535);
|
||||
}
|
||||
|
||||
bool is_blur(uint16_t *lapmap) {
|
||||
int n_roi = (ROI_X_MAX - ROI_X_MIN + 1) * (ROI_Y_MAX - ROI_Y_MIN + 1);
|
||||
float bad_sum = 0;
|
||||
for (int i = 0; i < n_roi; i++) {
|
||||
if (*(lapmap + i) < LM_THRESH) {
|
||||
bad_sum += 1/(float)n_roi;
|
||||
}
|
||||
}
|
||||
return (bad_sum > LM_PREC_THRESH);
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
#ifndef IMGPROC_UTILS
|
||||
#define IMGPROC_UTILS
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define NUM_SEGMENTS_X 8
|
||||
#define NUM_SEGMENTS_Y 6
|
||||
|
||||
#define ROI_X_MIN 1
|
||||
#define ROI_X_MAX 6
|
||||
#define ROI_Y_MIN 2
|
||||
#define ROI_Y_MAX 3
|
||||
|
||||
#define LM_THRESH 222
|
||||
#define LM_PREC_THRESH 0.9
|
||||
|
||||
// only apply to QCOM
|
||||
#define FULL_STRIDE_X 1280
|
||||
#define FULL_STRIDE_Y 896
|
||||
|
||||
#define CONV_LOCAL_WORKSIZE 16
|
||||
|
||||
const int16_t lapl_conv_krnl[9] = {0, 1, 0,
|
||||
1, -4, 1,
|
||||
0, 1, 0};
|
||||
|
||||
void get_lapmap_one(int16_t *lap, uint16_t *res, int x_pitch, int y_pitch);
|
||||
bool is_blur(uint16_t *lapmap);
|
||||
|
||||
#endif
|
|
@ -23,6 +23,7 @@
|
|||
#include "messaging.hpp"
|
||||
|
||||
#include "transforms/rgb_to_yuv.h"
|
||||
#include "imgproc/utils.h"
|
||||
|
||||
#include "clutil.h"
|
||||
#include "bufs.h"
|
||||
|
@ -78,6 +79,14 @@ struct VisionState {
|
|||
cl_kernel krnl_debayer_rear;
|
||||
cl_kernel krnl_debayer_front;
|
||||
|
||||
cl_program prg_rgb_laplacian;
|
||||
cl_kernel krnl_rgb_laplacian;
|
||||
|
||||
int conv_cl_localMemSize;
|
||||
size_t conv_cl_globalWorkSize[2];
|
||||
size_t conv_cl_localWorkSize[2];
|
||||
size_t pool_cl_globalWorkSize[2];
|
||||
|
||||
// processing
|
||||
TBuffer ui_tb;
|
||||
TBuffer ui_front_tb;
|
||||
|
@ -110,6 +119,8 @@ struct VisionState {
|
|||
int rgb_width, rgb_height, rgb_stride;
|
||||
VisionBuf rgb_bufs[UI_BUF_COUNT];
|
||||
cl_mem rgb_bufs_cl[UI_BUF_COUNT];
|
||||
cl_mem rgb_conv_roi_cl, rgb_conv_result_cl, rgb_conv_filter_cl;
|
||||
uint16_t lapres[(ROI_X_MAX-ROI_X_MIN+1)*(ROI_Y_MAX-ROI_Y_MIN+1)];
|
||||
|
||||
size_t rgb_front_buf_size;
|
||||
int rgb_front_width, rgb_front_height, rgb_front_stride;
|
||||
|
@ -177,9 +188,9 @@ void* frontview_thread(void *arg) {
|
|||
cl_event debayer_event;
|
||||
if (s->cameras.front.ci.bayer) {
|
||||
err = clSetKernelArg(s->krnl_debayer_front, 0, sizeof(cl_mem), &s->front_camera_bufs_cl[buf_idx]);
|
||||
cl_check_error(err);
|
||||
assert(err == 0);
|
||||
err = clSetKernelArg(s->krnl_debayer_front, 1, sizeof(cl_mem), &s->rgb_front_bufs_cl[rgb_idx]);
|
||||
cl_check_error(err);
|
||||
assert(err == 0);
|
||||
float digital_gain = 1.0;
|
||||
err = clSetKernelArg(s->krnl_debayer_front, 2, sizeof(float), &digital_gain);
|
||||
assert(err == 0);
|
||||
|
@ -390,9 +401,9 @@ void* processing_thread(void *arg) {
|
|||
cl_event debayer_event;
|
||||
if (s->cameras.rear.ci.bayer) {
|
||||
err = clSetKernelArg(s->krnl_debayer_rear, 0, sizeof(cl_mem), &s->camera_bufs_cl[buf_idx]);
|
||||
cl_check_error(err);
|
||||
assert(err == 0);
|
||||
err = clSetKernelArg(s->krnl_debayer_rear, 1, sizeof(cl_mem), &s->rgb_bufs_cl[rgb_idx]);
|
||||
cl_check_error(err);
|
||||
assert(err == 0);
|
||||
err = clSetKernelArg(s->krnl_debayer_rear, 2, sizeof(float), &s->cameras.rear.digital_gain);
|
||||
assert(err == 0);
|
||||
|
||||
|
@ -416,6 +427,74 @@ void* processing_thread(void *arg) {
|
|||
|
||||
visionbuf_sync(&s->rgb_bufs[rgb_idx], VISIONBUF_SYNC_FROM_DEVICE);
|
||||
|
||||
#ifdef QCOM
|
||||
/*FILE *dump_rgb_file = fopen("/tmp/process_dump.rgb", "wb");
|
||||
fwrite(s->rgb_bufs[rgb_idx].addr, s->rgb_bufs[rgb_idx].len, sizeof(uint8_t), dump_rgb_file);
|
||||
fclose(dump_rgb_file);
|
||||
printf("ORIGINAL SAVED!!\n");*/
|
||||
|
||||
/*double t10 = millis_since_boot();*/
|
||||
|
||||
// cache rgb roi and write to cl
|
||||
uint8_t *rgb_roi_buf = new uint8_t[(s->rgb_width/NUM_SEGMENTS_X)*(s->rgb_height/NUM_SEGMENTS_Y)*3];
|
||||
int roi_id = cnt % ((ROI_X_MAX-ROI_X_MIN+1)*(ROI_Y_MAX-ROI_Y_MIN+1)); // rolling roi
|
||||
int roi_x_offset = roi_id % (ROI_X_MAX-ROI_X_MIN+1);
|
||||
int roi_y_offset = roi_id / (ROI_X_MAX-ROI_X_MIN+1);
|
||||
|
||||
for (int r=0;r<(s->rgb_height/NUM_SEGMENTS_Y);r++) {
|
||||
memcpy(rgb_roi_buf + r * (s->rgb_width/NUM_SEGMENTS_X) * 3,
|
||||
(uint8_t *) s->rgb_bufs[rgb_idx].addr + \
|
||||
(ROI_Y_MIN + roi_y_offset) * s->rgb_height/NUM_SEGMENTS_Y * FULL_STRIDE_X * 3 + \
|
||||
(ROI_X_MIN + roi_x_offset) * s->rgb_width/NUM_SEGMENTS_X * 3 + r * FULL_STRIDE_X * 3,
|
||||
s->rgb_width/NUM_SEGMENTS_X * 3);
|
||||
}
|
||||
|
||||
err = clEnqueueWriteBuffer (q, s->rgb_conv_roi_cl, true, 0,
|
||||
s->rgb_width/NUM_SEGMENTS_X * s->rgb_height/NUM_SEGMENTS_Y * 3 * sizeof(uint8_t), rgb_roi_buf, 0, 0, 0);
|
||||
assert(err == 0);
|
||||
|
||||
/*double t11 = millis_since_boot();
|
||||
printf("cache time: %f ms\n", t11 - t10);
|
||||
t10 = millis_since_boot();*/
|
||||
|
||||
err = clSetKernelArg(s->krnl_rgb_laplacian, 0, sizeof(cl_mem), (void *) &s->rgb_conv_roi_cl);
|
||||
assert(err == 0);
|
||||
err = clSetKernelArg(s->krnl_rgb_laplacian, 1, sizeof(cl_mem), (void *) &s->rgb_conv_result_cl);
|
||||
assert(err == 0);
|
||||
err = clSetKernelArg(s->krnl_rgb_laplacian, 2, sizeof(cl_mem), (void *) &s->rgb_conv_filter_cl);
|
||||
assert(err == 0);
|
||||
err = clSetKernelArg(s->krnl_rgb_laplacian, 3, s->conv_cl_localMemSize, 0);
|
||||
assert(err == 0);
|
||||
|
||||
cl_event conv_event;
|
||||
err = clEnqueueNDRangeKernel(q, s->krnl_rgb_laplacian, 2, NULL,
|
||||
s->conv_cl_globalWorkSize, s->conv_cl_localWorkSize, 0, 0, &conv_event);
|
||||
assert(err == 0);
|
||||
clWaitForEvents(1, &conv_event);
|
||||
clReleaseEvent(conv_event);
|
||||
|
||||
int16_t *conv_result = new int16_t[(s->rgb_width/NUM_SEGMENTS_X)*(s->rgb_height/NUM_SEGMENTS_Y)];
|
||||
err = clEnqueueReadBuffer(q, s->rgb_conv_result_cl, true, 0,
|
||||
s->rgb_width/NUM_SEGMENTS_X * s->rgb_height/NUM_SEGMENTS_Y * sizeof(int16_t), conv_result, 0, 0, 0);
|
||||
assert(err == 0);
|
||||
|
||||
/*t11 = millis_since_boot();
|
||||
printf("conv time: %f ms\n", t11 - t10);
|
||||
t10 = millis_since_boot();*/
|
||||
|
||||
get_lapmap_one(conv_result, &s->lapres[roi_id], s->rgb_width/NUM_SEGMENTS_X, s->rgb_height/NUM_SEGMENTS_Y);
|
||||
|
||||
/*t11 = millis_since_boot();
|
||||
printf("pool time: %f ms\n", t11 - t10);
|
||||
t10 = millis_since_boot();*/
|
||||
|
||||
delete [] rgb_roi_buf;
|
||||
delete [] conv_result;
|
||||
|
||||
/*t11 = millis_since_boot();
|
||||
printf("process time: %f ms\n ----- \n", t11 - t10);
|
||||
t10 = millis_since_boot();*/
|
||||
#endif
|
||||
|
||||
double t2 = millis_since_boot();
|
||||
|
||||
|
@ -465,6 +544,8 @@ void* processing_thread(void *arg) {
|
|||
kj::ArrayPtr<const uint8_t> focus_confs(&s->cameras.rear.confidence[0], NUM_FOCUS);
|
||||
framed.setFocusVal(focus_vals);
|
||||
framed.setFocusConf(focus_confs);
|
||||
kj::ArrayPtr<const uint16_t> sharpness_score(&s->lapres[0], (ROI_X_MAX-ROI_X_MIN+1)*(ROI_Y_MAX-ROI_Y_MIN+1));
|
||||
framed.setSharpnessScore(sharpness_score);
|
||||
#endif
|
||||
|
||||
// TODO: add this back
|
||||
|
@ -907,6 +988,34 @@ cl_program build_debayer_program(VisionState *s,
|
|||
return CLU_LOAD_FROM_FILE(s->context, s->device_id, "cameras/debayer.cl", args);
|
||||
}
|
||||
|
||||
cl_program build_conv_program(VisionState *s,
|
||||
int image_w, int image_h,
|
||||
int filter_size) {
|
||||
char args[4096];
|
||||
snprintf(args, sizeof(args),
|
||||
"-cl-fast-relaxed-math -cl-denorms-are-zero "
|
||||
"-DIMAGE_W=%d -DIMAGE_H=%d -DFLIP_RB=%d "
|
||||
"-DFILTER_SIZE=%d -DHALF_FILTER_SIZE=%d -DTWICE_HALF_FILTER_SIZE=%d -DHALF_FILTER_SIZE_IMAGE_W=%d",
|
||||
image_w, image_h, 1,
|
||||
filter_size, filter_size/2, (filter_size/2)*2, (filter_size/2)*image_w);
|
||||
return CLU_LOAD_FROM_FILE(s->context, s->device_id, "imgproc/conv.cl", args);
|
||||
}
|
||||
|
||||
cl_program build_pool_program(VisionState *s,
|
||||
int full_stride_x,
|
||||
int x_pitch, int y_pitch,
|
||||
int roi_x_min, int roi_x_max,
|
||||
int roi_y_min, int roi_y_max) {
|
||||
char args[4096];
|
||||
snprintf(args, sizeof(args),
|
||||
"-cl-fast-relaxed-math -cl-denorms-are-zero "
|
||||
"-DFULL_STRIDE_X=%d -DX_PITCH=%d -DY_PITCH=%d "
|
||||
"-DROI_X_MIN=%d -DROI_X_MAX=%d -DROI_Y_MIN=%d -DROI_Y_MAX=%d",
|
||||
full_stride_x, x_pitch, y_pitch,
|
||||
roi_x_min, roi_x_max, roi_y_min, roi_y_max);
|
||||
return CLU_LOAD_FROM_FILE(s->context, s->device_id, "imgproc/pool.cl", args);
|
||||
}
|
||||
|
||||
void cl_init(VisionState *s) {
|
||||
int err;
|
||||
cl_platform_id platform_id = NULL;
|
||||
|
@ -1049,6 +1158,25 @@ void init_buffers(VisionState *s) {
|
|||
assert(err == 0);
|
||||
}
|
||||
|
||||
s->prg_rgb_laplacian = build_conv_program(s, s->rgb_width/NUM_SEGMENTS_X, s->rgb_height/NUM_SEGMENTS_Y,
|
||||
3);
|
||||
s->krnl_rgb_laplacian = clCreateKernel(s->prg_rgb_laplacian, "rgb2gray_conv2d", &err);
|
||||
assert(err == 0);
|
||||
s->rgb_conv_roi_cl = clCreateBuffer(s->context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
|
||||
s->rgb_width/NUM_SEGMENTS_X * s->rgb_height/NUM_SEGMENTS_Y * 3 * sizeof(uint8_t), NULL, NULL);
|
||||
s->rgb_conv_result_cl = clCreateBuffer(s->context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
|
||||
s->rgb_width/NUM_SEGMENTS_X * s->rgb_height/NUM_SEGMENTS_Y * sizeof(int16_t), NULL, NULL);
|
||||
s->rgb_conv_filter_cl = clCreateBuffer(s->context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
9 * sizeof(int16_t), (void*)&lapl_conv_krnl, NULL);
|
||||
s->conv_cl_localMemSize = ( CONV_LOCAL_WORKSIZE + 2 * (3 / 2) ) * ( CONV_LOCAL_WORKSIZE + 2 * (3 / 2) );
|
||||
s->conv_cl_localMemSize *= 3 * sizeof(uint8_t);
|
||||
s->conv_cl_globalWorkSize[0] = s->rgb_width/NUM_SEGMENTS_X;
|
||||
s->conv_cl_globalWorkSize[1] = s->rgb_height/NUM_SEGMENTS_Y;
|
||||
s->conv_cl_localWorkSize[0] = CONV_LOCAL_WORKSIZE;
|
||||
s->conv_cl_localWorkSize[1] = CONV_LOCAL_WORKSIZE;
|
||||
|
||||
for (int i=0; i<(ROI_X_MAX-ROI_X_MIN+1)*(ROI_Y_MAX-ROI_Y_MIN+1); i++) {s->lapres[i] = 16160;}
|
||||
|
||||
rgb_to_yuv_init(&s->rgb_to_yuv_state, s->context, s->device_id, s->yuv_width, s->yuv_height, s->rgb_stride);
|
||||
rgb_to_yuv_init(&s->front_rgb_to_yuv_state, s->context, s->device_id, s->yuv_front_width, s->yuv_front_height, s->rgb_front_stride);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue