315 lines
10 KiB
C++
315 lines
10 KiB
C++
#include "caffe2/operators/roi_align_op.h"
|
|
|
|
#include <vector>
|
|
|
|
#include "caffe2/utils/eigen_utils.h"
|
|
#include "caffe2/utils/math.h"
|
|
|
|
namespace caffe2 {
|
|
|
|
namespace {
|
|
|
|
template <typename T>
|
|
struct BilinearInterpolationParam {
|
|
int64_t p1;
|
|
int64_t p2;
|
|
int64_t p3;
|
|
int64_t p4;
|
|
T w1;
|
|
T w2;
|
|
T w3;
|
|
T w4;
|
|
};
|
|
|
|
template <typename T>
|
|
std::vector<BilinearInterpolationParam<T>> MakeBilinearInterpolationParams(
|
|
int64_t H,
|
|
int64_t W,
|
|
int64_t pooled_h,
|
|
int64_t pooled_w,
|
|
T bin_size_h,
|
|
T bin_size_w,
|
|
int64_t bin_grid_h,
|
|
int64_t bin_grid_w,
|
|
T roi_start_h,
|
|
T roi_start_w) {
|
|
std::vector<BilinearInterpolationParam<T>> params(
|
|
pooled_h * pooled_w * bin_grid_h * bin_grid_w);
|
|
const T ch = bin_size_h / static_cast<T>(bin_grid_h);
|
|
const T cw = bin_size_w / static_cast<T>(bin_grid_w);
|
|
int64_t cnt = 0;
|
|
for (int64_t ph = 0; ph < pooled_h; ++ph) {
|
|
for (int64_t pw = 0; pw < pooled_w; ++pw) {
|
|
for (int64_t iy = 0; iy < bin_grid_h; ++iy) {
|
|
const T yy = roi_start_h + static_cast<T>(ph) * bin_size_h +
|
|
(static_cast<T>(iy) + T(0.5)) * ch;
|
|
if (yy < T(-1) || yy > static_cast<T>(H)) {
|
|
std::memset(params.data() + cnt, 0, bin_grid_w * sizeof(params[0]));
|
|
cnt += bin_grid_w;
|
|
continue;
|
|
}
|
|
for (int64_t ix = 0; ix < bin_grid_w; ++ix) {
|
|
const T xx = roi_start_w + pw * bin_size_w +
|
|
(static_cast<T>(ix) + T(0.5f)) * cw;
|
|
BilinearInterpolationParam<T>& param = params[cnt++];
|
|
if (xx < T(-1) || xx > static_cast<T>(W)) {
|
|
std::memset(¶m, 0, sizeof(param));
|
|
continue;
|
|
}
|
|
const T y = std::min(std::max(yy, T(0)), static_cast<T>(H - 1));
|
|
const T x = std::min(std::max(xx, T(0)), static_cast<T>(W - 1));
|
|
const int64_t yl = static_cast<int64_t>(std::floor(y));
|
|
const int64_t xl = static_cast<int64_t>(std::floor(x));
|
|
const int64_t yh = std::min(yl + 1, H - 1);
|
|
const int64_t xh = std::min(xl + 1, W - 1);
|
|
const T py = y - static_cast<T>(yl);
|
|
const T px = x - static_cast<T>(xl);
|
|
const T qy = T(1) - py;
|
|
const T qx = T(1) - px;
|
|
param.p1 = yl * W + xl;
|
|
param.p2 = yl * W + xh;
|
|
param.p3 = yh * W + xl;
|
|
param.p4 = yh * W + xh;
|
|
param.w1 = qy * qx;
|
|
param.w2 = qy * px;
|
|
param.w3 = py * qx;
|
|
param.w4 = py * px;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return params;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
template <>
|
|
C10_EXPORT bool RoIAlignOp<float, CPUContext>::RunOnDeviceWithOrderNCHW(
|
|
int64_t N,
|
|
int64_t C,
|
|
int64_t H,
|
|
int64_t W,
|
|
int64_t roi_cols,
|
|
const float* X,
|
|
const float* R,
|
|
float* Y) {
|
|
DCHECK(roi_cols == 4 || roi_cols == 5);
|
|
const float roi_offset = aligned_ ? 0.5f : 0.0f;
|
|
|
|
#ifdef _OPENMP
|
|
#pragma omp parallel for
|
|
#endif
|
|
for (int64_t n = 0; n < N; ++n) {
|
|
const int64_t roi_batch_idx = roi_cols == 4 ? 0 : R[n * roi_cols];
|
|
const float* X_ptr = X + roi_batch_idx * C * H * W;
|
|
const float* R_ptr = R + n * roi_cols + (roi_cols == 5);
|
|
float* Y_ptr = Y + n * C * pooled_h_ * pooled_w_;
|
|
|
|
// Do not using rounding; this implementation detail is critical
|
|
const float roi_w1 = R_ptr[0] * spatial_scale_ - roi_offset;
|
|
const float roi_h1 = R_ptr[1] * spatial_scale_ - roi_offset;
|
|
const float roi_w2 = R_ptr[2] * spatial_scale_ - roi_offset;
|
|
const float roi_h2 = R_ptr[3] * spatial_scale_ - roi_offset;
|
|
float roi_w = roi_w2 - roi_w1;
|
|
float roi_h = roi_h2 - roi_h1;
|
|
if (aligned_) {
|
|
CAFFE_ENFORCE(
|
|
roi_w >= 0.0f && roi_h >= 0.0f,
|
|
"ROIs in ROIAlign do not have non-negative size!");
|
|
} else { // backward compatibility
|
|
// Force malformed ROIs to be 1x1
|
|
roi_w = std::max(roi_w, 1.0f);
|
|
roi_h = std::max(roi_h, 1.0f);
|
|
}
|
|
const float bin_size_h = roi_h / static_cast<float>(pooled_h_);
|
|
const float bin_size_w = roi_w / static_cast<float>(pooled_w_);
|
|
|
|
// We use roi_bin_grid to sample the grid and mimic integral
|
|
const int64_t bin_grid_h = (sampling_ratio_ > 0)
|
|
? sampling_ratio_
|
|
: static_cast<int64_t>(ceil(roi_h / static_cast<float>(pooled_h_)));
|
|
const int64_t bin_grid_w = (sampling_ratio_ > 0)
|
|
? sampling_ratio_
|
|
: static_cast<int64_t>(ceil(roi_w / static_cast<float>(pooled_w_)));
|
|
|
|
const std::vector<BilinearInterpolationParam<float>> params =
|
|
MakeBilinearInterpolationParams(
|
|
H,
|
|
W,
|
|
pooled_h_,
|
|
pooled_w_,
|
|
bin_size_h,
|
|
bin_size_w,
|
|
bin_grid_h,
|
|
bin_grid_w,
|
|
roi_h1,
|
|
roi_w1);
|
|
|
|
const float scale = 1.0f / static_cast<float>(bin_grid_h * bin_grid_w);
|
|
for (int64_t c = 0; c < C; ++c) {
|
|
int64_t cnt = 0;
|
|
for (int64_t ph = 0; ph < pooled_h_; ++ph) {
|
|
for (int64_t pw = 0; pw < pooled_w_; ++pw) {
|
|
float sum = 0.0f;
|
|
for (int64_t iy = 0; iy < bin_grid_h; ++iy) {
|
|
for (int64_t ix = 0; ix < bin_grid_w; ++ix) {
|
|
const BilinearInterpolationParam<float>& param = params[cnt++];
|
|
sum += param.w1 * X_ptr[param.p1] + param.w2 * X_ptr[param.p2] +
|
|
param.w3 * X_ptr[param.p3] + param.w4 * X_ptr[param.p4];
|
|
}
|
|
}
|
|
Y_ptr[ph * pooled_w_ + pw] = sum * scale;
|
|
}
|
|
}
|
|
X_ptr += H * W;
|
|
Y_ptr += pooled_h_ * pooled_w_;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
template <>
|
|
C10_EXPORT bool RoIAlignOp<float, CPUContext>::RunOnDeviceWithOrderNHWC(
|
|
int64_t N,
|
|
int64_t C,
|
|
int64_t H,
|
|
int64_t W,
|
|
int64_t roi_cols,
|
|
const float* X,
|
|
const float* R,
|
|
float* Y) {
|
|
DCHECK(roi_cols == 4 || roi_cols == 5);
|
|
const float roi_offset = aligned_ ? 0.5f : 0.0f;
|
|
|
|
#ifdef _OPENMP
|
|
#pragma omp parallel for
|
|
#endif
|
|
for (int64_t n = 0; n < N; ++n) {
|
|
const int64_t roi_batch_idx = roi_cols == 4 ? 0 : R[n * roi_cols];
|
|
const float* X_ptr = X + roi_batch_idx * C * H * W;
|
|
const float* R_ptr = R + n * roi_cols + (roi_cols == 5);
|
|
float* Y_ptr = Y + n * C * pooled_h_ * pooled_w_;
|
|
|
|
// Do not using rounding; this implementation detail is critical
|
|
const float roi_w1 = R_ptr[0] * spatial_scale_ - roi_offset;
|
|
const float roi_h1 = R_ptr[1] * spatial_scale_ - roi_offset;
|
|
const float roi_w2 = R_ptr[2] * spatial_scale_ - roi_offset;
|
|
const float roi_h2 = R_ptr[3] * spatial_scale_ - roi_offset;
|
|
float roi_w = roi_w2 - roi_w1;
|
|
float roi_h = roi_h2 - roi_h1;
|
|
if (aligned_) {
|
|
CAFFE_ENFORCE(
|
|
roi_w >= 0.0f && roi_h >= 0.0f,
|
|
"ROIs in ROIAlign do not have non-negative size!");
|
|
} else { // backward compatibility
|
|
// Force malformed ROIs to be 1x1
|
|
roi_w = std::max(roi_w, 1.0f);
|
|
roi_h = std::max(roi_h, 1.0f);
|
|
}
|
|
const float bin_size_h = roi_h / static_cast<float>(pooled_h_);
|
|
const float bin_size_w = roi_w / static_cast<float>(pooled_w_);
|
|
|
|
// We use roi_bin_grid to sample the grid and mimic integral
|
|
const int64_t bin_grid_h = (sampling_ratio_ > 0)
|
|
? sampling_ratio_
|
|
: static_cast<int64_t>(ceil(roi_h / static_cast<float>(pooled_h_)));
|
|
const int64_t bin_grid_w = (sampling_ratio_ > 0)
|
|
? sampling_ratio_
|
|
: static_cast<int64_t>(ceil(roi_w / static_cast<float>(pooled_w_)));
|
|
|
|
const std::vector<BilinearInterpolationParam<float>> params =
|
|
MakeBilinearInterpolationParams(
|
|
H,
|
|
W,
|
|
pooled_h_,
|
|
pooled_w_,
|
|
bin_size_h,
|
|
bin_size_w,
|
|
bin_grid_h,
|
|
bin_grid_w,
|
|
roi_h1,
|
|
roi_w1);
|
|
|
|
const float scale = 1.0f / static_cast<float>(bin_grid_h * bin_grid_w);
|
|
int64_t cnt = 0;
|
|
for (int64_t ph = 0; ph < pooled_h_; ++ph) {
|
|
for (int64_t pw = 0; pw < pooled_w_; ++pw) {
|
|
EigenVectorArrayMap<float> Y_arr(Y_ptr + (ph * pooled_w_ + pw) * C, C);
|
|
Y_arr.setZero();
|
|
for (int64_t iy = 0; iy < bin_grid_h; ++iy) {
|
|
for (int64_t ix = 0; ix < bin_grid_w; ++ix) {
|
|
const BilinearInterpolationParam<float>& param = params[cnt++];
|
|
ConstEigenVectorArrayMap<float> x1_arr(X_ptr + param.p1 * C, C);
|
|
ConstEigenVectorArrayMap<float> x2_arr(X_ptr + param.p2 * C, C);
|
|
ConstEigenVectorArrayMap<float> x3_arr(X_ptr + param.p3 * C, C);
|
|
ConstEigenVectorArrayMap<float> x4_arr(X_ptr + param.p4 * C, C);
|
|
Y_arr += param.w1 * x1_arr + param.w2 * x2_arr + param.w3 * x3_arr +
|
|
param.w4 * x4_arr;
|
|
}
|
|
}
|
|
Y_arr *= scale;
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
REGISTER_CPU_OPERATOR(RoIAlign, RoIAlignOp<float, CPUContext>);
|
|
|
|
// Input: X, rois; Output: Y
|
|
OPERATOR_SCHEMA(RoIAlign)
|
|
.NumInputs(2)
|
|
.NumOutputs(1)
|
|
.SetDoc(R"DOC(
|
|
Region of Interest (RoI) align operation as used in Mask R-CNN.
|
|
)DOC")
|
|
.Arg(
|
|
"spatial_scale",
|
|
"(float) default 1.0; Spatial scale of the input feature map X "
|
|
"relative to the input image. E.g., 0.0625 if X has a stride of 16 "
|
|
"w.r.t. the input image.")
|
|
.Arg("pooled_h", "(int) default 1; Pooled output Y's height.")
|
|
.Arg("pooled_w", "(int) default 1; Pooled output Y's width.")
|
|
.Arg(
|
|
"sampling_ratio",
|
|
"(int) default -1; number of sampling points in the interpolation grid "
|
|
"used to compute the output value of each pooled output bin. If > 0, "
|
|
"then exactly sampling_ratio x sampling_ratio grid points are used. If "
|
|
"<= 0, then an adaptive number of grid points are used (computed as "
|
|
"ceil(roi_width / pooled_w), and likewise for height).")
|
|
.Input(0, "X", "4D feature map input of shape (N, C, H, W).")
|
|
.Input(
|
|
1,
|
|
"RoIs",
|
|
"2D input of shape (R, 4 or 5) specifying R RoIs "
|
|
"representing: batch index in [0, N - 1], x1, y1, x2, y2. The RoI "
|
|
"coordinates are in the coordinate system of the input image. For "
|
|
"inputs corresponding to a single image, batch index can be excluded "
|
|
"to have just 4 columns.")
|
|
.Output(
|
|
0,
|
|
"Y",
|
|
"4D output of shape (R, C, pooled_h, pooled_w). The r-th batch element "
|
|
"is a pooled feature map cooresponding to the r-th RoI.");
|
|
|
|
template <typename T>
|
|
using RoIAlignCPUOp = caffe2::RoIAlignOp<T, CPUContext>;
|
|
|
|
} // namespace caffe2
|
|
|
|
C10_EXPORT_CAFFE2_OP_TO_C10_CPU(
|
|
RoIAlign,
|
|
"_caffe2::RoIAlign("
|
|
" Tensor features,"
|
|
" Tensor rois,"
|
|
" str order,"
|
|
" float spatial_scale,"
|
|
" int pooled_h,"
|
|
" int pooled_w,"
|
|
" int sampling_ratio,"
|
|
" bool aligned"
|
|
") -> Tensor",
|
|
caffe2::RoIAlignCPUOp<float>);
|