496 lines
14 KiB
C
496 lines
14 KiB
C
/*
|
|
* Copyright 2009-2015 Freescale Semiconductor, Inc. All Rights Reserved.
|
|
*/
|
|
|
|
/*
|
|
* The code contained herein is licensed under the GNU General Public
|
|
* License. You may obtain a copy of the GNU General Public License
|
|
* Version 2 or later at the following locations:
|
|
*
|
|
* http://www.opensource.org/licenses/gpl-license.html
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
*/
|
|
|
|
/*
|
|
* @file ipu_calc_stripes_sizes.c
|
|
*
|
|
* @brief IPU IC functions
|
|
*
|
|
* @ingroup IPU
|
|
*/
|
|
|
|
#include <linux/ipu-v3.h>
|
|
#include <linux/module.h>
|
|
#include <linux/math64.h>
|
|
|
|
#define BPP_32 0
|
|
#define BPP_16 3
|
|
#define BPP_8 5
|
|
#define BPP_24 1
|
|
#define BPP_12 4
|
|
#define BPP_18 2
|
|
|
|
static u32 truncate(u32 up, /* 0: down; else: up */
|
|
u64 a, /* must be non-negative */
|
|
u32 b)
|
|
{
|
|
u32 d;
|
|
u64 div;
|
|
div = div_u64(a, b);
|
|
d = b * (div >> 32);
|
|
if (up && (a > (((u64)d) << 32)))
|
|
return d+b;
|
|
else
|
|
return d;
|
|
}
|
|
|
|
static unsigned int f_calc(unsigned int pfs, unsigned int bpp, unsigned int *write)
|
|
{/* return input_f */
|
|
unsigned int f_calculated = 0;
|
|
switch (pfs) {
|
|
case IPU_PIX_FMT_YVU422P:
|
|
case IPU_PIX_FMT_YUV422P:
|
|
case IPU_PIX_FMT_YUV420P2:
|
|
case IPU_PIX_FMT_YUV420P:
|
|
case IPU_PIX_FMT_YVU420P:
|
|
case IPU_PIX_FMT_YUV444P:
|
|
f_calculated = 16;
|
|
break;
|
|
|
|
case IPU_PIX_FMT_RGB565:
|
|
case IPU_PIX_FMT_YUYV:
|
|
case IPU_PIX_FMT_UYVY:
|
|
f_calculated = 8;
|
|
break;
|
|
|
|
case IPU_PIX_FMT_NV12:
|
|
f_calculated = 8;
|
|
break;
|
|
|
|
default:
|
|
f_calculated = 0;
|
|
break;
|
|
|
|
}
|
|
if (!f_calculated) {
|
|
switch (bpp) {
|
|
case BPP_32:
|
|
f_calculated = 2;
|
|
break;
|
|
|
|
case BPP_16:
|
|
f_calculated = 4;
|
|
break;
|
|
|
|
case BPP_8:
|
|
case BPP_24:
|
|
f_calculated = 8;
|
|
break;
|
|
|
|
case BPP_12:
|
|
f_calculated = 16;
|
|
break;
|
|
|
|
case BPP_18:
|
|
f_calculated = 32;
|
|
break;
|
|
|
|
default:
|
|
f_calculated = 0;
|
|
break;
|
|
}
|
|
}
|
|
return f_calculated;
|
|
}
|
|
|
|
|
|
static unsigned int m_calc(unsigned int pfs)
|
|
{
|
|
unsigned int m_calculated = 0;
|
|
switch (pfs) {
|
|
case IPU_PIX_FMT_YUV420P2:
|
|
case IPU_PIX_FMT_YUV420P:
|
|
case IPU_PIX_FMT_YVU422P:
|
|
case IPU_PIX_FMT_YUV422P:
|
|
case IPU_PIX_FMT_YVU420P:
|
|
case IPU_PIX_FMT_YUV444P:
|
|
m_calculated = 16;
|
|
break;
|
|
|
|
case IPU_PIX_FMT_NV12:
|
|
case IPU_PIX_FMT_YUYV:
|
|
case IPU_PIX_FMT_UYVY:
|
|
m_calculated = 8;
|
|
break;
|
|
|
|
default:
|
|
m_calculated = 8;
|
|
break;
|
|
|
|
}
|
|
return m_calculated;
|
|
}
|
|
|
|
static int calc_split_resize_coeffs(unsigned int inSize, unsigned int outSize,
|
|
unsigned int *resizeCoeff,
|
|
unsigned int *downsizeCoeff)
|
|
{
|
|
uint32_t tempSize;
|
|
uint32_t tempDownsize;
|
|
|
|
if (inSize > 4096) {
|
|
pr_debug("IC input size(%d) cannot exceed 4096\n",
|
|
inSize);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (outSize > 1024) {
|
|
pr_debug("IC output size(%d) cannot exceed 1024\n",
|
|
outSize);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if ((outSize << 3) < inSize) {
|
|
pr_debug("IC cannot downsize more than 8:1\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Compute downsizing coefficient */
|
|
/* Output of downsizing unit cannot be more than 1024 */
|
|
tempDownsize = 0;
|
|
tempSize = inSize;
|
|
while (((tempSize > 1024) || (tempSize >= outSize * 2)) &&
|
|
(tempDownsize < 2)) {
|
|
tempSize >>= 1;
|
|
tempDownsize++;
|
|
}
|
|
*downsizeCoeff = tempDownsize;
|
|
|
|
/* compute resizing coefficient using the following equation:
|
|
resizeCoeff = M*(SI -1)/(SO - 1)
|
|
where M = 2^13, SI - input size, SO - output size */
|
|
*resizeCoeff = (8192L * (tempSize - 1)) / (outSize - 1);
|
|
if (*resizeCoeff >= 16384L) {
|
|
pr_debug("Overflow on IC resize coefficient.\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
pr_debug("resizing from %u -> %u pixels, "
|
|
"downsize=%u, resize=%u.%lu (reg=%u)\n", inSize, outSize,
|
|
*downsizeCoeff, (*resizeCoeff >= 8192L) ? 1 : 0,
|
|
((*resizeCoeff & 0x1FFF) * 10000L) / 8192L, *resizeCoeff);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Stripe parameters calculator */
|
|
/**************************************************************************
|
|
Notes:
|
|
MSW = the maximal width allowed for a stripe
|
|
i.MX31: 720, i.MX35: 800, i.MX37/51/53: 1024
|
|
cirr = the maximal inverse resizing ratio for which overlap in the input
|
|
is requested; typically cirr~2
|
|
flags
|
|
bit 0 - equal_stripes
|
|
0 each stripe is allowed to have independent parameters
|
|
for maximal image quality
|
|
1 the stripes are requested to have identical parameters
|
|
(except the base address), for maximal performance
|
|
bit 1 - vertical/horizontal
|
|
0 horizontal
|
|
1 vertical
|
|
|
|
If performance is the top priority (above image quality)
|
|
Avoid overlap, by setting CIRR = 0
|
|
This will also force effectively identical_stripes = 1
|
|
Choose IF & OF that corresponds to the same IOX/SX for both stripes
|
|
Choose IFW & OFW such that
|
|
IFW/IM, IFW/IF, OFW/OM, OFW/OF are even integers
|
|
The function returns an error status:
|
|
0: no error
|
|
1: invalid input parameters -> aborted without result
|
|
Valid parameters should satisfy the following conditions
|
|
IFW <= OFW, otherwise downsizing is required
|
|
- which is not supported yet
|
|
4 <= IFW,OFW, so some interpolation may be needed even without overlap
|
|
IM, OM, IF, OF should not vanish
|
|
2*IF <= IFW
|
|
so the frame can be split to two equal stripes, even without overlap
|
|
2*(OF+IF/irr_opt) <= OFW
|
|
so a valid positive INW exists even for equal stripes
|
|
OF <= MSW, otherwise, the left stripe cannot be sufficiently large
|
|
MSW < OFW, so splitting to stripes is required
|
|
OFW <= 2*MSW, so two stripes are sufficient
|
|
(this also implies that 2<=MSW)
|
|
2: OF is not a multiple of OM - not fully-supported yet
|
|
Output is produced but OW is not guaranited to be a multiple of OM
|
|
4: OFW reduced to be a multiple of OM
|
|
8: CIRR > 1: truncated to 1
|
|
Overlap is not supported (and not needed) y for upsizing)
|
|
**************************************************************************/
|
|
int ipu_calc_stripes_sizes(const unsigned int input_frame_width,
|
|
/* input frame width;>1 */
|
|
unsigned int output_frame_width, /* output frame width; >1 */
|
|
const unsigned int maximal_stripe_width,
|
|
/* the maximal width allowed for a stripe */
|
|
const unsigned long long cirr, /* see above */
|
|
const unsigned int flags, /* see above */
|
|
u32 input_pixelformat,/* pixel format after of read channel*/
|
|
u32 output_pixelformat,/* pixel format after of write channel*/
|
|
struct stripe_param *left,
|
|
struct stripe_param *right)
|
|
{
|
|
const unsigned int irr_frac_bits = 13;
|
|
const unsigned long irr_steps = 1 << irr_frac_bits;
|
|
const u64 dirr = ((u64)1) << (32 - 2);
|
|
/* The maximum relative difference allowed between the irrs */
|
|
const u64 cr = ((u64)4) << 32;
|
|
/* The importance ratio between the two terms in the cost function below */
|
|
|
|
unsigned int status;
|
|
unsigned int temp;
|
|
unsigned int onw_min;
|
|
unsigned int inw = 0, onw = 0, inw_best = 0;
|
|
/* number of pixels in the left stripe NOT hidden by the right stripe */
|
|
u64 irr_opt; /* the optimal inverse resizing ratio */
|
|
u64 rr_opt; /* the optimal resizing ratio = 1/irr_opt*/
|
|
u64 dinw; /* the misalignment between the stripes */
|
|
/* (measured in units of input columns) */
|
|
u64 difwl, difwr = 0;
|
|
/* The number of input columns not reflected in the output */
|
|
/* the resizing ratio used for the right stripe is */
|
|
/* left->irr and right->irr respectively */
|
|
u64 cost, cost_min;
|
|
u64 div; /* result of division */
|
|
bool equal_stripes = (flags & 0x1) != 0;
|
|
bool vertical = (flags & 0x2) != 0;
|
|
|
|
unsigned int input_m, input_f, output_m, output_f; /* parameters for upsizing by stripes */
|
|
unsigned int resize_coeff;
|
|
unsigned int downsize_coeff;
|
|
|
|
status = 0;
|
|
|
|
if (vertical) {
|
|
input_f = 2;
|
|
input_m = 8;
|
|
output_f = 8;
|
|
output_m = 2;
|
|
} else {
|
|
input_f = f_calc(input_pixelformat, 0, NULL);
|
|
input_m = m_calc(input_pixelformat);
|
|
output_f = input_m;
|
|
output_m = m_calc(output_pixelformat);
|
|
}
|
|
if ((input_frame_width < 4) || (output_frame_width < 4))
|
|
return 1;
|
|
|
|
irr_opt = div_u64((((u64)(input_frame_width - 1)) << 32),
|
|
(output_frame_width - 1));
|
|
rr_opt = div_u64((((u64)(output_frame_width - 1)) << 32),
|
|
(input_frame_width - 1));
|
|
|
|
if ((input_m == 0) || (output_m == 0) || (input_f == 0) || (output_f == 0)
|
|
|| (input_frame_width < (2 * input_f))
|
|
|| ((((u64)output_frame_width) << 32) <
|
|
(2 * ((((u64)output_f) << 32) + (input_f * rr_opt))))
|
|
|| (maximal_stripe_width < output_f)
|
|
|| ((output_frame_width <= maximal_stripe_width)
|
|
&& (equal_stripes == 0))
|
|
|| ((2 * maximal_stripe_width) < output_frame_width))
|
|
return 1;
|
|
|
|
if (output_f % output_m)
|
|
status += 2;
|
|
|
|
temp = truncate(0, (((u64)output_frame_width) << 32), output_m);
|
|
if (temp < output_frame_width) {
|
|
output_frame_width = temp;
|
|
status += 4;
|
|
}
|
|
|
|
pr_debug("---------------->\n"
|
|
"if = %d\n"
|
|
"im = %d\n"
|
|
"of = %d\n"
|
|
"om = %d\n"
|
|
"irr_opt = %llu\n"
|
|
"rr_opt = %llu\n"
|
|
"cirr = %llu\n"
|
|
"pixel in = %08x\n"
|
|
"pixel out = %08x\n"
|
|
"ifw = %d\n"
|
|
"ofwidth = %d\n",
|
|
input_f,
|
|
input_m,
|
|
output_f,
|
|
output_m,
|
|
irr_opt,
|
|
rr_opt,
|
|
cirr,
|
|
input_pixelformat,
|
|
output_pixelformat,
|
|
input_frame_width,
|
|
output_frame_width
|
|
);
|
|
|
|
if (equal_stripes) {
|
|
if ((irr_opt > cirr) /* overlap in the input is not requested */
|
|
&& ((input_frame_width % (input_m << 1)) == 0)
|
|
&& ((input_frame_width % (input_f << 1)) == 0)
|
|
&& ((output_frame_width % (output_m << 1)) == 0)
|
|
&& ((output_frame_width % (output_f << 1)) == 0)) {
|
|
/* without overlap */
|
|
left->input_width = right->input_width = right->input_column =
|
|
input_frame_width >> 1;
|
|
left->output_width = right->output_width = right->output_column =
|
|
output_frame_width >> 1;
|
|
left->input_column = 0;
|
|
left->output_column = 0;
|
|
div = div_u64(((((u64)irr_steps) << 32) *
|
|
(right->input_width - 1)), (right->output_width - 1));
|
|
left->irr = right->irr = truncate(0, div, 1);
|
|
} else { /* with overlap */
|
|
onw = truncate(0, (((u64)output_frame_width - 1) << 32) >> 1,
|
|
output_f);
|
|
inw = truncate(0, onw * irr_opt, input_f);
|
|
/* this is the maximal inw which allows the same resizing ratio */
|
|
/* in both stripes */
|
|
onw = truncate(1, (inw * rr_opt), output_f);
|
|
div = div_u64((((u64)(irr_steps * inw)) <<
|
|
32), onw);
|
|
left->irr = right->irr = truncate(0, div, 1);
|
|
left->output_width = right->output_width =
|
|
output_frame_width - onw;
|
|
/* These are valid assignments for output_width, */
|
|
/* assuming output_f is a multiple of output_m */
|
|
div = (((u64)(left->output_width-1) * (left->irr)) << 32);
|
|
div = (((u64)1) << 32) + div_u64(div, irr_steps);
|
|
|
|
left->input_width = right->input_width = truncate(1, div, input_m);
|
|
|
|
div = div_u64((((u64)((right->output_width - 1) * right->irr)) <<
|
|
32), irr_steps);
|
|
difwr = (((u64)(input_frame_width - 1 - inw)) << 32) - div;
|
|
div = div_u64((difwr + (((u64)input_f) << 32)), 2);
|
|
left->input_column = truncate(0, div, input_f);
|
|
|
|
|
|
/* This splits the truncated input columns evenly */
|
|
/* between the left and right margins */
|
|
right->input_column = left->input_column + inw;
|
|
left->output_column = 0;
|
|
right->output_column = onw;
|
|
}
|
|
if (left->input_width > left->output_width) {
|
|
if (calc_split_resize_coeffs(left->input_width,
|
|
left->output_width,
|
|
&resize_coeff,
|
|
&downsize_coeff) < 0)
|
|
return -EINVAL;
|
|
|
|
if (downsize_coeff > 0) {
|
|
left->irr = right->irr =
|
|
(downsize_coeff << 14) | resize_coeff;
|
|
}
|
|
}
|
|
pr_debug("inw %d, onw %d, ilw %d, ilc %d, olw %d,"
|
|
" irw %d, irc %d, orw %d, orc %d, "
|
|
"difwr %llu, lirr %u\n",
|
|
inw, onw, left->input_width,
|
|
left->input_column, left->output_width,
|
|
right->input_width, right->input_column,
|
|
right->output_width,
|
|
right->output_column, difwr, left->irr);
|
|
} else { /* independent stripes */
|
|
onw_min = output_frame_width - maximal_stripe_width;
|
|
/* onw is a multiple of output_f, in the range */
|
|
/* [max(output_f,output_frame_width-maximal_stripe_width),*/
|
|
/*min(output_frame_width-2,maximal_stripe_width)] */
|
|
/* definitely beyond the cost of any valid setting */
|
|
cost_min = (((u64)input_frame_width) << 32) + cr;
|
|
onw = truncate(0, ((u64)maximal_stripe_width), output_f);
|
|
if (output_frame_width - onw == 1)
|
|
onw -= output_f; /* => onw and output_frame_width-1-onw are positive */
|
|
inw = truncate(0, onw * irr_opt, input_f);
|
|
/* this is the maximal inw which allows the same resizing ratio */
|
|
/* in both stripes */
|
|
onw = truncate(1, inw * rr_opt, output_f);
|
|
do {
|
|
div = div_u64((((u64)(irr_steps * inw)) << 32), onw);
|
|
left->irr = truncate(0, div, 1);
|
|
div = div_u64((((u64)(onw * left->irr)) << 32),
|
|
irr_steps);
|
|
dinw = (((u64)inw) << 32) - div;
|
|
|
|
div = div_u64((((u64)((output_frame_width - 1 - onw) * left->irr)) <<
|
|
32), irr_steps);
|
|
|
|
difwl = (((u64)(input_frame_width - 1 - inw)) << 32) - div;
|
|
|
|
cost = difwl + (((u64)(cr * dinw)) >> 32);
|
|
|
|
if (cost < cost_min) {
|
|
inw_best = inw;
|
|
cost_min = cost;
|
|
}
|
|
|
|
inw -= input_f;
|
|
onw = truncate(1, inw * rr_opt, output_f);
|
|
/* This is the minimal onw which allows the same resizing ratio */
|
|
/* in both stripes */
|
|
} while (onw >= onw_min);
|
|
|
|
inw = inw_best;
|
|
onw = truncate(1, inw * rr_opt, output_f);
|
|
div = div_u64((((u64)(irr_steps * inw)) << 32), onw);
|
|
left->irr = truncate(0, div, 1);
|
|
|
|
left->output_width = onw;
|
|
right->output_width = output_frame_width - onw;
|
|
/* These are valid assignments for output_width, */
|
|
/* assuming output_f is a multiple of output_m */
|
|
left->input_width = truncate(1, ((u64)(inw + 1)) << 32, input_m);
|
|
right->input_width = truncate(1, ((u64)(input_frame_width - inw)) <<
|
|
32, input_m);
|
|
|
|
div = div_u64((((u64)(irr_steps * (input_frame_width - 1 - inw))) <<
|
|
32), (right->output_width - 1));
|
|
right->irr = truncate(0, div, 1);
|
|
temp = truncate(0, ((u64)left->irr) * ((((u64)1) << 32) + dirr), 1);
|
|
if (temp < right->irr)
|
|
right->irr = temp;
|
|
div = div_u64(((u64)((right->output_width - 1) * right->irr) <<
|
|
32), irr_steps);
|
|
difwr = (u64)(input_frame_width - 1 - inw) - div;
|
|
|
|
|
|
div = div_u64((difwr + (((u64)input_f) << 32)), 2);
|
|
left->input_column = truncate(0, div, input_f);
|
|
|
|
/* This splits the truncated input columns evenly */
|
|
/* between the left and right margins */
|
|
right->input_column = left->input_column + inw;
|
|
left->output_column = 0;
|
|
right->output_column = onw;
|
|
if (left->input_width > left->output_width) {
|
|
if (calc_split_resize_coeffs(left->input_width,
|
|
left->output_width,
|
|
&resize_coeff,
|
|
&downsize_coeff) < 0)
|
|
return -EINVAL;
|
|
left->irr = (downsize_coeff << 14) | resize_coeff;
|
|
}
|
|
if (right->input_width > right->output_width) {
|
|
if (calc_split_resize_coeffs(right->input_width,
|
|
right->output_width,
|
|
&resize_coeff,
|
|
&downsize_coeff) < 0)
|
|
return -EINVAL;
|
|
right->irr = (downsize_coeff << 14) | resize_coeff;
|
|
}
|
|
}
|
|
return status;
|
|
}
|
|
EXPORT_SYMBOL(ipu_calc_stripes_sizes);
|