Updated Eigen to version 2.0.11, which features precision fixes for quaternion slerp and
4x4 matrix inversion.sensor-dev
parent
25d8f6b8dc
commit
472416d880
|
@ -50,7 +50,7 @@ struct ei_traits<Part<MatrixType, Mode> > : ei_traits<MatrixType>
|
|||
typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
|
||||
enum {
|
||||
Flags = (_MatrixTypeNested::Flags & (HereditaryBits) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit))) | Mode,
|
||||
CoeffReadCost = _MatrixTypeNested::CoeffReadCost
|
||||
CoeffReadCost = _MatrixTypeNested::CoeffReadCost + ConditionalJumpCost
|
||||
};
|
||||
};
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
|
||||
#define EIGEN_WORLD_VERSION 2
|
||||
#define EIGEN_MAJOR_VERSION 0
|
||||
#define EIGEN_MINOR_VERSION 10
|
||||
#define EIGEN_MINOR_VERSION 11
|
||||
|
||||
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
|
||||
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
|
||||
|
|
|
@ -209,18 +209,47 @@ template<typename T, bool Align> inline void ei_conditional_aligned_delete(T *pt
|
|||
ei_conditional_aligned_free<Align>(ptr);
|
||||
}
|
||||
|
||||
/** \internal \returns the number of elements which have to be skipped such that data are 16 bytes aligned */
|
||||
template<typename Scalar>
|
||||
inline static int ei_alignmentOffset(const Scalar* ptr, int maxOffset)
|
||||
/** \internal \returns the index of the first element of the array that is well aligned for vectorization.
|
||||
*
|
||||
* \param array the address of the start of the array
|
||||
* \param size the size of the array
|
||||
*
|
||||
* \note If no element of the array is well aligned, the size of the array is returned. Typically,
|
||||
* for example with SSE, "well aligned" means 16-byte-aligned. If vectorization is disabled or if the
|
||||
* packet size for the given scalar type is 1, then everything is considered well-aligned.
|
||||
*
|
||||
* \note If the scalar type is vectorizable, we rely on the following assumptions: sizeof(Scalar) is a
|
||||
* power of 2, the packet size in bytes is also a power of 2, and is a multiple of sizeof(Scalar). On the
|
||||
* other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails for
|
||||
* example with Scalar=double on certain 32-bit platforms, see bug #79.
|
||||
*
|
||||
* There is also the variant ei_first_aligned(const MatrixBase&, Integer) defined in Coeffs.h.
|
||||
*/
|
||||
template<typename Scalar, typename Integer>
|
||||
inline static Integer ei_alignmentOffset(const Scalar* array, Integer size)
|
||||
{
|
||||
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||
const int PacketSize = ei_packet_traits<Scalar>::size;
|
||||
const int PacketAlignedMask = PacketSize-1;
|
||||
const bool Vectorized = PacketSize>1;
|
||||
return Vectorized
|
||||
? std::min<int>( (PacketSize - (int((size_t(ptr)/sizeof(Scalar))) & PacketAlignedMask))
|
||||
& PacketAlignedMask, maxOffset)
|
||||
: 0;
|
||||
enum { PacketSize = ei_packet_traits<Scalar>::size,
|
||||
PacketAlignedMask = PacketSize-1
|
||||
};
|
||||
|
||||
if(PacketSize==1)
|
||||
{
|
||||
// Either there is no vectorization, or a packet consists of exactly 1 scalar so that all elements
|
||||
// of the array have the same aligment.
|
||||
return 0;
|
||||
}
|
||||
else if(size_t(array) & (sizeof(Scalar)-1))
|
||||
{
|
||||
// There is vectorization for this scalar type, but the array is not aligned to the size of a single scalar.
|
||||
// Consequently, no element of the array is well aligned.
|
||||
return size;
|
||||
}
|
||||
else
|
||||
{
|
||||
return std::min<Integer>( (PacketSize - (Integer((size_t(array)/sizeof(Scalar))) & PacketAlignedMask))
|
||||
& PacketAlignedMask, size);
|
||||
}
|
||||
}
|
||||
|
||||
/** \internal
|
||||
|
|
|
@ -52,9 +52,9 @@ public:
|
|||
typedef _Scalar Scalar;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
typedef Matrix<Scalar,AmbientDimAtCompileTime,1> VectorType;
|
||||
typedef Matrix<Scalar,AmbientDimAtCompileTime==Dynamic
|
||||
typedef Matrix<Scalar,int(AmbientDimAtCompileTime)==Dynamic
|
||||
? Dynamic
|
||||
: AmbientDimAtCompileTime+1,1> Coefficients;
|
||||
: int(AmbientDimAtCompileTime)+1,1> Coefficients;
|
||||
typedef Block<Coefficients,AmbientDimAtCompileTime,1> NormalReturnType;
|
||||
|
||||
/** Default constructor without initialization */
|
||||
|
|
|
@ -450,22 +450,31 @@ inline Scalar Quaternion<Scalar>::angularDistance(const Quaternion& other) const
|
|||
template <typename Scalar>
|
||||
Quaternion<Scalar> Quaternion<Scalar>::slerp(Scalar t, const Quaternion& other) const
|
||||
{
|
||||
static const Scalar one = Scalar(1) - precision<Scalar>();
|
||||
static const Scalar one = Scalar(1) - machine_epsilon<Scalar>();
|
||||
Scalar d = this->dot(other);
|
||||
Scalar absD = ei_abs(d);
|
||||
|
||||
Scalar scale0;
|
||||
Scalar scale1;
|
||||
|
||||
if (absD>=one)
|
||||
return *this;
|
||||
{
|
||||
scale0 = Scalar(1) - t;
|
||||
scale1 = t;
|
||||
}
|
||||
else
|
||||
{
|
||||
// theta is the angle between the 2 quaternions
|
||||
Scalar theta = std::acos(absD);
|
||||
Scalar sinTheta = ei_sin(theta);
|
||||
|
||||
// theta is the angle between the 2 quaternions
|
||||
Scalar theta = std::acos(absD);
|
||||
Scalar sinTheta = ei_sin(theta);
|
||||
scale0 = ei_sin( ( Scalar(1) - t ) * theta) / sinTheta;
|
||||
scale1 = ei_sin( ( t * theta) ) / sinTheta;
|
||||
if (d<0)
|
||||
scale1 = -scale1;
|
||||
}
|
||||
|
||||
Scalar scale0 = ei_sin( ( Scalar(1) - t ) * theta) / sinTheta;
|
||||
Scalar scale1 = ei_sin( ( t * theta) ) / sinTheta;
|
||||
if (d<0)
|
||||
scale1 = -scale1;
|
||||
|
||||
return Quaternion(scale0 * m_coeffs + scale1 * other.m_coeffs);
|
||||
return Quaternion<Scalar>(scale0 * coeffs() + scale1 * other.coeffs());
|
||||
}
|
||||
|
||||
// set from a rotation matrix
|
||||
|
|
|
@ -29,8 +29,8 @@
|
|||
*** Part 1 : optimized implementations for fixed-size 2,3,4 cases ***
|
||||
********************************************************************/
|
||||
|
||||
template<typename MatrixType>
|
||||
void ei_compute_inverse_in_size2_case(const MatrixType& matrix, MatrixType* result)
|
||||
template<typename XprType, typename MatrixType>
|
||||
void ei_compute_inverse_in_size2_case(const XprType& matrix, MatrixType* result)
|
||||
{
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
const Scalar invdet = Scalar(1) / matrix.determinant();
|
||||
|
@ -75,99 +75,155 @@ void ei_compute_inverse_in_size3_case(const MatrixType& matrix, MatrixType* resu
|
|||
result->coeffRef(2, 2) = matrix.minor(2,2).determinant() * invdet;
|
||||
}
|
||||
|
||||
template<typename MatrixType>
|
||||
bool ei_compute_inverse_in_size4_case_helper(const MatrixType& matrix, MatrixType* result)
|
||||
template<typename MatrixType, typename Scalar = typename MatrixType::Scalar>
|
||||
struct ei_compute_inverse_in_size4_case
|
||||
{
|
||||
/* Let's split M into four 2x2 blocks:
|
||||
* (P Q)
|
||||
* (R S)
|
||||
* If P is invertible, with inverse denoted by P_inverse, and if
|
||||
* (S - R*P_inverse*Q) is also invertible, then the inverse of M is
|
||||
* (P' Q')
|
||||
* (R' S')
|
||||
* where
|
||||
* S' = (S - R*P_inverse*Q)^(-1)
|
||||
* P' = P1 + (P1*Q) * S' *(R*P_inverse)
|
||||
* Q' = -(P_inverse*Q) * S'
|
||||
* R' = -S' * (R*P_inverse)
|
||||
*/
|
||||
typedef Block<MatrixType,2,2> XprBlock22;
|
||||
typedef typename MatrixBase<XprBlock22>::PlainMatrixType Block22;
|
||||
Block22 P_inverse;
|
||||
if(ei_compute_inverse_in_size2_case_with_check(matrix.template block<2,2>(0,0), &P_inverse))
|
||||
static void run(const MatrixType& matrix, MatrixType& result)
|
||||
{
|
||||
const Block22 Q = matrix.template block<2,2>(0,2);
|
||||
const Block22 P_inverse_times_Q = P_inverse * Q;
|
||||
const XprBlock22 R = matrix.template block<2,2>(2,0);
|
||||
const Block22 R_times_P_inverse = R * P_inverse;
|
||||
const Block22 R_times_P_inverse_times_Q = R_times_P_inverse * Q;
|
||||
const XprBlock22 S = matrix.template block<2,2>(2,2);
|
||||
const Block22 X = S - R_times_P_inverse_times_Q;
|
||||
Block22 Y;
|
||||
ei_compute_inverse_in_size2_case(X, &Y);
|
||||
result->template block<2,2>(2,2) = Y;
|
||||
result->template block<2,2>(2,0) = - Y * R_times_P_inverse;
|
||||
const Block22 Z = P_inverse_times_Q * Y;
|
||||
result->template block<2,2>(0,2) = - Z;
|
||||
result->template block<2,2>(0,0) = P_inverse + Z * R_times_P_inverse;
|
||||
return true;
|
||||
result.coeffRef(0,0) = matrix.minor(0,0).determinant();
|
||||
result.coeffRef(1,0) = -matrix.minor(0,1).determinant();
|
||||
result.coeffRef(2,0) = matrix.minor(0,2).determinant();
|
||||
result.coeffRef(3,0) = -matrix.minor(0,3).determinant();
|
||||
result.coeffRef(0,2) = matrix.minor(2,0).determinant();
|
||||
result.coeffRef(1,2) = -matrix.minor(2,1).determinant();
|
||||
result.coeffRef(2,2) = matrix.minor(2,2).determinant();
|
||||
result.coeffRef(3,2) = -matrix.minor(2,3).determinant();
|
||||
result.coeffRef(0,1) = -matrix.minor(1,0).determinant();
|
||||
result.coeffRef(1,1) = matrix.minor(1,1).determinant();
|
||||
result.coeffRef(2,1) = -matrix.minor(1,2).determinant();
|
||||
result.coeffRef(3,1) = matrix.minor(1,3).determinant();
|
||||
result.coeffRef(0,3) = -matrix.minor(3,0).determinant();
|
||||
result.coeffRef(1,3) = matrix.minor(3,1).determinant();
|
||||
result.coeffRef(2,3) = -matrix.minor(3,2).determinant();
|
||||
result.coeffRef(3,3) = matrix.minor(3,3).determinant();
|
||||
result /= (matrix.col(0).cwise()*result.row(0).transpose()).sum();
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_SSE
|
||||
// The SSE code for the 4x4 float matrix inverse in this file comes from the file
|
||||
// ftp://download.intel.com/design/PentiumIII/sml/24504301.pdf
|
||||
// its copyright information is:
|
||||
// Copyright (C) 1999 Intel Corporation
|
||||
// See page ii of that document for legal stuff. Not being lawyers, we just assume
|
||||
// here that if Intel makes this document publically available, with source code
|
||||
// and detailed explanations, it's because they want their CPUs to be fed with
|
||||
// good code, and therefore they presumably don't mind us using it in Eigen.
|
||||
template<typename MatrixType>
|
||||
void ei_compute_inverse_in_size4_case(const MatrixType& _matrix, MatrixType* result)
|
||||
struct ei_compute_inverse_in_size4_case<MatrixType, float>
|
||||
{
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::RealScalar RealScalar;
|
||||
static void run(const MatrixType& matrix, MatrixType& result)
|
||||
{
|
||||
// Variables (Streaming SIMD Extensions registers) which will contain cofactors and, later, the
|
||||
// lines of the inverted matrix.
|
||||
__m128 minor0, minor1, minor2, minor3;
|
||||
|
||||
// we will do row permutations on the matrix. This copy should have negligible cost.
|
||||
// if not, consider working in-place on the matrix (const-cast it, but then undo the permutations
|
||||
// to nevertheless honor constness)
|
||||
typename MatrixType::PlainMatrixType matrix(_matrix);
|
||||
// Variables which will contain the lines of the reference matrix and, later (after the transposition),
|
||||
// the columns of the original matrix.
|
||||
__m128 row0, row1, row2, row3;
|
||||
|
||||
// let's extract from the 2 first colums a 2x2 block whose determinant is as big as possible.
|
||||
int good_row0, good_row1, good_i;
|
||||
Matrix<RealScalar,6,1> absdet;
|
||||
// Temporary variables and the variable that will contain the matrix determinant.
|
||||
__m128 det, tmp1;
|
||||
|
||||
// any 2x2 block with determinant above this threshold will be considered good enough.
|
||||
// The magic value 1e-1 here comes from experimentation. The bigger it is, the higher the precision,
|
||||
// the slower the computation. This value 1e-1 gives precision almost as good as the brutal cofactors
|
||||
// algorithm, both in average and in worst-case precision.
|
||||
RealScalar d = (matrix.col(0).squaredNorm()+matrix.col(1).squaredNorm()) * RealScalar(1e-1);
|
||||
#define ei_inv_size4_helper_macro(i,row0,row1) \
|
||||
absdet[i] = ei_abs(matrix.coeff(row0,0)*matrix.coeff(row1,1) \
|
||||
- matrix.coeff(row0,1)*matrix.coeff(row1,0)); \
|
||||
if(absdet[i] > d) { good_row0=row0; good_row1=row1; goto good; }
|
||||
ei_inv_size4_helper_macro(0,0,1)
|
||||
ei_inv_size4_helper_macro(1,0,2)
|
||||
ei_inv_size4_helper_macro(2,0,3)
|
||||
ei_inv_size4_helper_macro(3,1,2)
|
||||
ei_inv_size4_helper_macro(4,1,3)
|
||||
ei_inv_size4_helper_macro(5,2,3)
|
||||
// Matrix transposition
|
||||
const float *src = matrix.data();
|
||||
tmp1 = _mm_loadh_pi(_mm_castpd_ps(_mm_load_sd((double*)src)), (__m64*)(src+ 4));
|
||||
row1 = _mm_loadh_pi(_mm_castpd_ps(_mm_load_sd((double*)(src+8))), (__m64*)(src+12));
|
||||
row0 = _mm_shuffle_ps(tmp1, row1, 0x88);
|
||||
row1 = _mm_shuffle_ps(row1, tmp1, 0xDD);
|
||||
tmp1 = _mm_loadh_pi(_mm_castpd_ps(_mm_load_sd((double*)(src+ 2))), (__m64*)(src+ 6));
|
||||
row3 = _mm_loadh_pi(_mm_castpd_ps(_mm_load_sd((double*)(src+10))), (__m64*)(src+14));
|
||||
row2 = _mm_shuffle_ps(tmp1, row3, 0x88);
|
||||
row3 = _mm_shuffle_ps(row3, tmp1, 0xDD);
|
||||
|
||||
// no 2x2 block has determinant bigger than the threshold. So just take the one that
|
||||
// has the biggest determinant
|
||||
absdet.maxCoeff(&good_i);
|
||||
good_row0 = good_i <= 2 ? 0 : good_i <= 4 ? 1 : 2;
|
||||
good_row1 = good_i <= 2 ? good_i+1 : good_i <= 4 ? good_i-1 : 3;
|
||||
|
||||
// now good_row0 and good_row1 are correctly set
|
||||
good:
|
||||
// Cofactors calculation. Because in the process of cofactor computation some pairs in three-
|
||||
// element products are repeated, it is not reasonable to load these pairs anew every time. The
|
||||
// values in the registers with these pairs are formed using shuffle instruction. Cofactors are
|
||||
// calculated row by row (4 elements are placed in 1 SP FP SIMD floating point register).
|
||||
|
||||
// do row permutations to move this 2x2 block to the top
|
||||
matrix.row(0).swap(matrix.row(good_row0));
|
||||
matrix.row(1).swap(matrix.row(good_row1));
|
||||
// now applying our helper function is numerically stable
|
||||
ei_compute_inverse_in_size4_case_helper(matrix, result);
|
||||
// Since we did row permutations on the original matrix, we need to do column permutations
|
||||
// in the reverse order on the inverse
|
||||
result->col(1).swap(result->col(good_row1));
|
||||
result->col(0).swap(result->col(good_row0));
|
||||
}
|
||||
tmp1 = _mm_mul_ps(row2, row3);
|
||||
tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
|
||||
minor0 = _mm_mul_ps(row1, tmp1);
|
||||
minor1 = _mm_mul_ps(row0, tmp1);
|
||||
tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
|
||||
minor0 = _mm_sub_ps(_mm_mul_ps(row1, tmp1), minor0);
|
||||
minor1 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor1);
|
||||
minor1 = _mm_shuffle_ps(minor1, minor1, 0x4E);
|
||||
// -----------------------------------------------
|
||||
tmp1 = _mm_mul_ps(row1, row2);
|
||||
tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
|
||||
minor0 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor0);
|
||||
minor3 = _mm_mul_ps(row0, tmp1);
|
||||
tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
|
||||
minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row3, tmp1));
|
||||
minor3 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor3);
|
||||
minor3 = _mm_shuffle_ps(minor3, minor3, 0x4E);
|
||||
// -----------------------------------------------
|
||||
tmp1 = _mm_mul_ps(_mm_shuffle_ps(row1, row1, 0x4E), row3);
|
||||
tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
|
||||
row2 = _mm_shuffle_ps(row2, row2, 0x4E);
|
||||
minor0 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor0);
|
||||
minor2 = _mm_mul_ps(row0, tmp1);
|
||||
tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
|
||||
minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row2, tmp1));
|
||||
minor2 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor2);
|
||||
minor2 = _mm_shuffle_ps(minor2, minor2, 0x4E);
|
||||
// -----------------------------------------------
|
||||
tmp1 = _mm_mul_ps(row0, row1);
|
||||
tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
|
||||
minor2 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor2);
|
||||
minor3 = _mm_sub_ps(_mm_mul_ps(row2, tmp1), minor3);
|
||||
tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
|
||||
minor2 = _mm_sub_ps(_mm_mul_ps(row3, tmp1), minor2);
|
||||
minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row2, tmp1));
|
||||
// -----------------------------------------------
|
||||
tmp1 = _mm_mul_ps(row0, row3);
|
||||
tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
|
||||
minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row2, tmp1));
|
||||
minor2 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor2);
|
||||
tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
|
||||
minor1 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor1);
|
||||
minor2 = _mm_sub_ps(minor2, _mm_mul_ps(row1, tmp1));
|
||||
// -----------------------------------------------
|
||||
tmp1 = _mm_mul_ps(row0, row2);
|
||||
tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
|
||||
minor1 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor1);
|
||||
minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row1, tmp1));
|
||||
tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
|
||||
minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row3, tmp1));
|
||||
minor3 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor3);
|
||||
|
||||
// Evaluation of determinant and its reciprocal value. In the original Intel document,
|
||||
// 1/det was evaluated using a fast rcpps command with subsequent approximation using
|
||||
// the Newton-Raphson algorithm. Here, we go for a IEEE-compliant division instead,
|
||||
// so as to not compromise precision at all.
|
||||
det = _mm_mul_ps(row0, minor0);
|
||||
det = _mm_add_ps(_mm_shuffle_ps(det, det, 0x4E), det);
|
||||
det = _mm_add_ss(_mm_shuffle_ps(det, det, 0xB1), det);
|
||||
// tmp1= _mm_rcp_ss(det);
|
||||
// det= _mm_sub_ss(_mm_add_ss(tmp1, tmp1), _mm_mul_ss(det, _mm_mul_ss(tmp1, tmp1)));
|
||||
det = _mm_div_ss(_mm_set_ss(1.0f), det); // <--- yay, one original line not copied from Intel
|
||||
det = _mm_shuffle_ps(det, det, 0x00);
|
||||
// warning, Intel's variable naming is very confusing: now 'det' is 1/det !
|
||||
|
||||
// Multiplication of cofactors by 1/det. Storing the inverse matrix to the address in pointer src.
|
||||
minor0 = _mm_mul_ps(det, minor0);
|
||||
float *dst = result.data();
|
||||
_mm_storel_pi((__m64*)(dst), minor0);
|
||||
_mm_storeh_pi((__m64*)(dst+2), minor0);
|
||||
minor1 = _mm_mul_ps(det, minor1);
|
||||
_mm_storel_pi((__m64*)(dst+4), minor1);
|
||||
_mm_storeh_pi((__m64*)(dst+6), minor1);
|
||||
minor2 = _mm_mul_ps(det, minor2);
|
||||
_mm_storel_pi((__m64*)(dst+ 8), minor2);
|
||||
_mm_storeh_pi((__m64*)(dst+10), minor2);
|
||||
minor3 = _mm_mul_ps(det, minor3);
|
||||
_mm_storel_pi((__m64*)(dst+12), minor3);
|
||||
_mm_storeh_pi((__m64*)(dst+14), minor3);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
/***********************************************
|
||||
*** Part 2 : selector and MatrixBase methods ***
|
||||
|
@ -216,7 +272,7 @@ struct ei_compute_inverse<MatrixType, 4>
|
|||
{
|
||||
static inline void run(const MatrixType& matrix, MatrixType* result)
|
||||
{
|
||||
ei_compute_inverse_in_size4_case(matrix, result);
|
||||
ei_compute_inverse_in_size4_case<MatrixType>::run(matrix, *result);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -43,8 +43,11 @@ struct ei_solve_triangular_selector<Lhs,Rhs,LowerTriangular,RowMajor|IsSparse>
|
|||
{
|
||||
lastVal = it.value();
|
||||
lastIndex = it.index();
|
||||
if(lastIndex == i)
|
||||
break;
|
||||
tmp -= lastVal * other.coeff(lastIndex,col);
|
||||
}
|
||||
|
||||
if (Lhs::Flags & UnitDiagBit)
|
||||
other.coeffRef(i,col) = tmp;
|
||||
else
|
||||
|
|
|
@ -79,6 +79,16 @@ class CurvePlot
|
|||
double subdivisionThreshold,
|
||||
double startTime,
|
||||
double endTime) const;
|
||||
void renderFaded(const Eigen::Transform3d& modelview,
|
||||
double nearZ,
|
||||
double farZ,
|
||||
const Eigen::Vector3d viewFrustumPlaneNormals[],
|
||||
double subdivisionThreshold,
|
||||
double startTime,
|
||||
double endTime,
|
||||
const Eigen::Vector4f& color,
|
||||
double fadeStartTime,
|
||||
double fadeEndTime) const;
|
||||
|
||||
unsigned int lastUsed() const { return m_lastUsed; }
|
||||
void setLastUsed(unsigned int lastUsed) { m_lastUsed = lastUsed; }
|
||||
|
@ -89,6 +99,8 @@ class CurvePlot
|
|||
|
||||
bool empty() const { return m_samples.empty(); }
|
||||
|
||||
unsigned int sampleCount() const { return m_samples.size(); }
|
||||
|
||||
private:
|
||||
std::deque<CurvePlotSample> m_samples;
|
||||
|
||||
|
|
|
@ -235,6 +235,25 @@ public:
|
|||
#endif
|
||||
}
|
||||
|
||||
inline void vertex(const Vector4d& v, const Vector4f& color)
|
||||
{
|
||||
#if USE_VERTEX_BUFFER
|
||||
data[currentPosition++] = v.cast<float>();
|
||||
++currentStripLength;
|
||||
if (currentPosition == capacity)
|
||||
{
|
||||
flush();
|
||||
|
||||
data[0] = v.cast<float>();
|
||||
currentPosition = 1;
|
||||
currentStripLength = 1;
|
||||
}
|
||||
#else
|
||||
glColor4fv(color.data());
|
||||
glVertex3dv(v.data());
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void begin()
|
||||
{
|
||||
#if !USE_VERTEX_BUFFER
|
||||
|
@ -423,6 +442,86 @@ public:
|
|||
return restartCurve;
|
||||
}
|
||||
|
||||
// Return the GL restart status: true if the last segment of the
|
||||
// curve was culled and we need to start a new primitive sequence
|
||||
// with glBegin().
|
||||
bool renderCubicFaded(bool restartCurve,
|
||||
const Matrix4d& coeff,
|
||||
double t0, double t1,
|
||||
const Vector4f& color,
|
||||
double fadeStart, double fadeRate,
|
||||
double curveBoundingRadius,
|
||||
int depth) const
|
||||
{
|
||||
const double dt = (t1 - t0) * InvSubdivisionFactor;
|
||||
double segmentBoundingRadius = curveBoundingRadius * InvSubdivisionFactor;
|
||||
|
||||
#if DEBUG_ADAPTIVE_SPLINE
|
||||
{
|
||||
int c = depth % 10;
|
||||
glColor4f(SplineColors[c][0], SplineColors[c][1], SplineColors[c][2], 1.0f);
|
||||
++SegmentCounts[depth];
|
||||
}
|
||||
#endif
|
||||
|
||||
Vector4d lastP = coeff * Vector4d(1.0, t0, t0 * t0, t0 * t0 * t0);
|
||||
double lastOpacity = (t0 - fadeStart) * fadeRate;
|
||||
lastOpacity = max(0.0, min(1.0, lastOpacity)); // clamp
|
||||
|
||||
for (unsigned int i = 1; i <= SubdivisionFactor; i++)
|
||||
{
|
||||
double t = t0 + dt * i;
|
||||
Vector4d p = coeff * Vector4d(1.0, t, t * t, t * t * t);
|
||||
double opacity = (t - fadeStart) * fadeRate;
|
||||
opacity = max(0.0, min(1.0, opacity)); // clamp
|
||||
|
||||
double minDistance = max(-m_viewFrustum.nearZ(), abs(p.z()) - segmentBoundingRadius);
|
||||
|
||||
if (segmentBoundingRadius >= m_subdivisionThreshold * minDistance)
|
||||
{
|
||||
if (m_viewFrustum.cullSphere(p, segmentBoundingRadius))
|
||||
{
|
||||
if (!restartCurve)
|
||||
{
|
||||
m_vbuf.end();
|
||||
restartCurve = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
restartCurve = renderCubicFaded(restartCurve,
|
||||
coeff, t - dt, t,
|
||||
color,
|
||||
fadeStart, fadeRate,
|
||||
segmentBoundingRadius,
|
||||
depth + 1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#if DEBUG_ADAPTIVE_SPLINE
|
||||
{
|
||||
int c = depth % 10;
|
||||
glColor4f(SplineColors[c][0], SplineColors[c][1], SplineColors[c][2], i % 2 ? 0.25f : 1.0f);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (restartCurve)
|
||||
{
|
||||
m_vbuf.begin();
|
||||
m_vbuf.vertex(lastP, Vector4f(color.x(), color.y(), color.z(), color.w() * float(lastOpacity)));
|
||||
restartCurve = false;
|
||||
}
|
||||
|
||||
m_vbuf.vertex(p, Vector4f(color.x(), color.y(), color.z(), color.w() * float(opacity)));
|
||||
}
|
||||
lastP = p;
|
||||
lastOpacity = opacity;
|
||||
}
|
||||
|
||||
return restartCurve;
|
||||
}
|
||||
|
||||
private:
|
||||
HighPrec_VertexBuffer& m_vbuf;
|
||||
HighPrec_Frustum& m_viewFrustum;
|
||||
|
@ -554,7 +653,6 @@ CurvePlot::render(const Transform3d& modelview,
|
|||
vbuf.createVertexBuffer();
|
||||
vbuf.setup();
|
||||
|
||||
clog << "size: " << m_samples.size() << endl;
|
||||
for (unsigned int i = 1; i < m_samples.size(); i++)
|
||||
{
|
||||
// Transform the points into camera space.
|
||||
|
@ -805,3 +903,170 @@ CurvePlot::render(const Transform3d& modelview,
|
|||
vbuf.flush();
|
||||
vbuf.finish();
|
||||
}
|
||||
|
||||
|
||||
/** Draw a curve with a fade effect. The curve is at full opacity at fadeStartTime
|
||||
* and completely transparent at fadeEndTime. fadeStartTime may be greater than
|
||||
* fadeEndTime--this just means that the fade direction will be reversed.
|
||||
*/
|
||||
void
|
||||
CurvePlot::renderFaded(const Eigen::Transform3d& modelview,
|
||||
double nearZ,
|
||||
double farZ,
|
||||
const Eigen::Vector3d viewFrustumPlaneNormals[],
|
||||
double subdivisionThreshold,
|
||||
double startTime,
|
||||
double endTime,
|
||||
const Vector4f& color,
|
||||
double fadeStartTime,
|
||||
double fadeEndTime) const
|
||||
{
|
||||
// Flag to indicate whether we need to issue a glBegin()
|
||||
bool restartCurve = true;
|
||||
|
||||
if (m_samples.empty() || endTime <= m_samples.front().t || startTime >= m_samples.back().t)
|
||||
return;
|
||||
|
||||
// Linear search for the first sample
|
||||
unsigned int startSample = 0;
|
||||
while (startSample < m_samples.size() - 1 && startTime < m_samples[startSample].t)
|
||||
startSample++;
|
||||
|
||||
// Start at the first sample with time <= startTime
|
||||
if (startSample > 0)
|
||||
startSample--;
|
||||
|
||||
double fadeDuration = fadeEndTime - fadeStartTime;
|
||||
double fadeRate = 1.0 / fadeDuration;
|
||||
|
||||
const Vector3d& p0_ = m_samples[startSample].position;
|
||||
const Vector3d& v0_ = m_samples[startSample].velocity;
|
||||
Vector4d p0 = modelview * Vector4d(p0_.x(), p0_.y(), p0_.z(), 1.0);
|
||||
Vector4d v0 = modelview * Vector4d(v0_.x(), v0_.y(), v0_.z(), 0.0);
|
||||
double opacity0 = (m_samples[startSample].t - fadeStartTime) * fadeRate;
|
||||
opacity0 = max(0.0, min(1.0, opacity0));
|
||||
|
||||
HighPrec_Frustum viewFrustum(nearZ, farZ, viewFrustumPlaneNormals);
|
||||
HighPrec_RenderContext rc(vbuf, viewFrustum, subdivisionThreshold);
|
||||
|
||||
vbuf.createVertexBuffer();
|
||||
vbuf.setup();
|
||||
|
||||
bool firstSegment = true;
|
||||
bool lastSegment = false;
|
||||
|
||||
for (unsigned int i = startSample + 1; i < m_samples.size() && !lastSegment; i++)
|
||||
{
|
||||
// Transform the points into camera space.
|
||||
const Vector3d& p1_ = m_samples[i].position;
|
||||
const Vector3d& v1_ = m_samples[i].velocity;
|
||||
Vector4d p1 = modelview * Vector4d(p1_.x(), p1_.y(), p1_.z(), 1.0);
|
||||
Vector4d v1 = modelview * Vector4d(v1_.x(), v1_.y(), v1_.z(), 0.0);
|
||||
double opacity1 = (m_samples[i].t - fadeStartTime) * fadeRate;
|
||||
opacity1 = max(0.0, min(1.0, opacity1));
|
||||
|
||||
if (endTime <= m_samples[i].t)
|
||||
{
|
||||
lastSegment = true;
|
||||
}
|
||||
|
||||
// O(t) is an approximating function for this segment of
|
||||
// the orbit, with 0 <= t <= 1
|
||||
// C is the viewer position
|
||||
// d(t) = |O(t) - C|, the distance from viewer to the
|
||||
// orbit segment.
|
||||
|
||||
double curveBoundingRadius = m_samples[i].boundingRadius;
|
||||
|
||||
// Estimate the minimum possible distance from the
|
||||
// curve to the z=0 plane. If the curve is far enough
|
||||
// away to be approximated as a straight line, we'll just
|
||||
// render it. Otherwise, it should be a performance win
|
||||
// to do a sphere-frustum cull test before subdividing and
|
||||
// rendering segment.
|
||||
double minDistance = abs(p0.z()) - curveBoundingRadius;
|
||||
|
||||
// Render close segments as splines with adaptive subdivision. The
|
||||
// subdivisions eliminates kinks between line segments and also
|
||||
// prevents clipping precision problems that occur when a
|
||||
// very long line is rendered with a relatively small view
|
||||
// volume.
|
||||
if (curveBoundingRadius >= subdivisionThreshold * minDistance || lastSegment || firstSegment)
|
||||
{
|
||||
// Skip rendering this section if it lies outside the view
|
||||
// frustum.
|
||||
if (viewFrustum.cullSphere(p0, curveBoundingRadius))
|
||||
{
|
||||
if (!restartCurve)
|
||||
{
|
||||
vbuf.end();
|
||||
restartCurve = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
double dt = m_samples[i].t - m_samples[i - 1].t;
|
||||
double t0 = 0.0;
|
||||
double t1 = 1.0;
|
||||
|
||||
if (firstSegment)
|
||||
{
|
||||
t0 = (startTime - m_samples[i - 1].t) / dt;
|
||||
t0 = std::max(0.0, std::min(1.0, t0));
|
||||
firstSegment = false;
|
||||
}
|
||||
|
||||
if (lastSegment)
|
||||
{
|
||||
t1 = (endTime - m_samples[i - 1].t) / dt;
|
||||
}
|
||||
|
||||
Matrix4d coeff = cubicHermiteCoefficients(p0, p1, v0 * dt, v1 * dt);
|
||||
restartCurve = rc.renderCubicFaded(restartCurve, coeff,
|
||||
t0, t1,
|
||||
color,
|
||||
(fadeStartTime - m_samples[i - 1].t) / dt, fadeRate * dt,
|
||||
curveBoundingRadius, 1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Apparent size of curve is small enough that we can approximate
|
||||
// it as a line.
|
||||
|
||||
// Simple cull test--just check the far plane. This is required because
|
||||
// apparent clipping precision limitations can cause a GPU to draw lines
|
||||
// that lie completely beyond the far plane.
|
||||
if (p0.z() + curveBoundingRadius < farZ)
|
||||
{
|
||||
if (!restartCurve)
|
||||
{
|
||||
vbuf.end();
|
||||
restartCurve = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (restartCurve)
|
||||
{
|
||||
vbuf.begin();
|
||||
vbuf.vertex(p0, Vector4f(color.x(), color.y(), color.z(), color.w() * float(opacity0)));
|
||||
restartCurve = false;
|
||||
}
|
||||
vbuf.vertex(p1, Vector4f(color.x(), color.y(), color.z(), color.w() * float(opacity1)));
|
||||
}
|
||||
}
|
||||
|
||||
p0 = p1;
|
||||
v0 = v1;
|
||||
opacity0 = opacity1;
|
||||
}
|
||||
|
||||
if (!restartCurve)
|
||||
{
|
||||
vbuf.end();
|
||||
}
|
||||
|
||||
vbuf.flush();
|
||||
vbuf.finish();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue