From 472416d88063bfe68bb8c2b227971eea096c63d8 Mon Sep 17 00:00:00 2001 From: Chris Laurel Date: Thu, 14 Jan 2010 00:13:51 +0000 Subject: [PATCH] Updated Eigen to version 2.0.11, which features precision fixes for quaternion slerp and 4x4 matrix inversion. --- thirdparty/Eigen/Eigen/src/Core/Part.h | 2 +- thirdparty/Eigen/Eigen/src/Core/util/Macros.h | 2 +- thirdparty/Eigen/Eigen/src/Core/util/Memory.h | 49 +++- .../Eigen/Eigen/src/Geometry/Hyperplane.h | 4 +- .../Eigen/Eigen/src/Geometry/Quaternion.h | 31 +- thirdparty/Eigen/Eigen/src/LU/Inverse.h | 224 +++++++++------ .../Eigen/Eigen/src/Sparse/TriangularSolver.h | 3 + thirdparty/curveplot/include/curveplot.h | 12 + thirdparty/curveplot/src/curveplot.cpp | 267 +++++++++++++++++- 9 files changed, 484 insertions(+), 110 deletions(-) diff --git a/thirdparty/Eigen/Eigen/src/Core/Part.h b/thirdparty/Eigen/Eigen/src/Core/Part.h index 96229f43b..65f4bc194 100644 --- a/thirdparty/Eigen/Eigen/src/Core/Part.h +++ b/thirdparty/Eigen/Eigen/src/Core/Part.h @@ -50,7 +50,7 @@ struct ei_traits > : ei_traits typedef typename ei_unref::type _MatrixTypeNested; enum { Flags = (_MatrixTypeNested::Flags & (HereditaryBits) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit))) | Mode, - CoeffReadCost = _MatrixTypeNested::CoeffReadCost + CoeffReadCost = _MatrixTypeNested::CoeffReadCost + ConditionalJumpCost }; }; diff --git a/thirdparty/Eigen/Eigen/src/Core/util/Macros.h b/thirdparty/Eigen/Eigen/src/Core/util/Macros.h index cdafc73f3..6a0d692a6 100644 --- a/thirdparty/Eigen/Eigen/src/Core/util/Macros.h +++ b/thirdparty/Eigen/Eigen/src/Core/util/Macros.h @@ -30,7 +30,7 @@ #define EIGEN_WORLD_VERSION 2 #define EIGEN_MAJOR_VERSION 0 -#define EIGEN_MINOR_VERSION 10 +#define EIGEN_MINOR_VERSION 11 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ diff --git a/thirdparty/Eigen/Eigen/src/Core/util/Memory.h b/thirdparty/Eigen/Eigen/src/Core/util/Memory.h index 0a43e7f7b..d12a2ec26 100644 --- a/thirdparty/Eigen/Eigen/src/Core/util/Memory.h +++ b/thirdparty/Eigen/Eigen/src/Core/util/Memory.h @@ -209,18 +209,47 @@ template inline void ei_conditional_aligned_delete(T *pt ei_conditional_aligned_free(ptr); } -/** \internal \returns the number of elements which have to be skipped such that data are 16 bytes aligned */ -template -inline static int ei_alignmentOffset(const Scalar* ptr, int maxOffset) +/** \internal \returns the index of the first element of the array that is well aligned for vectorization. + * + * \param array the address of the start of the array + * \param size the size of the array + * + * \note If no element of the array is well aligned, the size of the array is returned. Typically, + * for example with SSE, "well aligned" means 16-byte-aligned. If vectorization is disabled or if the + * packet size for the given scalar type is 1, then everything is considered well-aligned. + * + * \note If the scalar type is vectorizable, we rely on the following assumptions: sizeof(Scalar) is a + * power of 2, the packet size in bytes is also a power of 2, and is a multiple of sizeof(Scalar). On the + * other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails for + * example with Scalar=double on certain 32-bit platforms, see bug #79. + * + * There is also the variant ei_first_aligned(const MatrixBase&, Integer) defined in Coeffs.h. + */ +template +inline static Integer ei_alignmentOffset(const Scalar* array, Integer size) { typedef typename ei_packet_traits::type Packet; - const int PacketSize = ei_packet_traits::size; - const int PacketAlignedMask = PacketSize-1; - const bool Vectorized = PacketSize>1; - return Vectorized - ? std::min( (PacketSize - (int((size_t(ptr)/sizeof(Scalar))) & PacketAlignedMask)) - & PacketAlignedMask, maxOffset) - : 0; + enum { PacketSize = ei_packet_traits::size, + PacketAlignedMask = PacketSize-1 + }; + + if(PacketSize==1) + { + // Either there is no vectorization, or a packet consists of exactly 1 scalar so that all elements + // of the array have the same aligment. + return 0; + } + else if(size_t(array) & (sizeof(Scalar)-1)) + { + // There is vectorization for this scalar type, but the array is not aligned to the size of a single scalar. + // Consequently, no element of the array is well aligned. + return size; + } + else + { + return std::min( (PacketSize - (Integer((size_t(array)/sizeof(Scalar))) & PacketAlignedMask)) + & PacketAlignedMask, size); + } } /** \internal diff --git a/thirdparty/Eigen/Eigen/src/Geometry/Hyperplane.h b/thirdparty/Eigen/Eigen/src/Geometry/Hyperplane.h index 22c530d4b..b1b26062f 100644 --- a/thirdparty/Eigen/Eigen/src/Geometry/Hyperplane.h +++ b/thirdparty/Eigen/Eigen/src/Geometry/Hyperplane.h @@ -52,9 +52,9 @@ public: typedef _Scalar Scalar; typedef typename NumTraits::Real RealScalar; typedef Matrix VectorType; - typedef Matrix Coefficients; + : int(AmbientDimAtCompileTime)+1,1> Coefficients; typedef Block NormalReturnType; /** Default constructor without initialization */ diff --git a/thirdparty/Eigen/Eigen/src/Geometry/Quaternion.h b/thirdparty/Eigen/Eigen/src/Geometry/Quaternion.h index 3fcbff4e7..51dfc270c 100644 --- a/thirdparty/Eigen/Eigen/src/Geometry/Quaternion.h +++ b/thirdparty/Eigen/Eigen/src/Geometry/Quaternion.h @@ -450,22 +450,31 @@ inline Scalar Quaternion::angularDistance(const Quaternion& other) const template Quaternion Quaternion::slerp(Scalar t, const Quaternion& other) const { - static const Scalar one = Scalar(1) - precision(); + static const Scalar one = Scalar(1) - machine_epsilon(); Scalar d = this->dot(other); Scalar absD = ei_abs(d); + + Scalar scale0; + Scalar scale1; + if (absD>=one) - return *this; + { + scale0 = Scalar(1) - t; + scale1 = t; + } + else + { + // theta is the angle between the 2 quaternions + Scalar theta = std::acos(absD); + Scalar sinTheta = ei_sin(theta); - // theta is the angle between the 2 quaternions - Scalar theta = std::acos(absD); - Scalar sinTheta = ei_sin(theta); + scale0 = ei_sin( ( Scalar(1) - t ) * theta) / sinTheta; + scale1 = ei_sin( ( t * theta) ) / sinTheta; + if (d<0) + scale1 = -scale1; + } - Scalar scale0 = ei_sin( ( Scalar(1) - t ) * theta) / sinTheta; - Scalar scale1 = ei_sin( ( t * theta) ) / sinTheta; - if (d<0) - scale1 = -scale1; - - return Quaternion(scale0 * m_coeffs + scale1 * other.m_coeffs); + return Quaternion(scale0 * coeffs() + scale1 * other.coeffs()); } // set from a rotation matrix diff --git a/thirdparty/Eigen/Eigen/src/LU/Inverse.h b/thirdparty/Eigen/Eigen/src/LU/Inverse.h index 2f15f0953..98e7fc5d0 100644 --- a/thirdparty/Eigen/Eigen/src/LU/Inverse.h +++ b/thirdparty/Eigen/Eigen/src/LU/Inverse.h @@ -29,8 +29,8 @@ *** Part 1 : optimized implementations for fixed-size 2,3,4 cases *** ********************************************************************/ -template -void ei_compute_inverse_in_size2_case(const MatrixType& matrix, MatrixType* result) +template +void ei_compute_inverse_in_size2_case(const XprType& matrix, MatrixType* result) { typedef typename MatrixType::Scalar Scalar; const Scalar invdet = Scalar(1) / matrix.determinant(); @@ -75,99 +75,155 @@ void ei_compute_inverse_in_size3_case(const MatrixType& matrix, MatrixType* resu result->coeffRef(2, 2) = matrix.minor(2,2).determinant() * invdet; } -template -bool ei_compute_inverse_in_size4_case_helper(const MatrixType& matrix, MatrixType* result) +template +struct ei_compute_inverse_in_size4_case { - /* Let's split M into four 2x2 blocks: - * (P Q) - * (R S) - * If P is invertible, with inverse denoted by P_inverse, and if - * (S - R*P_inverse*Q) is also invertible, then the inverse of M is - * (P' Q') - * (R' S') - * where - * S' = (S - R*P_inverse*Q)^(-1) - * P' = P1 + (P1*Q) * S' *(R*P_inverse) - * Q' = -(P_inverse*Q) * S' - * R' = -S' * (R*P_inverse) - */ - typedef Block XprBlock22; - typedef typename MatrixBase::PlainMatrixType Block22; - Block22 P_inverse; - if(ei_compute_inverse_in_size2_case_with_check(matrix.template block<2,2>(0,0), &P_inverse)) + static void run(const MatrixType& matrix, MatrixType& result) { - const Block22 Q = matrix.template block<2,2>(0,2); - const Block22 P_inverse_times_Q = P_inverse * Q; - const XprBlock22 R = matrix.template block<2,2>(2,0); - const Block22 R_times_P_inverse = R * P_inverse; - const Block22 R_times_P_inverse_times_Q = R_times_P_inverse * Q; - const XprBlock22 S = matrix.template block<2,2>(2,2); - const Block22 X = S - R_times_P_inverse_times_Q; - Block22 Y; - ei_compute_inverse_in_size2_case(X, &Y); - result->template block<2,2>(2,2) = Y; - result->template block<2,2>(2,0) = - Y * R_times_P_inverse; - const Block22 Z = P_inverse_times_Q * Y; - result->template block<2,2>(0,2) = - Z; - result->template block<2,2>(0,0) = P_inverse + Z * R_times_P_inverse; - return true; + result.coeffRef(0,0) = matrix.minor(0,0).determinant(); + result.coeffRef(1,0) = -matrix.minor(0,1).determinant(); + result.coeffRef(2,0) = matrix.minor(0,2).determinant(); + result.coeffRef(3,0) = -matrix.minor(0,3).determinant(); + result.coeffRef(0,2) = matrix.minor(2,0).determinant(); + result.coeffRef(1,2) = -matrix.minor(2,1).determinant(); + result.coeffRef(2,2) = matrix.minor(2,2).determinant(); + result.coeffRef(3,2) = -matrix.minor(2,3).determinant(); + result.coeffRef(0,1) = -matrix.minor(1,0).determinant(); + result.coeffRef(1,1) = matrix.minor(1,1).determinant(); + result.coeffRef(2,1) = -matrix.minor(1,2).determinant(); + result.coeffRef(3,1) = matrix.minor(1,3).determinant(); + result.coeffRef(0,3) = -matrix.minor(3,0).determinant(); + result.coeffRef(1,3) = matrix.minor(3,1).determinant(); + result.coeffRef(2,3) = -matrix.minor(3,2).determinant(); + result.coeffRef(3,3) = matrix.minor(3,3).determinant(); + result /= (matrix.col(0).cwise()*result.row(0).transpose()).sum(); } - else - { - return false; - } -} +}; +#ifdef EIGEN_VECTORIZE_SSE +// The SSE code for the 4x4 float matrix inverse in this file comes from the file +// ftp://download.intel.com/design/PentiumIII/sml/24504301.pdf +// its copyright information is: +// Copyright (C) 1999 Intel Corporation +// See page ii of that document for legal stuff. Not being lawyers, we just assume +// here that if Intel makes this document publically available, with source code +// and detailed explanations, it's because they want their CPUs to be fed with +// good code, and therefore they presumably don't mind us using it in Eigen. template -void ei_compute_inverse_in_size4_case(const MatrixType& _matrix, MatrixType* result) +struct ei_compute_inverse_in_size4_case { - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; + static void run(const MatrixType& matrix, MatrixType& result) + { + // Variables (Streaming SIMD Extensions registers) which will contain cofactors and, later, the + // lines of the inverted matrix. + __m128 minor0, minor1, minor2, minor3; - // we will do row permutations on the matrix. This copy should have negligible cost. - // if not, consider working in-place on the matrix (const-cast it, but then undo the permutations - // to nevertheless honor constness) - typename MatrixType::PlainMatrixType matrix(_matrix); + // Variables which will contain the lines of the reference matrix and, later (after the transposition), + // the columns of the original matrix. + __m128 row0, row1, row2, row3; - // let's extract from the 2 first colums a 2x2 block whose determinant is as big as possible. - int good_row0, good_row1, good_i; - Matrix absdet; + // Temporary variables and the variable that will contain the matrix determinant. + __m128 det, tmp1; - // any 2x2 block with determinant above this threshold will be considered good enough. - // The magic value 1e-1 here comes from experimentation. The bigger it is, the higher the precision, - // the slower the computation. This value 1e-1 gives precision almost as good as the brutal cofactors - // algorithm, both in average and in worst-case precision. - RealScalar d = (matrix.col(0).squaredNorm()+matrix.col(1).squaredNorm()) * RealScalar(1e-1); - #define ei_inv_size4_helper_macro(i,row0,row1) \ - absdet[i] = ei_abs(matrix.coeff(row0,0)*matrix.coeff(row1,1) \ - - matrix.coeff(row0,1)*matrix.coeff(row1,0)); \ - if(absdet[i] > d) { good_row0=row0; good_row1=row1; goto good; } - ei_inv_size4_helper_macro(0,0,1) - ei_inv_size4_helper_macro(1,0,2) - ei_inv_size4_helper_macro(2,0,3) - ei_inv_size4_helper_macro(3,1,2) - ei_inv_size4_helper_macro(4,1,3) - ei_inv_size4_helper_macro(5,2,3) + // Matrix transposition + const float *src = matrix.data(); + tmp1 = _mm_loadh_pi(_mm_castpd_ps(_mm_load_sd((double*)src)), (__m64*)(src+ 4)); + row1 = _mm_loadh_pi(_mm_castpd_ps(_mm_load_sd((double*)(src+8))), (__m64*)(src+12)); + row0 = _mm_shuffle_ps(tmp1, row1, 0x88); + row1 = _mm_shuffle_ps(row1, tmp1, 0xDD); + tmp1 = _mm_loadh_pi(_mm_castpd_ps(_mm_load_sd((double*)(src+ 2))), (__m64*)(src+ 6)); + row3 = _mm_loadh_pi(_mm_castpd_ps(_mm_load_sd((double*)(src+10))), (__m64*)(src+14)); + row2 = _mm_shuffle_ps(tmp1, row3, 0x88); + row3 = _mm_shuffle_ps(row3, tmp1, 0xDD); - // no 2x2 block has determinant bigger than the threshold. So just take the one that - // has the biggest determinant - absdet.maxCoeff(&good_i); - good_row0 = good_i <= 2 ? 0 : good_i <= 4 ? 1 : 2; - good_row1 = good_i <= 2 ? good_i+1 : good_i <= 4 ? good_i-1 : 3; - // now good_row0 and good_row1 are correctly set - good: + // Cofactors calculation. Because in the process of cofactor computation some pairs in three- + // element products are repeated, it is not reasonable to load these pairs anew every time. The + // values in the registers with these pairs are formed using shuffle instruction. Cofactors are + // calculated row by row (4 elements are placed in 1 SP FP SIMD floating point register). - // do row permutations to move this 2x2 block to the top - matrix.row(0).swap(matrix.row(good_row0)); - matrix.row(1).swap(matrix.row(good_row1)); - // now applying our helper function is numerically stable - ei_compute_inverse_in_size4_case_helper(matrix, result); - // Since we did row permutations on the original matrix, we need to do column permutations - // in the reverse order on the inverse - result->col(1).swap(result->col(good_row1)); - result->col(0).swap(result->col(good_row0)); -} + tmp1 = _mm_mul_ps(row2, row3); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + minor0 = _mm_mul_ps(row1, tmp1); + minor1 = _mm_mul_ps(row0, tmp1); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor0 = _mm_sub_ps(_mm_mul_ps(row1, tmp1), minor0); + minor1 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor1); + minor1 = _mm_shuffle_ps(minor1, minor1, 0x4E); + // ----------------------------------------------- + tmp1 = _mm_mul_ps(row1, row2); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + minor0 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor0); + minor3 = _mm_mul_ps(row0, tmp1); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row3, tmp1)); + minor3 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor3); + minor3 = _mm_shuffle_ps(minor3, minor3, 0x4E); + // ----------------------------------------------- + tmp1 = _mm_mul_ps(_mm_shuffle_ps(row1, row1, 0x4E), row3); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + row2 = _mm_shuffle_ps(row2, row2, 0x4E); + minor0 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor0); + minor2 = _mm_mul_ps(row0, tmp1); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row2, tmp1)); + minor2 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor2); + minor2 = _mm_shuffle_ps(minor2, minor2, 0x4E); + // ----------------------------------------------- + tmp1 = _mm_mul_ps(row0, row1); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + minor2 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor2); + minor3 = _mm_sub_ps(_mm_mul_ps(row2, tmp1), minor3); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor2 = _mm_sub_ps(_mm_mul_ps(row3, tmp1), minor2); + minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row2, tmp1)); + // ----------------------------------------------- + tmp1 = _mm_mul_ps(row0, row3); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row2, tmp1)); + minor2 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor2); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor1 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor1); + minor2 = _mm_sub_ps(minor2, _mm_mul_ps(row1, tmp1)); + // ----------------------------------------------- + tmp1 = _mm_mul_ps(row0, row2); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + minor1 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor1); + minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row1, tmp1)); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row3, tmp1)); + minor3 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor3); + + // Evaluation of determinant and its reciprocal value. In the original Intel document, + // 1/det was evaluated using a fast rcpps command with subsequent approximation using + // the Newton-Raphson algorithm. Here, we go for a IEEE-compliant division instead, + // so as to not compromise precision at all. + det = _mm_mul_ps(row0, minor0); + det = _mm_add_ps(_mm_shuffle_ps(det, det, 0x4E), det); + det = _mm_add_ss(_mm_shuffle_ps(det, det, 0xB1), det); +// tmp1= _mm_rcp_ss(det); +// det= _mm_sub_ss(_mm_add_ss(tmp1, tmp1), _mm_mul_ss(det, _mm_mul_ss(tmp1, tmp1))); + det = _mm_div_ss(_mm_set_ss(1.0f), det); // <--- yay, one original line not copied from Intel + det = _mm_shuffle_ps(det, det, 0x00); + // warning, Intel's variable naming is very confusing: now 'det' is 1/det ! + + // Multiplication of cofactors by 1/det. Storing the inverse matrix to the address in pointer src. + minor0 = _mm_mul_ps(det, minor0); + float *dst = result.data(); + _mm_storel_pi((__m64*)(dst), minor0); + _mm_storeh_pi((__m64*)(dst+2), minor0); + minor1 = _mm_mul_ps(det, minor1); + _mm_storel_pi((__m64*)(dst+4), minor1); + _mm_storeh_pi((__m64*)(dst+6), minor1); + minor2 = _mm_mul_ps(det, minor2); + _mm_storel_pi((__m64*)(dst+ 8), minor2); + _mm_storeh_pi((__m64*)(dst+10), minor2); + minor3 = _mm_mul_ps(det, minor3); + _mm_storel_pi((__m64*)(dst+12), minor3); + _mm_storeh_pi((__m64*)(dst+14), minor3); + } +}; +#endif /*********************************************** *** Part 2 : selector and MatrixBase methods *** @@ -216,7 +272,7 @@ struct ei_compute_inverse { static inline void run(const MatrixType& matrix, MatrixType* result) { - ei_compute_inverse_in_size4_case(matrix, result); + ei_compute_inverse_in_size4_case::run(matrix, *result); } }; diff --git a/thirdparty/Eigen/Eigen/src/Sparse/TriangularSolver.h b/thirdparty/Eigen/Eigen/src/Sparse/TriangularSolver.h index 8948ae45e..390d0174b 100644 --- a/thirdparty/Eigen/Eigen/src/Sparse/TriangularSolver.h +++ b/thirdparty/Eigen/Eigen/src/Sparse/TriangularSolver.h @@ -43,8 +43,11 @@ struct ei_solve_triangular_selector { lastVal = it.value(); lastIndex = it.index(); + if(lastIndex == i) + break; tmp -= lastVal * other.coeff(lastIndex,col); } + if (Lhs::Flags & UnitDiagBit) other.coeffRef(i,col) = tmp; else diff --git a/thirdparty/curveplot/include/curveplot.h b/thirdparty/curveplot/include/curveplot.h index a0f5c8c79..37300a3f3 100644 --- a/thirdparty/curveplot/include/curveplot.h +++ b/thirdparty/curveplot/include/curveplot.h @@ -79,6 +79,16 @@ class CurvePlot double subdivisionThreshold, double startTime, double endTime) const; + void renderFaded(const Eigen::Transform3d& modelview, + double nearZ, + double farZ, + const Eigen::Vector3d viewFrustumPlaneNormals[], + double subdivisionThreshold, + double startTime, + double endTime, + const Eigen::Vector4f& color, + double fadeStartTime, + double fadeEndTime) const; unsigned int lastUsed() const { return m_lastUsed; } void setLastUsed(unsigned int lastUsed) { m_lastUsed = lastUsed; } @@ -89,6 +99,8 @@ class CurvePlot bool empty() const { return m_samples.empty(); } + unsigned int sampleCount() const { return m_samples.size(); } + private: std::deque m_samples; diff --git a/thirdparty/curveplot/src/curveplot.cpp b/thirdparty/curveplot/src/curveplot.cpp index 0a0bb12b0..23fa9e3ec 100644 --- a/thirdparty/curveplot/src/curveplot.cpp +++ b/thirdparty/curveplot/src/curveplot.cpp @@ -235,6 +235,25 @@ public: #endif } + inline void vertex(const Vector4d& v, const Vector4f& color) + { +#if USE_VERTEX_BUFFER + data[currentPosition++] = v.cast(); + ++currentStripLength; + if (currentPosition == capacity) + { + flush(); + + data[0] = v.cast(); + currentPosition = 1; + currentStripLength = 1; + } +#else + glColor4fv(color.data()); + glVertex3dv(v.data()); +#endif + } + inline void begin() { #if !USE_VERTEX_BUFFER @@ -423,6 +442,86 @@ public: return restartCurve; } + // Return the GL restart status: true if the last segment of the + // curve was culled and we need to start a new primitive sequence + // with glBegin(). + bool renderCubicFaded(bool restartCurve, + const Matrix4d& coeff, + double t0, double t1, + const Vector4f& color, + double fadeStart, double fadeRate, + double curveBoundingRadius, + int depth) const + { + const double dt = (t1 - t0) * InvSubdivisionFactor; + double segmentBoundingRadius = curveBoundingRadius * InvSubdivisionFactor; + +#if DEBUG_ADAPTIVE_SPLINE + { + int c = depth % 10; + glColor4f(SplineColors[c][0], SplineColors[c][1], SplineColors[c][2], 1.0f); + ++SegmentCounts[depth]; + } +#endif + + Vector4d lastP = coeff * Vector4d(1.0, t0, t0 * t0, t0 * t0 * t0); + double lastOpacity = (t0 - fadeStart) * fadeRate; + lastOpacity = max(0.0, min(1.0, lastOpacity)); // clamp + + for (unsigned int i = 1; i <= SubdivisionFactor; i++) + { + double t = t0 + dt * i; + Vector4d p = coeff * Vector4d(1.0, t, t * t, t * t * t); + double opacity = (t - fadeStart) * fadeRate; + opacity = max(0.0, min(1.0, opacity)); // clamp + + double minDistance = max(-m_viewFrustum.nearZ(), abs(p.z()) - segmentBoundingRadius); + + if (segmentBoundingRadius >= m_subdivisionThreshold * minDistance) + { + if (m_viewFrustum.cullSphere(p, segmentBoundingRadius)) + { + if (!restartCurve) + { + m_vbuf.end(); + restartCurve = true; + } + } + else + { + restartCurve = renderCubicFaded(restartCurve, + coeff, t - dt, t, + color, + fadeStart, fadeRate, + segmentBoundingRadius, + depth + 1); + } + } + else + { +#if DEBUG_ADAPTIVE_SPLINE + { + int c = depth % 10; + glColor4f(SplineColors[c][0], SplineColors[c][1], SplineColors[c][2], i % 2 ? 0.25f : 1.0f); + } +#endif + + if (restartCurve) + { + m_vbuf.begin(); + m_vbuf.vertex(lastP, Vector4f(color.x(), color.y(), color.z(), color.w() * float(lastOpacity))); + restartCurve = false; + } + + m_vbuf.vertex(p, Vector4f(color.x(), color.y(), color.z(), color.w() * float(opacity))); + } + lastP = p; + lastOpacity = opacity; + } + + return restartCurve; + } + private: HighPrec_VertexBuffer& m_vbuf; HighPrec_Frustum& m_viewFrustum; @@ -554,7 +653,6 @@ CurvePlot::render(const Transform3d& modelview, vbuf.createVertexBuffer(); vbuf.setup(); - clog << "size: " << m_samples.size() << endl; for (unsigned int i = 1; i < m_samples.size(); i++) { // Transform the points into camera space. @@ -805,3 +903,170 @@ CurvePlot::render(const Transform3d& modelview, vbuf.flush(); vbuf.finish(); } + + +/** Draw a curve with a fade effect. The curve is at full opacity at fadeStartTime + * and completely transparent at fadeEndTime. fadeStartTime may be greater than + * fadeEndTime--this just means that the fade direction will be reversed. + */ +void +CurvePlot::renderFaded(const Eigen::Transform3d& modelview, + double nearZ, + double farZ, + const Eigen::Vector3d viewFrustumPlaneNormals[], + double subdivisionThreshold, + double startTime, + double endTime, + const Vector4f& color, + double fadeStartTime, + double fadeEndTime) const +{ + // Flag to indicate whether we need to issue a glBegin() + bool restartCurve = true; + + if (m_samples.empty() || endTime <= m_samples.front().t || startTime >= m_samples.back().t) + return; + + // Linear search for the first sample + unsigned int startSample = 0; + while (startSample < m_samples.size() - 1 && startTime < m_samples[startSample].t) + startSample++; + + // Start at the first sample with time <= startTime + if (startSample > 0) + startSample--; + + double fadeDuration = fadeEndTime - fadeStartTime; + double fadeRate = 1.0 / fadeDuration; + + const Vector3d& p0_ = m_samples[startSample].position; + const Vector3d& v0_ = m_samples[startSample].velocity; + Vector4d p0 = modelview * Vector4d(p0_.x(), p0_.y(), p0_.z(), 1.0); + Vector4d v0 = modelview * Vector4d(v0_.x(), v0_.y(), v0_.z(), 0.0); + double opacity0 = (m_samples[startSample].t - fadeStartTime) * fadeRate; + opacity0 = max(0.0, min(1.0, opacity0)); + + HighPrec_Frustum viewFrustum(nearZ, farZ, viewFrustumPlaneNormals); + HighPrec_RenderContext rc(vbuf, viewFrustum, subdivisionThreshold); + + vbuf.createVertexBuffer(); + vbuf.setup(); + + bool firstSegment = true; + bool lastSegment = false; + + for (unsigned int i = startSample + 1; i < m_samples.size() && !lastSegment; i++) + { + // Transform the points into camera space. + const Vector3d& p1_ = m_samples[i].position; + const Vector3d& v1_ = m_samples[i].velocity; + Vector4d p1 = modelview * Vector4d(p1_.x(), p1_.y(), p1_.z(), 1.0); + Vector4d v1 = modelview * Vector4d(v1_.x(), v1_.y(), v1_.z(), 0.0); + double opacity1 = (m_samples[i].t - fadeStartTime) * fadeRate; + opacity1 = max(0.0, min(1.0, opacity1)); + + if (endTime <= m_samples[i].t) + { + lastSegment = true; + } + + // O(t) is an approximating function for this segment of + // the orbit, with 0 <= t <= 1 + // C is the viewer position + // d(t) = |O(t) - C|, the distance from viewer to the + // orbit segment. + + double curveBoundingRadius = m_samples[i].boundingRadius; + + // Estimate the minimum possible distance from the + // curve to the z=0 plane. If the curve is far enough + // away to be approximated as a straight line, we'll just + // render it. Otherwise, it should be a performance win + // to do a sphere-frustum cull test before subdividing and + // rendering segment. + double minDistance = abs(p0.z()) - curveBoundingRadius; + + // Render close segments as splines with adaptive subdivision. The + // subdivisions eliminates kinks between line segments and also + // prevents clipping precision problems that occur when a + // very long line is rendered with a relatively small view + // volume. + if (curveBoundingRadius >= subdivisionThreshold * minDistance || lastSegment || firstSegment) + { + // Skip rendering this section if it lies outside the view + // frustum. + if (viewFrustum.cullSphere(p0, curveBoundingRadius)) + { + if (!restartCurve) + { + vbuf.end(); + restartCurve = true; + } + } + else + { + double dt = m_samples[i].t - m_samples[i - 1].t; + double t0 = 0.0; + double t1 = 1.0; + + if (firstSegment) + { + t0 = (startTime - m_samples[i - 1].t) / dt; + t0 = std::max(0.0, std::min(1.0, t0)); + firstSegment = false; + } + + if (lastSegment) + { + t1 = (endTime - m_samples[i - 1].t) / dt; + } + + Matrix4d coeff = cubicHermiteCoefficients(p0, p1, v0 * dt, v1 * dt); + restartCurve = rc.renderCubicFaded(restartCurve, coeff, + t0, t1, + color, + (fadeStartTime - m_samples[i - 1].t) / dt, fadeRate * dt, + curveBoundingRadius, 1); + } + } + else + { + // Apparent size of curve is small enough that we can approximate + // it as a line. + + // Simple cull test--just check the far plane. This is required because + // apparent clipping precision limitations can cause a GPU to draw lines + // that lie completely beyond the far plane. + if (p0.z() + curveBoundingRadius < farZ) + { + if (!restartCurve) + { + vbuf.end(); + restartCurve = true; + } + } + else + { + if (restartCurve) + { + vbuf.begin(); + vbuf.vertex(p0, Vector4f(color.x(), color.y(), color.z(), color.w() * float(opacity0))); + restartCurve = false; + } + vbuf.vertex(p1, Vector4f(color.x(), color.y(), color.z(), color.w() * float(opacity1))); + } + } + + p0 = p1; + v0 = v1; + opacity0 = opacity1; + } + + if (!restartCurve) + { + vbuf.end(); + } + + vbuf.flush(); + vbuf.finish(); +}