Bullet Collision Detection & Physics Library
btMatrix3x3.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
3 
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9 
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 
15 
16 #ifndef BT_MATRIX3x3_H
17 #define BT_MATRIX3x3_H
18 
19 #include "btVector3.h"
20 #include "btQuaternion.h"
21 #include <stdio.h>
22 
23 #ifdef BT_USE_SSE
24 //const __m128 ATTRIBUTE_ALIGNED16(v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
25 //const __m128 ATTRIBUTE_ALIGNED16(vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
26 #define vMPPP (_mm_set_ps (+0.0f, +0.0f, +0.0f, -0.0f))
27 #endif
28 
29 #if defined(BT_USE_SSE)
30 #define v1000 (_mm_set_ps(0.0f,0.0f,0.0f,1.0f))
31 #define v0100 (_mm_set_ps(0.0f,0.0f,1.0f,0.0f))
32 #define v0010 (_mm_set_ps(0.0f,1.0f,0.0f,0.0f))
33 #elif defined(BT_USE_NEON)
34 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
35 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
36 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
37 #endif
38 
39 #ifdef BT_USE_DOUBLE_PRECISION
40 #define btMatrix3x3Data btMatrix3x3DoubleData
41 #else
42 #define btMatrix3x3Data btMatrix3x3FloatData
43 #endif //BT_USE_DOUBLE_PRECISION
44 
45 
49 
51  btVector3 m_el[3];
52 
53 public:
56 
57  // explicit btMatrix3x3(const btScalar *m) { setFromOpenGLSubMatrix(m); }
58 
60  explicit btMatrix3x3(const btQuaternion& q) { setRotation(q); }
61  /*
62  template <typename btScalar>
63  Matrix3x3(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
64  {
65  setEulerYPR(yaw, pitch, roll);
66  }
67  */
69  btMatrix3x3(const btScalar& xx, const btScalar& xy, const btScalar& xz,
70  const btScalar& yx, const btScalar& yy, const btScalar& yz,
71  const btScalar& zx, const btScalar& zy, const btScalar& zz)
72  {
73  setValue(xx, xy, xz,
74  yx, yy, yz,
75  zx, zy, zz);
76  }
77 
78 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
79  SIMD_FORCE_INLINE btMatrix3x3 (const btSimdFloat4 v0, const btSimdFloat4 v1, const btSimdFloat4 v2 )
80  {
81  m_el[0].mVec128 = v0;
82  m_el[1].mVec128 = v1;
83  m_el[2].mVec128 = v2;
84  }
85 
86  SIMD_FORCE_INLINE btMatrix3x3 (const btVector3& v0, const btVector3& v1, const btVector3& v2 )
87  {
88  m_el[0] = v0;
89  m_el[1] = v1;
90  m_el[2] = v2;
91  }
92 
93  // Copy constructor
95  {
96  m_el[0].mVec128 = rhs.m_el[0].mVec128;
97  m_el[1].mVec128 = rhs.m_el[1].mVec128;
98  m_el[2].mVec128 = rhs.m_el[2].mVec128;
99  }
100 
101  // Assignment Operator
102  SIMD_FORCE_INLINE btMatrix3x3& operator=(const btMatrix3x3& m)
103  {
104  m_el[0].mVec128 = m.m_el[0].mVec128;
105  m_el[1].mVec128 = m.m_el[1].mVec128;
106  m_el[2].mVec128 = m.m_el[2].mVec128;
107 
108  return *this;
109  }
110 
111 #else
112 
115  {
116  m_el[0] = other.m_el[0];
117  m_el[1] = other.m_el[1];
118  m_el[2] = other.m_el[2];
119  }
120 
122  SIMD_FORCE_INLINE btMatrix3x3& operator=(const btMatrix3x3& other)
123  {
124  m_el[0] = other.m_el[0];
125  m_el[1] = other.m_el[1];
126  m_el[2] = other.m_el[2];
127  return *this;
128  }
129 
130 #endif
131 
134  SIMD_FORCE_INLINE btVector3 getColumn(int i) const
135  {
136  return btVector3(m_el[0][i],m_el[1][i],m_el[2][i]);
137  }
138 
139 
142  SIMD_FORCE_INLINE const btVector3& getRow(int i) const
143  {
144  btFullAssert(0 <= i && i < 3);
145  return m_el[i];
146  }
147 
150  SIMD_FORCE_INLINE btVector3& operator[](int i)
151  {
152  btFullAssert(0 <= i && i < 3);
153  return m_el[i];
154  }
155 
158  SIMD_FORCE_INLINE const btVector3& operator[](int i) const
159  {
160  btFullAssert(0 <= i && i < 3);
161  return m_el[i];
162  }
163 
167  btMatrix3x3& operator*=(const btMatrix3x3& m);
168 
172  btMatrix3x3& operator+=(const btMatrix3x3& m);
173 
177  btMatrix3x3& operator-=(const btMatrix3x3& m);
178 
181  void setFromOpenGLSubMatrix(const btScalar *m)
182  {
183  m_el[0].setValue(m[0],m[4],m[8]);
184  m_el[1].setValue(m[1],m[5],m[9]);
185  m_el[2].setValue(m[2],m[6],m[10]);
186 
187  }
198  void setValue(const btScalar& xx, const btScalar& xy, const btScalar& xz,
199  const btScalar& yx, const btScalar& yy, const btScalar& yz,
200  const btScalar& zx, const btScalar& zy, const btScalar& zz)
201  {
202  m_el[0].setValue(xx,xy,xz);
203  m_el[1].setValue(yx,yy,yz);
204  m_el[2].setValue(zx,zy,zz);
205  }
206 
209  void setRotation(const btQuaternion& q)
210  {
211  btScalar d = q.length2();
212  btFullAssert(d != btScalar(0.0));
213  btScalar s = btScalar(2.0) / d;
214 
215  #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
216  __m128 vs, Q = q.get128();
217  __m128i Qi = btCastfTo128i(Q);
218  __m128 Y, Z;
219  __m128 V1, V2, V3;
220  __m128 V11, V21, V31;
221  __m128 NQ = _mm_xor_ps(Q, btvMzeroMask);
222  __m128i NQi = btCastfTo128i(NQ);
223 
224  V1 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,2,3))); // Y X Z W
225  V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0,0,1,3)); // -X -X Y W
226  V3 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(2,1,0,3))); // Z Y X W
227  V1 = _mm_xor_ps(V1, vMPPP); // change the sign of the first element
228 
229  V11 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,1,0,3))); // Y Y X W
230  V21 = _mm_unpackhi_ps(Q, Q); // Z Z W W
231  V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0,2,0,3)); // X Z -X -W
232 
233  V2 = V2 * V1; //
234  V1 = V1 * V11; //
235  V3 = V3 * V31; //
236 
237  V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2,3,1,3)); // -Z -W Y W
238  V11 = V11 * V21; //
239  V21 = _mm_xor_ps(V21, vMPPP); // change the sign of the first element
240  V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3,3,1,3)); // W W -Y -W
241  V31 = _mm_xor_ps(V31, vMPPP); // change the sign of the first element
242  Y = btCastiTo128f(_mm_shuffle_epi32 (NQi, BT_SHUFFLE(3,2,0,3))); // -W -Z -X -W
243  Z = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,1,3))); // Y X Y W
244 
245  vs = _mm_load_ss(&s);
246  V21 = V21 * Y;
247  V31 = V31 * Z;
248 
249  V1 = V1 + V11;
250  V2 = V2 + V21;
251  V3 = V3 + V31;
252 
253  vs = bt_splat3_ps(vs, 0);
254  // s ready
255  V1 = V1 * vs;
256  V2 = V2 * vs;
257  V3 = V3 * vs;
258 
259  V1 = V1 + v1000;
260  V2 = V2 + v0100;
261  V3 = V3 + v0010;
262 
263  m_el[0] = V1;
264  m_el[1] = V2;
265  m_el[2] = V3;
266  #else
267  btScalar xs = q.x() * s, ys = q.y() * s, zs = q.z() * s;
268  btScalar wx = q.w() * xs, wy = q.w() * ys, wz = q.w() * zs;
269  btScalar xx = q.x() * xs, xy = q.x() * ys, xz = q.x() * zs;
270  btScalar yy = q.y() * ys, yz = q.y() * zs, zz = q.z() * zs;
271  setValue(
272  btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
273  xy + wz, btScalar(1.0) - (xx + zz), yz - wx,
274  xz - wy, yz + wx, btScalar(1.0) - (xx + yy));
275  #endif
276  }
277 
278 
284  void setEulerYPR(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
285  {
286  setEulerZYX(roll, pitch, yaw);
287  }
288 
298  void setEulerZYX(btScalar eulerX,btScalar eulerY,btScalar eulerZ) {
300  btScalar ci ( btCos(eulerX));
301  btScalar cj ( btCos(eulerY));
302  btScalar ch ( btCos(eulerZ));
303  btScalar si ( btSin(eulerX));
304  btScalar sj ( btSin(eulerY));
305  btScalar sh ( btSin(eulerZ));
306  btScalar cc = ci * ch;
307  btScalar cs = ci * sh;
308  btScalar sc = si * ch;
309  btScalar ss = si * sh;
310 
311  setValue(cj * ch, sj * sc - cs, sj * cc + ss,
312  cj * sh, sj * ss + cc, sj * cs - sc,
313  -sj, cj * si, cj * ci);
314  }
315 
317  void setIdentity()
318  {
319 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
320  m_el[0] = v1000;
321  m_el[1] = v0100;
322  m_el[2] = v0010;
323 #else
324  setValue(btScalar(1.0), btScalar(0.0), btScalar(0.0),
325  btScalar(0.0), btScalar(1.0), btScalar(0.0),
326  btScalar(0.0), btScalar(0.0), btScalar(1.0));
327 #endif
328  }
329 
330  static const btMatrix3x3& getIdentity()
331  {
332 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
333  static const btMatrix3x3
334  identityMatrix(v1000, v0100, v0010);
335 #else
336  static const btMatrix3x3
337  identityMatrix(
338  btScalar(1.0), btScalar(0.0), btScalar(0.0),
339  btScalar(0.0), btScalar(1.0), btScalar(0.0),
340  btScalar(0.0), btScalar(0.0), btScalar(1.0));
341 #endif
342  return identityMatrix;
343  }
344 
347  void getOpenGLSubMatrix(btScalar *m) const
348  {
349 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
350  __m128 v0 = m_el[0].mVec128;
351  __m128 v1 = m_el[1].mVec128;
352  __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
353  __m128 *vm = (__m128 *)m;
354  __m128 vT;
355 
356  v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
357 
358  vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
359  v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
360 
361  v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) ); // y0 y1 y2 0
362  v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) ); // x0 x1 x2 0
363  v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
364 
365  vm[0] = v0;
366  vm[1] = v1;
367  vm[2] = v2;
368 #elif defined(BT_USE_NEON)
369  // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
370  static const uint32x2_t zMask = (const uint32x2_t) {static_cast<uint32_t>(-1), 0 };
371  float32x4_t *vm = (float32x4_t *)m;
372  float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 ); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
373  float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) ); // {x2 0 }, {y2 0}
374  float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
375  float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
376  float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
377  float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q ); // z0 z1 z2 0
378 
379  vm[0] = v0;
380  vm[1] = v1;
381  vm[2] = v2;
382 #else
383  m[0] = btScalar(m_el[0].x());
384  m[1] = btScalar(m_el[1].x());
385  m[2] = btScalar(m_el[2].x());
386  m[3] = btScalar(0.0);
387  m[4] = btScalar(m_el[0].y());
388  m[5] = btScalar(m_el[1].y());
389  m[6] = btScalar(m_el[2].y());
390  m[7] = btScalar(0.0);
391  m[8] = btScalar(m_el[0].z());
392  m[9] = btScalar(m_el[1].z());
393  m[10] = btScalar(m_el[2].z());
394  m[11] = btScalar(0.0);
395 #endif
396  }
397 
400  void getRotation(btQuaternion& q) const
401  {
402 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
403  btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
404  btScalar s, x;
405 
406  union {
407  btSimdFloat4 vec;
408  btScalar f[4];
409  } temp;
410 
411  if (trace > btScalar(0.0))
412  {
413  x = trace + btScalar(1.0);
414 
415  temp.f[0]=m_el[2].y() - m_el[1].z();
416  temp.f[1]=m_el[0].z() - m_el[2].x();
417  temp.f[2]=m_el[1].x() - m_el[0].y();
418  temp.f[3]=x;
419  //temp.f[3]= s * btScalar(0.5);
420  }
421  else
422  {
423  int i, j, k;
424  if(m_el[0].x() < m_el[1].y())
425  {
426  if( m_el[1].y() < m_el[2].z() )
427  { i = 2; j = 0; k = 1; }
428  else
429  { i = 1; j = 2; k = 0; }
430  }
431  else
432  {
433  if( m_el[0].x() < m_el[2].z())
434  { i = 2; j = 0; k = 1; }
435  else
436  { i = 0; j = 1; k = 2; }
437  }
438 
439  x = m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0);
440 
441  temp.f[3] = (m_el[k][j] - m_el[j][k]);
442  temp.f[j] = (m_el[j][i] + m_el[i][j]);
443  temp.f[k] = (m_el[k][i] + m_el[i][k]);
444  temp.f[i] = x;
445  //temp.f[i] = s * btScalar(0.5);
446  }
447 
448  s = btSqrt(x);
449  q.set128(temp.vec);
450  s = btScalar(0.5) / s;
451 
452  q *= s;
453 #else
454  btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
455 
456  btScalar temp[4];
457 
458  if (trace > btScalar(0.0))
459  {
460  btScalar s = btSqrt(trace + btScalar(1.0));
461  temp[3]=(s * btScalar(0.5));
462  s = btScalar(0.5) / s;
463 
464  temp[0]=((m_el[2].y() - m_el[1].z()) * s);
465  temp[1]=((m_el[0].z() - m_el[2].x()) * s);
466  temp[2]=((m_el[1].x() - m_el[0].y()) * s);
467  }
468  else
469  {
470  int i = m_el[0].x() < m_el[1].y() ?
471  (m_el[1].y() < m_el[2].z() ? 2 : 1) :
472  (m_el[0].x() < m_el[2].z() ? 2 : 0);
473  int j = (i + 1) % 3;
474  int k = (i + 2) % 3;
475 
476  btScalar s = btSqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0));
477  temp[i] = s * btScalar(0.5);
478  s = btScalar(0.5) / s;
479 
480  temp[3] = (m_el[k][j] - m_el[j][k]) * s;
481  temp[j] = (m_el[j][i] + m_el[i][j]) * s;
482  temp[k] = (m_el[k][i] + m_el[i][k]) * s;
483  }
484  q.setValue(temp[0],temp[1],temp[2],temp[3]);
485 #endif
486  }
487 
492  void getEulerYPR(btScalar& yaw, btScalar& pitch, btScalar& roll) const
493  {
494 
495  // first use the normal calculus
496  yaw = btScalar(btAtan2(m_el[1].x(), m_el[0].x()));
497  pitch = btScalar(btAsin(-m_el[2].x()));
498  roll = btScalar(btAtan2(m_el[2].y(), m_el[2].z()));
499 
500  // on pitch = +/-HalfPI
501  if (btFabs(pitch)==SIMD_HALF_PI)
502  {
503  if (yaw>0)
504  yaw-=SIMD_PI;
505  else
506  yaw+=SIMD_PI;
507 
508  if (roll>0)
509  roll-=SIMD_PI;
510  else
511  roll+=SIMD_PI;
512  }
513  };
514 
515 
521  void getEulerZYX(btScalar& yaw, btScalar& pitch, btScalar& roll, unsigned int solution_number = 1) const
522  {
523  struct Euler
524  {
525  btScalar yaw;
526  btScalar pitch;
527  btScalar roll;
528  };
529 
530  Euler euler_out;
531  Euler euler_out2; //second solution
532  //get the pointer to the raw data
533 
534  // Check that pitch is not at a singularity
535  if (btFabs(m_el[2].x()) >= 1)
536  {
537  euler_out.yaw = 0;
538  euler_out2.yaw = 0;
539 
540  // From difference of angles formula
541  btScalar delta = btAtan2(m_el[0].x(),m_el[0].z());
542  if (m_el[2].x() > 0) //gimbal locked up
543  {
544  euler_out.pitch = SIMD_PI / btScalar(2.0);
545  euler_out2.pitch = SIMD_PI / btScalar(2.0);
546  euler_out.roll = euler_out.pitch + delta;
547  euler_out2.roll = euler_out.pitch + delta;
548  }
549  else // gimbal locked down
550  {
551  euler_out.pitch = -SIMD_PI / btScalar(2.0);
552  euler_out2.pitch = -SIMD_PI / btScalar(2.0);
553  euler_out.roll = -euler_out.pitch + delta;
554  euler_out2.roll = -euler_out.pitch + delta;
555  }
556  }
557  else
558  {
559  euler_out.pitch = - btAsin(m_el[2].x());
560  euler_out2.pitch = SIMD_PI - euler_out.pitch;
561 
562  euler_out.roll = btAtan2(m_el[2].y()/btCos(euler_out.pitch),
563  m_el[2].z()/btCos(euler_out.pitch));
564  euler_out2.roll = btAtan2(m_el[2].y()/btCos(euler_out2.pitch),
565  m_el[2].z()/btCos(euler_out2.pitch));
566 
567  euler_out.yaw = btAtan2(m_el[1].x()/btCos(euler_out.pitch),
568  m_el[0].x()/btCos(euler_out.pitch));
569  euler_out2.yaw = btAtan2(m_el[1].x()/btCos(euler_out2.pitch),
570  m_el[0].x()/btCos(euler_out2.pitch));
571  }
572 
573  if (solution_number == 1)
574  {
575  yaw = euler_out.yaw;
576  pitch = euler_out.pitch;
577  roll = euler_out.roll;
578  }
579  else
580  {
581  yaw = euler_out2.yaw;
582  pitch = euler_out2.pitch;
583  roll = euler_out2.roll;
584  }
585  }
586 
590  btMatrix3x3 scaled(const btVector3& s) const
591  {
592 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
593  return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
594 #else
595  return btMatrix3x3(
596  m_el[0].x() * s.x(), m_el[0].y() * s.y(), m_el[0].z() * s.z(),
597  m_el[1].x() * s.x(), m_el[1].y() * s.y(), m_el[1].z() * s.z(),
598  m_el[2].x() * s.x(), m_el[2].y() * s.y(), m_el[2].z() * s.z());
599 #endif
600  }
601 
603  btScalar determinant() const;
605  btMatrix3x3 adjoint() const;
607  btMatrix3x3 absolute() const;
609  btMatrix3x3 transpose() const;
611  btMatrix3x3 inverse() const;
612 
613  btMatrix3x3 transposeTimes(const btMatrix3x3& m) const;
614  btMatrix3x3 timesTranspose(const btMatrix3x3& m) const;
615 
616  SIMD_FORCE_INLINE btScalar tdotx(const btVector3& v) const
617  {
618  return m_el[0].x() * v.x() + m_el[1].x() * v.y() + m_el[2].x() * v.z();
619  }
620  SIMD_FORCE_INLINE btScalar tdoty(const btVector3& v) const
621  {
622  return m_el[0].y() * v.x() + m_el[1].y() * v.y() + m_el[2].y() * v.z();
623  }
624  SIMD_FORCE_INLINE btScalar tdotz(const btVector3& v) const
625  {
626  return m_el[0].z() * v.x() + m_el[1].z() * v.y() + m_el[2].z() * v.z();
627  }
628 
629 
639  void diagonalize(btMatrix3x3& rot, btScalar threshold, int maxSteps)
640  {
641  rot.setIdentity();
642  for (int step = maxSteps; step > 0; step--)
643  {
644  // find off-diagonal element [p][q] with largest magnitude
645  int p = 0;
646  int q = 1;
647  int r = 2;
648  btScalar max = btFabs(m_el[0][1]);
649  btScalar v = btFabs(m_el[0][2]);
650  if (v > max)
651  {
652  q = 2;
653  r = 1;
654  max = v;
655  }
656  v = btFabs(m_el[1][2]);
657  if (v > max)
658  {
659  p = 1;
660  q = 2;
661  r = 0;
662  max = v;
663  }
664 
665  btScalar t = threshold * (btFabs(m_el[0][0]) + btFabs(m_el[1][1]) + btFabs(m_el[2][2]));
666  if (max <= t)
667  {
668  if (max <= SIMD_EPSILON * t)
669  {
670  return;
671  }
672  step = 1;
673  }
674 
675  // compute Jacobi rotation J which leads to a zero for element [p][q]
676  btScalar mpq = m_el[p][q];
677  btScalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
678  btScalar theta2 = theta * theta;
679  btScalar cos;
680  btScalar sin;
681  if (theta2 * theta2 < btScalar(10 / SIMD_EPSILON))
682  {
683  t = (theta >= 0) ? 1 / (theta + btSqrt(1 + theta2))
684  : 1 / (theta - btSqrt(1 + theta2));
685  cos = 1 / btSqrt(1 + t * t);
686  sin = cos * t;
687  }
688  else
689  {
690  // approximation for large theta-value, i.e., a nearly diagonal matrix
691  t = 1 / (theta * (2 + btScalar(0.5) / theta2));
692  cos = 1 - btScalar(0.5) * t * t;
693  sin = cos * t;
694  }
695 
696  // apply rotation to matrix (this = J^T * this * J)
697  m_el[p][q] = m_el[q][p] = 0;
698  m_el[p][p] -= t * mpq;
699  m_el[q][q] += t * mpq;
700  btScalar mrp = m_el[r][p];
701  btScalar mrq = m_el[r][q];
702  m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
703  m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
704 
705  // apply rotation to rot (rot = rot * J)
706  for (int i = 0; i < 3; i++)
707  {
708  btVector3& row = rot[i];
709  mrp = row[p];
710  mrq = row[q];
711  row[p] = cos * mrp - sin * mrq;
712  row[q] = cos * mrq + sin * mrp;
713  }
714  }
715  }
716 
717 
718 
719 
727  btScalar cofac(int r1, int c1, int r2, int c2) const
728  {
729  return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
730  }
731 
732  void serialize(struct btMatrix3x3Data& dataOut) const;
733 
734  void serializeFloat(struct btMatrix3x3FloatData& dataOut) const;
735 
736  void deSerialize(const struct btMatrix3x3Data& dataIn);
737 
738  void deSerializeFloat(const struct btMatrix3x3FloatData& dataIn);
739 
740  void deSerializeDouble(const struct btMatrix3x3DoubleData& dataIn);
741 
742 };
743 
744 
747 {
748 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
749  __m128 rv00, rv01, rv02;
750  __m128 rv10, rv11, rv12;
751  __m128 rv20, rv21, rv22;
752  __m128 mv0, mv1, mv2;
753 
754  rv02 = m_el[0].mVec128;
755  rv12 = m_el[1].mVec128;
756  rv22 = m_el[2].mVec128;
757 
758  mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask);
759  mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask);
760  mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask);
761 
762  // rv0
763  rv00 = bt_splat_ps(rv02, 0);
764  rv01 = bt_splat_ps(rv02, 1);
765  rv02 = bt_splat_ps(rv02, 2);
766 
767  rv00 = _mm_mul_ps(rv00, mv0);
768  rv01 = _mm_mul_ps(rv01, mv1);
769  rv02 = _mm_mul_ps(rv02, mv2);
770 
771  // rv1
772  rv10 = bt_splat_ps(rv12, 0);
773  rv11 = bt_splat_ps(rv12, 1);
774  rv12 = bt_splat_ps(rv12, 2);
775 
776  rv10 = _mm_mul_ps(rv10, mv0);
777  rv11 = _mm_mul_ps(rv11, mv1);
778  rv12 = _mm_mul_ps(rv12, mv2);
779 
780  // rv2
781  rv20 = bt_splat_ps(rv22, 0);
782  rv21 = bt_splat_ps(rv22, 1);
783  rv22 = bt_splat_ps(rv22, 2);
784 
785  rv20 = _mm_mul_ps(rv20, mv0);
786  rv21 = _mm_mul_ps(rv21, mv1);
787  rv22 = _mm_mul_ps(rv22, mv2);
788 
789  rv00 = _mm_add_ps(rv00, rv01);
790  rv10 = _mm_add_ps(rv10, rv11);
791  rv20 = _mm_add_ps(rv20, rv21);
792 
793  m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
794  m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
795  m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
796 
797 #elif defined(BT_USE_NEON)
798 
799  float32x4_t rv0, rv1, rv2;
800  float32x4_t v0, v1, v2;
801  float32x4_t mv0, mv1, mv2;
802 
803  v0 = m_el[0].mVec128;
804  v1 = m_el[1].mVec128;
805  v2 = m_el[2].mVec128;
806 
807  mv0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
808  mv1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
809  mv2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
810 
811  rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
812  rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
813  rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
814 
815  rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
816  rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
817  rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
818 
819  rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
820  rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
821  rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
822 
823  m_el[0].mVec128 = rv0;
824  m_el[1].mVec128 = rv1;
825  m_el[2].mVec128 = rv2;
826 #else
827  setValue(
828  m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
829  m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
830  m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
831 #endif
832  return *this;
833 }
834 
837 {
838 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
839  m_el[0].mVec128 = m_el[0].mVec128 + m.m_el[0].mVec128;
840  m_el[1].mVec128 = m_el[1].mVec128 + m.m_el[1].mVec128;
841  m_el[2].mVec128 = m_el[2].mVec128 + m.m_el[2].mVec128;
842 #else
843  setValue(
844  m_el[0][0]+m.m_el[0][0],
845  m_el[0][1]+m.m_el[0][1],
846  m_el[0][2]+m.m_el[0][2],
847  m_el[1][0]+m.m_el[1][0],
848  m_el[1][1]+m.m_el[1][1],
849  m_el[1][2]+m.m_el[1][2],
850  m_el[2][0]+m.m_el[2][0],
851  m_el[2][1]+m.m_el[2][1],
852  m_el[2][2]+m.m_el[2][2]);
853 #endif
854  return *this;
855 }
856 
858 operator*(const btMatrix3x3& m, const btScalar & k)
859 {
860 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
861  __m128 vk = bt_splat_ps(_mm_load_ss((float *)&k), 0x80);
862  return btMatrix3x3(
863  _mm_mul_ps(m[0].mVec128, vk),
864  _mm_mul_ps(m[1].mVec128, vk),
865  _mm_mul_ps(m[2].mVec128, vk));
866 #elif defined(BT_USE_NEON)
867  return btMatrix3x3(
868  vmulq_n_f32(m[0].mVec128, k),
869  vmulq_n_f32(m[1].mVec128, k),
870  vmulq_n_f32(m[2].mVec128, k));
871 #else
872  return btMatrix3x3(
873  m[0].x()*k,m[0].y()*k,m[0].z()*k,
874  m[1].x()*k,m[1].y()*k,m[1].z()*k,
875  m[2].x()*k,m[2].y()*k,m[2].z()*k);
876 #endif
877 }
878 
880 operator+(const btMatrix3x3& m1, const btMatrix3x3& m2)
881 {
882 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
883  return btMatrix3x3(
884  m1[0].mVec128 + m2[0].mVec128,
885  m1[1].mVec128 + m2[1].mVec128,
886  m1[2].mVec128 + m2[2].mVec128);
887 #else
888  return btMatrix3x3(
889  m1[0][0]+m2[0][0],
890  m1[0][1]+m2[0][1],
891  m1[0][2]+m2[0][2],
892 
893  m1[1][0]+m2[1][0],
894  m1[1][1]+m2[1][1],
895  m1[1][2]+m2[1][2],
896 
897  m1[2][0]+m2[2][0],
898  m1[2][1]+m2[2][1],
899  m1[2][2]+m2[2][2]);
900 #endif
901 }
902 
904 operator-(const btMatrix3x3& m1, const btMatrix3x3& m2)
905 {
906 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
907  return btMatrix3x3(
908  m1[0].mVec128 - m2[0].mVec128,
909  m1[1].mVec128 - m2[1].mVec128,
910  m1[2].mVec128 - m2[2].mVec128);
911 #else
912  return btMatrix3x3(
913  m1[0][0]-m2[0][0],
914  m1[0][1]-m2[0][1],
915  m1[0][2]-m2[0][2],
916 
917  m1[1][0]-m2[1][0],
918  m1[1][1]-m2[1][1],
919  m1[1][2]-m2[1][2],
920 
921  m1[2][0]-m2[2][0],
922  m1[2][1]-m2[2][1],
923  m1[2][2]-m2[2][2]);
924 #endif
925 }
926 
927 
930 {
931 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
932  m_el[0].mVec128 = m_el[0].mVec128 - m.m_el[0].mVec128;
933  m_el[1].mVec128 = m_el[1].mVec128 - m.m_el[1].mVec128;
934  m_el[2].mVec128 = m_el[2].mVec128 - m.m_el[2].mVec128;
935 #else
936  setValue(
937  m_el[0][0]-m.m_el[0][0],
938  m_el[0][1]-m.m_el[0][1],
939  m_el[0][2]-m.m_el[0][2],
940  m_el[1][0]-m.m_el[1][0],
941  m_el[1][1]-m.m_el[1][1],
942  m_el[1][2]-m.m_el[1][2],
943  m_el[2][0]-m.m_el[2][0],
944  m_el[2][1]-m.m_el[2][1],
945  m_el[2][2]-m.m_el[2][2]);
946 #endif
947  return *this;
948 }
949 
950 
953 {
954  return btTriple((*this)[0], (*this)[1], (*this)[2]);
955 }
956 
957 
960 {
961 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
962  return btMatrix3x3(
963  _mm_and_ps(m_el[0].mVec128, btvAbsfMask),
964  _mm_and_ps(m_el[1].mVec128, btvAbsfMask),
965  _mm_and_ps(m_el[2].mVec128, btvAbsfMask));
966 #elif defined(BT_USE_NEON)
967  return btMatrix3x3(
968  (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, btv3AbsMask),
969  (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, btv3AbsMask),
970  (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, btv3AbsMask));
971 #else
972  return btMatrix3x3(
973  btFabs(m_el[0].x()), btFabs(m_el[0].y()), btFabs(m_el[0].z()),
974  btFabs(m_el[1].x()), btFabs(m_el[1].y()), btFabs(m_el[1].z()),
975  btFabs(m_el[2].x()), btFabs(m_el[2].y()), btFabs(m_el[2].z()));
976 #endif
977 }
978 
981 {
982 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
983  __m128 v0 = m_el[0].mVec128;
984  __m128 v1 = m_el[1].mVec128;
985  __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
986  __m128 vT;
987 
988  v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
989 
990  vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
991  v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
992 
993  v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) ); // y0 y1 y2 0
994  v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) ); // x0 x1 x2 0
995  v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
996 
997 
998  return btMatrix3x3( v0, v1, v2 );
999 #elif defined(BT_USE_NEON)
1000  // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
1001  static const uint32x2_t zMask = (const uint32x2_t) {static_cast<uint32_t>(-1), 0 };
1002  float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 ); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
1003  float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) ); // {x2 0 }, {y2 0}
1004  float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
1005  float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
1006  float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
1007  float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q ); // z0 z1 z2 0
1008  return btMatrix3x3( v0, v1, v2 );
1009 #else
1010  return btMatrix3x3( m_el[0].x(), m_el[1].x(), m_el[2].x(),
1011  m_el[0].y(), m_el[1].y(), m_el[2].y(),
1012  m_el[0].z(), m_el[1].z(), m_el[2].z());
1013 #endif
1014 }
1015 
1018 {
1019  return btMatrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
1020  cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
1021  cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
1022 }
1023 
1026 {
1027  btVector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
1028  btScalar det = (*this)[0].dot(co);
1029  btFullAssert(det != btScalar(0.0));
1030  btScalar s = btScalar(1.0) / det;
1031  return btMatrix3x3(co.x() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
1032  co.y() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
1033  co.z() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
1034 }
1035 
1038 {
1039 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1040  // zeros w
1041 // static const __m128i xyzMask = (const __m128i){ -1ULL, 0xffffffffULL };
1042  __m128 row = m_el[0].mVec128;
1043  __m128 m0 = _mm_and_ps( m.getRow(0).mVec128, btvFFF0fMask );
1044  __m128 m1 = _mm_and_ps( m.getRow(1).mVec128, btvFFF0fMask);
1045  __m128 m2 = _mm_and_ps( m.getRow(2).mVec128, btvFFF0fMask );
1046  __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
1047  __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
1048  __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
1049  row = m_el[1].mVec128;
1050  r0 = _mm_add_ps( r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
1051  r1 = _mm_add_ps( r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
1052  r2 = _mm_add_ps( r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
1053  row = m_el[2].mVec128;
1054  r0 = _mm_add_ps( r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
1055  r1 = _mm_add_ps( r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
1056  r2 = _mm_add_ps( r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
1057  return btMatrix3x3( r0, r1, r2 );
1058 
1059 #elif defined BT_USE_NEON
1060  // zeros w
1061  static const uint32x4_t xyzMask = (const uint32x4_t){ static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 0 };
1062  float32x4_t m0 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(0).mVec128, xyzMask );
1063  float32x4_t m1 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(1).mVec128, xyzMask );
1064  float32x4_t m2 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(2).mVec128, xyzMask );
1065  float32x4_t row = m_el[0].mVec128;
1066  float32x4_t r0 = vmulq_lane_f32( m0, vget_low_f32(row), 0);
1067  float32x4_t r1 = vmulq_lane_f32( m0, vget_low_f32(row), 1);
1068  float32x4_t r2 = vmulq_lane_f32( m0, vget_high_f32(row), 0);
1069  row = m_el[1].mVec128;
1070  r0 = vmlaq_lane_f32( r0, m1, vget_low_f32(row), 0);
1071  r1 = vmlaq_lane_f32( r1, m1, vget_low_f32(row), 1);
1072  r2 = vmlaq_lane_f32( r2, m1, vget_high_f32(row), 0);
1073  row = m_el[2].mVec128;
1074  r0 = vmlaq_lane_f32( r0, m2, vget_low_f32(row), 0);
1075  r1 = vmlaq_lane_f32( r1, m2, vget_low_f32(row), 1);
1076  r2 = vmlaq_lane_f32( r2, m2, vget_high_f32(row), 0);
1077  return btMatrix3x3( r0, r1, r2 );
1078 #else
1079  return btMatrix3x3(
1080  m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
1081  m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
1082  m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
1083  m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
1084  m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
1085  m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
1086  m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
1087  m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
1088  m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z());
1089 #endif
1090 }
1091 
1094 {
1095 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1096  __m128 a0 = m_el[0].mVec128;
1097  __m128 a1 = m_el[1].mVec128;
1098  __m128 a2 = m_el[2].mVec128;
1099 
1100  btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1101  __m128 mx = mT[0].mVec128;
1102  __m128 my = mT[1].mVec128;
1103  __m128 mz = mT[2].mVec128;
1104 
1105  __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
1106  __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
1107  __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
1108  r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
1109  r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
1110  r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
1111  r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
1112  r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
1113  r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
1114  return btMatrix3x3( r0, r1, r2);
1115 
1116 #elif defined BT_USE_NEON
1117  float32x4_t a0 = m_el[0].mVec128;
1118  float32x4_t a1 = m_el[1].mVec128;
1119  float32x4_t a2 = m_el[2].mVec128;
1120 
1121  btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1122  float32x4_t mx = mT[0].mVec128;
1123  float32x4_t my = mT[1].mVec128;
1124  float32x4_t mz = mT[2].mVec128;
1125 
1126  float32x4_t r0 = vmulq_lane_f32( mx, vget_low_f32(a0), 0);
1127  float32x4_t r1 = vmulq_lane_f32( mx, vget_low_f32(a1), 0);
1128  float32x4_t r2 = vmulq_lane_f32( mx, vget_low_f32(a2), 0);
1129  r0 = vmlaq_lane_f32( r0, my, vget_low_f32(a0), 1);
1130  r1 = vmlaq_lane_f32( r1, my, vget_low_f32(a1), 1);
1131  r2 = vmlaq_lane_f32( r2, my, vget_low_f32(a2), 1);
1132  r0 = vmlaq_lane_f32( r0, mz, vget_high_f32(a0), 0);
1133  r1 = vmlaq_lane_f32( r1, mz, vget_high_f32(a1), 0);
1134  r2 = vmlaq_lane_f32( r2, mz, vget_high_f32(a2), 0);
1135  return btMatrix3x3( r0, r1, r2 );
1136 
1137 #else
1138  return btMatrix3x3(
1139  m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
1140  m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
1141  m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
1142 #endif
1143 }
1144 
1146 operator*(const btMatrix3x3& m, const btVector3& v)
1147 {
1148 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
1149  return v.dot3(m[0], m[1], m[2]);
1150 #else
1151  return btVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
1152 #endif
1153 }
1154 
1155 
1157 operator*(const btVector3& v, const btMatrix3x3& m)
1158 {
1159 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1160 
1161  const __m128 vv = v.mVec128;
1162 
1163  __m128 c0 = bt_splat_ps( vv, 0);
1164  __m128 c1 = bt_splat_ps( vv, 1);
1165  __m128 c2 = bt_splat_ps( vv, 2);
1166 
1167  c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask) );
1168  c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask) );
1169  c0 = _mm_add_ps(c0, c1);
1170  c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask) );
1171 
1172  return btVector3(_mm_add_ps(c0, c2));
1173 #elif defined(BT_USE_NEON)
1174  const float32x4_t vv = v.mVec128;
1175  const float32x2_t vlo = vget_low_f32(vv);
1176  const float32x2_t vhi = vget_high_f32(vv);
1177 
1178  float32x4_t c0, c1, c2;
1179 
1180  c0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
1181  c1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
1182  c2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
1183 
1184  c0 = vmulq_lane_f32(c0, vlo, 0);
1185  c1 = vmulq_lane_f32(c1, vlo, 1);
1186  c2 = vmulq_lane_f32(c2, vhi, 0);
1187  c0 = vaddq_f32(c0, c1);
1188  c0 = vaddq_f32(c0, c2);
1189 
1190  return btVector3(c0);
1191 #else
1192  return btVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
1193 #endif
1194 }
1195 
1197 operator*(const btMatrix3x3& m1, const btMatrix3x3& m2)
1198 {
1199 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1200 
1201  __m128 m10 = m1[0].mVec128;
1202  __m128 m11 = m1[1].mVec128;
1203  __m128 m12 = m1[2].mVec128;
1204 
1205  __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
1206 
1207  __m128 c0 = bt_splat_ps( m10, 0);
1208  __m128 c1 = bt_splat_ps( m11, 0);
1209  __m128 c2 = bt_splat_ps( m12, 0);
1210 
1211  c0 = _mm_mul_ps(c0, m2v);
1212  c1 = _mm_mul_ps(c1, m2v);
1213  c2 = _mm_mul_ps(c2, m2v);
1214 
1215  m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
1216 
1217  __m128 c0_1 = bt_splat_ps( m10, 1);
1218  __m128 c1_1 = bt_splat_ps( m11, 1);
1219  __m128 c2_1 = bt_splat_ps( m12, 1);
1220 
1221  c0_1 = _mm_mul_ps(c0_1, m2v);
1222  c1_1 = _mm_mul_ps(c1_1, m2v);
1223  c2_1 = _mm_mul_ps(c2_1, m2v);
1224 
1225  m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
1226 
1227  c0 = _mm_add_ps(c0, c0_1);
1228  c1 = _mm_add_ps(c1, c1_1);
1229  c2 = _mm_add_ps(c2, c2_1);
1230 
1231  m10 = bt_splat_ps( m10, 2);
1232  m11 = bt_splat_ps( m11, 2);
1233  m12 = bt_splat_ps( m12, 2);
1234 
1235  m10 = _mm_mul_ps(m10, m2v);
1236  m11 = _mm_mul_ps(m11, m2v);
1237  m12 = _mm_mul_ps(m12, m2v);
1238 
1239  c0 = _mm_add_ps(c0, m10);
1240  c1 = _mm_add_ps(c1, m11);
1241  c2 = _mm_add_ps(c2, m12);
1242 
1243  return btMatrix3x3(c0, c1, c2);
1244 
1245 #elif defined(BT_USE_NEON)
1246 
1247  float32x4_t rv0, rv1, rv2;
1248  float32x4_t v0, v1, v2;
1249  float32x4_t mv0, mv1, mv2;
1250 
1251  v0 = m1[0].mVec128;
1252  v1 = m1[1].mVec128;
1253  v2 = m1[2].mVec128;
1254 
1255  mv0 = (float32x4_t) vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask);
1256  mv1 = (float32x4_t) vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask);
1257  mv2 = (float32x4_t) vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask);
1258 
1259  rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
1260  rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
1261  rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
1262 
1263  rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
1264  rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
1265  rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
1266 
1267  rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
1268  rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
1269  rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
1270 
1271  return btMatrix3x3(rv0, rv1, rv2);
1272 
1273 #else
1274  return btMatrix3x3(
1275  m2.tdotx( m1[0]), m2.tdoty( m1[0]), m2.tdotz( m1[0]),
1276  m2.tdotx( m1[1]), m2.tdoty( m1[1]), m2.tdotz( m1[1]),
1277  m2.tdotx( m1[2]), m2.tdoty( m1[2]), m2.tdotz( m1[2]));
1278 #endif
1279 }
1280 
1281 /*
1282 SIMD_FORCE_INLINE btMatrix3x3 btMultTransposeLeft(const btMatrix3x3& m1, const btMatrix3x3& m2) {
1283 return btMatrix3x3(
1284 m1[0][0] * m2[0][0] + m1[1][0] * m2[1][0] + m1[2][0] * m2[2][0],
1285 m1[0][0] * m2[0][1] + m1[1][0] * m2[1][1] + m1[2][0] * m2[2][1],
1286 m1[0][0] * m2[0][2] + m1[1][0] * m2[1][2] + m1[2][0] * m2[2][2],
1287 m1[0][1] * m2[0][0] + m1[1][1] * m2[1][0] + m1[2][1] * m2[2][0],
1288 m1[0][1] * m2[0][1] + m1[1][1] * m2[1][1] + m1[2][1] * m2[2][1],
1289 m1[0][1] * m2[0][2] + m1[1][1] * m2[1][2] + m1[2][1] * m2[2][2],
1290 m1[0][2] * m2[0][0] + m1[1][2] * m2[1][0] + m1[2][2] * m2[2][0],
1291 m1[0][2] * m2[0][1] + m1[1][2] * m2[1][1] + m1[2][2] * m2[2][1],
1292 m1[0][2] * m2[0][2] + m1[1][2] * m2[1][2] + m1[2][2] * m2[2][2]);
1293 }
1294 */
1295 
1299 {
1300 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1301 
1302  __m128 c0, c1, c2;
1303 
1304  c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
1305  c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
1306  c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
1307 
1308  c0 = _mm_and_ps(c0, c1);
1309  c0 = _mm_and_ps(c0, c2);
1310 
1311  return (0x7 == _mm_movemask_ps((__m128)c0));
1312 #else
1313  return
1314  ( m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
1315  m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
1316  m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2] );
1317 #endif
1318 }
1319 
1322 {
1324 };
1325 
1328 {
1330 };
1331 
1332 
1333 
1334 
1336 {
1337  for (int i=0;i<3;i++)
1338  m_el[i].serialize(dataOut.m_el[i]);
1339 }
1340 
1342 {
1343  for (int i=0;i<3;i++)
1344  m_el[i].serializeFloat(dataOut.m_el[i]);
1345 }
1346 
1347 
1349 {
1350  for (int i=0;i<3;i++)
1351  m_el[i].deSerialize(dataIn.m_el[i]);
1352 }
1353 
1355 {
1356  for (int i=0;i<3;i++)
1357  m_el[i].deSerializeFloat(dataIn.m_el[i]);
1358 }
1359 
1361 {
1362  for (int i=0;i<3;i++)
1363  m_el[i].deSerializeDouble(dataIn.m_el[i]);
1364 }
1365 
1366 #endif //BT_MATRIX3x3_H
1367