Fitting.cpp 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206
  1. // This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
  2. #include "Fitting.h"
  3. #include "Vector.inl"
  4. #include "Plane.inl"
  5. #include "nvcore/Array.inl"
  6. #include "nvcore/Utils.h" // max, swap
  7. #include <float.h> // FLT_MAX
  8. //#include <vector>
  9. #include <string.h>
  10. using namespace nv;
  11. // @@ Move to EigenSolver.h
  12. // @@ We should be able to do something cheaper...
  13. static Vector3 estimatePrincipalComponent(const float * __restrict matrix)
  14. {
  15. const Vector3 row0(matrix[0], matrix[1], matrix[2]);
  16. const Vector3 row1(matrix[1], matrix[3], matrix[4]);
  17. const Vector3 row2(matrix[2], matrix[4], matrix[5]);
  18. float r0 = lengthSquared(row0);
  19. float r1 = lengthSquared(row1);
  20. float r2 = lengthSquared(row2);
  21. if (r0 > r1 && r0 > r2) return row0;
  22. if (r1 > r2) return row1;
  23. return row2;
  24. }
  25. static inline Vector3 firstEigenVector_PowerMethod(const float *__restrict matrix)
  26. {
  27. if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0)
  28. {
  29. return Vector3(0.0f);
  30. }
  31. Vector3 v = estimatePrincipalComponent(matrix);
  32. const int NUM = 8;
  33. for (int i = 0; i < NUM; i++)
  34. {
  35. float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2];
  36. float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4];
  37. float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5];
  38. float norm = max(max(x, y), z);
  39. v = Vector3(x, y, z) / norm;
  40. }
  41. return v;
  42. }
  43. Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points)
  44. {
  45. Vector3 centroid(0.0f);
  46. for (int i = 0; i < n; i++)
  47. {
  48. centroid += points[i];
  49. }
  50. centroid /= float(n);
  51. return centroid;
  52. }
  53. Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric)
  54. {
  55. Vector3 centroid(0.0f);
  56. float total = 0.0f;
  57. for (int i = 0; i < n; i++)
  58. {
  59. total += weights[i];
  60. centroid += weights[i]*points[i];
  61. }
  62. centroid /= total;
  63. return centroid;
  64. }
  65. Vector4 nv::Fit::computeCentroid(int n, const Vector4 *__restrict points)
  66. {
  67. Vector4 centroid(0.0f);
  68. for (int i = 0; i < n; i++)
  69. {
  70. centroid += points[i];
  71. }
  72. centroid /= float(n);
  73. return centroid;
  74. }
  75. Vector4 nv::Fit::computeCentroid(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric)
  76. {
  77. Vector4 centroid(0.0f);
  78. float total = 0.0f;
  79. for (int i = 0; i < n; i++)
  80. {
  81. total += weights[i];
  82. centroid += weights[i]*points[i];
  83. }
  84. centroid /= total;
  85. return centroid;
  86. }
  87. Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, float *__restrict covariance)
  88. {
  89. // compute the centroid
  90. Vector3 centroid = computeCentroid(n, points);
  91. // compute covariance matrix
  92. for (int i = 0; i < 6; i++)
  93. {
  94. covariance[i] = 0.0f;
  95. }
  96. for (int i = 0; i < n; i++)
  97. {
  98. Vector3 v = points[i] - centroid;
  99. covariance[0] += v.x * v.x;
  100. covariance[1] += v.x * v.y;
  101. covariance[2] += v.x * v.z;
  102. covariance[3] += v.y * v.y;
  103. covariance[4] += v.y * v.z;
  104. covariance[5] += v.z * v.z;
  105. }
  106. return centroid;
  107. }
  108. Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, float *__restrict covariance)
  109. {
  110. // compute the centroid
  111. Vector3 centroid = computeCentroid(n, points, weights, metric);
  112. // compute covariance matrix
  113. for (int i = 0; i < 6; i++)
  114. {
  115. covariance[i] = 0.0f;
  116. }
  117. for (int i = 0; i < n; i++)
  118. {
  119. Vector3 a = (points[i] - centroid) * metric;
  120. Vector3 b = weights[i]*a;
  121. covariance[0] += a.x * b.x;
  122. covariance[1] += a.x * b.y;
  123. covariance[2] += a.x * b.z;
  124. covariance[3] += a.y * b.y;
  125. covariance[4] += a.y * b.z;
  126. covariance[5] += a.z * b.z;
  127. }
  128. return centroid;
  129. }
  130. Vector4 nv::Fit::computeCovariance(int n, const Vector4 *__restrict points, float *__restrict covariance)
  131. {
  132. // compute the centroid
  133. Vector4 centroid = computeCentroid(n, points);
  134. // compute covariance matrix
  135. for (int i = 0; i < 10; i++)
  136. {
  137. covariance[i] = 0.0f;
  138. }
  139. for (int i = 0; i < n; i++)
  140. {
  141. Vector4 v = points[i] - centroid;
  142. covariance[0] += v.x * v.x;
  143. covariance[1] += v.x * v.y;
  144. covariance[2] += v.x * v.z;
  145. covariance[3] += v.x * v.w;
  146. covariance[4] += v.y * v.y;
  147. covariance[5] += v.y * v.z;
  148. covariance[6] += v.y * v.w;
  149. covariance[7] += v.z * v.z;
  150. covariance[8] += v.z * v.w;
  151. covariance[9] += v.w * v.w;
  152. }
  153. return centroid;
  154. }
  155. Vector4 nv::Fit::computeCovariance(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric, float *__restrict covariance)
  156. {
  157. // compute the centroid
  158. Vector4 centroid = computeCentroid(n, points, weights, metric);
  159. // compute covariance matrix
  160. for (int i = 0; i < 10; i++)
  161. {
  162. covariance[i] = 0.0f;
  163. }
  164. for (int i = 0; i < n; i++)
  165. {
  166. Vector4 a = (points[i] - centroid) * metric;
  167. Vector4 b = weights[i]*a;
  168. covariance[0] += a.x * b.x;
  169. covariance[1] += a.x * b.y;
  170. covariance[2] += a.x * b.z;
  171. covariance[3] += a.x * b.w;
  172. covariance[4] += a.y * b.y;
  173. covariance[5] += a.y * b.z;
  174. covariance[6] += a.y * b.w;
  175. covariance[7] += a.z * b.z;
  176. covariance[8] += a.z * b.w;
  177. covariance[9] += a.w * b.w;
  178. }
  179. return centroid;
  180. }
  181. Vector3 nv::Fit::computePrincipalComponent_PowerMethod(int n, const Vector3 *__restrict points)
  182. {
  183. float matrix[6];
  184. computeCovariance(n, points, matrix);
  185. return firstEigenVector_PowerMethod(matrix);
  186. }
  187. Vector3 nv::Fit::computePrincipalComponent_PowerMethod(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric)
  188. {
  189. float matrix[6];
  190. computeCovariance(n, points, weights, metric, matrix);
  191. return firstEigenVector_PowerMethod(matrix);
  192. }
  193. static inline Vector3 firstEigenVector_EigenSolver3(const float *__restrict matrix)
  194. {
  195. if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0)
  196. {
  197. return Vector3(0.0f);
  198. }
  199. float eigenValues[3];
  200. Vector3 eigenVectors[3];
  201. if (!nv::Fit::eigenSolveSymmetric3(matrix, eigenValues, eigenVectors))
  202. {
  203. return Vector3(0.0f);
  204. }
  205. return eigenVectors[0];
  206. }
  207. Vector3 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector3 *__restrict points)
  208. {
  209. float matrix[6];
  210. computeCovariance(n, points, matrix);
  211. return firstEigenVector_EigenSolver3(matrix);
  212. }
  213. Vector3 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric)
  214. {
  215. float matrix[6];
  216. computeCovariance(n, points, weights, metric, matrix);
  217. return firstEigenVector_EigenSolver3(matrix);
  218. }
  219. static inline Vector4 firstEigenVector_EigenSolver4(const float *__restrict matrix)
  220. {
  221. if (matrix[0] == 0 && matrix[4] == 0 && matrix[7] == 0&& matrix[9] == 0)
  222. {
  223. return Vector4(0.0f);
  224. }
  225. float eigenValues[4];
  226. Vector4 eigenVectors[4];
  227. if (!nv::Fit::eigenSolveSymmetric4(matrix, eigenValues, eigenVectors))
  228. {
  229. return Vector4(0.0f);
  230. }
  231. return eigenVectors[0];
  232. }
  233. Vector4 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector4 *__restrict points)
  234. {
  235. float matrix[10];
  236. computeCovariance(n, points, matrix);
  237. return firstEigenVector_EigenSolver4(matrix);
  238. }
  239. Vector4 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric)
  240. {
  241. float matrix[10];
  242. computeCovariance(n, points, weights, metric, matrix);
  243. return firstEigenVector_EigenSolver4(matrix);
  244. }
  245. void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R);
  246. Vector3 nv::Fit::computePrincipalComponent_SVD(int n, const Vector3 *__restrict points)
  247. {
  248. // Store the points in an n x n matrix
  249. Array<float> Q; Q.resize(n*n, 0.0f);
  250. for (int i = 0; i < n; ++i)
  251. {
  252. Q[i*n+0] = points[i].x;
  253. Q[i*n+1] = points[i].y;
  254. Q[i*n+2] = points[i].z;
  255. }
  256. // Alloc space for the SVD outputs
  257. Array<float> diag; diag.resize(n, 0.0f);
  258. Array<float> R; R.resize(n*n, 0.0f);
  259. ArvoSVD(n, n, &Q[0], &diag[0], &R[0]);
  260. // Get the principal component
  261. return Vector3(R[0], R[1], R[2]);
  262. }
  263. Vector4 nv::Fit::computePrincipalComponent_SVD(int n, const Vector4 *__restrict points)
  264. {
  265. // Store the points in an n x n matrix
  266. Array<float> Q; Q.resize(n*n, 0.0f);
  267. for (int i = 0; i < n; ++i)
  268. {
  269. Q[i*n+0] = points[i].x;
  270. Q[i*n+1] = points[i].y;
  271. Q[i*n+2] = points[i].z;
  272. Q[i*n+3] = points[i].w;
  273. }
  274. // Alloc space for the SVD outputs
  275. Array<float> diag; diag.resize(n, 0.0f);
  276. Array<float> R; R.resize(n*n, 0.0f);
  277. ArvoSVD(n, n, &Q[0], &diag[0], &R[0]);
  278. // Get the principal component
  279. return Vector4(R[0], R[1], R[2], R[3]);
  280. }
  281. Plane nv::Fit::bestPlane(int n, const Vector3 *__restrict points)
  282. {
  283. // compute the centroid and covariance
  284. float matrix[6];
  285. Vector3 centroid = computeCovariance(n, points, matrix);
  286. if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0)
  287. {
  288. // If no plane defined, then return a horizontal plane.
  289. return Plane(Vector3(0, 0, 1), centroid);
  290. }
  291. float eigenValues[3];
  292. Vector3 eigenVectors[3];
  293. if (!eigenSolveSymmetric3(matrix, eigenValues, eigenVectors)) {
  294. // If no plane defined, then return a horizontal plane.
  295. return Plane(Vector3(0, 0, 1), centroid);
  296. }
  297. return Plane(eigenVectors[2], centroid);
  298. }
  299. bool nv::Fit::isPlanar(int n, const Vector3 * points, float epsilon/*=NV_EPSILON*/)
  300. {
  301. // compute the centroid and covariance
  302. float matrix[6];
  303. computeCovariance(n, points, matrix);
  304. float eigenValues[3];
  305. Vector3 eigenVectors[3];
  306. if (!eigenSolveSymmetric3(matrix, eigenValues, eigenVectors)) {
  307. return false;
  308. }
  309. return eigenValues[2] < epsilon;
  310. }
  311. // Tridiagonal solver from Charles Bloom.
  312. // Householder transforms followed by QL decomposition.
  313. // Seems to be based on the code from Numerical Recipes in C.
  314. static void EigenSolver3_Tridiagonal(float mat[3][3], float * diag, float * subd);
  315. static bool EigenSolver3_QLAlgorithm(float mat[3][3], float * diag, float * subd);
  316. bool nv::Fit::eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3])
  317. {
  318. nvDebugCheck(matrix != NULL && eigenValues != NULL && eigenVectors != NULL);
  319. float subd[3];
  320. float diag[3];
  321. float work[3][3];
  322. work[0][0] = matrix[0];
  323. work[0][1] = work[1][0] = matrix[1];
  324. work[0][2] = work[2][0] = matrix[2];
  325. work[1][1] = matrix[3];
  326. work[1][2] = work[2][1] = matrix[4];
  327. work[2][2] = matrix[5];
  328. EigenSolver3_Tridiagonal(work, diag, subd);
  329. if (!EigenSolver3_QLAlgorithm(work, diag, subd))
  330. {
  331. for (int i = 0; i < 3; i++) {
  332. eigenValues[i] = 0;
  333. eigenVectors[i] = Vector3(0);
  334. }
  335. return false;
  336. }
  337. for (int i = 0; i < 3; i++) {
  338. eigenValues[i] = (float)diag[i];
  339. }
  340. // eigenvectors are the columns; make them the rows :
  341. for (int i=0; i < 3; i++)
  342. {
  343. for (int j = 0; j < 3; j++)
  344. {
  345. eigenVectors[j].component[i] = (float) work[i][j];
  346. }
  347. }
  348. // shuffle to sort by singular value :
  349. if (eigenValues[2] > eigenValues[0] && eigenValues[2] > eigenValues[1])
  350. {
  351. swap(eigenValues[0], eigenValues[2]);
  352. swap(eigenVectors[0], eigenVectors[2]);
  353. }
  354. if (eigenValues[1] > eigenValues[0])
  355. {
  356. swap(eigenValues[0], eigenValues[1]);
  357. swap(eigenVectors[0], eigenVectors[1]);
  358. }
  359. if (eigenValues[2] > eigenValues[1])
  360. {
  361. swap(eigenValues[1], eigenValues[2]);
  362. swap(eigenVectors[1], eigenVectors[2]);
  363. }
  364. nvDebugCheck(eigenValues[0] >= eigenValues[1] && eigenValues[0] >= eigenValues[2]);
  365. nvDebugCheck(eigenValues[1] >= eigenValues[2]);
  366. return true;
  367. }
  368. static void EigenSolver3_Tridiagonal(float mat[3][3], float * diag, float * subd)
  369. {
  370. // Householder reduction T = Q^t M Q
  371. // Input:
  372. // mat, symmetric 3x3 matrix M
  373. // Output:
  374. // mat, orthogonal matrix Q
  375. // diag, diagonal entries of T
  376. // subd, subdiagonal entries of T (T is symmetric)
  377. const float epsilon = 1e-08f;
  378. float a = mat[0][0];
  379. float b = mat[0][1];
  380. float c = mat[0][2];
  381. float d = mat[1][1];
  382. float e = mat[1][2];
  383. float f = mat[2][2];
  384. diag[0] = a;
  385. subd[2] = 0.f;
  386. if (fabsf(c) >= epsilon)
  387. {
  388. const float ell = sqrtf(b*b+c*c);
  389. b /= ell;
  390. c /= ell;
  391. const float q = 2*b*e+c*(f-d);
  392. diag[1] = d+c*q;
  393. diag[2] = f-c*q;
  394. subd[0] = ell;
  395. subd[1] = e-b*q;
  396. mat[0][0] = 1; mat[0][1] = 0; mat[0][2] = 0;
  397. mat[1][0] = 0; mat[1][1] = b; mat[1][2] = c;
  398. mat[2][0] = 0; mat[2][1] = c; mat[2][2] = -b;
  399. }
  400. else
  401. {
  402. diag[1] = d;
  403. diag[2] = f;
  404. subd[0] = b;
  405. subd[1] = e;
  406. mat[0][0] = 1; mat[0][1] = 0; mat[0][2] = 0;
  407. mat[1][0] = 0; mat[1][1] = 1; mat[1][2] = 0;
  408. mat[2][0] = 0; mat[2][1] = 0; mat[2][2] = 1;
  409. }
  410. }
  411. static bool EigenSolver3_QLAlgorithm(float mat[3][3], float * diag, float * subd)
  412. {
  413. // QL iteration with implicit shifting to reduce matrix from tridiagonal
  414. // to diagonal
  415. const int maxiter = 32;
  416. for (int ell = 0; ell < 3; ell++)
  417. {
  418. int iter;
  419. for (iter = 0; iter < maxiter; iter++)
  420. {
  421. int m;
  422. for (m = ell; m <= 1; m++)
  423. {
  424. float dd = fabsf(diag[m]) + fabsf(diag[m+1]);
  425. if ( fabsf(subd[m]) + dd == dd )
  426. break;
  427. }
  428. if ( m == ell )
  429. break;
  430. float g = (diag[ell+1]-diag[ell])/(2*subd[ell]);
  431. float r = sqrtf(g*g+1);
  432. if ( g < 0 )
  433. g = diag[m]-diag[ell]+subd[ell]/(g-r);
  434. else
  435. g = diag[m]-diag[ell]+subd[ell]/(g+r);
  436. float s = 1, c = 1, p = 0;
  437. for (int i = m-1; i >= ell; i--)
  438. {
  439. float f = s*subd[i], b = c*subd[i];
  440. if ( fabsf(f) >= fabsf(g) )
  441. {
  442. c = g/f;
  443. r = sqrtf(c*c+1);
  444. subd[i+1] = f*r;
  445. c *= (s = 1/r);
  446. }
  447. else
  448. {
  449. s = f/g;
  450. r = sqrtf(s*s+1);
  451. subd[i+1] = g*r;
  452. s *= (c = 1/r);
  453. }
  454. g = diag[i+1]-p;
  455. r = (diag[i]-g)*s+2*b*c;
  456. p = s*r;
  457. diag[i+1] = g+p;
  458. g = c*r-b;
  459. for (int k = 0; k < 3; k++)
  460. {
  461. f = mat[k][i+1];
  462. mat[k][i+1] = s*mat[k][i]+c*f;
  463. mat[k][i] = c*mat[k][i]-s*f;
  464. }
  465. }
  466. diag[ell] -= p;
  467. subd[ell] = g;
  468. subd[m] = 0;
  469. }
  470. if ( iter == maxiter )
  471. // should not get here under normal circumstances
  472. return false;
  473. }
  474. return true;
  475. }
  476. // Tridiagonal solver for 4x4 symmetric matrices.
  477. static void EigenSolver4_Tridiagonal(float mat[4][4], float * diag, float * subd);
  478. static bool EigenSolver4_QLAlgorithm(float mat[4][4], float * diag, float * subd);
  479. bool nv::Fit::eigenSolveSymmetric4(const float matrix[10], float eigenValues[4], Vector4 eigenVectors[4])
  480. {
  481. nvDebugCheck(matrix != NULL && eigenValues != NULL && eigenVectors != NULL);
  482. float subd[4];
  483. float diag[4];
  484. float work[4][4];
  485. work[0][0] = matrix[0];
  486. work[0][1] = work[1][0] = matrix[1];
  487. work[0][2] = work[2][0] = matrix[2];
  488. work[0][3] = work[3][0] = matrix[3];
  489. work[1][1] = matrix[4];
  490. work[1][2] = work[2][1] = matrix[5];
  491. work[1][3] = work[3][1] = matrix[6];
  492. work[2][2] = matrix[7];
  493. work[2][3] = work[3][2] = matrix[8];
  494. work[3][3] = matrix[9];
  495. EigenSolver4_Tridiagonal(work, diag, subd);
  496. if (!EigenSolver4_QLAlgorithm(work, diag, subd))
  497. {
  498. for (int i = 0; i < 4; i++) {
  499. eigenValues[i] = 0;
  500. eigenVectors[i] = Vector4(0);
  501. }
  502. return false;
  503. }
  504. for (int i = 0; i < 4; i++) {
  505. eigenValues[i] = (float)diag[i];
  506. }
  507. // eigenvectors are the columns; make them the rows
  508. for (int i = 0; i < 4; i++)
  509. {
  510. for (int j = 0; j < 4; j++)
  511. {
  512. eigenVectors[j].component[i] = (float) work[i][j];
  513. }
  514. }
  515. // sort by singular value
  516. for (int i = 0; i < 3; ++i)
  517. {
  518. for (int j = i+1; j < 4; ++j)
  519. {
  520. if (eigenValues[j] > eigenValues[i])
  521. {
  522. swap(eigenValues[i], eigenValues[j]);
  523. swap(eigenVectors[i], eigenVectors[j]);
  524. }
  525. }
  526. }
  527. nvDebugCheck(eigenValues[0] >= eigenValues[1] && eigenValues[0] >= eigenValues[2] && eigenValues[0] >= eigenValues[3]);
  528. nvDebugCheck(eigenValues[1] >= eigenValues[2] && eigenValues[1] >= eigenValues[3]);
  529. nvDebugCheck(eigenValues[2] >= eigenValues[2]);
  530. return true;
  531. }
  532. #include "nvmath/Matrix.inl"
  533. inline float signNonzero(float x)
  534. {
  535. return (x >= 0.0f) ? 1.0f : -1.0f;
  536. }
  537. static void EigenSolver4_Tridiagonal(float mat[4][4], float * diag, float * subd)
  538. {
  539. // Householder reduction T = Q^t M Q
  540. // Input:
  541. // mat, symmetric 3x3 matrix M
  542. // Output:
  543. // mat, orthogonal matrix Q
  544. // diag, diagonal entries of T
  545. // subd, subdiagonal entries of T (T is symmetric)
  546. static const int n = 4;
  547. // Set epsilon relative to size of elements in matrix
  548. static const float relEpsilon = 1e-6f;
  549. float maxElement = FLT_MAX;
  550. for (int i = 0; i < n; ++i)
  551. for (int j = 0; j < n; ++j)
  552. maxElement = max(maxElement, fabsf(mat[i][j]));
  553. float epsilon = relEpsilon * maxElement;
  554. // Iterative algorithm, works for any size of matrix but might be slower than
  555. // a closed-form solution for symmetric 4x4 matrices. Based on this article:
  556. // http://en.wikipedia.org/wiki/Householder_transformation#Tridiagonalization
  557. Matrix A, Q(identity);
  558. memcpy(&A, mat, sizeof(float)*n*n);
  559. // We proceed from left to right, making the off-tridiagonal entries zero in
  560. // one column of the matrix at a time.
  561. for (int k = 0; k < n - 2; ++k)
  562. {
  563. float sum = 0.0f;
  564. for (int j = k+1; j < n; ++j)
  565. sum += A(j,k)*A(j,k);
  566. float alpha = -signNonzero(A(k+1,k)) * sqrtf(sum);
  567. float r = sqrtf(0.5f * (alpha*alpha - A(k+1,k)*alpha));
  568. // If r is zero, skip this column - already in tridiagonal form
  569. if (fabsf(r) < epsilon)
  570. continue;
  571. float v[n] = {};
  572. v[k+1] = 0.5f * (A(k+1,k) - alpha) / r;
  573. for (int j = k+2; j < n; ++j)
  574. v[j] = 0.5f * A(j,k) / r;
  575. Matrix P(identity);
  576. for (int i = 0; i < n; ++i)
  577. for (int j = 0; j < n; ++j)
  578. P(i,j) -= 2.0f * v[i] * v[j];
  579. A = mul(mul(P, A), P);
  580. Q = mul(Q, P);
  581. }
  582. nvDebugCheck(fabsf(A(2,0)) < epsilon);
  583. nvDebugCheck(fabsf(A(0,2)) < epsilon);
  584. nvDebugCheck(fabsf(A(3,0)) < epsilon);
  585. nvDebugCheck(fabsf(A(0,3)) < epsilon);
  586. nvDebugCheck(fabsf(A(3,1)) < epsilon);
  587. nvDebugCheck(fabsf(A(1,3)) < epsilon);
  588. for (int i = 0; i < n; ++i)
  589. diag[i] = A(i,i);
  590. for (int i = 0; i < n - 1; ++i)
  591. subd[i] = A(i+1,i);
  592. subd[n-1] = 0.0f;
  593. memcpy(mat, &Q, sizeof(float)*n*n);
  594. }
  595. static bool EigenSolver4_QLAlgorithm(float mat[4][4], float * diag, float * subd)
  596. {
  597. // QL iteration with implicit shifting to reduce matrix from tridiagonal
  598. // to diagonal
  599. const int maxiter = 32;
  600. for (int ell = 0; ell < 4; ell++)
  601. {
  602. int iter;
  603. for (iter = 0; iter < maxiter; iter++)
  604. {
  605. int m;
  606. for (m = ell; m < 3; m++)
  607. {
  608. float dd = fabsf(diag[m]) + fabsf(diag[m+1]);
  609. if ( fabsf(subd[m]) + dd == dd )
  610. break;
  611. }
  612. if ( m == ell )
  613. break;
  614. float g = (diag[ell+1]-diag[ell])/(2*subd[ell]);
  615. float r = sqrtf(g*g+1);
  616. if ( g < 0 )
  617. g = diag[m]-diag[ell]+subd[ell]/(g-r);
  618. else
  619. g = diag[m]-diag[ell]+subd[ell]/(g+r);
  620. float s = 1, c = 1, p = 0;
  621. for (int i = m-1; i >= ell; i--)
  622. {
  623. float f = s*subd[i], b = c*subd[i];
  624. if ( fabsf(f) >= fabsf(g) )
  625. {
  626. c = g/f;
  627. r = sqrtf(c*c+1);
  628. subd[i+1] = f*r;
  629. c *= (s = 1/r);
  630. }
  631. else
  632. {
  633. s = f/g;
  634. r = sqrtf(s*s+1);
  635. subd[i+1] = g*r;
  636. s *= (c = 1/r);
  637. }
  638. g = diag[i+1]-p;
  639. r = (diag[i]-g)*s+2*b*c;
  640. p = s*r;
  641. diag[i+1] = g+p;
  642. g = c*r-b;
  643. for (int k = 0; k < 4; k++)
  644. {
  645. f = mat[k][i+1];
  646. mat[k][i+1] = s*mat[k][i]+c*f;
  647. mat[k][i] = c*mat[k][i]-s*f;
  648. }
  649. }
  650. diag[ell] -= p;
  651. subd[ell] = g;
  652. subd[m] = 0;
  653. }
  654. if ( iter == maxiter )
  655. // should not get here under normal circumstances
  656. return false;
  657. }
  658. return true;
  659. }
  660. int nv::Fit::compute4Means(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, Vector3 *__restrict cluster)
  661. {
  662. // Compute principal component.
  663. float matrix[6];
  664. Vector3 centroid = computeCovariance(n, points, weights, metric, matrix);
  665. Vector3 principal = firstEigenVector_PowerMethod(matrix);
  666. // Pick initial solution.
  667. int mini, maxi;
  668. mini = maxi = 0;
  669. float mindps, maxdps;
  670. mindps = maxdps = dot(points[0] - centroid, principal);
  671. for (int i = 1; i < n; ++i)
  672. {
  673. float dps = dot(points[i] - centroid, principal);
  674. if (dps < mindps) {
  675. mindps = dps;
  676. mini = i;
  677. }
  678. else {
  679. maxdps = dps;
  680. maxi = i;
  681. }
  682. }
  683. cluster[0] = centroid + mindps * principal;
  684. cluster[1] = centroid + maxdps * principal;
  685. cluster[2] = (2.0f * cluster[0] + cluster[1]) / 3.0f;
  686. cluster[3] = (2.0f * cluster[1] + cluster[0]) / 3.0f;
  687. // Now we have to iteratively refine the clusters.
  688. while (true)
  689. {
  690. Vector3 newCluster[4] = { Vector3(0.0f), Vector3(0.0f), Vector3(0.0f), Vector3(0.0f) };
  691. float total[4] = {0, 0, 0, 0};
  692. for (int i = 0; i < n; ++i)
  693. {
  694. // Find nearest cluster.
  695. int nearest = 0;
  696. float mindist = FLT_MAX;
  697. for (int j = 0; j < 4; j++)
  698. {
  699. float dist = lengthSquared((cluster[j] - points[i]) * metric);
  700. if (dist < mindist)
  701. {
  702. mindist = dist;
  703. nearest = j;
  704. }
  705. }
  706. newCluster[nearest] += weights[i] * points[i];
  707. total[nearest] += weights[i];
  708. }
  709. for (int j = 0; j < 4; j++)
  710. {
  711. if (total[j] != 0)
  712. newCluster[j] /= total[j];
  713. }
  714. if (equal(cluster[0], newCluster[0]) && equal(cluster[1], newCluster[1]) &&
  715. equal(cluster[2], newCluster[2]) && equal(cluster[3], newCluster[3]))
  716. {
  717. return (total[0] != 0) + (total[1] != 0) + (total[2] != 0) + (total[3] != 0);
  718. }
  719. cluster[0] = newCluster[0];
  720. cluster[1] = newCluster[1];
  721. cluster[2] = newCluster[2];
  722. cluster[3] = newCluster[3];
  723. // Sort clusters by weight.
  724. for (int i = 0; i < 4; i++)
  725. {
  726. for (int j = i; j > 0 && total[j] > total[j - 1]; j--)
  727. {
  728. swap( total[j], total[j - 1] );
  729. swap( cluster[j], cluster[j - 1] );
  730. }
  731. }
  732. }
  733. }
  734. // Adaptation of James Arvo's SVD code, as found in ZOH.
  735. inline float Sqr(float x) { return x*x; }
  736. inline float svd_pythag( float a, float b )
  737. {
  738. float at = fabsf(a);
  739. float bt = fabsf(b);
  740. if( at > bt )
  741. return at * sqrtf( 1.0f + Sqr( bt / at ) );
  742. else if( bt > 0.0f )
  743. return bt * sqrtf( 1.0f + Sqr( at / bt ) );
  744. else return 0.0f;
  745. }
  746. inline float SameSign( float a, float b )
  747. {
  748. float t;
  749. if( b >= 0.0f ) t = fabsf( a );
  750. else t = -fabsf( a );
  751. return t;
  752. }
  753. void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R)
  754. {
  755. static const int MaxIterations = 30;
  756. int i, j, k, l, p, q, iter;
  757. float c, f, h, s, x, y, z;
  758. float norm = 0.0f;
  759. float g = 0.0f;
  760. float scale = 0.0f;
  761. Array<float> temp; temp.resize(cols, 0.0f);
  762. for( i = 0; i < cols; i++ )
  763. {
  764. temp[i] = scale * g;
  765. scale = 0.0f;
  766. g = 0.0f;
  767. s = 0.0f;
  768. l = i + 1;
  769. if( i < rows )
  770. {
  771. for( k = i; k < rows; k++ ) scale += fabsf( Q[k*cols+i] );
  772. if( scale != 0.0f )
  773. {
  774. for( k = i; k < rows; k++ )
  775. {
  776. Q[k*cols+i] /= scale;
  777. s += Sqr( Q[k*cols+i] );
  778. }
  779. f = Q[i*cols+i];
  780. g = -SameSign( sqrtf(s), f );
  781. h = f * g - s;
  782. Q[i*cols+i] = f - g;
  783. if( i != cols - 1 )
  784. {
  785. for( j = l; j < cols; j++ )
  786. {
  787. s = 0.0f;
  788. for( k = i; k < rows; k++ ) s += Q[k*cols+i] * Q[k*cols+j];
  789. f = s / h;
  790. for( k = i; k < rows; k++ ) Q[k*cols+j] += f * Q[k*cols+i];
  791. }
  792. }
  793. for( k = i; k < rows; k++ ) Q[k*cols+i] *= scale;
  794. }
  795. }
  796. diag[i] = scale * g;
  797. g = 0.0f;
  798. s = 0.0f;
  799. scale = 0.0f;
  800. if( i < rows && i != cols - 1 )
  801. {
  802. for( k = l; k < cols; k++ ) scale += fabsf( Q[i*cols+k] );
  803. if( scale != 0.0f )
  804. {
  805. for( k = l; k < cols; k++ )
  806. {
  807. Q[i*cols+k] /= scale;
  808. s += Sqr( Q[i*cols+k] );
  809. }
  810. f = Q[i*cols+l];
  811. g = -SameSign( sqrtf(s), f );
  812. h = f * g - s;
  813. Q[i*cols+l] = f - g;
  814. for( k = l; k < cols; k++ ) temp[k] = Q[i*cols+k] / h;
  815. if( i != rows - 1 )
  816. {
  817. for( j = l; j < rows; j++ )
  818. {
  819. s = 0.0f;
  820. for( k = l; k < cols; k++ ) s += Q[j*cols+k] * Q[i*cols+k];
  821. for( k = l; k < cols; k++ ) Q[j*cols+k] += s * temp[k];
  822. }
  823. }
  824. for( k = l; k < cols; k++ ) Q[i*cols+k] *= scale;
  825. }
  826. }
  827. norm = max( norm, fabsf( diag[i] ) + fabsf( temp[i] ) );
  828. }
  829. for( i = cols - 1; i >= 0; i-- )
  830. {
  831. if( i < cols - 1 )
  832. {
  833. if( g != 0.0f )
  834. {
  835. for( j = l; j < cols; j++ ) R[i*cols+j] = ( Q[i*cols+j] / Q[i*cols+l] ) / g;
  836. for( j = l; j < cols; j++ )
  837. {
  838. s = 0.0f;
  839. for( k = l; k < cols; k++ ) s += Q[i*cols+k] * R[j*cols+k];
  840. for( k = l; k < cols; k++ ) R[j*cols+k] += s * R[i*cols+k];
  841. }
  842. }
  843. for( j = l; j < cols; j++ )
  844. {
  845. R[i*cols+j] = 0.0f;
  846. R[j*cols+i] = 0.0f;
  847. }
  848. }
  849. R[i*cols+i] = 1.0f;
  850. g = temp[i];
  851. l = i;
  852. }
  853. for( i = cols - 1; i >= 0; i-- )
  854. {
  855. l = i + 1;
  856. g = diag[i];
  857. if( i < cols - 1 ) for( j = l; j < cols; j++ ) Q[i*cols+j] = 0.0f;
  858. if( g != 0.0f )
  859. {
  860. g = 1.0f / g;
  861. if( i != cols - 1 )
  862. {
  863. for( j = l; j < cols; j++ )
  864. {
  865. s = 0.0f;
  866. for( k = l; k < rows; k++ ) s += Q[k*cols+i] * Q[k*cols+j];
  867. f = ( s / Q[i*cols+i] ) * g;
  868. for( k = i; k < rows; k++ ) Q[k*cols+j] += f * Q[k*cols+i];
  869. }
  870. }
  871. for( j = i; j < rows; j++ ) Q[j*cols+i] *= g;
  872. }
  873. else
  874. {
  875. for( j = i; j < rows; j++ ) Q[j*cols+i] = 0.0f;
  876. }
  877. Q[i*cols+i] += 1.0f;
  878. }
  879. for( k = cols - 1; k >= 0; k-- )
  880. {
  881. for( iter = 1; iter <= MaxIterations; iter++ )
  882. {
  883. int jump = 0;
  884. for( l = k; l >= 0; l-- )
  885. {
  886. q = l - 1;
  887. if( fabsf( temp[l] ) + norm == norm ) { jump = 1; break; }
  888. if( fabsf( diag[q] ) + norm == norm ) { jump = 0; break; }
  889. }
  890. if( !jump )
  891. {
  892. c = 0.0f;
  893. s = 1.0f;
  894. for( i = l; i <= k; i++ )
  895. {
  896. f = s * temp[i];
  897. temp[i] *= c;
  898. if( fabsf( f ) + norm == norm ) break;
  899. g = diag[i];
  900. h = svd_pythag( f, g );
  901. diag[i] = h;
  902. h = 1.0f / h;
  903. c = g * h;
  904. s = -f * h;
  905. for( j = 0; j < rows; j++ )
  906. {
  907. y = Q[j*cols+q];
  908. z = Q[j*cols+i];
  909. Q[j*cols+q] = y * c + z * s;
  910. Q[j*cols+i] = z * c - y * s;
  911. }
  912. }
  913. }
  914. z = diag[k];
  915. if( l == k )
  916. {
  917. if( z < 0.0f )
  918. {
  919. diag[k] = -z;
  920. for( j = 0; j < cols; j++ ) R[k*cols+j] *= -1.0f;
  921. }
  922. break;
  923. }
  924. if( iter >= MaxIterations ) return;
  925. x = diag[l];
  926. q = k - 1;
  927. y = diag[q];
  928. g = temp[q];
  929. h = temp[k];
  930. f = ( ( y - z ) * ( y + z ) + ( g - h ) * ( g + h ) ) / ( 2.0f * h * y );
  931. g = svd_pythag( f, 1.0f );
  932. f = ( ( x - z ) * ( x + z ) + h * ( ( y / ( f + SameSign( g, f ) ) ) - h ) ) / x;
  933. c = 1.0f;
  934. s = 1.0f;
  935. for( j = l; j <= q; j++ )
  936. {
  937. i = j + 1;
  938. g = temp[i];
  939. y = diag[i];
  940. h = s * g;
  941. g = c * g;
  942. z = svd_pythag( f, h );
  943. temp[j] = z;
  944. c = f / z;
  945. s = h / z;
  946. f = x * c + g * s;
  947. g = g * c - x * s;
  948. h = y * s;
  949. y = y * c;
  950. for( p = 0; p < cols; p++ )
  951. {
  952. x = R[j*cols+p];
  953. z = R[i*cols+p];
  954. R[j*cols+p] = x * c + z * s;
  955. R[i*cols+p] = z * c - x * s;
  956. }
  957. z = svd_pythag( f, h );
  958. diag[j] = z;
  959. if( z != 0.0f )
  960. {
  961. z = 1.0f / z;
  962. c = f * z;
  963. s = h * z;
  964. }
  965. f = c * g + s * y;
  966. x = c * y - s * g;
  967. for( p = 0; p < rows; p++ )
  968. {
  969. y = Q[p*cols+j];
  970. z = Q[p*cols+i];
  971. Q[p*cols+j] = y * c + z * s;
  972. Q[p*cols+i] = z * c - y * s;
  973. }
  974. }
  975. temp[l] = 0.0f;
  976. temp[k] = f;
  977. diag[k] = x;
  978. }
  979. }
  980. // Sort the singular values into descending order.
  981. for( i = 0; i < cols - 1; i++ )
  982. {
  983. float biggest = diag[i]; // Biggest singular value so far.
  984. int bindex = i; // The row/col it occurred in.
  985. for( j = i + 1; j < cols; j++ )
  986. {
  987. if( diag[j] > biggest )
  988. {
  989. biggest = diag[j];
  990. bindex = j;
  991. }
  992. }
  993. if( bindex != i ) // Need to swap rows and columns.
  994. {
  995. // Swap columns in Q.
  996. for (int j = 0; j < rows; ++j)
  997. swap(Q[j*cols+i], Q[j*cols+bindex]);
  998. // Swap rows in R.
  999. for (int j = 0; j < rows; ++j)
  1000. swap(R[i*cols+j], R[bindex*cols+j]);
  1001. // Swap elements in diag.
  1002. swap(diag[i], diag[bindex]);
  1003. }
  1004. }
  1005. }