nvmath.h 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. // This code is in the public domain -- castanyo@yahoo.es
  2. #pragma once
  3. #ifndef NV_MATH_H
  4. #define NV_MATH_H
  5. #include "nvcore/nvcore.h"
  6. #include "nvcore/Debug.h" // nvDebugCheck
  7. #include "nvcore/Utils.h" // max, clamp
  8. #include <math.h>
  9. #if NV_OS_WIN32 || NV_OS_XBOX || NV_OS_DURANGO
  10. #include <float.h> // finite, isnan
  11. #endif
  12. // -- GODOT start --
  13. //#if NV_CPU_X86 || NV_CPU_X86_64
  14. // //#include <intrin.h>
  15. // #include <xmmintrin.h>
  16. //#endif
  17. // -- GODOT end --
  18. // Function linkage
  19. #if NVMATH_SHARED
  20. #ifdef NVMATH_EXPORTS
  21. #define NVMATH_API DLL_EXPORT
  22. #define NVMATH_CLASS DLL_EXPORT_CLASS
  23. #else
  24. #define NVMATH_API DLL_IMPORT
  25. #define NVMATH_CLASS DLL_IMPORT
  26. #endif
  27. #else // NVMATH_SHARED
  28. #define NVMATH_API
  29. #define NVMATH_CLASS
  30. #endif // NVMATH_SHARED
  31. // Set some reasonable defaults.
  32. #ifndef NV_USE_ALTIVEC
  33. # define NV_USE_ALTIVEC NV_CPU_PPC
  34. //# define NV_USE_ALTIVEC defined(__VEC__)
  35. #endif
  36. #ifndef NV_USE_SSE
  37. # if NV_CPU_X86_64
  38. // x64 always supports at least SSE2
  39. # define NV_USE_SSE 2
  40. # elif NV_CC_MSVC && defined(_M_IX86_FP)
  41. // Also on x86 with the /arch:SSE flag in MSVC.
  42. # define NV_USE_SSE _M_IX86_FP // 1=SSE, 2=SS2
  43. # elif defined(__SSE__)
  44. # define NV_USE_SSE 1
  45. # elif defined(__SSE2__)
  46. # define NV_USE_SSE 2
  47. # else
  48. // Otherwise we assume no SSE.
  49. # define NV_USE_SSE 0
  50. # endif
  51. #endif
  52. // Internally set NV_USE_SIMD when either altivec or sse is available.
  53. #if NV_USE_ALTIVEC && NV_USE_SSE
  54. # error "Cannot enable both altivec and sse!"
  55. #endif
  56. // -- GODOT start --
  57. #if NV_USE_SSE
  58. //#include <intrin.h>
  59. #include <xmmintrin.h>
  60. #endif
  61. // -- GODOT end --
  62. #ifndef PI
  63. #define PI float(3.1415926535897932384626433833)
  64. #endif
  65. #define NV_EPSILON (0.0001f)
  66. #define NV_NORMAL_EPSILON (0.001f)
  67. /*
  68. #define SQ(r) ((r)*(r))
  69. #define SIGN_BITMASK 0x80000000
  70. /// Integer representation of a floating-point value.
  71. #define IR(x) ((uint32 &)(x))
  72. /// Absolute integer representation of a floating-point value
  73. #define AIR(x) (IR(x) & 0x7fffffff)
  74. /// Floating-point representation of an integer value.
  75. #define FR(x) ((float&)(x))
  76. /// Integer-based comparison of a floating point value.
  77. /// Don't use it blindly, it can be faster or slower than the FPU comparison, depends on the context.
  78. #define IS_NEGATIVE_FLOAT(x) (IR(x)&SIGN_BITMASK)
  79. */
  80. extern "C" inline double sqrt_assert(const double f)
  81. {
  82. nvDebugCheck(f >= 0.0f);
  83. return sqrt(f);
  84. }
  85. inline float sqrtf_assert(const float f)
  86. {
  87. nvDebugCheck(f >= 0.0f);
  88. return sqrtf(f);
  89. }
  90. extern "C" inline double acos_assert(const double f)
  91. {
  92. nvDebugCheck(f >= -1.0f && f <= 1.0f);
  93. return acos(f);
  94. }
  95. inline float acosf_assert(const float f)
  96. {
  97. nvDebugCheck(f >= -1.0f && f <= 1.0f);
  98. return acosf(f);
  99. }
  100. extern "C" inline double asin_assert(const double f)
  101. {
  102. nvDebugCheck(f >= -1.0f && f <= 1.0f);
  103. return asin(f);
  104. }
  105. inline float asinf_assert(const float f)
  106. {
  107. nvDebugCheck(f >= -1.0f && f <= 1.0f);
  108. return asinf(f);
  109. }
  110. // Replace default functions with asserting ones.
  111. #if !NV_CC_MSVC || (NV_CC_MSVC && (_MSC_VER < 1700)) // IC: Apparently this was causing problems in Visual Studio 2012. See Issue 194: https://code.google.com/p/nvidia-texture-tools/issues/detail?id=194
  112. #define sqrt sqrt_assert
  113. #define sqrtf sqrtf_assert
  114. #define acos acos_assert
  115. #define acosf acosf_assert
  116. #define asin asin_assert
  117. #define asinf asinf_assert
  118. #endif
  119. #if NV_CC_MSVC
  120. NV_FORCEINLINE float log2f(float x)
  121. {
  122. nvCheck(x >= 0);
  123. return logf(x) / logf(2.0f);
  124. }
  125. NV_FORCEINLINE float exp2f(float x)
  126. {
  127. return powf(2.0f, x);
  128. }
  129. #endif
  130. namespace nv
  131. {
  132. inline float toRadian(float degree) { return degree * (PI / 180.0f); }
  133. inline float toDegree(float radian) { return radian * (180.0f / PI); }
  134. // Robust floating point comparisons:
  135. // http://realtimecollisiondetection.net/blog/?p=89
  136. inline bool equal(const float f0, const float f1, const float epsilon = NV_EPSILON)
  137. {
  138. //return fabs(f0-f1) <= epsilon;
  139. return fabs(f0-f1) <= epsilon * max3(1.0f, fabsf(f0), fabsf(f1));
  140. }
  141. inline bool isZero(const float f, const float epsilon = NV_EPSILON)
  142. {
  143. return fabs(f) <= epsilon;
  144. }
  145. inline bool isFinite(const float f)
  146. {
  147. #if NV_OS_WIN32 || NV_OS_XBOX || NV_OS_DURANGO
  148. return _finite(f) != 0;
  149. #elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD || NV_OS_ORBIS || NV_OS_LINUX
  150. return isfinite(f);
  151. #else
  152. # error "isFinite not supported"
  153. #endif
  154. //return std::isfinite (f);
  155. //return finite (f);
  156. }
  157. inline bool isNan(const float f)
  158. {
  159. #if NV_OS_WIN32 || NV_OS_XBOX || NV_OS_DURANGO
  160. return _isnan(f) != 0;
  161. #elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD || NV_OS_ORBIS || NV_OS_LINUX
  162. return isnan(f);
  163. #else
  164. # error "isNan not supported"
  165. #endif
  166. }
  167. inline uint log2(uint32 i)
  168. {
  169. uint32 value = 0;
  170. while( i >>= 1 ) value++;
  171. return value;
  172. }
  173. inline uint log2(uint64 i)
  174. {
  175. uint64 value = 0;
  176. while (i >>= 1) value++;
  177. return U32(value);
  178. }
  179. inline float lerp(float f0, float f1, float t)
  180. {
  181. const float s = 1.0f - t;
  182. return f0 * s + f1 * t;
  183. }
  184. inline float square(float f) { return f * f; }
  185. inline int square(int i) { return i * i; }
  186. inline float cube(float f) { return f * f * f; }
  187. inline int cube(int i) { return i * i * i; }
  188. inline float frac(float f)
  189. {
  190. return f - floor(f);
  191. }
  192. inline float floatRound(float f)
  193. {
  194. return floorf(f + 0.5f);
  195. }
  196. // Eliminates negative zeros from a float array.
  197. inline void floatCleanup(float * fp, int n)
  198. {
  199. for (int i = 0; i < n; i++) {
  200. //nvDebugCheck(isFinite(fp[i]));
  201. union { float f; uint32 i; } x = { fp[i] };
  202. if (x.i == 0x80000000) fp[i] = 0.0f;
  203. }
  204. }
  205. inline float saturate(float f) {
  206. return clamp(f, 0.0f, 1.0f);
  207. }
  208. inline float linearstep(float edge0, float edge1, float x) {
  209. // Scale, bias and saturate x to 0..1 range
  210. return saturate((x - edge0) / (edge1 - edge0));
  211. }
  212. inline float smoothstep(float edge0, float edge1, float x) {
  213. x = linearstep(edge0, edge1, x);
  214. // Evaluate polynomial
  215. return x*x*(3 - 2*x);
  216. }
  217. inline int sign(float a)
  218. {
  219. return (a > 0) - (a < 0);
  220. //if (a > 0.0f) return 1;
  221. //if (a < 0.0f) return -1;
  222. //return 0;
  223. }
  224. union Float754 {
  225. unsigned int raw;
  226. float value;
  227. struct {
  228. #if NV_BIG_ENDIAN
  229. unsigned int negative:1;
  230. unsigned int biasedexponent:8;
  231. unsigned int mantissa:23;
  232. #else
  233. unsigned int mantissa:23;
  234. unsigned int biasedexponent:8;
  235. unsigned int negative:1;
  236. #endif
  237. } field;
  238. };
  239. // Return the exponent of x ~ Floor(Log2(x))
  240. inline int floatExponent(float x)
  241. {
  242. Float754 f;
  243. f.value = x;
  244. return (f.field.biasedexponent - 127);
  245. }
  246. // FloatRGB9E5
  247. union Float3SE {
  248. uint32 v;
  249. struct {
  250. #if NV_BIG_ENDIAN
  251. uint32 e : 5;
  252. uint32 zm : 9;
  253. uint32 ym : 9;
  254. uint32 xm : 9;
  255. #else
  256. uint32 xm : 9;
  257. uint32 ym : 9;
  258. uint32 zm : 9;
  259. uint32 e : 5;
  260. #endif
  261. };
  262. };
  263. // FloatR11G11B10
  264. union Float3PK {
  265. uint32 v;
  266. struct {
  267. #if NV_BIG_ENDIAN
  268. uint32 ze : 5;
  269. uint32 zm : 5;
  270. uint32 ye : 5;
  271. uint32 ym : 6;
  272. uint32 xe : 5;
  273. uint32 xm : 6;
  274. #else
  275. uint32 xm : 6;
  276. uint32 xe : 5;
  277. uint32 ym : 6;
  278. uint32 ye : 5;
  279. uint32 zm : 5;
  280. uint32 ze : 5;
  281. #endif
  282. };
  283. };
  284. } // nv
  285. #endif // NV_MATH_H