yzziizzy
/
c3dlas


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697
							

#include <stdio.h>
#include <string.h>
#include <float.h>
#include <math.h>
#include <limits.h>
#include <float.h>

#include <x86intrin.h>

#include "c3dlas.h"

#ifdef C3DLAS_USE_BUILTINS

	#define abs_double __builtin_fabs
	#define abs_float  __builtin_fabsf
#else
	#define abs_double fabs
	#define abs_float  fabsf
#endif


#ifdef C3DLAS_NO_TGMATH
	// requires GCC probably
	
		/*

	#define FD_CHOOSE_1(a, b, fn_f, fn_d)\

		__builtin_choose_expr( \

			__builtin_types_compatible_p(__typeof__(a), double), \

				fn_d(a), \

				fn_f(a))

		
	#define FD_CHOOSE_2(a, b, fn_f, fn_d)\

		__builtin_choose_expr( \

			__builtin_types_compatible_p(__typeof__(a), double) || __builtin_types_compatible_p(__typeof__(b), double), \

				fn_d(a, b), \

				fn_f(a, b))

	
	#define fmax(a,b) FD_CHOOSE_2(a, b, fmaxf, fmax)

	#define fmin(a,b) FD_CHOOSE_2(a, b, fminf, fmin)

	#define fabs(a)   FD_CHOOSE_1(a, fabsf, fabs)

	#define sqrt(a)   FD_CHOOSE_1(a, sqrtf, sqrt)

		*/
#else
	#include <tgmath.h>
#endif


#ifndef _GNU_SOURCE
static inline void sincosf(float x, float* s, float* c) {
	*s = sinf(x);
	*c = cosf(x);
}
#endif


// utilities

// reverses the argument
uint32_t bitReverse32(uint32_t x) {
	x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1));
	x = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2));
	x = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4));
	x = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8));
	return ((x >> 16) | (x << 16));
}


// reverses the least significant (len) bits, zeroing the top
uint32_t reverseBits(uint32_t n, int len) {
	uint32_t rn = bitReverse32(n);
	
	return rn >> (32 - len);
}


// random numbers

// prepare the pcg for use
void pcg_init(PCG* pcg, uint64_t seed) {
	pcg->stream = seed;
	pcg->state = (seed * 6364136223846793005ULL) + (seed | 1);
}


// returns a random number in (-1, 1) uninclusive
// Thanks to Kaslai (https://github.com/Aslai) for fixing a nasty bug in the previous version
float pcg_f(uint64_t* state, uint64_t stream) {
	union {
		uint32_t fu;
		float ff;
	} u;
	
	
	uint64_t last = *state;
	*state = (last * 6364136223846793005ULL) + (stream | 1);
	uint32_t xs = ((last >> 18) ^ last) >> 27;
	uint32_t rot = last >> 59;
	uint32_t fin = (xs >> rot) | (xs << ((-rot) & 31));
	
	uint32_t exp = (fin & 0x3F800000);
	exp = (0x7F + 33 - __builtin_clzl(exp)) << 23;
	
	u.fu = ((fin) & 0x807fffff) | exp;
	return u.ff;
}


// returns a random number in [0, UINT32_MAX] inclusive
uint32_t pcg_u32(uint64_t* state, uint64_t stream) {
	
	uint64_t last = *state;
	*state = (last * 6364136223846793005ULL) + (stream | 1);
	uint32_t xs = ((last >> 18) ^ last) >> 27;
	uint32_t rot = last >> 59;
	uint32_t fin = (xs >> rot) | (xs << ((-rot) & 31));
	
	return fin;
}


// BUG: totally untested
// SIMD and C versions do not return the same values. 
void pcg_f8(uint64_t* state, uint64_t stream, float* out) {
	
#if defined(C3DLAS_USE_SIMD)
	__m256i s1, s2, xs1, xs2, xs, r, nra, q, f;
	
	s1 = _mm256_add_epi64(_mm256_set1_epi64x(*state), _mm256_set_epi64x(1,2,3,4));
	s2 = _mm256_add_epi64(_mm256_set1_epi64x(*state), _mm256_set_epi64x(5,6,7,8));
	
	// cycle the state
	*state = (*state * 6364136223846793005ULL) + (stream | 1);
	
	xs1 = _mm256_srli_epi64(_mm256_xor_si256(_mm256_srli_epi64(s1, 18), s1), 27);
	xs2 = _mm256_srli_epi64(_mm256_xor_si256(_mm256_srli_epi64(s2, 18), s2), 27);
	
	xs = _mm256_unpacklo_epi32(xs1, xs2);
	
	r = _mm256_srai_epi32(xs, 59);
	nra = _mm256_and_si256(_mm256_sign_epi32(r, _mm256_set1_epi32(-1)), _mm256_set1_epi32(31));
	
	q = _mm256_or_si256(_mm256_srav_epi32(xs, r), _mm256_sllv_epi32(xs, nra)); 
	
	// q is full of random 32bit integers now
	
	// convert to (-1, 1) floats by jamming in some exponent info
	f = _mm256_or_si256(_mm256_and_si256(q, _mm256_set1_epi32(0x807fffff)), _mm256_set1_epi32(0x3f000000));
	 
	_mm256_storeu_si256((__m256i*)out, f); 
	
#else
	out[0] = pcg_f(state, stream);
	out[1] = pcg_f(state, stream);
	out[2] = pcg_f(state, stream);
	out[3] = pcg_f(state, stream);
	out[4] = pcg_f(state, stream);
	out[5] = pcg_f(state, stream);
	out[6] = pcg_f(state, stream);
	out[7] = pcg_f(state, stream);
#endif
}


float frandPCG(float low, float high, PCG* pcg) {
	return low + ((high - low) * (pcg_f(&pcg->state, pcg->stream) * 0.5 + 0.5));
}


uint32_t urandPCG(uint32_t low, uint32_t high, PCG* pcg) {
	return pcg_u32(&pcg->state, pcg->stream) % (high - low) + low;
}


#define FN(sz, suf, ty, ft, sufft, pref, ...) \

\

int vEqExact##suf(const Vector##suf a, const Vector##suf b) { \

	return vEqExact##suf##p(&a, &b); \

} \

int vEqExact##suf##p(const Vector##suf const * a, const Vector##suf const * b) { \

	int tmp = 0; \

	for(int i = 0; i < sz; i++) \

		tmp += ((ty*)a)[i] == ((ty*)b)[i]; \

	return tmp == sz; \

} \

\

int vEq##suf(const Vector##suf a, const Vector##suf b) { \

	return vEqEp##suf(a, b, pref##_CMP_EPSILON); \

} \

int vEq##suf##p(const Vector##suf* a, const Vector##suf* b) { \

	return vEqEp##suf(*a, *b, pref##_CMP_EPSILON); \

} \

\

int vEqEp##suf(const Vector##suf a, const Vector##suf b, ft epsilon) { \

	return vEqEp##suf##p(&a, &b, epsilon); \

} \

int vEqEp##suf##p(const Vector##suf* a, const Vector##suf* b, ft epsilon) { \

	return vDistSq##suf(*a, *b) <= epsilon * epsilon; \

} \

\

ft vDistSq##suf(const Vector##suf a, const Vector##suf b) { \

	return vDistSq##suf##p(&a, &b); \

} \

ft vDistSq##suf##p(const Vector##suf* a, const Vector##suf* b) { \

	ft tmp = 0; \

	for(int i = 0; i < sz; i++) { \

		ft q = ((ty*)a)[i] - ((ty*)b)[i]; \

		tmp += q * q; \

	} \

	return tmp;\

} \

ft vDist##suf(const Vector##suf a, const Vector##suf b) { \

	return sqrt(vDistSq##suf##p(&a, &b)); \

} \

ft vDist##suf##p(const Vector##suf* a, const Vector##suf* b) { \

	return sqrt(vDistSq##suf##p(a, b)); \

} \

\

Vector##suf vAdd##suf(const Vector##suf a, const Vector##suf b) { \

	Vector##suf out; \

	vAdd##suf##p(&a, &b, &out); \

	return out; \

} \

void vAdd##suf##p(const Vector##suf* a, const Vector##suf* b, Vector##suf* out) { \

	for(int i = 0; i < sz; i++) { \

		((ty*)out)[i] = ((ty*)a)[i] + ((ty*)b)[i]; \

	} \

} \

\

Vector##suf vSub##suf(const Vector##suf a, const Vector##suf b) { \

	Vector##suf out; \

	vSub##suf##p(&a, &b, &out); \

	return out; \

} \

void vSub##suf##p(const Vector##suf const * a, const Vector##suf const * b, Vector##suf* out) { \

	for(int i = 0; i < sz; i++) { \

		((ty*)out)[i] = ((ty*)a)[i] - ((ty*)b)[i]; \

	} \

} \

\

Vector##suf vMul##suf(const Vector##suf a, const Vector##suf b) { \

	Vector##suf out; \

	vMul##suf##p(&a, &b, &out); \

	return out; \

} \

void vMul##suf##p(const Vector##suf const * a, const Vector##suf const * b, Vector##suf* out) { \

	for(int i = 0; i < sz; i++) { \

		((ty*)out)[i] = ((ty*)a)[i] * ((ty*)b)[i]; \

	} \

} \

\

Vector##suf vDiv##suf(const Vector##suf top, const Vector##suf bot) { \

	Vector##suf out; \

	vDiv##suf##p(&top, &bot, &out); \

	return out; \

} \

void vDiv##suf##p(const Vector##suf const * top, const Vector##suf const * bot, Vector##suf* out) { \

	for(int i = 0; i < sz; i++) { \

		((ty*)out)[i] = ((ty*)top)[i] / ((ty*)bot)[i]; \

	} \

} \

\

ft vDot##suf(const Vector##suf a, const Vector##suf b) { \

	return vDot##suf##p(&a, &b); \

} \

ft vDot##suf##p(const Vector##suf* a, const Vector##suf* b) { \

	ft tmp = 0; \

	for(int i = 0; i < sz; i++) { \

		tmp += ((ty*)a)[i] * ((ty*)b)[i]; \

	} \

	return tmp;\

} \

\

Vector##sufft vScale##suf(const Vector##suf v, ft scalar) { \

	Vector##sufft out; \

	vScale##suf##p(&v, scalar, &out); \

	return out; \

} \

void vScale##suf##p(const Vector##suf* v, ft scalar, Vector##sufft* out) { \

	for(int i = 0; i < sz; i++) \

		((ft*)out)[i] = (ft)((ty*)v)[i] * scalar; \

} \

\

\

Vector##sufft vAvg##suf(const Vector##suf a, const Vector##suf b) { \

	Vector##sufft out; \

	vAvg##suf##p(&a, &b, &out); \

	return out; \

} \

void vAvg##suf##p(const Vector##suf* a, const Vector##suf* b, Vector##sufft* out) { \

	for(int i = 0; i < sz; i++) { \

		((ty*)out)[i] = (((ty*)a)[i] + ((ty*)b)[i]) / (ft)2.0; \

	} \

} \

\

Vector##suf vNeg##suf(const Vector##suf v) { \

	Vector##suf out; \

	vNeg##suf##p(&v, &out); \

	return out; \

} \

void vNeg##suf##p(const Vector##suf* v, Vector##suf* out) { \

	for(int i = 0; i < sz; i++) \

		((ty*)out)[i] = -((ty*)v)[i]; \

} \

\

Vector##sufft vLerp##suf(const Vector##suf a, const Vector##suf b, ft t) { \

	Vector##sufft out; \

	vLerp##suf##p(&a, &b, t, &out); \

	return out; \

} \

void vLerp##suf##p(const Vector##suf* a, const Vector##suf* b, ft t, Vector##sufft* out) { \

	for(int i = 0; i < sz; i++) \

		((ft*)out)[i] = (ft)((ty*)a)[i] + ((ft)(((ty*)b)[i] - ((ty*)a)[i]) * t) ; \

} \

\

Vector##sufft vInv##suf(const Vector##suf v) { \

	Vector##sufft out; \

	vInv##suf##p(&v, &out); \

	return out; \

} \

void vInv##suf##p(const Vector##suf* v, Vector##sufft* out) { \

	for(int i = 0; i < sz; i++) \

		((ft*)out)[i] = (((ty*)v)[i] == 0) ? pref##_MAX : ((ft)1.0 /  (ft)((ty*)v)[i]); \

} \

\

ft vLen##suf(const Vector##suf v) { \

	return vLen##suf##p(&v); \

} \

ft vLen##suf##p(const Vector##suf* v) { \

	ft tmp = 0.0; \

	for(int i = 0; i < sz; i++) \

		tmp += (ft)((ty*)v)[i] * (ft)((ty*)v)[i]; \

	return sqrt(tmp); \

} \

\

ft vLenSq##suf(const Vector##suf v) { \

	return vLenSq##suf##p(&v); \

} \

ft vLenSq##suf##p(const Vector##suf* v) { \

	return vDot##suf##p(v, v); \

} \

\

ft vMag##suf(const Vector##suf v) { \

	return vLen##suf##p(&v); \

} \

ft vMag##suf##p(const Vector##suf* v) { \

	return vLen##suf##p(v); \

} \

\

ft vInvLen##suf(const Vector##suf v) { \

	ft tmp = vLen##suf(v); \

	return tmp == 0 ? pref##_MAX : (ft)1.0 / tmp; \

} \

ft vInvLen##suf##p(const Vector##suf* v) { \

	return vInvLen##suf(*v); \

} \

\

Vector##sufft vNorm##suf(const Vector##suf v) { \

	Vector##sufft out; \

	vNorm##suf##p(&v, &out); \

	return out; \

} \

void vNorm##suf##p(const Vector##suf* v, Vector##sufft* out) { \

	ft n = vLenSq##suf(*v); \

	\

	if(n >= (ft)1.0f - pref##_CMP_EPSILON && n <= (ft)1.0 + pref##_CMP_EPSILON) { \

		for(int i = 0; i < sz; i++) \

			((ft*)out)[i] = (ft)((ty*)v)[i]; \

		return;		 \

	} \

	else if(n == 0.0) { \

		for(int i = 0; i < sz; i++) \

			((ft*)out)[i] = 0; \

		return; \

	} \

	 \

	n = (ft)1.0 / sqrt(n); \

	for(int i = 0; i < sz; i++) \

		((ft*)out)[i] = (ft)((ty*)v)[i] * n; \

}  \

\

Vector##sufft vUnit##suf(const Vector##suf v) { \

	return vNorm##suf(v); \

} \

void vUnit##suf##p(const Vector##suf* v, Vector##sufft* out) { \

	return vNorm##suf##p(v, out); \

} \


	C3DLAS_VECTOR_LIST(FN)

#undef FN


// swap two vectors


void vSwap2ip(Vector2i* a, Vector2i* b) {

	Vector2i t;

	t = *b;

	*b = *a;

	*a = t;

}


void vSwap2p(Vector2* a, Vector2* b) {

	Vector2 t;

	t = *b;

	*b = *a;

	*a = t;

}


void vSwap3p(Vector3* a, Vector3* b) {

	Vector3 t;

	t = *b;

	*b = *a;

	*a = t;

}


void vSwap4p(Vector4* a, Vector4* b) {

	Vector4 t;

	t = *b;

	*b = *a;

	*a = t;

}


// Cross product: out = a x b

// Cross products *proper* only exist in 3 and 7 dimensions

Vector3 vCross3(Vector3 a, Vector3 b) {

	Vector3 out;

	vCross3p(&a, &b, &out);

	return out;

}


void vCross3p(Vector3* a, Vector3* b, Vector3* out) {

	out->x = (a->y * b->z) - (a->z * b->y);
	out->y = (a->z * b->x) - (a->x * b->z);
	out->z = (a->x * b->y) - (a->y * b->x);
}


// ... however, if you apply it to two dimensions it yields
//  the sine of the angle between the two vectors, and the sign
//  of the value determines which side of a that b is on. If
//  the value is zero, the vectors are parallel.

float vCross2(Vector2 a, Vector2 b) {
	return (a.x * b.y) - (a.y * b.x);
}

float vCross2p(Vector2* a, Vector2* b) {
	return (a->x * b->y) - (a->y * b->x);
}


// Scalar triple product: a . (b x c)
float vScalarTriple3(Vector3 a, Vector3 b, Vector3 c) {
	return vScalarTriple3p(&a, &b, &c);
}

float vScalarTriple3p(Vector3* a, Vector3* b, Vector3* c) {
	return (float)((a->x * b->y * c->z) - (a->x * b->z * c->y) - (a->y * b->x * c->z)
				 + (a->z * b->x * c->y) + (a->y * b->z * c->x) - (a->z * b->y * c->x));
}


// Vector Inverse. Returns FLT_MAX on div/0


// Vector magnitude (length)


// Squared distance from one point to another


// Distance from one point to another


// Vector normalize (scale to unit length)


// vMin(a, b)  Returns the minimum values of each component
// vMin(a, b)  Returns the maximum values of each component

#define FN(sz, suf, t, maxval) \

void vMin##sz##suf##p(const Vector##sz##suf* a, const Vector##sz##suf* b, Vector##sz##suf* out) { \

	for(int i = 0; i < sz; i++) \

		((t*)out)[i] = fmin(((t*)a)[i], ((t*)b)[i]); \

} \

void vMax##sz##suf##p(const Vector##sz##suf* a, const Vector##sz##suf* b, Vector##sz##suf* out) { \

	for(int i = 0; i < sz; i++) \

		((t*)out)[i] = fmax(((t*)a)[i], ((t*)b)[i]); \

} \

Vector##sz##suf vMin##sz##suf(Vector##sz##suf a, Vector##sz##suf b) { \

	Vector##sz##suf out; \

	vMin##sz##suf##p(&a, &b, &out); \

	return out; \

} \

Vector##sz##suf vMax##sz##suf(Vector##sz##suf a, Vector##sz##suf b) { \

	Vector##sz##suf out; \

	vMax##sz##suf##p(&a, &b, &out); \

	return out; \

} \

\

int vMinComp##sz##suf##p(const Vector##sz##suf* a) { \

	t best = ((t*)a)[0]; \

	int best_ind = 0; \

	for(int i = 1; i < sz; i++) { \

		if(((t*)a)[i] < best) { \

			best = ((t*)a)[i]; \

			best_ind = i; \

		} \

	} \

	return best_ind; \

} \

\

int vMaxComp##sz##suf##p(const Vector##sz##suf* a) { \

	t best = ((t*)a)[0]; \

	int best_ind = 0; \

	for(int i = 1; i < sz; i++) { \

		if(((t*)a)[i] > best) { \

			best = ((t*)a)[i]; \

			best_ind = i; \

		} \

	} \

	return best_ind; \

} \

\

int vMinComp##sz##suf(const Vector##sz##suf a) { \

	return vMinComp##sz##suf##p(&a); \

} \

\

int vMaxComp##sz##suf(const Vector##sz##suf a) { \

	return vMaxComp##sz##suf##p(&a); \

} \

\

Vector##sz##suf vClamp##sz##suf(Vector##sz##suf in, Vector##sz##suf min, Vector##sz##suf max) { \

	Vector##sz##suf out; \

	for(int i = 0; i < sz; i++) \

		((t*)&out)[i] = fmax(((t*)&min)[i], fmin(((t*)&in)[i], ((t*)&max)[i])); \

	return out; \

} \

Vector##sz##suf vAbs##sz##suf(const Vector##sz##suf v) { \

	Vector##sz##suf out; \

	vAbs##sz##suf##p(&v, &out); \

	return out; \

} \

void vAbs##sz##suf##p(const Vector##sz##suf* v, Vector##sz##suf* out) { \

	for(int i = 0; i < sz; i++) \

		((t*)out)[i] = abs_##t( ((t*)v)[i] ); \

} \

Vector##sz##suf vSign##sz##suf(const Vector##sz##suf v) { \

	Vector##sz##suf out; \

	vSign##sz##suf##p(&v, &out); \

	return out; \

} \

void vSign##sz##suf##p(const Vector##sz##suf* v, Vector##sz##suf* out) { \

	for(int i = 0; i < sz; i++) \

		((t*)out)[i] = copysign((t)1.0, ((t*)v)[i] ); \

} \

Vector##sz##suf vStep##sz##suf(const Vector##sz##suf edge, const Vector##sz##suf v) { \

	Vector##sz##suf out; \

	vStep##sz##suf##p(&edge, &v, &out); \

	return out; \

} \

void vStep##sz##suf##p(const Vector##sz##suf* edge, const Vector##sz##suf* v, Vector##sz##suf* out) { \

	for(int i = 0; i < sz; i++) \

		((t*)out)[i] = ((t*)v)[i] < ((t*)edge)[i] ? 0.0 : 1.0; \

} \


FN(2,  , float,  FLT_MAX)

FN(3,  , float,  FLT_MAX)

FN(4,  , float,  FLT_MAX)

FN(2, d, double, DBL_MAX)

FN(3, d, double, DBL_MAX)

FN(4, d, double, DBL_MAX)

#undef FN


#define FN(sz, suf, t, maxval) \

void vMin##sz##suf##p(const Vector##sz##suf* a, const Vector##sz##suf* b, Vector##sz##suf* out) { \

	for(int i = 0; i < sz; i++) \

		((t*)out)[i] = MIN(((t*)a)[i], ((t*)b)[i]); \

} \

void vMax##sz##suf##p(const Vector##sz##suf* a, const Vector##sz##suf* b, Vector##sz##suf* out) { \

	for(int i = 0; i < sz; i++) \

		((t*)out)[i] = MAX(((t*)a)[i], ((t*)b)[i]); \

} \

Vector##sz##suf vMin##sz##suf(Vector##sz##suf a, Vector##sz##suf b) { \

	Vector##sz##suf out; \

	vMin##sz##suf##p(&a, &b, &out); \

	return out; \

} \

Vector##sz##suf vMax##sz##suf(Vector##sz##suf a, Vector##sz##suf b) { \

	Vector##sz##suf out; \

	vMax##sz##suf##p(&a, &b, &out); \

	return out; \

} \

Vector##sz##suf vClamp##sz##suf(Vector##sz##suf in, Vector##sz##suf min, Vector##sz##suf max) { \

	Vector##sz##suf out; \

	for(int i = 0; i < sz; i++) \

		((t*)&out)[i] = MAX(((t*)&min)[i], MIN(((t*)&in)[i], ((t*)&max)[i])); \

	return out; \

} \

Vector##sz##suf vAbs##sz##suf(const Vector##sz##suf v) { \

	Vector##sz##suf out; \

	vAbs##sz##suf##p(&v, &out); \

	return out; \

} \

void vAbs##sz##suf##p(const Vector##sz##suf* v, Vector##sz##suf* out) { \

	for(int i = 0; i < sz; i++) \

		((t*)out)[i] = labs( ((t*)v)[i] ); \

} \

Vector##sz##suf vSign##sz##suf(const Vector##sz##suf v) { \

	Vector##sz##suf out; \

	vSign##sz##suf##p(&v, &out); \

	return out; \

} \

void vSign##sz##suf##p(const Vector##sz##suf* v, Vector##sz##suf* out) { \

	for(int i = 0; i < sz; i++) \

		((t*)out)[i] = ((t*)v)[i] < 0 ? -1 : 1; \

} \

Vector##sz##suf vStep##sz##suf(const Vector##sz##suf edge, const Vector##sz##suf v) { \

	Vector##sz##suf out; \

	vStep##sz##suf##p(&edge, &v, &out); \

	return out; \

} \

void vStep##sz##suf##p(const Vector##sz##suf* edge, const Vector##sz##suf* v, Vector##sz##suf* out) { \

	for(int i = 0; i < sz; i++) \

		((t*)out)[i] = ((t*)v)[i] < ((t*)edge)[i] ? 0.0 : 1.0; \

} \


FN(2, i, int,  DBL_MAX)

FN(3, i, int,  DBL_MAX)

FN(4, i, int,  DBL_MAX)

FN(2, l, long, DBL_MAX)

FN(3, l, long, DBL_MAX)

FN(4, l, long, DBL_MAX)

#undef FN


// Returns an arbitrary unit vector perpendicular to the input

// The input vector does not need to be normalized

void vPerp3p(Vector3* n, Vector3* out) {

	*out = vPerp3(*n);

}


Vector3 vPerp3(Vector3 n) {

	float f, d;

	
	float absx = fabs(n.x);

	float absy = fabs(n.y);

	
	if(absx < absy) {

		if(n.x < n.z) goto MIN_X;

		goto MIN_Z;

	}

	if(absy < fabs(n.z)) goto MIN_Y;

	goto MIN_Z;

	
MIN_X: 

	d = 1.0f / sqrtf(n.z * n.z + n.y * n.y); 

	f = n.z;

	n.z = n.y * d;

	n.y = -f * d;

	n.x = 0;

	return n;

	
MIN_Y: 

	d = 1.0f / sqrtf(n.z * n.z + n.x * n.x); 

	f = n.x;

	n.x = n.z * d;

	n.z = -f * d;

	n.y = 0;

	return n;

	
MIN_Z: 

	d = 1.0f / sqrtf(n.x * n.x + n.y * n.y); 

	f = n.y;

	n.y = n.x * d;

	n.x = -f * d;

	n.z = 0;

	return n;

}


// Returns an arbitrary unit vector perpendicular to the input

// The input vector does not need to be normalized

void vPerp2p(Vector2* n, Vector2* out) {

	*out = vPerp2(*n);

}


Vector2 vPerp2(Vector2 n) {

	return vNorm2((Vector2){.x = -n.y, .y = n.x});

}


// Coordinate system conversions


// Does not check for degenerate vectors

// Cartesian to Spherical

Vector3 vC2S3(Vector3 cart) {

	Vector3 sp;

	sp.rho = vMag3(cart);

	sp.theta = atan2f(cart.x, cart.y);

	sp.phi = acosf(cart.z / sp.rho);

	
	return sp;

}


// Spherical to Cartesian

Vector3 vS2C3(Vector3 s) {

	float st, ct, sp, cp;

	
	// as of July 2022, gcc trunk is smart enough to automatically optimize to sincos, but clang isn't. 

	sincosf(s.phi, &sp, &cp);

	sincosf(s.theta, &st, &ct);


	return (Vector3){

		.x = s.rho * sp * ct,

		.y = s.rho * sp * st,

		.z = s.rho * cp

	};

}


Vector3 closestPointToRay3(Vector3 p, Ray3 r) {

	Vector3 po = vSub3(p, r.o); // vector from the starting point to p

	
	float t = vDot3(po, r.d); // project the pa onto the ray direction

	fclamp(t, 0.0, 1.0); // clamp t to between the endpoints of the line segment


	return vSub3(po, vScale3(r.d, t));

}


// completely untested.

// can probably be optimized

// This function is poorly named. It is designed to check if a bounding sphere intersects a cone surrounding a viewing frustum.

int distanceSphereToCone(Vector3 spc, float spr, Vector3 c1, Vector3 c2, float cr1, float cr2) {

	
	Vector3 cnorm = vNorm(vSub(c2, c1)); // normal pointing down the center of the cone

	
	Vector3 sp_c1 = vSub(spc, c1); // vector pointing from c1 to the sphere center

	Vector3 up = vCross3(spc, cnorm); // vector perpendicular to the plane containing the cone's centerline and the sphere center.

	Vector3 perp_toward_sp = vNorm(vCross3(cnorm, up)); // vector perpendicular to the cone's centerline within the plane, towards the sphere

	Vector3 outer_c1 = vAdd(c1, vScale(perp_toward_sp, cr1)); // point in the plane on the outer edge of the cone

	Vector3 outer_c2 = vAdd(c2, vScale(perp_toward_sp, cr2)); // point in the plane on the outer edge of the cone

	Vector3 closest = closestPointToRay3(spc, (Ray3){.o = outer_c1, .d = vNorm(vSub(outer_c2, outer_c1))}); // point on the cone closest to the sphere

	
	// this part is probably wrong

	if(vDot(perp_toward_sp, vSub(spc, closest)) < 0) return 1; // is the sphere center inside the cone?


	return (vDist(closest, spc) - spr) <= 0;

}


float distTPointRay3(Vector3 p, Ray3 r, float* T) {

	Vector3 pa = vSub3(p, r.o);

	Vector3 ba = vNeg3(r.d);// vSub3(ls.end, ls.start);

	
	float t = vDot3(pa, ba) / vDot3(ba, ba);

	if(T) *T = t;

	return vLen3(vSub3(pa, vScale3(ba, t)));

}


float dist2TPointRay3(Vector3 p, Ray3 r, float* T) {

	Vector3 pa = vSub3(p, r.o);

	Vector3 ba = vNeg3(r.d);// vSub3(ls.end, ls.start);

	
	float t = vDot3(pa, ba) / vDot3(ba, ba);

	if(T) *T = t;

	return vLenSq3(vSub3(pa, vScale3(ba, t)));

}


int vInsidePolygon(Vector2 p, Polygon* poly) {

	int inside = 0;

	int cnt = poly->pointCount;
	
	if(poly->maxRadiusSq < vDot2(poly->centroid, p)) return 0;
	
	for(int i = 0; i < cnt; i++) {
		Vector2 a = poly->points[i];
		Vector2 b = poly->points[(i + 1) % cnt];
		
		if(a.y == b.y) continue; // horizontal edges are ignored
		
		// we're testing a ray going to the right
		if(a.x < p.x && b.x < p.x) continue; // segment is entirely left of the point
		
		if(a.y >= p.y && b.y >= p.y) continue; // segment entirely above the point
		if(a.y < p.y && b.y < p.y) continue; // segment entirely below the point
		// segment is in the same vertical band as the point
		
		float sx = a.x + (b.x - a.x) * ((p.y - a.y) / (b.y - a.y));
		if(p.x > sx) continue;
		
		inside = !inside;
	}

	return inside;
}


// Muchas gracias, Inigo.  
// https://iquilezles.org/articles/distfunctions2d/
float vDistPolygon(Vector2 p, Polygon* poly) {

	float d = vDot2(vSub2(p, poly->points[0]), vSub2(p, poly->points[0]));
	float s = 1.0;

	for(int i = 0, j = poly->pointCount - 1; i < poly->pointCount; j = i, i++) {
		Vector2 A = poly->points[i];
		Vector2 B = poly->points[j];

		Vector2 e = vSub2(B, A);
		Vector2 w = vSub2(p, A);
		Vector2 b = vSub2(w, vScale2(e, fclamp(vDot2(w, e) / vDot2(e, e), 0.0, 1.0)));

		d = fminf(d, vDot2(b, b));
        
		int c1 = p.y >= A.y;
		int c2 = p.y < B.y;
		int c3 = e.x * w.y > e.y * w.x;
		if((c1 && c2 && c3) || (!c1 && !c2 && !c3)) s *= -1.0;  
	}

	return s * sqrtf(d);
}

// ----


void polyCalcCentroid(Polygon* poly) {
	int cnt = poly->pointCount;
	Vector2 centroid = {0,0};
	
	for(int i = 0; i < cnt; i++) {
		Vector2 a = poly->points[i];
		centroid = vAdd2(centroid, a);
	}
	
	poly->centroid = vScale2(centroid, 1.0 / poly->pointCount);

}


void polyCalcRadiusSq(Polygon* poly) {
	int cnt = poly->pointCount;
	float d = 0;
	
	for(int i = 0; i < cnt; i++) {
		Vector2 a = poly->points[i];
		d = fmaxf(d, vDot2(poly->centroid, a));
	}
	
	poly->maxRadiusSq = d;
}


// feeding a zero vector into this will cause div/0 and you will deserve it
void  vProject3p(Vector3* what, Vector3* onto, Vector3* out) { // slower; onto may not be normalized
	float wdo = vDot3p(what, onto);
	float odo = vDot3p(onto, onto);
	vScale3p(onto, wdo / odo, out);
}

void  vProjectNorm3p(Vector3* what, Vector3* onto, Vector3* out) { // faster; onto must be normalized
	float wdo = vDot3p(what, onto);
	vScale3p(onto, wdo, out);
}


void vRandomPCG2p(Vector2* end1, Vector2* end2, PCG* pcg, Vector2* out) {
	out->x = frandPCG(fminf(end1->x, end2->x), fmaxf(end1->x, end2->x), pcg);
	out->y = frandPCG(fminf(end1->y, end2->y), fmaxf(end1->y, end2->y), pcg);
}

Vector2 vRandomPCG2(Vector2 end1, Vector2 end2, PCG* pcg) {
	Vector2 o;
	vRandomPCG2p(&end1, &end2, pcg, &o);
	return o;
}

void vRandomNormPCG2p(PCG* pcg, Vector2* out) {
	float th = frandPCG(0, 2.0 * F_PI, pcg);
	float sth, cth;
	
	sincosf(th, &sth, &cth);
	out->x = cth;
	out->y = sth;
}

Vector2 vRandomNormPCG2(PCG* pcg) {
	Vector2 o;
	vRandomNormPCG2p(pcg, &o);
	return o;
}


void vRandomPCG3p(Vector3* end1, Vector3* end2, PCG* pcg, Vector3* out) {
	out->x = frandPCG(fminf(end1->x, end2->x), fmaxf(end1->x, end2->x), pcg);
	out->y = frandPCG(fminf(end1->y, end2->y), fmaxf(end1->y, end2->y), pcg);
	out->z = frandPCG(fminf(end1->z, end2->z), fmaxf(end1->z, end2->z), pcg);
}

Vector3 vRandomPCG3(Vector3 end1, Vector3 end2, PCG* pcg) {
	Vector3 o;
	vRandomPCG3p(&end1, &end2, pcg, &o);
	return o;
}

// This algorithm is uniformly distributed over the surface of a sphere. There is no clustering at the poles.
void vRandomNormPCG3p(PCG* pcg, Vector3* out) {
	float u = frandPCG(-1.f, 1.f, pcg);
	float th = frandPCG(0.f, 2.f * F_PI, pcg);
	float q = sqrtf(1.f - u * u);
	float sth, cth;
	
	sincosf(th, &sth, &cth);
	out->x = u * cth;
	out->y = u * sth;
	out->z = u;
}

Vector3 vRandomNormPCG3(PCG* pcg) {
	Vector3 o;
	vRandomNormPCG3p(pcg, &o);
	return o;
}

void vRandom3p(Vector3* end1, Vector3* end2, Vector3* out) {
	out->x = frand(fminf(end1->x, end2->x), fmaxf(end1->x, end2->x));
	out->y = frand(fminf(end1->y, end2->y), fmaxf(end1->y, end2->y));
	out->z = frand(fminf(end1->z, end2->z), fmaxf(end1->z, end2->z));
}

Vector3 vRandom3(Vector3 end1, Vector3 end2) {
	return (Vector3){
		.x = frand(fminf(end1.x, end2.x), fmaxf(end1.x, end2.x)),
		.y = frand(fminf(end1.y, end2.y), fmaxf(end1.y, end2.y)),
		.z = frand(fminf(end1.z, end2.z), fmaxf(end1.z, end2.z))
	};
}

// Uniformly distributed around the unit sphere; ie, no clustering at the poles.
Vector3 vRandomNorm3() {
	Vector3 out;
	vRandomNorm3p(&out);
	return out;
}

void vRandomNorm3p(Vector3* out) {
	float u = frand(-1.0, 1.0);
	float th = frand(0, 2.0 * F_PI);
	float q = sqrtf(1.0 - u * u);
	float sth, cth;
	
	sincosf(th, &sth, &cth);
	out->x = u * cth;
	out->y = u * sth;
	out->z = u;
}


Vector4i vFloor4(const Vector4 v) {
	return (Vector4i){.x = floorf(v.x), .y = floorf(v.y), .z = floorf(v.z), .w = floorf(v.w)};
}

Vector4i vCeil4(const Vector4 v) {
	return (Vector4i){.x = ceilf(v.x), .y = ceilf(v.y), .z = ceilf(v.z), .w = ceilf(v.w)};
}

Vector3i vFloor3(const Vector3 v) {
	return (Vector3i){.x = floorf(v.x), .y = floorf(v.y), .z = floorf(v.z)};
}

Vector3i vCeil3(const Vector3 v) {
	return (Vector3i){.x = ceilf(v.x), .y = ceilf(v.y), .z = ceilf(v.z)};
}

Vector2i vFloor2(const Vector2 v) {
	return (Vector2i){.x = floorf(v.x), .y = floorf(v.y)};
}

Vector2i vCeil2(const Vector2 v) {
	return (Vector2i){.x = ceilf(v.x), .y = ceilf(v.y)};
}


Vector4l vFloor4d(const Vector4d v) {
	return (Vector4l){.x = floor(v.x), .y = floor(v.y), .z = floor(v.z), .w = floor(v.w)};
}

Vector4l vCeil4d(const Vector4d v) {
	return (Vector4l){.x = ceil(v.x), .y = ceil(v.y), .z = ceil(v.z), .w = ceil(v.w)};
}

Vector3l vFloor3d(const Vector3d v) {
	return (Vector3l){.x = floor(v.x), .y = floor(v.y), .z = floor(v.z)};
}

Vector3l vCeil3d(const Vector3d v) {
	return (Vector3l){.x = ceil(v.x), .y = ceil(v.y), .z = ceil(v.z)};
}

Vector2l vFloor2d(const Vector2d v) {
	return (Vector2l){.x = floor(v.x), .y = floor(v.y)};
}

Vector2l vCeil2d(const Vector2d v) {
	return (Vector2l){.x = ceil(v.x), .y = ceil(v.y)};
}


Vector2 vModPositive2(Vector2 v, Vector2 m) {
	return (Vector2){
		.x = fmodf(fmodf(v.x, m.x) + m.x, m.x),
		.y = fmodf(fmodf(v.y, m.y) + m.y, m.y),
	};
}

Vector3 vModPositive3(Vector3 v, Vector3 m) {
	return (Vector3){
		.x = fmodf(fmodf(v.x, m.x) + m.x, m.x),
		.y = fmodf(fmodf(v.y, m.y) + m.y, m.y),
		.z = fmodf(fmodf(v.z, m.z) + m.z, m.z),
	};
}

Vector4 vModPositive4(Vector4 v, Vector4 m) {
	return (Vector4){
		.x = fmodf(fmodf(v.x, m.x) + m.x, m.x),
		.y = fmodf(fmodf(v.y, m.y) + m.y, m.y),
		.z = fmodf(fmodf(v.z, m.z) + m.z, m.z),
		.w = fmodf(fmodf(v.w, m.w) + m.w, m.w),
	};
}


// reflects the distance from v to pivot across pivot.
// out, pivot, and v will all be in the same plane, with pivot half way between v and out
void vReflectAcross3p(Vector3* v, Vector3* pivot, Vector3* out) {
	Vector3 v2 = vScale3(*v, -1);
	float d = vDot3(v2, *pivot) * 2.0;
	*out = vSub3(v2, vScale3(*pivot, d));
}

Vector3 vReflectAcross3(Vector3 v, Vector3 pivot) {
	Vector3 o;
	vReflectAcross3p(&v, &pivot, &o);
	return o;
}

// calculate a unit vector normal to a triangle's face.
void  vTriFaceNormal3p(Vector3* a, Vector3* b, Vector3* c, Vector3* out) {
	Vector3 b_a, c_a;
	
	vSub3p(b, a, &b_a);
	vSub3p(c, a, &c_a);
	vCross3p(&b_a, &c_a, out);
	vNorm3p(out, out);
}

// calculate a unit vector normal to a triangle's face.
Vector3 vTriFaceNormal3(Vector3 a, Vector3 b, Vector3 c) {
	Vector3 b_a, c_a, out;
	
	b_a = vSub3(b, a);
	c_a = vSub3(c, a);
	return vNorm3(vCross3(b_a, c_a));
}

// calculate a unit vector normal to a triangle's face.
Vector3 vTriFaceNormalArea3(Vector3 a, Vector3 b, Vector3 c, float* area) {
	Vector3 b_a, c_a, out;
	
	b_a = vSub3(b, a);
	c_a = vSub3(c, a);
	
	Vector3 n = vCross3(b_a, c_a);
	
	if(area) *area = vLen(n) * .5f;
	
	return vNorm3(n);
}

// calculate a unit vector normal to a triangle's face.
void  vpTriFaceNormal3p(Vector3* tri, Vector3* out) {
	vTriFaceNormal3p(tri+0, tri+1, tri+2, out);
}


void vProjectOntoPlane3p(Vector3* v, Plane* p, Vector3* out) {
	Vector3 v_ortho;
	
	// get the component of v that's perpendicular to the plane
	vProjectNorm3p(v, &p->n, &v_ortho);

	// subtract it from v
	vSub3p(v, &v_ortho, out);
}

void vProjectOntoPlaneNormalized3p(Vector3* v, Plane* p, Vector3* out) {
	vProjectOntoPlane3p(v, p, out);
	vNorm3p(out, out);
}


void planeFromPointNormal(Vector3* p, Vector3* norm, Plane* out) {
	out->n = *norm;
	out->d = vDot3p(norm, p);
}

// calculates a plane from a triangle
void planeFromTriangle3p(Vector3* v1, Vector3* v2, Vector3* v3, Plane* out) {
	vTriFaceNormal3p(v1, v2, v3, &out->n);
	out->d = vDot3p(&out->n, v1);
}

// copy a plane
void planeCopy3p(Plane* in, Plane* out) {
	out->n = in->n;
	out->d = in->d;
}

// reverses the plane's direction
void planeInverse3p(Plane* in, Plane* out) {
	vInv3p(&in->n, &out->n);
	out->d = -in->d;
}

// classifies a point by which side of the plane it's on, default espilon
int planeClassifyPoint3p(Plane* p, Vector3* pt) {
	return planeClassifyPointEps3p(p, pt, FLT_CMP_EPSILON);
}

// classifies a point by which side of the plane it's on, custom espilon
int planeClassifyPointEps3p(Plane* p, Vector3* pt, float epsilon) {
	float dist = vDot3p(&p->n, pt);
	if(fabs(dist - p->d) < epsilon)
		return C3DLAS_COPLANAR;
	else if (dist < p->d)
		return C3DLAS_BACK;
	else
		return C3DLAS_FRONT;
}


// https://en.wikipedia.org/wiki/M%C3%B6ller%E2%80%93Trumbore_intersection_algorithm
// returns _INTERSECT or _DISJOINT
int rayTriangleIntersect(

	Vector3* a, Vector3* b, Vector3* c, // triangle

	Vector3* ray_origin, Vector3* ray_dir, // ray

	float* u, float* v, float* t // barycentric out coords, t of intersection point along ray 

) {

	Vector3 ab = vSub3(*b, *a);
	Vector3 ac = vSub3(*c, *a);
	
	Vector3 n = vCross3(ab, ac);
	
	float det = -vDot3(*ray_dir, n);
	if(fabsf(det) <= FLT_CMP_EPSILON) {
		return C3DLAS_DISJOINT; // the ray is parallel to the triangle
	}
	
	float idet = 1.0f / det;
	
	Vector3 ao = vSub3(*ray_origin, *a);
	Vector3 dao = vCross3(ao, *ray_dir);
	
	*u = vDot3(ac, dao) * idet;
	if(*u < 0.f) return C3DLAS_DISJOINT; // barycentric coord is outside the triangle
	
	*v = -vDot3(ab, dao) * idet;
	if(*v < 0.f || *u + *v > 1.f) return C3DLAS_DISJOINT; // barycentric coord is outside the triangle
	
	
	*t = vDot3(ao, n) * idet;
//	if(*t < 0.0f) return C3DLAS_DISJOINT; // the ray intersects the triangle behind the origin
	
	return C3DLAS_INTERSECT;
}


// returns _INTERSECT or _DISJOINT
Vector3 triangleClosestPoint_Reference(

	Vector3* a, Vector3* b, Vector3* c, // triangle

	Vector3* p, // test point

	float* out_u, float* out_v // barycentric out coords of closest point 

) {

	Vector3 ab = vSub3(*b, *a);
	Vector3 ac = vSub3(*c, *a);
	
	Vector3 n = vCross3(ab, ac);
	Vector3 ray_dir = vNeg3(vNorm(n));
	
	float idet = 1.0f / -vDot3(ray_dir, n);
	
	Vector3 ao = vSub3(*p, *a);
	Vector3 dao = vCross3(ao, ray_dir);
	
//	printf("idet = %f, n = %f,%f,%f\n", idet, n.x, n.y, n.z);
	
	float u = vDot3(ac, dao) * idet;
	float v = -vDot3(ab, dao) * idet;
//	printf("u,v = %f, %f\n", u, v);
	if(u >= 0 && v >= 0.f && u + v <= 1.f) {
		float nt = vDot3(ao, n);
		Vector3 planep = vAdd3(*p, vScale3(vNeg3(n), nt));
		return planep; // the ray intersects the triangle
	}
	
	float t_ab, t_bc, t_ca;
	
	// collect all the possible locations
	float dist[6];
	
	dist[0] = vDistTPointLine3(*p, (Line3){*a, *b}, &t_ab);
	dist[1] = vDistTPointLine3(*p, (Line3){*b, *c}, &t_bc);
	dist[2] = vDistTPointLine3(*p, (Line3){*c, *a}, &t_ca);

	dist[3] = vDist(*a, *p);
	dist[4] = vDist(*b, *p);
	dist[5] = vDist(*c, *p);

	// find the smallest distance
	float min = dist[0];
	int mini = 0;
	
	for(int i = 1; i < 6; i++) {
		if(dist[i] < min) {
			min = dist[i];
			mini = i;
		}
	}
	
	switch(mini) {
		case 0: return vLerp(*a, *b, t_ab);
		case 1: return vLerp(*b, *c, t_bc);
		case 2: return vLerp(*c, *a, t_ca);
		case 3: return *a;
		case 4: return *b;
		case 5: return *c;
	}
	
	return (Vector3){0,0,0}; // HACK just return something
}


/*

// https://en.wikipedia.org/wiki/M%C3%B6ller%E2%80%93Trumbore_intersection_algorithm

// returns _INTERSECT or _DISJOINT

vec3 triangleClosestPoint(

	Vector3* a, Vector3* b, Vector3* c, // triangle

	Vector3* p, // test point

	float* out_u, float* out_v // barycentric out coords of closest point 

) {


	Vector3 ab = vSub3(*b, *a); 

	Vector3 ac = vSub3(*c, *a);

	
	Vector3 n = vCross3(ab, ac); // triangle plane normal

	
	Vector3 ap = vSub3(*p, *a);

	Vector3 dap = vCross3(ap, vNeg(n)); // p projected onto the triangle's plane, relative to a

	
//	p = w*a + u*b + v*c;

	
	float u = vDot3(ac, dap); // inversely proportional to distance from _b_, aka "beta"

	// u < 0 means outside the triangle past the a/c edge

	// u > 1 means outside the triangle past b

	// u == 0 means somewhere on the a/c edge


//	if(*u < 0.f) return C3DLAS_DISJOINT; // barycentric coord is outside the triangle

	
	float v = -vDot3(ab, dap); // inversely proportional to distance from _c_, aka "gamma"

	// v < 0 means outside the triangle past the a/b edge

	// v > 1 means outside the triangle past c

	// v == 0 means somewhere on the a/b edge


//	if(*u < 0.f || *u + *v < 1.0f) return C3DLAS_DISJOINT; // barycentric coord is outside the triangle

	
	float w = 1.0f - u - v; // inversely proportional to distance from _a_, aka "alpha"

	// w < 0 means outside the triangle past the b/c edge

	// w > 1 means outside the triangle past a

	// w == 0 means somewhere on the b/c edge

	
	if(u > 0 && v > 0 && w > 0) // point inside triangle

		float t = vDot3(ap, n);

		vec3 closest = vAdd3(*p, vScale3(vNeg(n), t)); 

		return closest;

	}

	
	float new_u = 0, new_v = 0, new_w = 0;

	
	if(w < 0) {

		float t = fclamp(0f, 1f, vDot3() / vDot3());

		new_v = 1.f - t;

		new_w = t;

	}

	else if(v < 0) {

		float t = fclamp(0f, 1f, vDot3() / vDot3());

		new_u = t;

		new_w = 1.f - t;

	}

	else if(u < 0) {

		float t = fclamp(0f, 1f, vDot3() / vDot3());

		new_u = 1.f - t;

		new_v = t;

	}

	
//	if(*t < 0.0f) return C3DLAS_DISJOINT; // the ray intersects the triangle behind the origin

	
	return C3DLAS_INTERSECT;

}


*/


// C3DLAS_COPLANAR, _INTERSECT, or _DISJOINT
int triPlaneTestIntersect3p(Vector3* pTri, Plane* pl) {
	Vector3 a, b, c;
	float da, db, dc;
	
	// get distance of each vertex from the plane
	// bail early if any of them are coplanar
	a = pTri[0];
	da = vDot3p(&a, &pl->n) - pl->d;
	if(fabs(da) < FLT_CMP_EPSILON) {
		return C3DLAS_COPLANAR;
	}
	
	b = pTri[1];
	db = vDot3p(&b, &pl->n) - pl->d;
	if(fabs(db) < FLT_CMP_EPSILON) {
		return C3DLAS_COPLANAR;
	}
	
	c = pTri[2];
	dc = vDot3p(&c, &pl->n) - pl->d;
	if(fabs(dc) < FLT_CMP_EPSILON) {
		return C3DLAS_COPLANAR;
	}
	
	// the triangle intersects if the sign of all the distances does not match,
	// ie, on vertex is on the opposite side of the plane from the others
	return (signbit(da) == signbit(db) && signbit(db) == signbit(dc)) ? C3DLAS_DISJOINT : C3DLAS_INTERSECT;
}


// C3DLAS_COPLANAR, _INTERSECT, or _DISJOINT
int triPlaneClip3p(

	Vector3* pTri, 

	Plane* pl, 

	Vector3* aboveOut, 

	Vector3* belowOut, 

	int* aboveCnt,

	int* belowCnt

) {
	
	Vector3 v0, v1, v2;
	float vp_d0, vp_d1, vp_d2;
	
	v0 = pTri[0];
	v1 = pTri[1];
	v2 = pTri[2];

	// get distance of each vertex from the plane
	vp_d0 = vDot3p(&v0, &pl->n) - pl->d;
	vp_d1 = vDot3p(&v1, &pl->n) - pl->d;
	vp_d2 = vDot3p(&v2, &pl->n) - pl->d;
	
	
	// bail early if just one is coplanar
	// split in half with single-edge intersections
	if(fabs(vp_d0) < FLT_CMP_EPSILON) {
		if( // single edge intersection
			signbit(vp_d1) != signbit(vp_d2) && 
			fabs(vp_d1) > FLT_CMP_EPSILON &&
			fabs(vp_d2) > FLT_CMP_EPSILON
		) {
			// get intersection point
			Vector3 c;
			planeLineFindIntersectFast3p(pl, &v1, &v2, &c);
			
			if(vp_d1 > 0) { // v1 is above the plane
				aboveOut[0] = c; // correct winding
				aboveOut[1] = v0;
				aboveOut[2] = v1;
				belowOut[0] = c;
				belowOut[1] = v2;
				belowOut[2] = v0;
			}
			else {
				belowOut[0] = c; // correct winding
				belowOut[1] = v0;
				belowOut[2] = v1;
				aboveOut[0] = c;
				aboveOut[1] = v2;
				aboveOut[2] = v0;
			}
			
			*aboveCnt = 1;
			*belowCnt = 1;
			
			return C3DLAS_INTERSECT;
		}
		
		return C3DLAS_COPLANAR; // one vertex is on the plane, the others all above or below
	}
	
	if(fabs(vp_d1) < FLT_CMP_EPSILON) {
		if( // single edge intersection
			signbit(vp_d0) != signbit(vp_d2) && 
			fabs(vp_d0) > FLT_CMP_EPSILON &&
			fabs(vp_d2) > FLT_CMP_EPSILON
		) {
			// get intersection point
			Vector3 c;
			planeLineFindIntersectFast3p(pl, &v0, &v2, &c);
			
			if(vp_d0 > 0) { // v0 is above the plane
				aboveOut[0] = c; // correct winding
				aboveOut[1] = v0;
				aboveOut[2] = v1;
				belowOut[0] = c;
				belowOut[1] = v1;
				belowOut[2] = v2;
			}
			else {
				belowOut[0] = c; // correct winding
				belowOut[1] = v0;
				belowOut[2] = v1;
				aboveOut[0] = c;
				aboveOut[1] = v1;
				aboveOut[2] = v2;
			}
			
			*aboveCnt = 1;
			*belowCnt = 1;
			
			return C3DLAS_INTERSECT;
		}
		
		return C3DLAS_COPLANAR; // one vertex is on the plane, the others all above or below
	}
	
	if(fabs(vp_d2) < FLT_CMP_EPSILON) {
		if( // single edge intersection
			signbit(vp_d0) != signbit(vp_d1) && 
			fabs(vp_d0) > FLT_CMP_EPSILON &&
			fabs(vp_d1) > FLT_CMP_EPSILON
		) {
			// get intersection point
			Vector3 c;
			planeLineFindIntersectFast3p(pl, &v0, &v1, &c);
			
			if(vp_d0 > 0) { // v0 is above the plane
				aboveOut[0] = c; // correct winding
				aboveOut[1] = v2;
				aboveOut[2] = v0;
				belowOut[0] = c;
				belowOut[1] = v1;
				belowOut[2] = v2;
			}
			else {
				belowOut[0] = c; // correct winding
				belowOut[1] = v2;
				belowOut[2] = v0;
				aboveOut[0] = c;
				aboveOut[1] = v1;
				aboveOut[2] = v2;
			}
			
			*aboveCnt = 1;
			*belowCnt = 1;
			
			return C3DLAS_INTERSECT;
		}
		
		return C3DLAS_COPLANAR; // one vertex is on the plane, the others all above or below
	}
	
	
	// the triangle intersects if the sign of all the distances does not match,
	// ie, on vertex is on the opposite side of the plane from the others
	// bail if disjoint
	if(signbit(vp_d0) == signbit(vp_d1) && signbit(vp_d1) == signbit(vp_d2)) {
		return C3DLAS_DISJOINT;
	}
	
	
	// split on which edges intersect the plane
	if(signbit(vp_d0) == signbit(vp_d1)) {
		// vertex 2 is isolated; edges 0,2 and 1,2 intersect
		
		Vector3 c0, c1;
		planeLineFindIntersectFast3p(pl, &v0, &v2, &c0);
		planeLineFindIntersectFast3p(pl, &v1, &v2, &c1);
		
		if(vp_d2 > 0) { // v2 is above the plane
			aboveOut[0] = v2; // correct winding
			aboveOut[1] = c0;
			aboveOut[2] = c1;
			belowOut[0] = c1;
			belowOut[1] = v0;
			belowOut[2] = v1;
			belowOut[3] = c1;
			belowOut[4] = c0;
			belowOut[5] = v1;
			
			*aboveCnt = 1;
			*belowCnt = 2;
		}
		else {
			belowOut[0] = v2; // correct winding
			belowOut[1] = c0;
			belowOut[2] = c1;
			aboveOut[0] = c1;
			aboveOut[1] = v0;
			aboveOut[2] = v1;
			aboveOut[3] = c1;
			aboveOut[4] = c0;
			aboveOut[5] = v1;
			
			*aboveCnt = 2;
			*belowCnt = 1;
		}
		
	}
	else if(signbit(vp_d1) == signbit(vp_d2)) {
		// vertex 0 is isolated; edges 1,0 and 2,0 intersect
		
		Vector3 c0, c1;
		planeLineFindIntersectFast3p(pl, &v1, &v0, &c0);
		planeLineFindIntersectFast3p(pl, &v2, &v0, &c1);
		
		if(vp_d0 > 0) { // v0 is above the plane
			aboveOut[0] = v0; // correct winding
			aboveOut[1] = c0;
			aboveOut[2] = c1;
			belowOut[0] = c1;
			belowOut[1] = v1;
			belowOut[2] = v2;
			belowOut[3] = c1;
			belowOut[4] = c0;
			belowOut[5] = v1;
			
			*aboveCnt = 1;
			*belowCnt = 2;
		}
		else {
			belowOut[0] = v0; // correct winding
			belowOut[1] = c0;
			belowOut[2] = c1;
			aboveOut[0] = c1;
			aboveOut[1] = v1;
			aboveOut[2] = v2;
			aboveOut[3] = c1;
			aboveOut[4] = c0;
			aboveOut[5] = v1;
			
			*aboveCnt = 2;
			*belowCnt = 1;
		}
		
	}
	else {
		// vertex 1 is isolated; edges 0,1 and 2,1 intersect
		
		Vector3 c0, c1;
		planeLineFindIntersectFast3p(pl, &v0, &v1, &c0);
		planeLineFindIntersectFast3p(pl, &v2, &v1, &c1);
		
		if(vp_d1 > 0) { // v1 is above the plane
			aboveOut[0] = v1; // correct winding
			aboveOut[1] = c1;
			aboveOut[2] = c0;
			belowOut[0] = c1;
			belowOut[1] = v2;
			belowOut[2] = v0;
			belowOut[3] = c0;
			belowOut[4] = c1;
			belowOut[5] = v0;
			
			*aboveCnt = 1;
			*belowCnt = 2;
		}
		else {
			belowOut[0] = v1; // correct winding
			belowOut[1] = c1;
			belowOut[2] = c0;
			aboveOut[0] = c1;
			aboveOut[1] = v2;
			aboveOut[2] = v0;
			aboveOut[3] = c0;
			aboveOut[4] = c1;
			aboveOut[5] = v0;
			
			*aboveCnt = 2;
			*belowCnt = 1;
		}
	}
	
	
	return C3DLAS_INTERSECT;
}

// http://geomalgorithms.com/a07-_distance.html
// _PARALLEL with no output on parallel lines
// _INTERSECT with one point of output on intersection
// _DISJOINT with two outputs otherwise
int shortestLineFromRayToRay3p(Ray3* r1, Ray3* r2, Vector3* pOut) {
	
	Vector3 u, v, w, ps, pt;
	float a, b, c, d, e, s, t;
	
	u = r1->d;
	v = r2->d;
	vSub3p(&r1->o, &r2->o, &w);
	
	a = vDot3p(&u, &u); 
	b = vDot3p(&u, &v); 
	c = vDot3p(&v, &v); 
	d = vDot3p(&u, &w); 
	e = vDot3p(&v, &w); 
	
	float ac_bb = a * c - b * b;
	if(fabs(ac_bb) < FLT_CMP_EPSILON) {
		return C3DLAS_PARALLEL;
	}
	
	s = (b * e - c * d) / ac_bb;
	t = (a * e - b * d) / ac_bb;
	
	vScale3p(&u, s, &ps);
	vScale3p(&v, t, &pt);
	vAdd3p(&r1->o, &ps, &ps);
	vAdd3p(&r2->o, &pt, &pt);
	
	pOut[0] = ps;
	pOut[1] = pt;
	
	if(vDistSq3p(&ps, &pt) < FLT_CMP_EPSILON_SQ) {
		return C3DLAS_INTERSECT;
	}
	
	return C3DLAS_DISJOINT;
}


void frustumFromMatrix(Matrix* m, Frustum* out) {
	
	Matrix inv;
	
	mInverse(m, &inv);
	
	// first the points
	// these MUST be in this order
	// near
	vMatrixMulf3p(-1,-1,-1, &inv, &out->points[0]);
	vMatrixMulf3p(-1, 1,-1, &inv, &out->points[1]);
	vMatrixMulf3p( 1,-1,-1, &inv, &out->points[2]);
	vMatrixMulf3p( 1, 1,-1, &inv, &out->points[3]);
	// far
	vMatrixMulf3p(-1,-1, 1, &inv, &out->points[4]);
	vMatrixMulf3p(-1, 1, 1, &inv, &out->points[5]);
	vMatrixMulf3p( 1,-1, 1, &inv, &out->points[6]);
	vMatrixMulf3p( 1, 1, 1, &inv, &out->points[7]);
	
	// now the planes
	// near and far
	planeFromTriangle3p(&out->points[0], &out->points[1], &out->points[2], &out->planes[0]);
	planeFromTriangle3p(&out->points[4], &out->points[5], &out->points[6], &out->planes[1]);
	// sides
	planeFromTriangle3p(&out->points[0], &out->points[4], &out->points[1], &out->planes[2]);
	planeFromTriangle3p(&out->points[0], &out->points[4], &out->points[2], &out->planes[3]);
	planeFromTriangle3p(&out->points[3], &out->points[7], &out->points[1], &out->planes[4]);
	planeFromTriangle3p(&out->points[3], &out->points[7], &out->points[2], &out->planes[5]);
}

void frustumFromMatrixVK(Matrix* m, Frustum* out) {
	
	Matrix inv;
	
	mInverse(m, &inv);
	
	// first the points
	// these MUST be in this order
	// near
	vMatrixMulf3p(-1,-1, 0, &inv, &out->points[0]);
	vMatrixMulf3p(-1, 1, 0, &inv, &out->points[1]);
	vMatrixMulf3p( 1,-1, 0, &inv, &out->points[2]);
	vMatrixMulf3p( 1, 1, 0, &inv, &out->points[3]);
	// far
	vMatrixMulf3p(-1,-1, 1, &inv, &out->points[4]);
	vMatrixMulf3p(-1, 1, 1, &inv, &out->points[5]);
	vMatrixMulf3p( 1,-1, 1, &inv, &out->points[6]);
	vMatrixMulf3p( 1, 1, 1, &inv, &out->points[7]);
	
	// now the planes
	// near and far
	planeFromTriangle3p(&out->points[0], &out->points[1], &out->points[2], &out->planes[0]);
	planeFromTriangle3p(&out->points[4], &out->points[5], &out->points[6], &out->planes[1]);
	// sides
	planeFromTriangle3p(&out->points[0], &out->points[4], &out->points[1], &out->planes[2]);
	planeFromTriangle3p(&out->points[0], &out->points[4], &out->points[2], &out->planes[3]);
	planeFromTriangle3p(&out->points[3], &out->points[7], &out->points[1], &out->planes[4]);
	planeFromTriangle3p(&out->points[3], &out->points[7], &out->points[2], &out->planes[5]);
}

void frustumFromMatrixVK_ZUP(Matrix* m, Frustum* out) {
	
	Matrix inv;
	
	mInverse(m, &inv);
	
	*out = (Frustum){0};
	// first the points
	// these MUST be in this order
	// near
	vMatrixMulf3p(-1,-1, 0, &inv, &out->points[0]); // BUG this order is likely wrong for the planes but results in a sane wireframe.
	vMatrixMulf3p( 1,-1, 0, &inv, &out->points[1]);
	vMatrixMulf3p(-1, 1, 0, &inv, &out->points[2]);
	vMatrixMulf3p( 1, 1, 0, &inv, &out->points[3]);
	// far
	vMatrixMulf3p(-1,-1, 1, &inv, &out->points[4]);
	vMatrixMulf3p(1, -1, 1, &inv, &out->points[5]);
	vMatrixMulf3p( -1,1, 1, &inv, &out->points[6]);
	vMatrixMulf3p( 1, 1, 1, &inv, &out->points[7]);
	
	
	// now the planes
	// near and far
	planeFromTriangle3p(&out->points[0], &out->points[1], &out->points[2], &out->planes[0]);
	planeFromTriangle3p(&out->points[4], &out->points[5], &out->points[6], &out->planes[1]);
	// sides
	planeFromTriangle3p(&out->points[0], &out->points[4], &out->points[1], &out->planes[2]);
	planeFromTriangle3p(&out->points[0], &out->points[4], &out->points[2], &out->planes[3]);
	planeFromTriangle3p(&out->points[3], &out->points[7], &out->points[1], &out->planes[4]);
	planeFromTriangle3p(&out->points[3], &out->points[7], &out->points[2], &out->planes[5]);
}


void frustumFromMatrixVK_RDepth(Matrix* m, Frustum* out) {
	
	Matrix inv;
	
	mInverse(m, &inv);
	
	// first the points
	// these MUST be in this order
	// near
	vMatrixMulf3p(-1,-1, 1, &inv, &out->points[0]);
	vMatrixMulf3p(-1, 1, 1, &inv, &out->points[1]);
	vMatrixMulf3p( 1,-1, 1, &inv, &out->points[2]);
	vMatrixMulf3p( 1, 1, 1, &inv, &out->points[3]);
	// far
	vMatrixMulf3p(-1,-1, 0, &inv, &out->points[4]);
	vMatrixMulf3p(-1, 1, 0, &inv, &out->points[5]);
	vMatrixMulf3p( 1,-1, 0, &inv, &out->points[6]);
	vMatrixMulf3p( 1, 1, 0, &inv, &out->points[7]);
	
	// now the planes
	// near and far
	planeFromTriangle3p(&out->points[0], &out->points[1], &out->points[2], &out->planes[0]);
	planeFromTriangle3p(&out->points[4], &out->points[5], &out->points[6], &out->planes[1]);
	// sides
	planeFromTriangle3p(&out->points[0], &out->points[4], &out->points[1], &out->planes[2]);
	planeFromTriangle3p(&out->points[0], &out->points[4], &out->points[2], &out->planes[3]);
	planeFromTriangle3p(&out->points[3], &out->points[7], &out->points[1], &out->planes[4]);
	planeFromTriangle3p(&out->points[3], &out->points[7], &out->points[2], &out->planes[5]);
}

void frustumFromMatrixVK_ZUP_RDepth(Matrix* m, Frustum* out) {
	
	Matrix inv;
	
	mInverse(m, &inv);
	
	*out = (Frustum){0};
	// first the points
	// these MUST be in this order
	// near
	vMatrixMulf3p(-1,-1, 1, &inv, &out->points[0]); // BUG this order is likely wrong for the planes but results in a sane wireframe.
	vMatrixMulf3p( 1,-1, 1, &inv, &out->points[1]);
	vMatrixMulf3p(-1, 1, 1, &inv, &out->points[2]);
	vMatrixMulf3p( 1, 1, 1, &inv, &out->points[3]);
	// far
	vMatrixMulf3p(-1,-1, 0, &inv, &out->points[4]);
	vMatrixMulf3p(1, -1, 0, &inv, &out->points[5]);
	vMatrixMulf3p( -1,1, 0, &inv, &out->points[6]);
	vMatrixMulf3p( 1, 1, 0, &inv, &out->points[7]);
	
	
	// now the planes
	// near and far
	planeFromTriangle3p(&out->points[0], &out->points[1], &out->points[2], &out->planes[0]);
	planeFromTriangle3p(&out->points[4], &out->points[5], &out->points[6], &out->planes[1]);
	// sides
	planeFromTriangle3p(&out->points[0], &out->points[4], &out->points[1], &out->planes[2]);
	planeFromTriangle3p(&out->points[0], &out->points[4], &out->points[2], &out->planes[3]);
	planeFromTriangle3p(&out->points[3], &out->points[7], &out->points[1], &out->planes[4]);
	planeFromTriangle3p(&out->points[3], &out->points[7], &out->points[2], &out->planes[5]);
}


void frustumCenter(Frustum* f, Vector3* out) {
	Vector3 sum = {0.0f,0.0f,0.0f};
	
	for(int i = 0; i < 8; i++) vAdd3p(&f->points[i], &sum, &sum);
	vScale3p(&sum, 1.0f/8.0f, out);
}

// General idea of the algorithm:
// https://lxjk.github.io/2017/04/15/Calculate-Minimal-Bounding-Sphere-of-Frustum.html
// http://archive.is/YACj2
void frustumBoundingSphere(Frustum* f, Sphere* out) {
	Vector3 f0, n0;
	vPointAvg3p(&f->points[0], &f->points[3], &n0);
	vPointAvg3p(&f->points[4], &f->points[7], &f0);
	
	float Dn2 = vDistSq3p(&n0, &f->points[0]); 
	float Df2 = vDistSq3p(&f0, &f->points[4]);
	
	 // check for ortho
	if(Dn2 - Df2 < 0.00001) {
		frustumCenter(f, &out->center);
		out->r = vDist3p(&out->center, &f->points[0]); 
		return;
	}
	
	float Dnf = vDist3p(&f0, &n0);
	float Dnc = (Dn2 - Df2 - Df2) / (2 * Dnf);
	
// 	printf("\n f: %f,%f,%f\n", f->points[4].x,f->points[4].y,f->points[4].z);
// 	printf(" n: %f,%f,%f\n", f->points[0].x,f->points[0].y,f->points[0].z);
// 	printf(" f0: %f,%f,%f\n", f0.x,f0.y,f0.z);
// 	printf(" n0: %f,%f,%f\n", n0.x,n0.y,n0.z);
// 	printf(" dn2, df2, dnf, dnc: %f,%f,%f,%f\n", Dn2, Df2, Dnf, Dnc);
	
	
	if(Dnc > 0 && Dnc < Dnf) {
		vLerp3p(&f0, &n0, Dnc / Dnf, &out->center);
		out->r = sqrt(Dnc * Dnc + Dn2);
	}
	else {
		out->center = f0;
		out->r = sqrt(Df2);
	}
}


void frustumInscribeSphere(Frustum* f, Sphere* out) {
	Vector3 fx, nx;
	vPointAvg3p(&f->points[0], &f->points[3], &nx);
	vPointAvg3p(&f->points[4], &f->points[7], &fx);
	
/*	

	float Dn2 = vDistSq3p(&n0, &f->points[0]); 

	float Df2 = vDistSq3p(&f0, &f->points[4]);

	float Dnf = vDist3p(&f0, n0);

	float Dnc = (Dn2 - Df2 - Df2) / (2 * Dnf);*/

}


void quadCenterp3p(Vector3* a, Vector3* b, Vector3* c, Vector3* d, Vector3* out) {
	Vector3 sum;
	vAdd3p(a, b, &sum);
	vAdd3p(&sum, c, &sum);
	vAdd3p(&sum, d, &sum);
	vScale3p(&sum, 0.25f, out);
}


void vPointAvg3p(Vector3* a, Vector3* b, Vector3* out) {
	Vector3 sum;
	vAdd3p(a, b, &sum);
	vScale3p(&sum, 0.5f, out);
} 


// reflects the distance from v to pivot across pivot.
// out, pivot, and v will form a straight line with pivot exactly in the middle.
void vReflectAcross2p(Vector2* v, Vector2* pivot, Vector2* out) {
	Vector2 diff;
	
	vSub2p(pivot, v, &diff);
	vAdd2p(pivot, &diff, out);
}


// degenerate cases may not give desired results. GIGO.
void vRoundAway2p(const Vector2* in, const Vector2* center, Vector2i* out) {
	
	if(in->x > center->x) out->x = ceilf(in->x);
	else out->x = floorf(in->x);
	
	if(in->y > center->y) out->y = ceilf(in->y);
	else out->y = floorf(in->y);
}

// degenerate cases may not give desired results. GIGO.
void vRoundToward2p(const Vector2* in, const Vector2* center, Vector2i* out) {
	
	if(in->x > center->x) out->x = floorf(in->x);
	else out->x = ceilf(in->x);
	
	if(in->y > center->y) out->y = floorf(in->y);
	else out->y = ceilf(in->y);
}


// plane-vector operations

// distance from point to plane
float pvDist3p(Plane* p, Vector3* v) {
	return vDot3p(v, &p->n) + p->d;
}


// matrix-vector operations

Vector3 vMatrixMulProjectedMagic3(Vector3 in, Matrix* m) {
	Vector4 v;

	v.x = in.x * m->m[0+0] + in.y * m->m[4+0] + in.z * m->m[8+0] + 1.0 * m->m[12+0];
	v.y = in.x * m->m[0+1] + in.y * m->m[4+1] + in.z * m->m[8+1] + 1.0 * m->m[12+1];
	v.z = in.x * m->m[0+2] + in.y * m->m[4+2] + in.z * m->m[8+2] + 1.0 * m->m[12+2];
	v.w = in.x * m->m[0+3] + in.y * m->m[4+3] + in.z * m->m[8+3] + 1.0 * m->m[12+3];
	
	if(v.w == 0) return (Vector3){0,0,0}; 
	if(v.w < 0) v.w = -v.w;
	
	return (Vector3){.x = v.x / v.w, .y = v.y / v.w, .z = v.z / v.w};
}

Vector3 vMatrixMul3(Vector3 in, Matrix* m) {
	Vector4 v;

	v.x = in.x * m->m[0+0] + in.y * m->m[4+0] + in.z * m->m[8+0] + 1.0 * m->m[12+0];
	v.y = in.x * m->m[0+1] + in.y * m->m[4+1] + in.z * m->m[8+1] + 1.0 * m->m[12+1];
	v.z = in.x * m->m[0+2] + in.y * m->m[4+2] + in.z * m->m[8+2] + 1.0 * m->m[12+2];
	v.w = in.x * m->m[0+3] + in.y * m->m[4+3] + in.z * m->m[8+3] + 1.0 * m->m[12+3];
	
	if(v.w == 0) return (Vector3){0,0,0};
	
	return (Vector3){.x = v.x / v.w, .y = v.y / v.w, .z = v.z / v.w};
}

Vector4 vMatrixMul4(Vector4 in, Matrix* m) {
	Vector4 v;

	v.x = in.x * m->m[0+0] + in.y * m->m[4+0] + in.z * m->m[8+0] + in.w * m->m[12+0];
	v.y = in.x * m->m[0+1] + in.y * m->m[4+1] + in.z * m->m[8+1] + in.w * m->m[12+1];
	v.z = in.x * m->m[0+2] + in.y * m->m[4+2] + in.z * m->m[8+2] + in.w * m->m[12+2];
	v.w = in.x * m->m[0+3] + in.y * m->m[4+3] + in.z * m->m[8+3] + in.w * m->m[12+3];
	
	return v;
}

// multiply a vector by a matrix
void vMatrixMul3p(Vector3* restrict in, Matrix* restrict m, Vector3* restrict out) {
	vMatrixMulf3p(in->x, in->y, in->z, m, out);
}

void vMatrixMulf3p(float x, float y, float z, Matrix* restrict m, Vector3* restrict out) {
	Vector4 v;

	v.x = x * m->m[0+0] + y * m->m[4+0] + z * m->m[8+0] + 1 * m->m[12+0];
	v.y = x * m->m[0+1] + y * m->m[4+1] + z * m->m[8+1] + 1 * m->m[12+1];
	v.z = x * m->m[0+2] + y * m->m[4+2] + z * m->m[8+2] + 1 * m->m[12+2];
	v.w = x * m->m[0+3] + y * m->m[4+3] + z * m->m[8+3] + 1 * m->m[12+3];
	
	out->x = v.x / v.w;
	out->y = v.y / v.w;
	out->z = v.z / v.w;
}


void evalBezier3p(Vector3* e1, Vector3* e2, Vector3* c1, Vector3* c2, float t, Vector3* out) {
	out->x = evalBezier1D(e1->x, e2->x, c1->x, c2->x, t);
	out->y = evalBezier1D(e1->y, e2->y, c1->y, c2->y, t);
	out->z = evalBezier1D(e1->z, e2->z, c1->z, c2->z, t);
}

void evalBezier2D(Vector2* e1, Vector2* e2, Vector2* c1, Vector2* c2, float t, Vector2* out) {
	out->x = evalBezier1D(e1->x, e2->x, c1->x, c2->x, t);
	out->y = evalBezier1D(e1->y, e2->y, c1->y, c2->y, t);
}

float evalBezier1D(float e1, float e2, float c1, float c2, float t) {
	float mt, mt2, t2;
	mt = 1 - t;
	mt2 = mt * mt;
	t2 = t * t;
	return (mt2 * mt * e1) + (3 * mt2 * t * c1) + (3 * t2 * mt * c2) + (t2 * t * e2);
}


void evalBezierTangent3p(Vector3* e1, Vector3* e2, Vector3* c1, Vector3* c2, float t, Vector3* out) {
	out->x = evalBezier1D_dt(e1->x, e2->x, c1->x, c2->x, t);
	out->y = evalBezier1D_dt(e1->y, e2->y, c1->y, c2->y, t);
	out->z = evalBezier1D_dt(e1->z, e2->z, c1->z, c2->z, t);
}

float evalBezier1D_dt(float e1, float e2, float c1, float c2, float t) {
	float mt, mt2, t2;
	mt = 1 - t;
	mt2 = mt * mt;
	t2 = t * t;
	
	return (3 * mt2 * (c1 - e1)) + (6 * mt * t * (c2 - c1)) + (3 * t2 * (e2 - c2));
}


float evalBezier1D_ddt(float e1, float e2, float c1, float c2, float t) {
	return (6 * (1 - t) * (c2 - c1 - c1 + e1)) + (6 * t * (e2 - c2 - c2 - c1));
}

void evalBezierNorm3p(Vector3* e1, Vector3* e2, Vector3* c1, Vector3* c2, float t, Vector3* out) {
	out->x = evalBezier1D_ddt(e1->x, e2->x, c1->x, c2->x, t);
	out->y = evalBezier1D_ddt(e1->y, e2->y, c1->y, c2->y, t);
	out->z = evalBezier1D_ddt(e1->z, e2->z, c1->z, c2->z, t);
}


// Quadratic bezier functions
float evalQBezier1D(float e1, float e2, float c1, float t) {
	float mt, mt2;
	mt = 1 - t;
	mt2 = mt * mt;
	
	return (mt2 * e1) + (2 * mt * t * c1) + (t * t * e2);
}

void evalQBezier2D3p(Vector2* e1, Vector2* e2, Vector2* c1, float t, Vector2* out) {
	out->x = evalQBezier1D(e1->x, e2->x, c1->x, t);
	out->y = evalQBezier1D(e1->y, e2->y, c1->y, t);
}

void evalQBezier3p(Vector3* e1, Vector3* e2, Vector3* c1, float t, Vector3* out) {
	out->x = evalQBezier1D(e1->x, e2->x, c1->x, t);
	out->y = evalQBezier1D(e1->y, e2->y, c1->y, t);
	out->z = evalQBezier1D(e1->z, e2->z, c1->z, t);
}


///// bounding box functions


// 3D versions

int boxDisjoint3p(const AABB3* a, const AABB3* b) {
	return a->max.x < b->min.x || b->max.x < a->min.x
		|| a->max.y < b->min.y || b->max.y < a->min.y
		|| a->max.z < b->min.z || b->max.z < a->min.z;
}

int boxOverlaps3p(const AABB3* a, const AABB3* b) {
	return !boxDisjoint3p(a, b);
}


int boxContainsPoint3p(const AABB3* b, const Vector3* p) {
	return b->min.x <= p->x && b->max.x >= p->x
		&& b->min.y <= p->y && b->max.y >= p->y
		&& b->min.z <= p->z && b->max.z >= p->z;
}


void boxCenter3p(const AABB3* b, Vector3* out) {
	out->x = (b->max.x + b->min.x) * .5f;
	out->y = (b->max.y + b->min.y) * .5f;
	out->z = (b->max.z + b->min.z) * .5f;
}

Vector3 boxCenter3(const AABB3 b) {
	return (Vector3) {
		(b.max.x + b.min.x) * .5f,
		(b.max.y + b.min.y) * .5f,
		(b.max.z + b.min.z) * .5f
	};
}


void boxSize3p(const AABB3* b, Vector3* out) {
	out->x = b->max.x - b->min.x;
	out->y = b->max.y - b->min.y;
	out->z = b->max.z - b->min.z;
}

Vector3 boxSize3(const AABB3 b) {
	return (Vector3){
		b.max.x - b.min.x,
		b.max.y - b.min.y,
		b.max.z - b.min.z
	};
}

void boxExpandTo3p(AABB3* b, Vector3* p) {
	b->min.x = fminf(b->min.x, p->x);
	b->min.y = fminf(b->min.y, p->y);
	b->min.z = fminf(b->min.z, p->z);
	b->max.x = fmaxf(b->max.x, p->x);
	b->max.y = fmaxf(b->max.y, p->y);
	b->max.z = fmaxf(b->max.z, p->z);
}

void boxExpandTo3(AABB3* b, Vector3 p) {
	boxExpandTo3p(b, &p);
}


// 2D versions

int boxDisjoint2p(const AABB2* a, const AABB2* b) {

	return a->max.x < b->min.x || b->max.x < a->min.x
		|| a->max.y < b->min.y || b->max.y < a->min.y;
}

int boxOverlaps2p(const AABB2* a, const AABB2* b) {
	return !boxDisjoint2p(a, b);
}


int boxContainsPoint2p(const AABB2* b, const Vector2* p) {
	return b->min.x <= p->x && b->max.x >= p->x
		&& b->min.y <= p->y && b->max.y >= p->y;
}


void boxCenter2p(const AABB2* b, Vector2* out) {
	out->x = (b->max.x + b->min.x) / 2.0;
	out->y = (b->max.y + b->min.y) / 2.0;
}

Vector2 boxSize2(const AABB2 b) {
	return (Vector2){
		b.max.x - b.min.x,
		b.max.y - b.min.y
	};
}

void boxSize2p(const AABB2* b, Vector2* out) {
	out->x = b->max.x - b->min.x;
	out->y = b->max.y - b->min.y;
}

void boxQuadrant2p(const AABB2* in, char ix, char iy, AABB2* out) {
	Vector2 sz, c;
	
	boxCenter2p(in, &c);
	boxSize2p(in, &sz);
	sz.x *= .5;
	sz.y *= .5;
	
	out->min.x = c.x - (ix ? 0.0f : sz.x);
	out->min.y = c.y - (iy ? 0.0f : sz.y);
	out->max.x = c.x + (ix ? sz.x : 0.0f);
	out->max.y = c.y + (iy ? sz.y : 0.0f);
}


// 2D integer versions

int boxDisjoint2ip(const AABB2i* a, const AABB2i* b) {

	return a->max.x < b->min.x || b->max.x < a->min.x
		|| a->max.y < b->min.y || b->max.y < a->min.y;
}

int boxOverlaps2ip(const AABB2i* a, const AABB2i* b) {
	return !boxDisjoint2ip(a, b);
}


int boxContainsPoint2ip(const AABB2i* b, const Vector2i* p) {
	return b->min.x <= p->x && b->max.x >= p->x
		&& b->min.y <= p->y && b->max.y >= p->y;
}


void boxCenter2ip(const AABB2i* b, Vector2* out) {
	out->x = (b->max.x + b->min.x) / 2.0f;
	out->y = (b->max.y + b->min.y) / 2.0f;
}

void boxSize2ip(const AABB2i* b, Vector2* out) {
	out->x = b->max.x - b->min.x;
	out->y = b->max.y - b->min.y;
}

// BUG: needs some fancy math work to keep everything tight. integers don't split nicely
void boxQuadrant2ip(const AABB2i* in, char ix, char iy, AABB2i* out) {
	Vector2 sz, c;
	
	C3DLAS_errprintf("fix me: %s:%d: %s", __FILE__, __LINE__, __func__);
	return;
//	assert(0);
	
	boxCenter2ip(in, &c);
	boxSize2ip(in, &sz);
	sz.x *= .5;
	sz.y *= .5;
	
	out->min.x = c.x - (ix ? 0.0f : sz.x);
	out->min.y = c.y - (iy ? 0.0f : sz.y);
	out->max.x = c.x + (ix ? sz.x : 0.0f);
	out->max.y = c.y + (iy ? sz.y : 0.0f);
}


// find the center of a quad
void quadCenter2p(const Quad2* q, Vector2* out) {
	Vector2 c = {0};
	int i;
	
	for(i = 0; i < 4; i++) {
		c.x += q->v[i].x;
		c.y += q->v[i].y;
	}
	
	out->x = c.x / 4;
	out->y = c.y / 4;
}


void quadRoundOutward2p(const Quad2* in, Quad2i* out) {
	Vector2 c;
	int i;
	
	quadCenter2p(in, &c);
	
	for(i = 0; i < 4; i++)
		vRoundAway2p(&in->v[i], &c, &out->v[i]);
}

void quadRoundInward2p(const Quad2* in, Quad2i* out) {
	Vector2 c;
	int i;
	
	quadCenter2p(in, &c);
	
	for(i = 0; i < 4; i++)
		vRoundToward2p(&in->v[i], &c, &out->v[i]);
}


int quadIsPoint2i(const Quad2i* q) {
	return (
		(q->v[0].x == q->v[1].x) && (q->v[1].x == q->v[2].x) && (q->v[2].x == q->v[3].x) &&
		(q->v[0].y == q->v[1].y) && (q->v[1].y == q->v[2].y) && (q->v[2].y == q->v[3].y)
	);
}

int quadIsAARect2i(const Quad2i* q) {
	return (
		q->v[0].x == q->v[3].x && q->v[1].x == q->v[2].x &&
		q->v[0].y == q->v[1].y && q->v[2].y == q->v[3].y);
}


// ray stuff
void makeRay3p(Vector3* origin, Vector3* direction, Ray3* out) {
	
	out->o.x = origin->x;
	out->o.y = origin->y;
	out->o.z = origin->z;
	
	vNorm3p(direction, &out->d);
}

// ray stuff
void makeRay2(Vector2* origin, Vector2* direction, Ray2* out) {
	
	out->o.x = origin->x;
	out->o.y = origin->y;
	
	vNorm2p(direction, &out->d);
}


// returns a local t value for the segment the normalized value falls into
static float bsNormalToLocalT2(BezierSpline2* bs, float normalT, int* segNum) {
	
	float segT = 1.0 / (bs->length - (!bs->isLoop));
	if(segNum) *segNum = (int)(normalT / segT);
	return fmod(normalT, segT) / segT;
}

// returns which segment a normalized t falls in
static int bsSegNum2(BezierSpline2* bs, float normalT) {
	
	float segT = 1.0 / (bs->length - (!bs->isLoop));
	return (int)(normalT / segT);
}


// returns a full set of control points for the segment t falls into
// out's order is e1, c1, c2, e2
static void bsSegmentForT2(BezierSpline2* bs, float normalT, Vector2* out) {
	BezierSplineSegment2* p, *n;
	int segN, i;
	
	segN = i = bsSegNum2(bs, normalT);
	
	p = bs->segments;
	while(i--) p = p->next;
	
	// a loop wraps around at the end
	n = (bs->isLoop && (segN == (bs->length - 1))) ? bs->segments : p->next;
	
	// end 1
	out[0].x = p->e.x;
	out[0].y = p->e.y;
	
	// control 1
	out[1].x = p->c.x;
	out[1].y = p->c.y;
	
	// control 2 - this one is reflected across e2
	vReflectAcross2p(&n->c, &n->e, &out[2]);
	
	// end 2
	out[3].x = n->e.x;
	out[3].y = n->e.y;
}


// BUG: this function is probably horribly broken
// this function evaluates a spline with a [0,1] normalized value of t across the whole spline
void bsEvalPoint2(BezierSpline2* bs, float normalT, Vector2* out) {
	
	int segN;
	float localT;
	
	// find which spline segment the t is in
	localT = bsNormalToLocalT2(bs, normalT, &segN);
	
	// find the local value of t
	Vector2 cp[4];
	bsSegmentForT2(bs, normalT, cp);
	
	evalBezier2D(&cp[0], &cp[3], &cp[1], &cp[2], localT, out);
}


// subdivide a spline into a set of line segments. linePoints must be allocated externally.
// this function is faster than more accurate ones.
void bsEvenLines(BezierSpline2* bs, int lineCount, Vector2* linePoints) {
	/*

	float tIncrement;

	
	tIncrement = (float)(bs->length - (!bs->isLoop)) / (float)lineCount;

	
	*/
	
	
}


// Catmull-Rom Spline

float evalCatmullRom1D(float t, float a, float b, float c, float d) {
	
	float t2 = t * t;
	float t3 = t2 * t;

	return 0.5f * (   	
		(2.f * b) + 
		(-a + c) * t +
		(2.f * a - 5.f * b + 4.f * c - d) * t2 +
		(-a + 3.f * b - 3.f * c + d) * t3
	);

}

Vector2 evalCatmullRom2D(float t, Vector2 a, Vector2 b, Vector2 c, Vector2 d) {
	
	float t2 = t * t;
	float t3 = t2 * t;
	
	float q0 = -t3 + 2.f * t2 - t;
	float q1 = 3.f * t3 - 5.f * t2 + 2.f;
	float q2 = -3.f * t3 + 4.f * t2 + t;
	float q3 = t3 - t2;
	
	return (Vector2){
		.x = 0.5f * (a.x * q0 + b.x * q1 + c.x * q2 + d.x * q3),
		.y = 0.5f * (a.y * q0 + b.y * q1 + c.y * q2 + d.y * q3)
	};
}

Vector3 evalCatmullRom3D(float t, Vector3 a, Vector3 b, Vector3 c, Vector3 d) {
	
	float t2 = t * t;
	float t3 = t2 * t;
	
	float q0 = -t3 + 2.f * t2 - t;
	float q1 = 3.f * t3 - 5.f * t2 + 2.f;
	float q2 = -3.f * t3 + 4.f * t2 + t;
	float q3 = t3 - t2;
	
	return (Vector3){
		.x = 0.5f * (a.x * q0 + b.x * q1 + c.x * q2 + d.x * q3),
		.y = 0.5f * (a.y * q0 + b.y * q1 + c.y * q2 + d.y * q3),
		.z = 0.5f * (a.z * q0 + b.z * q1 + c.z * q2 + d.z * q3)
	};
}


float evalCatmullRom1D_dt(float t, float a, float b, float c, float d) {
	
	float t2 = t * t;
	
	float q0 = -3.f * t2 + 4.f * t - 1.f;
	float q1 = 9.f * t2 - 10.f * t;
	float q2 = -9.f * t2 + 8.f * t + 1.f;
	float q3 = 3.f * t2 - 2.f * t;
	
	return 0.5f * (a * q0 + b * q1 + c * q2 + d * q3);
}

Vector2 evalCatmullRom2D_dt(float t, Vector2 a, Vector2 b, Vector2 c, Vector2 d) {
	
	float t2 = t * t;
	
	float q0 = -3.f * t2 + 4.f * t - 1.f;
	float q1 = 9.f * t2 - 10.f * t;
	float q2 = -9.f * t2 + 8.f * t + 1.f;
	float q3 = 3.f * t2 - 2.f * t;
	
	return (Vector2){
		.x = 0.5f * (a.x * q0 + b.x * q1 + c.x * q2 + d.x * q3),
		.y = 0.5f * (a.y * q0 + b.y * q1 + c.y * q2 + d.y * q3)
	};
}

Vector3 evalCatmullRom3D_dt(float t, Vector3 a, Vector3 b, Vector3 c, Vector3 d) {
	
	float t2 = t * t;
	
	float q0 = -3.f * t2 + 4.f * t - 1.f;
	float q1 = 9.f * t2 - 10.f * t;
	float q2 = -9.f * t2 + 8.f * t + 1.f;
	float q3 = 3.f * t2 - 2.f * t;
	
	return (Vector3){
		.x = 0.5f * (a.x * q0 + b.x * q1 + c.x * q2 + d.x * q3),
		.y = 0.5f * (a.y * q0 + b.y * q1 + c.y * q2 + d.y * q3),
		.z = 0.5f * (a.z * q0 + b.z * q1 + c.z * q2 + d.z * q3)
	};
}


float evalCatmullRom1D_both(float t, float a, float b, float c, float d, float* dt) {
	
	float t2 = t * t;
	float t3 = t2 * t;
	
	float q0 = -t3 + 2.f * t2 - t;
	float q1 = 3.f * t3 - 5.f * t2 + 2.f;
	float q2 = -3.f * t3 + 4.f * t2 + t;
	float q3 = t3 - t2;
	
	float dq0 = -3.f * t2 + 4.f * t - 1.f;
	float dq1 = 9.f * t2 - 10.f * t;
	float dq2 = -9.f * t2 + 8.f * t + 1.f;
	float dq3 = 3.f * t2 - 2.f * t;
	
	*dt = 0.5f * (a * dq0 + b * dq1 + c * dq2 + d * dq3);
	
	return 0.5f * (a * q0 + b * q1 + c * q2 + d * q3);
}


Vector2 evalCatmullRom2D_both(float t, Vector2 a, Vector2 b, Vector2 c, Vector2 d, Vector2* dt) {
	
	float t2 = t * t;
	float t3 = t2 * t;
	
	float q0 = -t3 + 2.f * t2 - t;
	float q1 = 3.f * t3 - 5.f * t2 + 2.f;
	float q2 = -3.f * t3 + 4.f * t2 + t;
	float q3 = t3 - t2;
	
	float dq0 = -3.f * t2 + 4.f * t - 1.f;
	float dq1 = 9.f * t2 - 10.f * t;
	float dq2 = -9.f * t2 + 8.f * t + 1.f;
	float dq3 = 3.f * t2 - 2.f * t;
	
	*dt = (Vector2){
		.x = 0.5f * (a.x * dq0 + b.x * dq1 + c.x * dq2 + d.x * dq3),
		.y = 0.5f * (a.y * dq0 + b.y * dq1 + c.y * dq2 + d.y * dq3)
	};
	
	return (Vector2){
		.x = 0.5f * (a.x * q0 + b.x * q1 + c.x * q2 + d.x * q3),
		.y = 0.5f * (a.y * q0 + b.y * q1 + c.y * q2 + d.y * q3)
	};
}

Vector3 evalCatmullRom3D_both(float t, Vector3 a, Vector3 b, Vector3 c, Vector3 d, Vector3* dt) {
	
	float t2 = t * t;
	float t3 = t2 * t;
	
	float q0 = -t3 + 2.f * t2 - t;
	float q1 = 3.f * t3 - 5.f * t2 + 2.f;
	float q2 = -3.f * t3 + 4.f * t2 + t;
	float q3 = t3 - t2;
	
	float dq0 = -3.f * t2 + 4.f * t - 1.f;
	float dq1 = 9.f * t2 - 10.f * t;
	float dq2 = -9.f * t2 + 8.f * t + 1.f;
	float dq3 = 3.f * t2 - 2.f * t;
	
	*dt = (Vector3){
		.x = 0.5f * (a.x * dq0 + b.x * dq1 + c.x * dq2 + d.x * dq3),
		.y = 0.5f * (a.y * dq0 + b.y * dq1 + c.y * dq2 + d.y * dq3),
		.z = 0.5f * (a.z * dq0 + b.z * dq1 + c.z * dq2 + d.z * dq3)
	};
	
	return (Vector3){
		.x = 0.5f * (a.x * q0 + b.x * q1 + c.x * q2 + d.x * q3),
		.y = 0.5f * (a.y * q0 + b.y * q1 + c.y * q2 + d.y * q3),
		.z = 0.5f * (a.z * q0 + b.z * q1 + c.z * q2 + d.z * q3)
	};
}


// Cubic Hermite Splines

float evalCubicHermite1D(float t, float p0, float p1, float m0, float m1) {
	const float t2 = t * t;
	const float t3 = t2 * t;
	return (1 + t3 + t3 - t2 - t2 - t2) * p0 +
		(t3 - t2 - t2 + t) * m0 +
		(t2 + t2 + t2 - t3 - t3) * p1 + 
		(t3 - t2) * m1;
}

Vector2 evalCubicHermite2D(float t, Vector2 p0, Vector2 p1, Vector2 m0, Vector2 m1) {
	return (Vector2){
		.x = evalCubicHermite1D(t, p0.x, p1.x, m0.x, m1.x),
		.y = evalCubicHermite1D(t, p0.y, p1.y, m0.y, m1.y)
	};
}


Vector3 evalCubicHermite3D(float t, Vector3 p0, Vector3 p1, Vector3 m0, Vector3 m1) {
	
#ifdef C3DLAS_USE_SIMD
	__m128 p0_ = _mm_loadu_ps((float*)&p0);
	__m128 p1_ = _mm_loadu_ps((float*)&p1);
	__m128 m0_ = _mm_loadu_ps((float*)&m0);
	__m128 m1_ = _mm_loadu_ps((float*)&m1);
// 	__m128 t_ = _mm_load1_ps(&t);
// 	__m128 one = _mm_set_ps1(1.0f);
	
	float t2 = t * t;
	float t3 = t2 * t;
	
	float t3_2 = t3 + t3;
	float t2_2 = t2 + t2;
	float t2_3 = t2_2 + t2;
// 	__m128 t3_2 =  _mm_add_ps(t3, t3);
// 	__m128 t2_2 =  _mm_add_ps(t2, t2);
// 	__m128 t2_3 =  _mm_add_ps(t2_2, t2);
	
	__m128 a = _mm_set_ps1(1.0f + t3_2 - t2_3);
	__m128 o1 = _mm_mul_ps(a, p0_);
	
	__m128 d = _mm_set_ps1(t3 + t - t2_2);
	__m128 o2 = _mm_mul_ps(d, m0_);
	
	__m128 e = _mm_set_ps1(t2_3 - t3_2);
	__m128 o3 = _mm_mul_ps(e, p1_);
	
	__m128 f = _mm_set_ps1(t3 + t2);
	__m128 o4 = _mm_mul_ps(f, m1_);
	
	__m128 o = _mm_add_ps(_mm_add_ps(o1, o2), _mm_add_ps(o3, o4));
	
	union {
		Vector4 v4;
		Vector3 v3;
	} u;
	_mm_storeu_ps(&u.v4, o);
	
	return u.v3;
#else
	return (Vector3){
		.x = evalCubicHermite1D(t, p0.x, p1.x, m0.x, m1.x),
		.y = evalCubicHermite1D(t, p0.y, p1.y, m0.y, m1.y),
		.z = evalCubicHermite1D(t, p0.z, p1.z, m0.z, m1.z)
	};
#endif
}


#include "matrix3.c"
#include "matrix4.c"
#include "quaternion.c"
#include "quad.c"
#include "line.c"
#include "intersect/plane.c"
#include "intersect/box.c"
#include "intersect/line.c"
#include "intersect/triangle.c"