#include "stdafx.h" extern "C" { float pi() { __asm fldpi } void memclear(void* dest, size_t _size) { __asm { xor eax, eax mov edi, dest mov ecx, _size rep stosb } } void* memcpy(void* dest, const void* source, size_t _size) { __asm { mov esi, source mov edi, dest mov ecx, _size rep movsb } } size_t strlen(const char* string) { __asm { xor ecx, ecx mov edi, string not ecx xor al, al cld repne scasb not ecx lea eax, [ecx-1] } } float pow(float x, float y) { float r; __asm { fld y fld x fyl2x fld1 fld st(1) fprem f2xm1 faddp st(1),st fscale fxch st(1) fstp st(0) fstp r } return r; } float sqrt(float x) { __asm fld x __asm fsqrt } float sin(float x) { __asm fld x __asm fsin } float cos(float x) { __asm fld x __asm fcos } float fabs(float x) { __asm fld x __asm fabs } float tan(float f) { return sin(f) / cos(f); } float smoothstep(float x, float a, float b) { if (xb) return 1.0f; x = (x-a)/(b-a); return x*x*(3.0f-2.0f*x); } float exp2(float f) { _asm fld dword ptr [f] _asm fld1 _asm fld st(1) _asm fprem _asm f2xm1 _asm faddp st(1), st _asm fscale _asm fstp st(1) _asm fstp dword ptr [f] return f; } unsigned long ftol(float x) { int t; _asm fld x _asm fistp t return t; } long _ftol2_sse(float x) { *((char*)0) = 0; // If it breaks here, it's because you have a cast to (int) somewhere. Use ftol for float-to-int casts. return ftol(x); } float min(float a, float b) { if (a < b) return a; return b; } float max(float a, float b) { if (a > b) return a; return b; } int _fltused = 1; } vec4::vec4():x(0),y(0),z(0),w(0){}; vec4::vec4(const vec4& arg):x(arg.x),y(arg.y),z(arg.z),w(arg.w){}; vec4::vec4(float X, float Y, float Z, float W):x(X),y(Y),z(Z),w(W){}; vec4::vec4(float a):x(a),y(a),z(a),w(0){}; vec4::vec4(const float* arg){memcpy(m, arg, 16);}; vec4::vec4(const __m128& arg){_mm_store_ps(m, arg);}; int vec4::ToInt32() const { return (((ftol(255 * x) << 8) + ftol(255 * y)) << 8) + ftol(255 * z); } vec4::operator __m128() const { return _mm_load_ps(m); } float& vec4::operator[](int i) { return m[i]; } vec4 vec4::operator + (const vec4& arg) const { return _mm_add_ps(_mm_load_ps(m), _mm_load_ps(arg.m)); } vec4 vec4::operator + (float arg) const { return *this + _mm_set_ps1(arg); } void vec4::operator += (const vec4& arg) { _mm_store_ps(m, _mm_add_ps(*this, arg)); } void vec4::operator += (float arg) { *this += _mm_set_ps1(arg); } vec4 vec4::operator - (const vec4& arg) const { return _mm_sub_ps(*this, arg); } vec4 vec4::operator - (float arg) const { return *this - _mm_set_ps1(arg); } void vec4::operator -= (const vec4& arg) { _mm_store_ps(m, _mm_sub_ps(*this, arg)); } void vec4::operator -= (float arg) { *this -= _mm_set_ps1(arg); } vec4 vec4::operator * (const vec4& arg) const { return _mm_mul_ps(*this, arg); } vec4 vec4::operator * (float arg) const { return *this * _mm_set_ps1(arg); } void vec4::operator *= (const vec4& arg) { _mm_store_ps(m, _mm_mul_ps(*this, arg)); } void vec4::operator *= (float arg) { *this *= _mm_set_ps1(arg); } vec4 vec4::operator / (float arg) const { return *this * _mm_set_ps1(1.0f / arg); } void vec4::operator /= (float arg) { *this *= _mm_set_ps1(1.0f / arg); } vec4 vec4::operator -() const { return *this * -1.0f; } __inline vec4 align(vec4& arg) { return arg; }; float dot(const vec4& arg1, const vec4& arg2) { __m128 temp = _mm_mul_ps(arg1, arg2); temp = _mm_hadd_ps(temp, temp); temp = _mm_hadd_ps(temp, temp); return temp.m128_f32[0]; } float length(vec4& arg) { return sqrt(dot(arg, arg)); } vec4 normalize(vec4 const& arg) { return arg * _mm_rsqrt_ss(_mm_set_ps1(dot(arg,arg))).m128_f32[0]; } vec4 mix(vec4& arg1, vec4& arg2, float t) { return arg1 * (1.0f - t) + arg2 * t; } vec4 clamp(vec4& arg, float lo, float hi) { return _mm_max_ps(_mm_min_ps(arg, _mm_set_ps1(hi)), _mm_set_ps1(lo)); } float clamp(float arg, float lo, float hi) { return max(min(arg, hi), lo); } vec4 reflect(vec4& arg1, vec4& arg2) { return arg1 - arg2 * 2.0f * dot(arg2, arg1); } vec4 cross(vec4& va, vec4& vb) { __m128 a = va; __m128 b = vb; __m128 ea = _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 0, 2, 1)); __m128 eb = _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 1, 0, 2)); __m128 xa = _mm_mul_ps(ea, eb); a = _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 1, 0, 2)); b = _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 0, 2, 1)); __m128 xb = _mm_mul_ps(a, b); return _mm_sub_ps(xa, xb); } Quaternion::Quaternion() : x(0) , y(0) , z(0) , w(1) { }; Quaternion::Quaternion(const Quaternion& arg) : x(arg.x) , y(arg.y) , z(arg.z) , w(arg.w) { }; Quaternion::Quaternion(const float* M) { memcpy(m, M, 4 * sizeof(float)); }; Quaternion::operator __m128() const { return _mm_load_ps(m); } Quaternion::Quaternion(float X, float Y, float Z, float W, bool ab_Rotation) : x(X) , y(Y) , z(Z) , w(W) { if (ab_Rotation) { W *= pi(); W /= 180.0; w = 0.0f; normalize(); _mm_store_ps(m, _mm_mul_ps(*this, _mm_set_ps1(sin(W / 2.0f)))); w = cos(W / 2.0f); } }; Quaternion::Quaternion(const __m128& arg) { _mm_store_ps(m, arg); }; Quaternion::operator float*() { return m; } Quaternion Quaternion::operator * (const Quaternion& arg) const { return _mm_set_ps( x * -arg.x - y * arg.y - z * arg.z * w * arg.w, x * -arg.y + y * arg.x + z * arg.w - w * arg.z, x * -arg.z - y * arg.w + z * arg.x + w * arg.y, x * -arg.w + y * arg.z - z * arg.y + w * arg.x); } Quaternion Quaternion::operator -() const { return _mm_set_ps(-x, -y, -z, -w); } void Quaternion::operator *= (const Quaternion& arg) { _mm_store_ps(m, *this * arg); } void Quaternion::operator += (const Quaternion& arg) { _mm_store_ps(m, _mm_add_ps(*this, arg)); } void Quaternion::rotate (const Quaternion& arg) { _mm_store_ps(m, -arg * *this * arg); } float Quaternion::lengthSquared() const { __m128 temp = _mm_mul_ps(*this, *this); temp = _mm_hadd_ps(temp, temp); temp = _mm_hadd_ps(temp, temp); return temp.m128_f32[0]; } float Quaternion::length() const { return sqrt(lengthSquared()); } void Quaternion::normalize() { _mm_store_ps(m, _mm_mul_ps(*this, _mm_set_ps1(_mm_rsqrt_ss(_mm_set_ps1(lengthSquared())).m128_f32[0]))); } Matrix::Matrix() : r1(1, 0, 0, 0) , r2(0, 1, 0, 0) , r3(0, 0, 1, 0) , r4(0, 0, 0, 1) { }; Matrix::Matrix(const Matrix& arg) : r1(arg.r1) , r2(arg.r2) , r3(arg.r3) , r4(arg.r4) { }; Matrix::Matrix(const vec4& R1, const vec4& R2, const vec4& R3, const vec4& R4) : r1(R1) , r2(R2) , r3(R3) , r4(R4) { }; Matrix::Matrix(const float* M) : r1(&M[0]) , r2(&M[4]) , r3(&M[8]) , r4(&M[12]) { }; Matrix::Matrix(const Quaternion& aQuaternion) { Quaternion lQuaterion(aQuaternion); float lf_Length2 = lQuaterion.lengthSquared(); if (lf_Length2 != 1.0 && lf_Length2 != 0.0) { lQuaterion.normalize(); } float wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2; x2 = lQuaterion.m[0] + lQuaterion.m[0]; y2 = lQuaterion.m[1] + lQuaterion.m[1]; z2 = lQuaterion.m[2] + lQuaterion.m[2]; xx = lQuaterion.m[0] * x2; xy = lQuaterion.m[0] * y2; xz = lQuaterion.m[0] * z2; yy = lQuaterion.m[1] * y2; yz = lQuaterion.m[1] * z2; zz = lQuaterion.m[2] * z2; wx = lQuaterion.m[3] * x2; wy = lQuaterion.m[3] * y2; wz = lQuaterion.m[3] * z2; r1 = vec4(1.0f - (yy + zz), xy - wz, xz + wy, 0.0f); r2 = vec4(xy + wz, 1.0f - (xx + zz), yz - wx, 0.0f); r3 = vec4(xz - wy, yz + wx, 1.0f - (xx + yy), 0.0f); r4 = vec4(0.0f, 0.0f, 0.0f, 1.0f); }; float& Matrix::operator[](int i) { return m[i]; } vec4 Matrix::operator * (const vec4& arg) const { return vec4(dot(r1, arg), dot(r2, arg), dot(r3, arg), dot(r4, arg)); } Matrix Matrix::operator * (const Matrix& arg) const { Matrix temp; for (int i = 0; i < 4; ++i) // zeile { for (int j = 0; j < 4; ++j) // spalte { float value = 0; for (int k = 0; k < 4; ++k) { value += m[i*4+k] * arg.m[k*4+j]; } temp.m[i*4+j] = value; } } return temp; } void Matrix::operator *= (const Matrix& arg) { Matrix lTemp = *this; r1 = lTemp * arg.r1; r2 = lTemp * arg.r2; r3 = lTemp * arg.r3; r4 = lTemp * arg.r4; } Matrix Matrix::rotateX(float af_Rad) { float s = sin(af_Rad); float c = cos(af_Rad); return Matrix( vec4(1,0,0,0), vec4(0,c,-s,0), vec4(0,s,c,0), vec4(0,0,0,1)); } Matrix Matrix::rotateY(float af_Rad) { float s = sin(af_Rad); float c = cos(af_Rad); return Matrix( vec4(c,0,s,0), vec4(0,1,0,0), vec4(-s,0,c,0), vec4(0,0,0,1)); } Matrix Matrix::rotateZ(float af_Rad) { float s = sin(af_Rad); float c = cos(af_Rad); return Matrix( vec4(c,-s,0,0), vec4(s,c,0,0), vec4(0,0,1,0), vec4(0,0,0,1)); } Matrix Matrix::scale(float s) { return Matrix( vec4(s,0,0,0), vec4(0,s,0,0), vec4(0,0,s,0), vec4(0,0,0,1)); } Matrix Matrix::scale(const vec4& s) { return Matrix( vec4(s.x,0,0,0), vec4(0,s.y,0,0), vec4(0,0,s.z,0), vec4(0,0,0,1)); } Matrix Matrix::translate(const vec4& aTranslation) { return Matrix( vec4(1,0,0,aTranslation.x), vec4(0,1,0,aTranslation.y), vec4(0,0,1,aTranslation.z), vec4(0,0,0,1)); } void Matrix::transpose() { Matrix lTemp( vec4(r1.x, r2.x, r3.x, r4.x), vec4(r1.y, r2.y, r3.y, r4.y), vec4(r1.z, r2.z, r3.z, r4.z), vec4(r1.w, r2.w, r3.w, r4.w)); r1 = lTemp.r1; r2 = lTemp.r2; r3 = lTemp.r3; r4 = lTemp.r4; } float Matrix::Determinante2x2(float a1, float a2, float b1, float b2) { return a1 * b2 - b1 * a2; } float Matrix::Determinante3x3( float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3) { return a1 * Determinante2x2( b2, b3, c2, c3 ) - b1 * Determinante2x2( a2, a3, c2, c3 ) + c1 * Determinante2x2( a2, a3, b2, b3 ); } void Matrix::inverse() { Matrix lResult; lResult.m[0] = Determinante3x3( m[5], m[9], m[13], m[6], m[10], m[14], m[7], m[11], m[15] ); lResult.m[4] = -Determinante3x3( m[4], m[8], m[12], m[6], m[10], m[14], m[7], m[11], m[15] ); lResult.m[8] = Determinante3x3( m[4], m[8], m[12], m[5], m[9], m[13], m[7], m[11], m[15] ); lResult.m[12] = -Determinante3x3( m[4], m[8], m[12], m[5], m[9], m[13], m[6], m[10], m[14] ); lResult.m[1] = -Determinante3x3( m[1], m[9], m[13], m[2], m[10], m[14], m[3], m[11], m[15] ); lResult.m[5] = Determinante3x3( m[0], m[8], m[12], m[2], m[10], m[14], m[3], m[11], m[15] ); lResult.m[9] = -Determinante3x3( m[0], m[8], m[12], m[1], m[9], m[13], m[3], m[11], m[15] ); lResult.m[13] = Determinante3x3( m[0], m[8], m[12], m[1], m[9], m[13], m[2], m[10], m[14] ); lResult.m[2] = Determinante3x3( m[1], m[5], m[13], m[2], m[6], m[14], m[3], m[7], m[15] ); lResult.m[6] = -Determinante3x3( m[0], m[4], m[12], m[2], m[6], m[14], m[3], m[7], m[15] ); lResult.m[10] = Determinante3x3( m[0], m[4], m[12], m[1], m[5], m[13], m[3], m[7], m[15] ); lResult.m[14] = -Determinante3x3( m[0], m[4], m[12], m[1], m[5], m[13], m[2], m[6], m[14] ); lResult.m[3] = -Determinante3x3( m[1], m[5], m[9], m[2], m[6], m[10], m[3], m[7], m[11] ); lResult.m[7] = Determinante3x3( m[0], m[4], m[8], m[2], m[6], m[10], m[3], m[7], m[11] ); lResult.m[11] = -Determinante3x3( m[0], m[4], m[8], m[1], m[5], m[9], m[3], m[7], m[11] ); lResult.m[15] = Determinante3x3( m[0], m[4], m[8], m[1], m[5], m[9], m[2], m[6], m[10] ); float lf_Det = (m[0] * lResult.m[0]) + (m[1] * lResult.m[4]) + (m[2] * lResult.m[8]) + (m[3] * lResult.m[12]); if (lf_Det == 0.0f) return; // matrix is singular lf_Det = 1.0f / lf_Det; *this = lResult; m[0] *= lf_Det; m[4] *= lf_Det; m[8] *= lf_Det; m[12] *= lf_Det; m[1] *= lf_Det; m[5] *= lf_Det; m[9] *= lf_Det; m[13] *= lf_Det; m[2] *= lf_Det; m[6] *= lf_Det; m[10] *= lf_Det; m[14] *= lf_Det; m[3] *= lf_Det; m[7] *= lf_Det; m[11] *= lf_Det; m[15] *= lf_Det; } Matrix Matrix::Perspective(float fov, float aspectRatio, float zNear, float zFar) { float f = 1.0f / tan(fov / 2.0f); float m = zNear - zFar; Matrix M( vec4(f / aspectRatio, 0, 0, 0), vec4(0, f, 0, 0), vec4(0, 0, (zFar + zNear) / m, (2 * zFar * zNear) / m), vec4(0, 0, -1, 0)); return M; } Matrix Matrix::LookAt(vec4& eye, vec4& center, vec4& up) { vec4 f = normalize(center - eye); vec4 u = normalize(up); vec4 s = cross(f, u); u = cross(s, f); s[3] = u[3] = f[3] = 0; Matrix M(s, u, -f, vec4(0, 0, 0, 1)); //M.transpose(); M *= Matrix::translate(-eye); return M; }