Simd.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. /*
  2. ===========================================================================
  3. Doom 3 GPL Source Code
  4. Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company.
  5. This file is part of the Doom 3 GPL Source Code (?Doom 3 Source Code?).
  6. Doom 3 Source Code is free software: you can redistribute it and/or modify
  7. it under the terms of the GNU General Public License as published by
  8. the Free Software Foundation, either version 3 of the License, or
  9. (at your option) any later version.
  10. Doom 3 Source Code is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. GNU General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with Doom 3 Source Code. If not, see <http://www.gnu.org/licenses/>.
  16. In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code. If not, please request a copy in writing from id Software at the address below.
  17. If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
  18. ===========================================================================
  19. */
  20. #ifndef __MATH_SIMD_H__
  21. #define __MATH_SIMD_H__
  22. /*
  23. ===============================================================================
  24. Single Instruction Multiple Data (SIMD)
  25. For optimal use data should be aligned on a 16 byte boundary.
  26. All idSIMDProcessor routines are thread safe.
  27. ===============================================================================
  28. */
  29. class idSIMD {
  30. public:
  31. static void Init( void );
  32. static void InitProcessor( const char *module, bool forceGeneric );
  33. static void Shutdown( void );
  34. static void Test_f( const class idCmdArgs &args );
  35. };
  36. /*
  37. ===============================================================================
  38. virtual base class for different SIMD processors
  39. ===============================================================================
  40. */
  41. #ifdef _WIN32
  42. #define VPCALL __fastcall
  43. #else
  44. #define VPCALL
  45. #endif
  46. class idVec2;
  47. class idVec3;
  48. class idVec4;
  49. class idVec5;
  50. class idVec6;
  51. class idVecX;
  52. class idMat2;
  53. class idMat3;
  54. class idMat4;
  55. class idMat5;
  56. class idMat6;
  57. class idMatX;
  58. class idPlane;
  59. class idDrawVert;
  60. class idJointQuat;
  61. class idJointMat;
  62. struct dominantTri_s;
  63. const int MIXBUFFER_SAMPLES = 4096;
  64. typedef enum {
  65. SPEAKER_LEFT = 0,
  66. SPEAKER_RIGHT,
  67. SPEAKER_CENTER,
  68. SPEAKER_LFE,
  69. SPEAKER_BACKLEFT,
  70. SPEAKER_BACKRIGHT
  71. } speakerLabel;
  72. class idSIMDProcessor {
  73. public:
  74. idSIMDProcessor( void ) { cpuid = CPUID_NONE; }
  75. cpuid_t cpuid;
  76. virtual const char * VPCALL GetName( void ) const = 0;
  77. virtual void VPCALL Add( float *dst, const float constant, const float *src, const int count ) = 0;
  78. virtual void VPCALL Add( float *dst, const float *src0, const float *src1, const int count ) = 0;
  79. virtual void VPCALL Sub( float *dst, const float constant, const float *src, const int count ) = 0;
  80. virtual void VPCALL Sub( float *dst, const float *src0, const float *src1, const int count ) = 0;
  81. virtual void VPCALL Mul( float *dst, const float constant, const float *src, const int count ) = 0;
  82. virtual void VPCALL Mul( float *dst, const float *src0, const float *src1, const int count ) = 0;
  83. virtual void VPCALL Div( float *dst, const float constant, const float *src, const int count ) = 0;
  84. virtual void VPCALL Div( float *dst, const float *src0, const float *src1, const int count ) = 0;
  85. virtual void VPCALL MulAdd( float *dst, const float constant, const float *src, const int count ) = 0;
  86. virtual void VPCALL MulAdd( float *dst, const float *src0, const float *src1, const int count ) = 0;
  87. virtual void VPCALL MulSub( float *dst, const float constant, const float *src, const int count ) = 0;
  88. virtual void VPCALL MulSub( float *dst, const float *src0, const float *src1, const int count ) = 0;
  89. virtual void VPCALL Dot( float *dst, const idVec3 &constant, const idVec3 *src, const int count ) = 0;
  90. virtual void VPCALL Dot( float *dst, const idVec3 &constant, const idPlane *src, const int count ) = 0;
  91. virtual void VPCALL Dot( float *dst, const idVec3 &constant, const idDrawVert *src, const int count ) = 0;
  92. virtual void VPCALL Dot( float *dst, const idPlane &constant,const idVec3 *src, const int count ) = 0;
  93. virtual void VPCALL Dot( float *dst, const idPlane &constant,const idPlane *src, const int count ) = 0;
  94. virtual void VPCALL Dot( float *dst, const idPlane &constant,const idDrawVert *src, const int count ) = 0;
  95. virtual void VPCALL Dot( float *dst, const idVec3 *src0, const idVec3 *src1, const int count ) = 0;
  96. virtual void VPCALL Dot( float &dot, const float *src1, const float *src2, const int count ) = 0;
  97. virtual void VPCALL CmpGT( byte *dst, const float *src0, const float constant, const int count ) = 0;
  98. virtual void VPCALL CmpGT( byte *dst, const byte bitNum, const float *src0, const float constant, const int count ) = 0;
  99. virtual void VPCALL CmpGE( byte *dst, const float *src0, const float constant, const int count ) = 0;
  100. virtual void VPCALL CmpGE( byte *dst, const byte bitNum, const float *src0, const float constant, const int count ) = 0;
  101. virtual void VPCALL CmpLT( byte *dst, const float *src0, const float constant, const int count ) = 0;
  102. virtual void VPCALL CmpLT( byte *dst, const byte bitNum, const float *src0, const float constant, const int count ) = 0;
  103. virtual void VPCALL CmpLE( byte *dst, const float *src0, const float constant, const int count ) = 0;
  104. virtual void VPCALL CmpLE( byte *dst, const byte bitNum, const float *src0, const float constant, const int count ) = 0;
  105. virtual void VPCALL MinMax( float &min, float &max, const float *src, const int count ) = 0;
  106. virtual void VPCALL MinMax( idVec2 &min, idVec2 &max, const idVec2 *src, const int count ) = 0;
  107. virtual void VPCALL MinMax( idVec3 &min, idVec3 &max, const idVec3 *src, const int count ) = 0;
  108. virtual void VPCALL MinMax( idVec3 &min, idVec3 &max, const idDrawVert *src, const int count ) = 0;
  109. virtual void VPCALL MinMax( idVec3 &min, idVec3 &max, const idDrawVert *src, const int *indexes, const int count ) = 0;
  110. virtual void VPCALL Clamp( float *dst, const float *src, const float min, const float max, const int count ) = 0;
  111. virtual void VPCALL ClampMin( float *dst, const float *src, const float min, const int count ) = 0;
  112. virtual void VPCALL ClampMax( float *dst, const float *src, const float max, const int count ) = 0;
  113. virtual void VPCALL Memcpy( void *dst, const void *src, const int count ) = 0;
  114. virtual void VPCALL Memset( void *dst, const int val, const int count ) = 0;
  115. // these assume 16 byte aligned and 16 byte padded memory
  116. virtual void VPCALL Zero16( float *dst, const int count ) = 0;
  117. virtual void VPCALL Negate16( float *dst, const int count ) = 0;
  118. virtual void VPCALL Copy16( float *dst, const float *src, const int count ) = 0;
  119. virtual void VPCALL Add16( float *dst, const float *src1, const float *src2, const int count ) = 0;
  120. virtual void VPCALL Sub16( float *dst, const float *src1, const float *src2, const int count ) = 0;
  121. virtual void VPCALL Mul16( float *dst, const float *src1, const float constant, const int count ) = 0;
  122. virtual void VPCALL AddAssign16( float *dst, const float *src, const int count ) = 0;
  123. virtual void VPCALL SubAssign16( float *dst, const float *src, const int count ) = 0;
  124. virtual void VPCALL MulAssign16( float *dst, const float constant, const int count ) = 0;
  125. // idMatX operations
  126. virtual void VPCALL MatX_MultiplyVecX( idVecX &dst, const idMatX &mat, const idVecX &vec ) = 0;
  127. virtual void VPCALL MatX_MultiplyAddVecX( idVecX &dst, const idMatX &mat, const idVecX &vec ) = 0;
  128. virtual void VPCALL MatX_MultiplySubVecX( idVecX &dst, const idMatX &mat, const idVecX &vec ) = 0;
  129. virtual void VPCALL MatX_TransposeMultiplyVecX( idVecX &dst, const idMatX &mat, const idVecX &vec ) = 0;
  130. virtual void VPCALL MatX_TransposeMultiplyAddVecX( idVecX &dst, const idMatX &mat, const idVecX &vec ) = 0;
  131. virtual void VPCALL MatX_TransposeMultiplySubVecX( idVecX &dst, const idMatX &mat, const idVecX &vec ) = 0;
  132. virtual void VPCALL MatX_MultiplyMatX( idMatX &dst, const idMatX &m1, const idMatX &m2 ) = 0;
  133. virtual void VPCALL MatX_TransposeMultiplyMatX( idMatX &dst, const idMatX &m1, const idMatX &m2 ) = 0;
  134. virtual void VPCALL MatX_LowerTriangularSolve( const idMatX &L, float *x, const float *b, const int n, int skip = 0 ) = 0;
  135. virtual void VPCALL MatX_LowerTriangularSolveTranspose( const idMatX &L, float *x, const float *b, const int n ) = 0;
  136. virtual bool VPCALL MatX_LDLTFactor( idMatX &mat, idVecX &invDiag, const int n ) = 0;
  137. // rendering
  138. virtual void VPCALL BlendJoints( idJointQuat *joints, const idJointQuat *blendJoints, const float lerp, const int *index, const int numJoints ) = 0;
  139. virtual void VPCALL ConvertJointQuatsToJointMats( idJointMat *jointMats, const idJointQuat *jointQuats, const int numJoints ) = 0;
  140. virtual void VPCALL ConvertJointMatsToJointQuats( idJointQuat *jointQuats, const idJointMat *jointMats, const int numJoints ) = 0;
  141. virtual void VPCALL TransformJoints( idJointMat *jointMats, const int *parents, const int firstJoint, const int lastJoint ) = 0;
  142. virtual void VPCALL UntransformJoints( idJointMat *jointMats, const int *parents, const int firstJoint, const int lastJoint ) = 0;
  143. virtual void VPCALL TransformVerts( idDrawVert *verts, const int numVerts, const idJointMat *joints, const idVec4 *weights, const int *index, const int numWeights ) = 0;
  144. virtual void VPCALL TracePointCull( byte *cullBits, byte &totalOr, const float radius, const idPlane *planes, const idDrawVert *verts, const int numVerts ) = 0;
  145. virtual void VPCALL DecalPointCull( byte *cullBits, const idPlane *planes, const idDrawVert *verts, const int numVerts ) = 0;
  146. virtual void VPCALL OverlayPointCull( byte *cullBits, idVec2 *texCoords, const idPlane *planes, const idDrawVert *verts, const int numVerts ) = 0;
  147. virtual void VPCALL DeriveTriPlanes( idPlane *planes, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes ) = 0;
  148. virtual void VPCALL DeriveTangents( idPlane *planes, idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes ) = 0;
  149. virtual void VPCALL DeriveUnsmoothedTangents( idDrawVert *verts, const dominantTri_s *dominantTris, const int numVerts ) = 0;
  150. virtual void VPCALL NormalizeTangents( idDrawVert *verts, const int numVerts ) = 0;
  151. virtual void VPCALL CreateTextureSpaceLightVectors( idVec3 *lightVectors, const idVec3 &lightOrigin, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes ) = 0;
  152. virtual void VPCALL CreateSpecularTextureCoords( idVec4 *texCoords, const idVec3 &lightOrigin, const idVec3 &viewOrigin, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes ) = 0;
  153. virtual int VPCALL CreateShadowCache( idVec4 *vertexCache, int *vertRemap, const idVec3 &lightOrigin, const idDrawVert *verts, const int numVerts ) = 0;
  154. virtual int VPCALL CreateVertexProgramShadowCache( idVec4 *vertexCache, const idDrawVert *verts, const int numVerts ) = 0;
  155. // sound mixing
  156. virtual void VPCALL UpSamplePCMTo44kHz( float *dest, const short *pcm, const int numSamples, const int kHz, const int numChannels ) = 0;
  157. virtual void VPCALL UpSampleOGGTo44kHz( float *dest, const float * const *ogg, const int numSamples, const int kHz, const int numChannels ) = 0;
  158. virtual void VPCALL MixSoundTwoSpeakerMono( float *mixBuffer, const float *samples, const int numSamples, const float lastV[2], const float currentV[2] ) = 0;
  159. virtual void VPCALL MixSoundTwoSpeakerStereo( float *mixBuffer, const float *samples, const int numSamples, const float lastV[2], const float currentV[2] ) = 0;
  160. virtual void VPCALL MixSoundSixSpeakerMono( float *mixBuffer, const float *samples, const int numSamples, const float lastV[6], const float currentV[6] ) = 0;
  161. virtual void VPCALL MixSoundSixSpeakerStereo( float *mixBuffer, const float *samples, const int numSamples, const float lastV[6], const float currentV[6] ) = 0;
  162. virtual void VPCALL MixedSoundToSamples( short *samples, const float *mixBuffer, const int numSamples ) = 0;
  163. };
  164. // pointer to SIMD processor
  165. extern idSIMDProcessor *SIMDProcessor;
  166. #endif /* !__MATH_SIMD_H__ */