Simd_AltiVec.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. /*
  2. ===========================================================================
  3. Doom 3 GPL Source Code
  4. Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company.
  5. This file is part of the Doom 3 GPL Source Code (?Doom 3 Source Code?).
  6. Doom 3 Source Code is free software: you can redistribute it and/or modify
  7. it under the terms of the GNU General Public License as published by
  8. the Free Software Foundation, either version 3 of the License, or
  9. (at your option) any later version.
  10. Doom 3 Source Code is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. GNU General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with Doom 3 Source Code. If not, see <http://www.gnu.org/licenses/>.
  16. In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code. If not, please request a copy in writing from id Software at the address below.
  17. If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
  18. ===========================================================================
  19. */
  20. #ifndef __MATH_SIMD_ALTIVEC_H__
  21. #define __MATH_SIMD_ALTIVEC_H__
  22. /*
  23. ===============================================================================
  24. AltiVec implementation of idSIMDProcessor
  25. ===============================================================================
  26. */
  27. // Defines for enabling parts of the library
  28. // Turns on/off the simple math routines (add, sub, div, etc)
  29. #define ENABLE_SIMPLE_MATH
  30. // Turns on/off the dot routines
  31. #define ENABLE_DOT
  32. // Turns on/off the compare routines
  33. #define ENABLE_COMPARES
  34. // The MinMax routines introduce a couple of bugs. In the bathroom of the alphalabs2 map, the
  35. // wrong surface appears in the mirror at times. It also introduces a noticable delay when map
  36. // data is loaded such as going through doors.
  37. // Turns on/off MinMax routines
  38. //#define ENABLE_MINMAX
  39. // Turns on/off Clamp routines
  40. #define ENABLE_CLAMP
  41. // Turns on/off XXX16 routines
  42. #define ENABLE_16ROUTINES
  43. // Turns on/off LowerTriangularSolve, LowerTriangularSolveTranspose, and MatX_LDLTFactor
  44. #define ENABLE_LOWER_TRIANGULAR
  45. // Turns on/off TracePointCull, DecalPointCull, and OverlayPoint
  46. // The Enable_Cull routines breaks the g_decals functionality, DecalPointCull is
  47. // the likely suspect. Bullet holes do not appear on the walls when this optimization
  48. // is enabled.
  49. //#define ENABLE_CULL
  50. // Turns on/off DeriveTriPlanes, DeriveTangents, DeriveUnsmoothedTangents, NormalizeTangents
  51. #define ENABLE_DERIVE
  52. // Turns on/off CreateTextureSpaceLightVectors, CreateShadowCache, CreateVertexProgramShadowCache
  53. #define ENABLE_CREATE
  54. // Turns on/off the sound routines
  55. #define ENABLE_SOUND_ROUTINES
  56. // Turns on/off the stuff that isn't on elsewhere
  57. // Currently: BlendJoints, TransformJoints, UntransformJoints, ConvertJointQuatsToJointMats, and
  58. // ConvertJointMatsToJointQuats
  59. #define LIVE_VICARIOUSLY
  60. // This assumes that the dest (and mixBuffer) array to the sound functions is aligned. If this is not true, we take a large
  61. // performance hit from having to do unaligned stores
  62. //#define SOUND_DEST_ALIGNED
  63. // This assumes that the vertexCache array to CreateShadowCache and CreateVertexProgramShadowCache is aligned. If it's not,
  64. // then we take a big performance hit from unaligned stores.
  65. //#define VERTEXCACHE_ALIGNED
  66. // This turns on support for PPC intrinsics in the SIMD_AltiVec.cpp file. Right now it's only used for frsqrte. GCC
  67. // supports these intrinsics but XLC does not.
  68. #define PPC_INTRINSICS
  69. // This assumes that the idDrawVert array that is used in DeriveUnsmoothedTangents is aligned. If its not aligned,
  70. // then we don't get any speedup
  71. //#define DERIVE_UNSMOOTH_DRAWVERT_ALIGNED
  72. // Disable DRAWVERT_PADDED since we disabled the ENABLE_CULL optimizations and the default
  73. // implementation does not allow for the extra padding.
  74. // This assumes that idDrawVert has been padded by 4 bytes so that xyz always starts at an aligned
  75. // address
  76. //#define DRAWVERT_PADDED
  77. class idSIMD_AltiVec : public idSIMD_Generic {
  78. #if defined(MACOS_X) && defined(__ppc__)
  79. public:
  80. virtual const char * VPCALL GetName( void ) const;
  81. #ifdef ENABLE_SIMPLE_MATH
  82. // Basic math, works for both aligned and unaligned data
  83. virtual void VPCALL Add( float *dst, const float constant, const float *src, const int count );
  84. virtual void VPCALL Add( float *dst, const float *src0, const float *src1, const int count );
  85. virtual void VPCALL Sub( float *dst, const float constant, const float *src, const int count );
  86. virtual void VPCALL Sub( float *dst, const float *src0, const float *src1, const int count );
  87. virtual void VPCALL Mul( float *dst, const float constant, const float *src, const int count);
  88. virtual void VPCALL Mul( float *dst, const float *src0, const float *src1, const int count );
  89. virtual void VPCALL Div( float *dst, const float constant, const float *divisor, const int count );
  90. virtual void VPCALL Div( float *dst, const float *src0, const float *src1, const int count );
  91. virtual void VPCALL MulAdd( float *dst, const float constant, const float *src, const int count );
  92. virtual void VPCALL MulAdd( float *dst, const float *src0, const float *src1, const int count );
  93. virtual void VPCALL MulSub( float *dst, const float constant, const float *src, const int count );
  94. virtual void VPCALL MulSub( float *dst, const float *src0, const float *src1, const int count );
  95. #endif
  96. #ifdef ENABLE_DOT
  97. // Dot products, expects data structures in contiguous memory
  98. virtual void VPCALL Dot( float *dst, const idVec3 &constant, const idVec3 *src, const int count );
  99. virtual void VPCALL Dot( float *dst, const idVec3 &constant, const idPlane *src, const int count );
  100. virtual void VPCALL Dot( float *dst, const idVec3 &constant, const idDrawVert *src, const int count );
  101. virtual void VPCALL Dot( float *dst, const idPlane &constant,const idVec3 *src, const int count );
  102. virtual void VPCALL Dot( float *dst, const idPlane &constant,const idPlane *src, const int count );
  103. virtual void VPCALL Dot( float *dst, const idPlane &constant,const idDrawVert *src, const int count );
  104. virtual void VPCALL Dot( float *dst, const idVec3 *src0, const idVec3 *src1, const int count );
  105. virtual void VPCALL Dot( float &dot, const float *src1, const float *src2, const int count );
  106. #endif
  107. #ifdef ENABLE_COMPARES
  108. // Comparisons, works for both aligned and unaligned data
  109. virtual void VPCALL CmpGT( byte *dst, const float *src0, const float constant, const int count );
  110. virtual void VPCALL CmpGT( byte *dst, const byte bitNum, const float *src0, const float constant, const int count );
  111. virtual void VPCALL CmpGE( byte *dst, const float *src0, const float constant, const int count );
  112. virtual void VPCALL CmpGE( byte *dst, const byte bitNum, const float *src0, const float constant, const int count );
  113. virtual void VPCALL CmpLT( byte *dst, const float *src0, const float constant, const int count );
  114. virtual void VPCALL CmpLT( byte *dst, const byte bitNum, const float *src0, const float constant, const int count );
  115. virtual void VPCALL CmpLE( byte *dst, const float *src0, const float constant, const int count );
  116. virtual void VPCALL CmpLE( byte *dst, const byte bitNum, const float *src0, const float constant, const int count );
  117. #endif
  118. #ifdef ENABLE_MINMAX
  119. // Min/Max. Expects data structures in contiguous memory
  120. virtual void VPCALL MinMax( float &min, float &max, const float *src, const int count );
  121. virtual void VPCALL MinMax( idVec2 &min, idVec2 &max, const idVec2 *src, const int count );
  122. virtual void VPCALL MinMax( idVec3 &min, idVec3 &max, const idVec3 *src, const int count );
  123. virtual void VPCALL MinMax( idVec3 &min, idVec3 &max, const idDrawVert *src, const int count );
  124. virtual void VPCALL MinMax( idVec3 &min, idVec3 &max, const idDrawVert *src, const int *indexes, const int count );
  125. #endif
  126. #ifdef ENABLE_CLAMP
  127. // Clamp operations. Works for both aligned and unaligned data
  128. virtual void VPCALL Clamp( float *dst, const float *src, const float min, const float max, const int count );
  129. virtual void VPCALL ClampMin( float *dst, const float *src, const float min, const int count );
  130. virtual void VPCALL ClampMax( float *dst, const float *src, const float max, const int count );
  131. #endif
  132. // These are already using memcpy and memset functions. Leaving default implementation
  133. // virtual void VPCALL Memcpy( void *dst, const void *src, const int count );
  134. // virtual void VPCALL Memset( void *dst, const int val, const int count );
  135. #ifdef ENABLE_16ROUTINES
  136. // Operations that expect 16-byte aligned data and 16-byte padded memory (with zeros), generally faster
  137. virtual void VPCALL Zero16( float *dst, const int count );
  138. virtual void VPCALL Negate16( float *dst, const int count );
  139. virtual void VPCALL Copy16( float *dst, const float *src, const int count );
  140. virtual void VPCALL Add16( float *dst, const float *src1, const float *src2, const int count );
  141. virtual void VPCALL Sub16( float *dst, const float *src1, const float *src2, const int count );
  142. virtual void VPCALL Mul16( float *dst, const float *src1, const float constant, const int count );
  143. virtual void VPCALL AddAssign16( float *dst, const float *src, const int count );
  144. virtual void VPCALL SubAssign16( float *dst, const float *src, const int count );
  145. virtual void VPCALL MulAssign16( float *dst, const float constant, const int count );
  146. #endif
  147. // Most of these deal with tiny matrices or vectors, generally not worth altivec'ing since
  148. // the scalar code is already really fast
  149. // virtual void VPCALL MatX_MultiplyVecX( idVecX &dst, const idMatX &mat, const idVecX &vec );
  150. // virtual void VPCALL MatX_MultiplyAddVecX( idVecX &dst, const idMatX &mat, const idVecX &vec );
  151. // virtual void VPCALL MatX_MultiplySubVecX( idVecX &dst, const idMatX &mat, const idVecX &vec );
  152. // virtual void VPCALL MatX_TransposeMultiplyVecX( idVecX &dst, const idMatX &mat, const idVecX &vec );
  153. // virtual void VPCALL MatX_TransposeMultiplyAddVecX( idVecX &dst, const idMatX &mat, const idVecX &vec );
  154. // virtual void VPCALL MatX_TransposeMultiplySubVecX( idVecX &dst, const idMatX &mat, const idVecX &vec );
  155. // virtual void VPCALL MatX_MultiplyMatX( idMatX &dst, const idMatX &m1, const idMatX &m2 );
  156. // virtual void VPCALL MatX_TransposeMultiplyMatX( idMatX &dst, const idMatX &m1, const idMatX &m2 );
  157. #ifdef ENABLE_LOWER_TRIANGULAR
  158. virtual void VPCALL MatX_LowerTriangularSolve( const idMatX &L, float *x, const float *b, const int n, int skip = 0 );
  159. virtual void VPCALL MatX_LowerTriangularSolveTranspose( const idMatX &L, float *x, const float *b, const int n );
  160. virtual bool VPCALL MatX_LDLTFactor( idMatX &mat, idVecX &invDiag, const int n );
  161. #endif
  162. #ifdef LIVE_VICARIOUSLY
  163. virtual void VPCALL BlendJoints( idJointQuat *joints, const idJointQuat *blendJoints, const float lerp, const int *index, const int numJoints );
  164. virtual void VPCALL ConvertJointQuatsToJointMats( idJointMat *jointMats, const idJointQuat *jointQuats, const int numJoints );
  165. virtual void VPCALL ConvertJointMatsToJointQuats( idJointQuat *jointQuats, const idJointMat *jointMats, const int numJoints );
  166. #endif
  167. #ifdef LIVE_VICARIOUSLY
  168. virtual void VPCALL TransformJoints( idJointMat *jointMats, const int *parents, const int firstJoint, const int lastJoint );
  169. virtual void VPCALL UntransformJoints( idJointMat *jointMats, const int *parents, const int firstJoint, const int lastJoint );
  170. virtual void VPCALL TransformVerts( idDrawVert *verts, const int numVerts, const idJointMat *joints, const idVec4 *weights, const int *index, const int numWeights );
  171. #endif
  172. #ifdef ENABLE_CULL
  173. virtual void VPCALL TracePointCull( byte *cullBits, byte &totalOr, const float radius, const idPlane *planes, const idDrawVert *verts, const int numVerts );
  174. virtual void VPCALL DecalPointCull( byte *cullBits, const idPlane *planes, const idDrawVert *verts, const int numVerts );
  175. virtual void VPCALL OverlayPointCull( byte *cullBits, idVec2 *texCoords, const idPlane *planes, const idDrawVert *verts, const int numVerts );
  176. #endif
  177. #ifdef ENABLE_DERIVE
  178. virtual void VPCALL DeriveTriPlanes( idPlane *planes, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes );
  179. virtual void VPCALL DeriveTangents( idPlane *planes, idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes );
  180. virtual void VPCALL DeriveUnsmoothedTangents( idDrawVert *verts, const dominantTri_s *dominantTris, const int numVerts );
  181. virtual void VPCALL NormalizeTangents( idDrawVert *verts, const int numVerts );
  182. #endif
  183. #ifdef ENABLE_CREATE
  184. virtual void VPCALL CreateTextureSpaceLightVectors( idVec3 *lightVectors, const idVec3 &lightOrigin, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes );
  185. virtual void VPCALL CreateSpecularTextureCoords( idVec4 *texCoords, const idVec3 &lightOrigin, const idVec3 &viewOrigin, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes );
  186. virtual int VPCALL CreateShadowCache( idVec4 *vertexCache, int *vertRemap, const idVec3 &lightOrigin, const idDrawVert *verts, const int numVerts );
  187. virtual int VPCALL CreateVertexProgramShadowCache( idVec4 *vertexCache, const idDrawVert *verts, const int numVerts );
  188. #endif
  189. #ifdef ENABLE_SOUND_ROUTINES
  190. // Sound upsampling and mixing routines, works for aligned and unaligned data
  191. virtual void VPCALL UpSamplePCMTo44kHz( float *dest, const short *pcm, const int numSamples, const int kHz, const int numChannels );
  192. virtual void VPCALL UpSampleOGGTo44kHz( float *dest, const float * const *ogg, const int numSamples, const int kHz, const int numChannels );
  193. virtual void VPCALL MixSoundTwoSpeakerMono( float *mixBuffer, const float *samples, const int numSamples, const float lastV[2], const float currentV[2] );
  194. virtual void VPCALL MixSoundTwoSpeakerStereo( float *mixBuffer, const float *samples, const int numSamples, const float lastV[2], const float currentV[2] );
  195. virtual void VPCALL MixSoundSixSpeakerMono( float *mixBuffer, const float *samples, const int numSamples, const float lastV[6], const float currentV[6] );
  196. virtual void VPCALL MixSoundSixSpeakerStereo( float *mixBuffer, const float *samples, const int numSamples, const float lastV[6], const float currentV[6] );
  197. virtual void VPCALL MixedSoundToSamples( short *samples, const float *mixBuffer, const int numSamples );
  198. #endif
  199. #endif
  200. };
  201. #endif /* !__MATH_SIMD_ALTIVEC_H__ */