cpu.c 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. /********************************************************************
  2. * *
  3. * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
  4. * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
  5. * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  6. * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
  7. * *
  8. * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
  9. * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  10. * *
  11. ********************************************************************
  12. CPU capability detection for x86 processors.
  13. Originally written by Rudolf Marek.
  14. function:
  15. last mod: $Id: cpu.c 16503 2009-08-22 18:14:02Z giles $
  16. ********************************************************************/
  17. #include "cpu.h"
  18. #if !defined(OC_X86_ASM)
  19. static ogg_uint32_t oc_cpu_flags_get(void){
  20. return 0;
  21. }
  22. #else
  23. # if !defined(_MSC_VER)
  24. # if defined(__amd64__)||defined(__x86_64__)
  25. /*On x86-64, gcc seems to be able to figure out how to save %rbx for us when
  26. compiling with -fPIC.*/
  27. # define cpuid(_op,_eax,_ebx,_ecx,_edx) \
  28. __asm__ __volatile__( \
  29. "cpuid\n\t" \
  30. :[eax]"=a"(_eax),[ebx]"=b"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \
  31. :"a"(_op) \
  32. :"cc" \
  33. )
  34. # else
  35. /*On x86-32, not so much.*/
  36. # define cpuid(_op,_eax,_ebx,_ecx,_edx) \
  37. __asm__ __volatile__( \
  38. "xchgl %%ebx,%[ebx]\n\t" \
  39. "cpuid\n\t" \
  40. "xchgl %%ebx,%[ebx]\n\t" \
  41. :[eax]"=a"(_eax),[ebx]"=r"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \
  42. :"a"(_op) \
  43. :"cc" \
  44. )
  45. # endif
  46. # else
  47. /*Why does MSVC need this complicated rigamarole?
  48. At this point I honestly do not care.*/
  49. /*Visual C cpuid helper function.
  50. For VS2005 we could as well use the _cpuid builtin, but that wouldn't work
  51. for VS2003 users, so we do it in inline assembler.*/
  52. static void oc_cpuid_helper(ogg_uint32_t _cpu_info[4],ogg_uint32_t _op){
  53. _asm{
  54. mov eax,[_op]
  55. mov esi,_cpu_info
  56. cpuid
  57. mov [esi+0],eax
  58. mov [esi+4],ebx
  59. mov [esi+8],ecx
  60. mov [esi+12],edx
  61. }
  62. }
  63. # define cpuid(_op,_eax,_ebx,_ecx,_edx) \
  64. do{ \
  65. ogg_uint32_t cpu_info[4]; \
  66. oc_cpuid_helper(cpu_info,_op); \
  67. (_eax)=cpu_info[0]; \
  68. (_ebx)=cpu_info[1]; \
  69. (_ecx)=cpu_info[2]; \
  70. (_edx)=cpu_info[3]; \
  71. }while(0)
  72. static void oc_detect_cpuid_helper(ogg_uint32_t *_eax,ogg_uint32_t *_ebx){
  73. _asm{
  74. pushfd
  75. pushfd
  76. pop eax
  77. mov ebx,eax
  78. xor eax,200000h
  79. push eax
  80. popfd
  81. pushfd
  82. pop eax
  83. popfd
  84. mov ecx,_eax
  85. mov [ecx],eax
  86. mov ecx,_ebx
  87. mov [ecx],ebx
  88. }
  89. }
  90. # endif
  91. static ogg_uint32_t oc_parse_intel_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){
  92. ogg_uint32_t flags;
  93. /*If there isn't even MMX, give up.*/
  94. if(!(_edx&0x00800000))return 0;
  95. flags=OC_CPU_X86_MMX;
  96. if(_edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE;
  97. if(_edx&0x04000000)flags|=OC_CPU_X86_SSE2;
  98. if(_ecx&0x00000001)flags|=OC_CPU_X86_PNI;
  99. if(_ecx&0x00000100)flags|=OC_CPU_X86_SSSE3;
  100. if(_ecx&0x00080000)flags|=OC_CPU_X86_SSE4_1;
  101. if(_ecx&0x00100000)flags|=OC_CPU_X86_SSE4_2;
  102. return flags;
  103. }
  104. static ogg_uint32_t oc_parse_amd_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){
  105. ogg_uint32_t flags;
  106. /*If there isn't even MMX, give up.*/
  107. if(!(_edx&0x00800000))return 0;
  108. flags=OC_CPU_X86_MMX;
  109. if(_edx&0x00400000)flags|=OC_CPU_X86_MMXEXT;
  110. if(_edx&0x80000000)flags|=OC_CPU_X86_3DNOW;
  111. if(_edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT;
  112. if(_ecx&0x00000040)flags|=OC_CPU_X86_SSE4A;
  113. if(_ecx&0x00000800)flags|=OC_CPU_X86_SSE5;
  114. return flags;
  115. }
  116. static ogg_uint32_t oc_cpu_flags_get(void){
  117. ogg_uint32_t flags;
  118. ogg_uint32_t eax;
  119. ogg_uint32_t ebx;
  120. ogg_uint32_t ecx;
  121. ogg_uint32_t edx;
  122. # if !defined(__amd64__)&&!defined(__x86_64__)
  123. /*Not all x86-32 chips support cpuid, so we have to check.*/
  124. # if !defined(_MSC_VER)
  125. __asm__ __volatile__(
  126. "pushfl\n\t"
  127. "pushfl\n\t"
  128. "popl %[a]\n\t"
  129. "movl %[a],%[b]\n\t"
  130. "xorl $0x200000,%[a]\n\t"
  131. "pushl %[a]\n\t"
  132. "popfl\n\t"
  133. "pushfl\n\t"
  134. "popl %[a]\n\t"
  135. "popfl\n\t"
  136. :[a]"=r"(eax),[b]"=r"(ebx)
  137. :
  138. :"cc"
  139. );
  140. # else
  141. oc_detect_cpuid_helper(&eax,&ebx);
  142. # endif
  143. /*No cpuid.*/
  144. if(eax==ebx)return 0;
  145. # endif
  146. cpuid(0,eax,ebx,ecx,edx);
  147. /* l e t n I e n i u n e G*/
  148. if(ecx==0x6C65746E&&edx==0x49656E69&&ebx==0x756E6547||
  149. /* 6 8 x M T e n i u n e G*/
  150. ecx==0x3638784D&&edx==0x54656E69&&ebx==0x756E6547){
  151. /*Intel, Transmeta (tested with Crusoe TM5800):*/
  152. cpuid(1,eax,ebx,ecx,edx);
  153. flags=oc_parse_intel_flags(edx,ecx);
  154. }
  155. /* D M A c i t n e h t u A*/
  156. else if(ecx==0x444D4163&&edx==0x69746E65&&ebx==0x68747541||
  157. /* C S N y b e d o e G*/
  158. ecx==0x43534e20&&edx==0x79622065&&ebx==0x646f6547){
  159. /*AMD, Geode:*/
  160. cpuid(0x80000000,eax,ebx,ecx,edx);
  161. if(eax<0x80000001)flags=0;
  162. else{
  163. cpuid(0x80000001,eax,ebx,ecx,edx);
  164. flags=oc_parse_amd_flags(edx,ecx);
  165. }
  166. /*Also check for SSE.*/
  167. cpuid(1,eax,ebx,ecx,edx);
  168. flags|=oc_parse_intel_flags(edx,ecx);
  169. }
  170. /*Technically some VIA chips can be configured in the BIOS to return any
  171. string here the user wants.
  172. There is a special detection method that can be used to identify such
  173. processors, but in my opinion, if the user really wants to change it, they
  174. deserve what they get.*/
  175. /* s l u a H r u a t n e C*/
  176. else if(ecx==0x736C7561&&edx==0x48727561&&ebx==0x746E6543){
  177. /*VIA:*/
  178. /*I only have documentation for the C7 (Esther) and Isaiah (forthcoming)
  179. chips (thanks to the engineers from Centaur Technology who provided it).
  180. These chips support Intel-like cpuid info.
  181. The C3-2 (Nehemiah) cores appear to, as well.*/
  182. cpuid(1,eax,ebx,ecx,edx);
  183. flags=oc_parse_intel_flags(edx,ecx);
  184. if(eax>=0x80000001){
  185. /*The (non-Nehemiah) C3 processors support AMD-like cpuid info.
  186. We need to check this even if the Intel test succeeds to pick up 3DNow!
  187. support on these processors.
  188. Unlike actual AMD processors, we cannot _rely_ on this info, since
  189. some cores (e.g., the 693 stepping of the Nehemiah) claim to support
  190. this function, yet return edx=0, despite the Intel test indicating
  191. MMX support.
  192. Therefore the features detected here are strictly added to those
  193. detected by the Intel test.*/
  194. /*TODO: How about earlier chips?*/
  195. cpuid(0x80000001,eax,ebx,ecx,edx);
  196. /*Note: As of the C7, this function returns Intel-style extended feature
  197. flags, not AMD-style.
  198. Currently, this only defines bits 11, 20, and 29 (0x20100800), which
  199. do not conflict with any of the AMD flags we inspect.
  200. For the remaining bits, Intel tells us, "Do not count on their value",
  201. but VIA assures us that they will all be zero (at least on the C7 and
  202. Isaiah chips).
  203. In the (unlikely) event a future processor uses bits 18, 19, 30, or 31
  204. (0xC0C00000) for something else, we will have to add code to detect
  205. the model to decide when it is appropriate to inspect them.*/
  206. flags|=oc_parse_amd_flags(edx,ecx);
  207. }
  208. }
  209. else{
  210. /*Implement me.*/
  211. flags=0;
  212. }
  213. return flags;
  214. }
  215. #endif