pa_x86_plain_converters.c 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219
  1. /*
  2. * Plain Intel IA32 assembly implementations of PortAudio sample converter functions.
  3. * Copyright (c) 1999-2002 Ross Bencina, Phil Burk
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining
  6. * a copy of this software and associated documentation files
  7. * (the "Software"), to deal in the Software without restriction,
  8. * including without limitation the rights to use, copy, modify, merge,
  9. * publish, distribute, sublicense, and/or sell copies of the Software,
  10. * and to permit persons to whom the Software is furnished to do so,
  11. * subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be
  14. * included in all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  17. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  19. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
  20. * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
  21. * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  22. * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23. */
  24. /*
  25. * The text above constitutes the entire PortAudio license; however,
  26. * the PortAudio community also makes the following non-binding requests:
  27. *
  28. * Any person wishing to distribute modifications to the Software is
  29. * requested to send the modifications to the original developer so that
  30. * they can be incorporated into the canonical version. It is also
  31. * requested that these non-binding requests be included along with the
  32. * license above.
  33. */
  34. /** @file
  35. @ingroup win_src
  36. */
  37. #include "pa_x86_plain_converters.h"
  38. #include "pa_converters.h"
  39. #include "pa_dither.h"
  40. /*
  41. the main reason these versions are faster than the equivalent C versions
  42. is that float -> int casting is expensive in C on x86 because the rounding
  43. mode needs to be changed for every cast. these versions only set
  44. the rounding mode once outside the loop.
  45. small additional speed gains are made by the way that clamping is
  46. implemented.
  47. TODO:
  48. o- inline dither code
  49. o- implement Dither only (no-clip) versions
  50. o- implement int8 and uint8 versions
  51. o- test thouroughly
  52. o- the packed 24 bit functions could benefit from unrolling and avoiding
  53. byte and word sized register access.
  54. */
  55. /* -------------------------------------------------------------------------- */
  56. /*
  57. #define PA_CLIP_( val, min, max )\
  58. { val = ((val) < (min)) ? (min) : (((val) > (max)) ? (max) : (val)); }
  59. */
  60. /*
  61. the following notes were used to determine whether a floating point
  62. value should be saturated (ie >1 or <-1) by loading it into an integer
  63. register. these should be rewritten so that they make sense.
  64. an ieee floating point value
  65. 1.xxxxxxxxxxxxxxxxxxxx?
  66. is less than or equal to 1 and greater than or equal to -1 either:
  67. if the mantissa is 0 and the unbiased exponent is 0
  68. OR
  69. if the unbiased exponent < 0
  70. this translates to:
  71. if the mantissa is 0 and the biased exponent is 7F
  72. or
  73. if the biased exponent is less than 7F
  74. therefore the value is greater than 1 or less than -1 if
  75. the mantissa is not 0 and the biased exponent is 7F
  76. or
  77. if the biased exponent is greater than 7F
  78. in other words, if we mask out the sign bit, the value is
  79. greater than 1 or less than -1 if its integer representation is greater than:
  80. 0 01111111 0000 0000 0000 0000 0000 000
  81. 0011 1111 1000 0000 0000 0000 0000 0000 => 0x3F800000
  82. */
  83. #if defined(_WIN64) || defined(_WIN32_WCE)
  84. /*
  85. -EMT64/AMD64 uses different asm
  86. -VC2005 doesnt allow _WIN64 with inline assembly either!
  87. */
  88. void PaUtil_InitializeX86PlainConverters( void )
  89. {
  90. }
  91. #else
  92. /* -------------------------------------------------------------------------- */
  93. static const short fpuControlWord_ = 0x033F; /*round to nearest, 64 bit precision, all exceptions masked*/
  94. static const double int32Scaler_ = 0x7FFFFFFF;
  95. static const double ditheredInt32Scaler_ = 0x7FFFFFFE;
  96. static const double int24Scaler_ = 0x7FFFFF;
  97. static const double ditheredInt24Scaler_ = 0x7FFFFE;
  98. static const double int16Scaler_ = 0x7FFF;
  99. static const double ditheredInt16Scaler_ = 0x7FFE;
  100. #define PA_DITHER_BITS_ (15)
  101. /* Multiply by PA_FLOAT_DITHER_SCALE_ to get a float between -2.0 and +1.99999 */
  102. #define PA_FLOAT_DITHER_SCALE_ (1.0F / ((1<<PA_DITHER_BITS_)-1))
  103. static const float const_float_dither_scale_ = PA_FLOAT_DITHER_SCALE_;
  104. #define PA_DITHER_SHIFT_ ((32 - PA_DITHER_BITS_) + 1)
  105. /* -------------------------------------------------------------------------- */
  106. static void Float32_To_Int32(
  107. void *destinationBuffer, signed int destinationStride,
  108. void *sourceBuffer, signed int sourceStride,
  109. unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
  110. {
  111. /*
  112. float *src = (float*)sourceBuffer;
  113. signed long *dest = (signed long*)destinationBuffer;
  114. (void)ditherGenerator; // unused parameter
  115. while( count-- )
  116. {
  117. // REVIEW
  118. double scaled = *src * 0x7FFFFFFF;
  119. *dest = (signed long) scaled;
  120. src += sourceStride;
  121. dest += destinationStride;
  122. }
  123. */
  124. short savedFpuControlWord;
  125. (void) ditherGenerator; /* unused parameter */
  126. __asm{
  127. // esi -> source ptr
  128. // eax -> source byte stride
  129. // edi -> destination ptr
  130. // ebx -> destination byte stride
  131. // ecx -> source end ptr
  132. // edx -> temp
  133. mov esi, sourceBuffer
  134. mov edx, 4 // sizeof float32 and int32
  135. mov eax, sourceStride
  136. imul eax, edx
  137. mov ecx, count
  138. imul ecx, eax
  139. add ecx, esi
  140. mov edi, destinationBuffer
  141. mov ebx, destinationStride
  142. imul ebx, edx
  143. fwait
  144. fstcw savedFpuControlWord
  145. fldcw fpuControlWord_
  146. fld int32Scaler_ // stack: (int)0x7FFFFFFF
  147. Float32_To_Int32_loop:
  148. // load unscaled value into st(0)
  149. fld dword ptr [esi] // stack: value, (int)0x7FFFFFFF
  150. add esi, eax // increment source ptr
  151. //lea esi, [esi+eax]
  152. fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFFFF, (int)0x7FFFFFFF
  153. /*
  154. note: we could store to a temporary qword here which would cause
  155. wraparound distortion instead of int indefinite 0x10. that would
  156. be more work, and given that not enabling clipping is only advisable
  157. when you know that your signal isn't going to clip it isn't worth it.
  158. */
  159. fistp dword ptr [edi] // pop st(0) into dest, stack: (int)0x7FFFFFFF
  160. add edi, ebx // increment destination ptr
  161. //lea edi, [edi+ebx]
  162. cmp esi, ecx // has src ptr reached end?
  163. jne Float32_To_Int32_loop
  164. ffree st(0)
  165. fincstp
  166. fwait
  167. fnclex
  168. fldcw savedFpuControlWord
  169. }
  170. }
  171. /* -------------------------------------------------------------------------- */
  172. static void Float32_To_Int32_Clip(
  173. void *destinationBuffer, signed int destinationStride,
  174. void *sourceBuffer, signed int sourceStride,
  175. unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
  176. {
  177. /*
  178. float *src = (float*)sourceBuffer;
  179. signed long *dest = (signed long*)destinationBuffer;
  180. (void) ditherGenerator; // unused parameter
  181. while( count-- )
  182. {
  183. // REVIEW
  184. double scaled = *src * 0x7FFFFFFF;
  185. PA_CLIP_( scaled, -2147483648., 2147483647. );
  186. *dest = (signed long) scaled;
  187. src += sourceStride;
  188. dest += destinationStride;
  189. }
  190. */
  191. short savedFpuControlWord;
  192. (void) ditherGenerator; /* unused parameter */
  193. __asm{
  194. // esi -> source ptr
  195. // eax -> source byte stride
  196. // edi -> destination ptr
  197. // ebx -> destination byte stride
  198. // ecx -> source end ptr
  199. // edx -> temp
  200. mov esi, sourceBuffer
  201. mov edx, 4 // sizeof float32 and int32
  202. mov eax, sourceStride
  203. imul eax, edx
  204. mov ecx, count
  205. imul ecx, eax
  206. add ecx, esi
  207. mov edi, destinationBuffer
  208. mov ebx, destinationStride
  209. imul ebx, edx
  210. fwait
  211. fstcw savedFpuControlWord
  212. fldcw fpuControlWord_
  213. fld int32Scaler_ // stack: (int)0x7FFFFFFF
  214. Float32_To_Int32_Clip_loop:
  215. mov edx, dword ptr [esi] // load floating point value into integer register
  216. and edx, 0x7FFFFFFF // mask off sign
  217. cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
  218. jg Float32_To_Int32_Clip_clamp
  219. // load unscaled value into st(0)
  220. fld dword ptr [esi] // stack: value, (int)0x7FFFFFFF
  221. add esi, eax // increment source ptr
  222. //lea esi, [esi+eax]
  223. fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFFFF, (int)0x7FFFFFFF
  224. fistp dword ptr [edi] // pop st(0) into dest, stack: (int)0x7FFFFFFF
  225. jmp Float32_To_Int32_Clip_stored
  226. Float32_To_Int32_Clip_clamp:
  227. mov edx, dword ptr [esi] // load floating point value into integer register
  228. shr edx, 31 // move sign bit into bit 0
  229. add esi, eax // increment source ptr
  230. //lea esi, [esi+eax]
  231. add edx, 0x7FFFFFFF // convert to maximum range integers
  232. mov dword ptr [edi], edx
  233. Float32_To_Int32_Clip_stored:
  234. //add edi, ebx // increment destination ptr
  235. lea edi, [edi+ebx]
  236. cmp esi, ecx // has src ptr reached end?
  237. jne Float32_To_Int32_Clip_loop
  238. ffree st(0)
  239. fincstp
  240. fwait
  241. fnclex
  242. fldcw savedFpuControlWord
  243. }
  244. }
  245. /* -------------------------------------------------------------------------- */
  246. static void Float32_To_Int32_DitherClip(
  247. void *destinationBuffer, signed int destinationStride,
  248. void *sourceBuffer, signed int sourceStride,
  249. unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
  250. {
  251. /*
  252. float *src = (float*)sourceBuffer;
  253. signed long *dest = (signed long*)destinationBuffer;
  254. while( count-- )
  255. {
  256. // REVIEW
  257. double dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
  258. // use smaller scaler to prevent overflow when we add the dither
  259. double dithered = ((double)*src * (2147483646.0)) + dither;
  260. PA_CLIP_( dithered, -2147483648., 2147483647. );
  261. *dest = (signed long) dithered;
  262. src += sourceStride;
  263. dest += destinationStride;
  264. }
  265. */
  266. short savedFpuControlWord;
  267. // spill storage:
  268. signed long sourceByteStride;
  269. signed long highpassedDither;
  270. // dither state:
  271. unsigned long ditherPrevious = ditherGenerator->previous;
  272. unsigned long ditherRandSeed1 = ditherGenerator->randSeed1;
  273. unsigned long ditherRandSeed2 = ditherGenerator->randSeed2;
  274. __asm{
  275. // esi -> source ptr
  276. // eax -> source byte stride
  277. // edi -> destination ptr
  278. // ebx -> destination byte stride
  279. // ecx -> source end ptr
  280. // edx -> temp
  281. mov esi, sourceBuffer
  282. mov edx, 4 // sizeof float32 and int32
  283. mov eax, sourceStride
  284. imul eax, edx
  285. mov ecx, count
  286. imul ecx, eax
  287. add ecx, esi
  288. mov edi, destinationBuffer
  289. mov ebx, destinationStride
  290. imul ebx, edx
  291. fwait
  292. fstcw savedFpuControlWord
  293. fldcw fpuControlWord_
  294. fld ditheredInt32Scaler_ // stack: int scaler
  295. Float32_To_Int32_DitherClip_loop:
  296. mov edx, dword ptr [esi] // load floating point value into integer register
  297. and edx, 0x7FFFFFFF // mask off sign
  298. cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
  299. jg Float32_To_Int32_DitherClip_clamp
  300. // load unscaled value into st(0)
  301. fld dword ptr [esi] // stack: value, int scaler
  302. add esi, eax // increment source ptr
  303. //lea esi, [esi+eax]
  304. fmul st(0), st(1) // st(0) *= st(1), stack: value*(int scaler), int scaler
  305. /*
  306. // call PaUtil_GenerateFloatTriangularDither with C calling convention
  307. mov sourceByteStride, eax // save eax
  308. mov sourceEnd, ecx // save ecx
  309. push ditherGenerator // pass ditherGenerator parameter on stack
  310. call PaUtil_GenerateFloatTriangularDither // stack: dither, value*(int scaler), int scaler
  311. pop edx // clear parameter off stack
  312. mov ecx, sourceEnd // restore ecx
  313. mov eax, sourceByteStride // restore eax
  314. */
  315. // generate dither
  316. mov sourceByteStride, eax // save eax
  317. mov edx, 196314165
  318. mov eax, ditherRandSeed1
  319. mul edx // eax:edx = eax * 196314165
  320. //add eax, 907633515
  321. lea eax, [eax+907633515]
  322. mov ditherRandSeed1, eax
  323. mov edx, 196314165
  324. mov eax, ditherRandSeed2
  325. mul edx // eax:edx = eax * 196314165
  326. //add eax, 907633515
  327. lea eax, [eax+907633515]
  328. mov edx, ditherRandSeed1
  329. shr edx, PA_DITHER_SHIFT_
  330. mov ditherRandSeed2, eax
  331. shr eax, PA_DITHER_SHIFT_
  332. //add eax, edx // eax -> current
  333. lea eax, [eax+edx]
  334. mov edx, ditherPrevious
  335. neg edx
  336. lea edx, [eax+edx] // highpass = current - previous
  337. mov highpassedDither, edx
  338. mov ditherPrevious, eax // previous = current
  339. mov eax, sourceByteStride // restore eax
  340. fild highpassedDither
  341. fmul const_float_dither_scale_
  342. // end generate dither, dither signal in st(0)
  343. faddp st(1), st(0) // stack: dither + value*(int scaler), int scaler
  344. fistp dword ptr [edi] // pop st(0) into dest, stack: int scaler
  345. jmp Float32_To_Int32_DitherClip_stored
  346. Float32_To_Int32_DitherClip_clamp:
  347. mov edx, dword ptr [esi] // load floating point value into integer register
  348. shr edx, 31 // move sign bit into bit 0
  349. add esi, eax // increment source ptr
  350. //lea esi, [esi+eax]
  351. add edx, 0x7FFFFFFF // convert to maximum range integers
  352. mov dword ptr [edi], edx
  353. Float32_To_Int32_DitherClip_stored:
  354. //add edi, ebx // increment destination ptr
  355. lea edi, [edi+ebx]
  356. cmp esi, ecx // has src ptr reached end?
  357. jne Float32_To_Int32_DitherClip_loop
  358. ffree st(0)
  359. fincstp
  360. fwait
  361. fnclex
  362. fldcw savedFpuControlWord
  363. }
  364. ditherGenerator->previous = ditherPrevious;
  365. ditherGenerator->randSeed1 = ditherRandSeed1;
  366. ditherGenerator->randSeed2 = ditherRandSeed2;
  367. }
  368. /* -------------------------------------------------------------------------- */
  369. static void Float32_To_Int24(
  370. void *destinationBuffer, signed int destinationStride,
  371. void *sourceBuffer, signed int sourceStride,
  372. unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
  373. {
  374. /*
  375. float *src = (float*)sourceBuffer;
  376. unsigned char *dest = (unsigned char*)destinationBuffer;
  377. signed long temp;
  378. (void) ditherGenerator; // unused parameter
  379. while( count-- )
  380. {
  381. // convert to 32 bit and drop the low 8 bits
  382. double scaled = *src * 0x7FFFFFFF;
  383. temp = (signed long) scaled;
  384. dest[0] = (unsigned char)(temp >> 8);
  385. dest[1] = (unsigned char)(temp >> 16);
  386. dest[2] = (unsigned char)(temp >> 24);
  387. src += sourceStride;
  388. dest += destinationStride * 3;
  389. }
  390. */
  391. short savedFpuControlWord;
  392. signed long tempInt32;
  393. (void) ditherGenerator; /* unused parameter */
  394. __asm{
  395. // esi -> source ptr
  396. // eax -> source byte stride
  397. // edi -> destination ptr
  398. // ebx -> destination byte stride
  399. // ecx -> source end ptr
  400. // edx -> temp
  401. mov esi, sourceBuffer
  402. mov edx, 4 // sizeof float32
  403. mov eax, sourceStride
  404. imul eax, edx
  405. mov ecx, count
  406. imul ecx, eax
  407. add ecx, esi
  408. mov edi, destinationBuffer
  409. mov edx, 3 // sizeof int24
  410. mov ebx, destinationStride
  411. imul ebx, edx
  412. fwait
  413. fstcw savedFpuControlWord
  414. fldcw fpuControlWord_
  415. fld int24Scaler_ // stack: (int)0x7FFFFF
  416. Float32_To_Int24_loop:
  417. // load unscaled value into st(0)
  418. fld dword ptr [esi] // stack: value, (int)0x7FFFFF
  419. add esi, eax // increment source ptr
  420. //lea esi, [esi+eax]
  421. fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFF, (int)0x7FFFFF
  422. fistp tempInt32 // pop st(0) into tempInt32, stack: (int)0x7FFFFF
  423. mov edx, tempInt32
  424. mov byte ptr [edi], DL
  425. shr edx, 8
  426. //mov byte ptr [edi+1], DL
  427. //mov byte ptr [edi+2], DH
  428. mov word ptr [edi+1], DX
  429. //add edi, ebx // increment destination ptr
  430. lea edi, [edi+ebx]
  431. cmp esi, ecx // has src ptr reached end?
  432. jne Float32_To_Int24_loop
  433. ffree st(0)
  434. fincstp
  435. fwait
  436. fnclex
  437. fldcw savedFpuControlWord
  438. }
  439. }
  440. /* -------------------------------------------------------------------------- */
  441. static void Float32_To_Int24_Clip(
  442. void *destinationBuffer, signed int destinationStride,
  443. void *sourceBuffer, signed int sourceStride,
  444. unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
  445. {
  446. /*
  447. float *src = (float*)sourceBuffer;
  448. unsigned char *dest = (unsigned char*)destinationBuffer;
  449. signed long temp;
  450. (void) ditherGenerator; // unused parameter
  451. while( count-- )
  452. {
  453. // convert to 32 bit and drop the low 8 bits
  454. double scaled = *src * 0x7FFFFFFF;
  455. PA_CLIP_( scaled, -2147483648., 2147483647. );
  456. temp = (signed long) scaled;
  457. dest[0] = (unsigned char)(temp >> 8);
  458. dest[1] = (unsigned char)(temp >> 16);
  459. dest[2] = (unsigned char)(temp >> 24);
  460. src += sourceStride;
  461. dest += destinationStride * 3;
  462. }
  463. */
  464. short savedFpuControlWord;
  465. signed long tempInt32;
  466. (void) ditherGenerator; /* unused parameter */
  467. __asm{
  468. // esi -> source ptr
  469. // eax -> source byte stride
  470. // edi -> destination ptr
  471. // ebx -> destination byte stride
  472. // ecx -> source end ptr
  473. // edx -> temp
  474. mov esi, sourceBuffer
  475. mov edx, 4 // sizeof float32
  476. mov eax, sourceStride
  477. imul eax, edx
  478. mov ecx, count
  479. imul ecx, eax
  480. add ecx, esi
  481. mov edi, destinationBuffer
  482. mov edx, 3 // sizeof int24
  483. mov ebx, destinationStride
  484. imul ebx, edx
  485. fwait
  486. fstcw savedFpuControlWord
  487. fldcw fpuControlWord_
  488. fld int24Scaler_ // stack: (int)0x7FFFFF
  489. Float32_To_Int24_Clip_loop:
  490. mov edx, dword ptr [esi] // load floating point value into integer register
  491. and edx, 0x7FFFFFFF // mask off sign
  492. cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
  493. jg Float32_To_Int24_Clip_clamp
  494. // load unscaled value into st(0)
  495. fld dword ptr [esi] // stack: value, (int)0x7FFFFF
  496. add esi, eax // increment source ptr
  497. //lea esi, [esi+eax]
  498. fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFF, (int)0x7FFFFF
  499. fistp tempInt32 // pop st(0) into tempInt32, stack: (int)0x7FFFFF
  500. mov edx, tempInt32
  501. jmp Float32_To_Int24_Clip_store
  502. Float32_To_Int24_Clip_clamp:
  503. mov edx, dword ptr [esi] // load floating point value into integer register
  504. shr edx, 31 // move sign bit into bit 0
  505. add esi, eax // increment source ptr
  506. //lea esi, [esi+eax]
  507. add edx, 0x7FFFFF // convert to maximum range integers
  508. Float32_To_Int24_Clip_store:
  509. mov byte ptr [edi], DL
  510. shr edx, 8
  511. //mov byte ptr [edi+1], DL
  512. //mov byte ptr [edi+2], DH
  513. mov word ptr [edi+1], DX
  514. //add edi, ebx // increment destination ptr
  515. lea edi, [edi+ebx]
  516. cmp esi, ecx // has src ptr reached end?
  517. jne Float32_To_Int24_Clip_loop
  518. ffree st(0)
  519. fincstp
  520. fwait
  521. fnclex
  522. fldcw savedFpuControlWord
  523. }
  524. }
  525. /* -------------------------------------------------------------------------- */
  526. static void Float32_To_Int24_DitherClip(
  527. void *destinationBuffer, signed int destinationStride,
  528. void *sourceBuffer, signed int sourceStride,
  529. unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
  530. {
  531. /*
  532. float *src = (float*)sourceBuffer;
  533. unsigned char *dest = (unsigned char*)destinationBuffer;
  534. signed long temp;
  535. while( count-- )
  536. {
  537. // convert to 32 bit and drop the low 8 bits
  538. // FIXME: the dither amplitude here appears to be too small by 8 bits
  539. double dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
  540. // use smaller scaler to prevent overflow when we add the dither
  541. double dithered = ((double)*src * (2147483646.0)) + dither;
  542. PA_CLIP_( dithered, -2147483648., 2147483647. );
  543. temp = (signed long) dithered;
  544. dest[0] = (unsigned char)(temp >> 8);
  545. dest[1] = (unsigned char)(temp >> 16);
  546. dest[2] = (unsigned char)(temp >> 24);
  547. src += sourceStride;
  548. dest += destinationStride * 3;
  549. }
  550. */
  551. short savedFpuControlWord;
  552. // spill storage:
  553. signed long sourceByteStride;
  554. signed long highpassedDither;
  555. // dither state:
  556. unsigned long ditherPrevious = ditherGenerator->previous;
  557. unsigned long ditherRandSeed1 = ditherGenerator->randSeed1;
  558. unsigned long ditherRandSeed2 = ditherGenerator->randSeed2;
  559. signed long tempInt32;
  560. __asm{
  561. // esi -> source ptr
  562. // eax -> source byte stride
  563. // edi -> destination ptr
  564. // ebx -> destination byte stride
  565. // ecx -> source end ptr
  566. // edx -> temp
  567. mov esi, sourceBuffer
  568. mov edx, 4 // sizeof float32
  569. mov eax, sourceStride
  570. imul eax, edx
  571. mov ecx, count
  572. imul ecx, eax
  573. add ecx, esi
  574. mov edi, destinationBuffer
  575. mov edx, 3 // sizeof int24
  576. mov ebx, destinationStride
  577. imul ebx, edx
  578. fwait
  579. fstcw savedFpuControlWord
  580. fldcw fpuControlWord_
  581. fld ditheredInt24Scaler_ // stack: int scaler
  582. Float32_To_Int24_DitherClip_loop:
  583. mov edx, dword ptr [esi] // load floating point value into integer register
  584. and edx, 0x7FFFFFFF // mask off sign
  585. cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
  586. jg Float32_To_Int24_DitherClip_clamp
  587. // load unscaled value into st(0)
  588. fld dword ptr [esi] // stack: value, int scaler
  589. add esi, eax // increment source ptr
  590. //lea esi, [esi+eax]
  591. fmul st(0), st(1) // st(0) *= st(1), stack: value*(int scaler), int scaler
  592. /*
  593. // call PaUtil_GenerateFloatTriangularDither with C calling convention
  594. mov sourceByteStride, eax // save eax
  595. mov sourceEnd, ecx // save ecx
  596. push ditherGenerator // pass ditherGenerator parameter on stack
  597. call PaUtil_GenerateFloatTriangularDither // stack: dither, value*(int scaler), int scaler
  598. pop edx // clear parameter off stack
  599. mov ecx, sourceEnd // restore ecx
  600. mov eax, sourceByteStride // restore eax
  601. */
  602. // generate dither
  603. mov sourceByteStride, eax // save eax
  604. mov edx, 196314165
  605. mov eax, ditherRandSeed1
  606. mul edx // eax:edx = eax * 196314165
  607. //add eax, 907633515
  608. lea eax, [eax+907633515]
  609. mov ditherRandSeed1, eax
  610. mov edx, 196314165
  611. mov eax, ditherRandSeed2
  612. mul edx // eax:edx = eax * 196314165
  613. //add eax, 907633515
  614. lea eax, [eax+907633515]
  615. mov edx, ditherRandSeed1
  616. shr edx, PA_DITHER_SHIFT_
  617. mov ditherRandSeed2, eax
  618. shr eax, PA_DITHER_SHIFT_
  619. //add eax, edx // eax -> current
  620. lea eax, [eax+edx]
  621. mov edx, ditherPrevious
  622. neg edx
  623. lea edx, [eax+edx] // highpass = current - previous
  624. mov highpassedDither, edx
  625. mov ditherPrevious, eax // previous = current
  626. mov eax, sourceByteStride // restore eax
  627. fild highpassedDither
  628. fmul const_float_dither_scale_
  629. // end generate dither, dither signal in st(0)
  630. faddp st(1), st(0) // stack: dither * value*(int scaler), int scaler
  631. fistp tempInt32 // pop st(0) into tempInt32, stack: int scaler
  632. mov edx, tempInt32
  633. jmp Float32_To_Int24_DitherClip_store
  634. Float32_To_Int24_DitherClip_clamp:
  635. mov edx, dword ptr [esi] // load floating point value into integer register
  636. shr edx, 31 // move sign bit into bit 0
  637. add esi, eax // increment source ptr
  638. //lea esi, [esi+eax]
  639. add edx, 0x7FFFFF // convert to maximum range integers
  640. Float32_To_Int24_DitherClip_store:
  641. mov byte ptr [edi], DL
  642. shr edx, 8
  643. //mov byte ptr [edi+1], DL
  644. //mov byte ptr [edi+2], DH
  645. mov word ptr [edi+1], DX
  646. //add edi, ebx // increment destination ptr
  647. lea edi, [edi+ebx]
  648. cmp esi, ecx // has src ptr reached end?
  649. jne Float32_To_Int24_DitherClip_loop
  650. ffree st(0)
  651. fincstp
  652. fwait
  653. fnclex
  654. fldcw savedFpuControlWord
  655. }
  656. ditherGenerator->previous = ditherPrevious;
  657. ditherGenerator->randSeed1 = ditherRandSeed1;
  658. ditherGenerator->randSeed2 = ditherRandSeed2;
  659. }
  660. /* -------------------------------------------------------------------------- */
  661. static void Float32_To_Int16(
  662. void *destinationBuffer, signed int destinationStride,
  663. void *sourceBuffer, signed int sourceStride,
  664. unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
  665. {
  666. /*
  667. float *src = (float*)sourceBuffer;
  668. signed short *dest = (signed short*)destinationBuffer;
  669. (void)ditherGenerator; // unused parameter
  670. while( count-- )
  671. {
  672. short samp = (short) (*src * (32767.0f));
  673. *dest = samp;
  674. src += sourceStride;
  675. dest += destinationStride;
  676. }
  677. */
  678. short savedFpuControlWord;
  679. (void) ditherGenerator; /* unused parameter */
  680. __asm{
  681. // esi -> source ptr
  682. // eax -> source byte stride
  683. // edi -> destination ptr
  684. // ebx -> destination byte stride
  685. // ecx -> source end ptr
  686. // edx -> temp
  687. mov esi, sourceBuffer
  688. mov edx, 4 // sizeof float32
  689. mov eax, sourceStride
  690. imul eax, edx // source byte stride
  691. mov ecx, count
  692. imul ecx, eax
  693. add ecx, esi // source end ptr = count * source byte stride + source ptr
  694. mov edi, destinationBuffer
  695. mov edx, 2 // sizeof int16
  696. mov ebx, destinationStride
  697. imul ebx, edx // destination byte stride
  698. fwait
  699. fstcw savedFpuControlWord
  700. fldcw fpuControlWord_
  701. fld int16Scaler_ // stack: (int)0x7FFF
  702. Float32_To_Int16_loop:
  703. // load unscaled value into st(0)
  704. fld dword ptr [esi] // stack: value, (int)0x7FFF
  705. add esi, eax // increment source ptr
  706. //lea esi, [esi+eax]
  707. fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFF, (int)0x7FFF
  708. fistp word ptr [edi] // store scaled int into dest, stack: (int)0x7FFF
  709. add edi, ebx // increment destination ptr
  710. //lea edi, [edi+ebx]
  711. cmp esi, ecx // has src ptr reached end?
  712. jne Float32_To_Int16_loop
  713. ffree st(0)
  714. fincstp
  715. fwait
  716. fnclex
  717. fldcw savedFpuControlWord
  718. }
  719. }
  720. /* -------------------------------------------------------------------------- */
  721. static void Float32_To_Int16_Clip(
  722. void *destinationBuffer, signed int destinationStride,
  723. void *sourceBuffer, signed int sourceStride,
  724. unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
  725. {
  726. /*
  727. float *src = (float*)sourceBuffer;
  728. signed short *dest = (signed short*)destinationBuffer;
  729. (void)ditherGenerator; // unused parameter
  730. while( count-- )
  731. {
  732. long samp = (signed long) (*src * (32767.0f));
  733. PA_CLIP_( samp, -0x8000, 0x7FFF );
  734. *dest = (signed short) samp;
  735. src += sourceStride;
  736. dest += destinationStride;
  737. }
  738. */
  739. short savedFpuControlWord;
  740. (void) ditherGenerator; /* unused parameter */
  741. __asm{
  742. // esi -> source ptr
  743. // eax -> source byte stride
  744. // edi -> destination ptr
  745. // ebx -> destination byte stride
  746. // ecx -> source end ptr
  747. // edx -> temp
  748. mov esi, sourceBuffer
  749. mov edx, 4 // sizeof float32
  750. mov eax, sourceStride
  751. imul eax, edx // source byte stride
  752. mov ecx, count
  753. imul ecx, eax
  754. add ecx, esi // source end ptr = count * source byte stride + source ptr
  755. mov edi, destinationBuffer
  756. mov edx, 2 // sizeof int16
  757. mov ebx, destinationStride
  758. imul ebx, edx // destination byte stride
  759. fwait
  760. fstcw savedFpuControlWord
  761. fldcw fpuControlWord_
  762. fld int16Scaler_ // stack: (int)0x7FFF
  763. Float32_To_Int16_Clip_loop:
  764. mov edx, dword ptr [esi] // load floating point value into integer register
  765. and edx, 0x7FFFFFFF // mask off sign
  766. cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
  767. jg Float32_To_Int16_Clip_clamp
  768. // load unscaled value into st(0)
  769. fld dword ptr [esi] // stack: value, (int)0x7FFF
  770. add esi, eax // increment source ptr
  771. //lea esi, [esi+eax]
  772. fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFF, (int)0x7FFF
  773. fistp word ptr [edi] // store scaled int into dest, stack: (int)0x7FFF
  774. jmp Float32_To_Int16_Clip_stored
  775. Float32_To_Int16_Clip_clamp:
  776. mov edx, dword ptr [esi] // load floating point value into integer register
  777. shr edx, 31 // move sign bit into bit 0
  778. add esi, eax // increment source ptr
  779. //lea esi, [esi+eax]
  780. add dx, 0x7FFF // convert to maximum range integers
  781. mov word ptr [edi], dx // store clamped into into dest
  782. Float32_To_Int16_Clip_stored:
  783. add edi, ebx // increment destination ptr
  784. //lea edi, [edi+ebx]
  785. cmp esi, ecx // has src ptr reached end?
  786. jne Float32_To_Int16_Clip_loop
  787. ffree st(0)
  788. fincstp
  789. fwait
  790. fnclex
  791. fldcw savedFpuControlWord
  792. }
  793. }
  794. /* -------------------------------------------------------------------------- */
  795. static void Float32_To_Int16_DitherClip(
  796. void *destinationBuffer, signed int destinationStride,
  797. void *sourceBuffer, signed int sourceStride,
  798. unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
  799. {
  800. /*
  801. float *src = (float*)sourceBuffer;
  802. signed short *dest = (signed short*)destinationBuffer;
  803. (void)ditherGenerator; // unused parameter
  804. while( count-- )
  805. {
  806. float dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
  807. // use smaller scaler to prevent overflow when we add the dither
  808. float dithered = (*src * (32766.0f)) + dither;
  809. signed long samp = (signed long) dithered;
  810. PA_CLIP_( samp, -0x8000, 0x7FFF );
  811. *dest = (signed short) samp;
  812. src += sourceStride;
  813. dest += destinationStride;
  814. }
  815. */
  816. short savedFpuControlWord;
  817. // spill storage:
  818. signed long sourceByteStride;
  819. signed long highpassedDither;
  820. // dither state:
  821. unsigned long ditherPrevious = ditherGenerator->previous;
  822. unsigned long ditherRandSeed1 = ditherGenerator->randSeed1;
  823. unsigned long ditherRandSeed2 = ditherGenerator->randSeed2;
  824. __asm{
  825. // esi -> source ptr
  826. // eax -> source byte stride
  827. // edi -> destination ptr
  828. // ebx -> destination byte stride
  829. // ecx -> source end ptr
  830. // edx -> temp
  831. mov esi, sourceBuffer
  832. mov edx, 4 // sizeof float32
  833. mov eax, sourceStride
  834. imul eax, edx // source byte stride
  835. mov ecx, count
  836. imul ecx, eax
  837. add ecx, esi // source end ptr = count * source byte stride + source ptr
  838. mov edi, destinationBuffer
  839. mov edx, 2 // sizeof int16
  840. mov ebx, destinationStride
  841. imul ebx, edx // destination byte stride
  842. fwait
  843. fstcw savedFpuControlWord
  844. fldcw fpuControlWord_
  845. fld ditheredInt16Scaler_ // stack: int scaler
  846. Float32_To_Int16_DitherClip_loop:
  847. mov edx, dword ptr [esi] // load floating point value into integer register
  848. and edx, 0x7FFFFFFF // mask off sign
  849. cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
  850. jg Float32_To_Int16_DitherClip_clamp
  851. // load unscaled value into st(0)
  852. fld dword ptr [esi] // stack: value, int scaler
  853. add esi, eax // increment source ptr
  854. //lea esi, [esi+eax]
  855. fmul st(0), st(1) // st(0) *= st(1), stack: value*(int scaler), int scaler
  856. /*
  857. // call PaUtil_GenerateFloatTriangularDither with C calling convention
  858. mov sourceByteStride, eax // save eax
  859. mov sourceEnd, ecx // save ecx
  860. push ditherGenerator // pass ditherGenerator parameter on stack
  861. call PaUtil_GenerateFloatTriangularDither // stack: dither, value*(int scaler), int scaler
  862. pop edx // clear parameter off stack
  863. mov ecx, sourceEnd // restore ecx
  864. mov eax, sourceByteStride // restore eax
  865. */
  866. // generate dither
  867. mov sourceByteStride, eax // save eax
  868. mov edx, 196314165
  869. mov eax, ditherRandSeed1
  870. mul edx // eax:edx = eax * 196314165
  871. //add eax, 907633515
  872. lea eax, [eax+907633515]
  873. mov ditherRandSeed1, eax
  874. mov edx, 196314165
  875. mov eax, ditherRandSeed2
  876. mul edx // eax:edx = eax * 196314165
  877. //add eax, 907633515
  878. lea eax, [eax+907633515]
  879. mov edx, ditherRandSeed1
  880. shr edx, PA_DITHER_SHIFT_
  881. mov ditherRandSeed2, eax
  882. shr eax, PA_DITHER_SHIFT_
  883. //add eax, edx // eax -> current
  884. lea eax, [eax+edx] // current = randSeed1>>x + randSeed2>>x
  885. mov edx, ditherPrevious
  886. neg edx
  887. lea edx, [eax+edx] // highpass = current - previous
  888. mov highpassedDither, edx
  889. mov ditherPrevious, eax // previous = current
  890. mov eax, sourceByteStride // restore eax
  891. fild highpassedDither
  892. fmul const_float_dither_scale_
  893. // end generate dither, dither signal in st(0)
  894. faddp st(1), st(0) // stack: dither * value*(int scaler), int scaler
  895. fistp word ptr [edi] // store scaled int into dest, stack: int scaler
  896. jmp Float32_To_Int16_DitherClip_stored
  897. Float32_To_Int16_DitherClip_clamp:
  898. mov edx, dword ptr [esi] // load floating point value into integer register
  899. shr edx, 31 // move sign bit into bit 0
  900. add esi, eax // increment source ptr
  901. //lea esi, [esi+eax]
  902. add dx, 0x7FFF // convert to maximum range integers
  903. mov word ptr [edi], dx // store clamped into into dest
  904. Float32_To_Int16_DitherClip_stored:
  905. add edi, ebx // increment destination ptr
  906. //lea edi, [edi+ebx]
  907. cmp esi, ecx // has src ptr reached end?
  908. jne Float32_To_Int16_DitherClip_loop
  909. ffree st(0)
  910. fincstp
  911. fwait
  912. fnclex
  913. fldcw savedFpuControlWord
  914. }
  915. ditherGenerator->previous = ditherPrevious;
  916. ditherGenerator->randSeed1 = ditherRandSeed1;
  917. ditherGenerator->randSeed2 = ditherRandSeed2;
  918. }
  919. /* -------------------------------------------------------------------------- */
  920. void PaUtil_InitializeX86PlainConverters( void )
  921. {
  922. paConverters.Float32_To_Int32 = Float32_To_Int32;
  923. paConverters.Float32_To_Int32_Clip = Float32_To_Int32_Clip;
  924. paConverters.Float32_To_Int32_DitherClip = Float32_To_Int32_DitherClip;
  925. paConverters.Float32_To_Int24 = Float32_To_Int24;
  926. paConverters.Float32_To_Int24_Clip = Float32_To_Int24_Clip;
  927. paConverters.Float32_To_Int24_DitherClip = Float32_To_Int24_DitherClip;
  928. paConverters.Float32_To_Int16 = Float32_To_Int16;
  929. paConverters.Float32_To_Int16_Clip = Float32_To_Int16_Clip;
  930. paConverters.Float32_To_Int16_DitherClip = Float32_To_Int16_DitherClip;
  931. }
  932. #endif
  933. /* -------------------------------------------------------------------------- */