1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219 |
- /*
- * Plain Intel IA32 assembly implementations of PortAudio sample converter functions.
- * Copyright (c) 1999-2002 Ross Bencina, Phil Burk
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files
- * (the "Software"), to deal in the Software without restriction,
- * including without limitation the rights to use, copy, modify, merge,
- * publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so,
- * subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
- * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
- /*
- * The text above constitutes the entire PortAudio license; however,
- * the PortAudio community also makes the following non-binding requests:
- *
- * Any person wishing to distribute modifications to the Software is
- * requested to send the modifications to the original developer so that
- * they can be incorporated into the canonical version. It is also
- * requested that these non-binding requests be included along with the
- * license above.
- */
- /** @file
- @ingroup win_src
- */
- #include "pa_x86_plain_converters.h"
- #include "pa_converters.h"
- #include "pa_dither.h"
- /*
- the main reason these versions are faster than the equivalent C versions
- is that float -> int casting is expensive in C on x86 because the rounding
- mode needs to be changed for every cast. these versions only set
- the rounding mode once outside the loop.
- small additional speed gains are made by the way that clamping is
- implemented.
- TODO:
- o- inline dither code
- o- implement Dither only (no-clip) versions
- o- implement int8 and uint8 versions
- o- test thouroughly
- o- the packed 24 bit functions could benefit from unrolling and avoiding
- byte and word sized register access.
- */
- /* -------------------------------------------------------------------------- */
- /*
- #define PA_CLIP_( val, min, max )\
- { val = ((val) < (min)) ? (min) : (((val) > (max)) ? (max) : (val)); }
- */
- /*
- the following notes were used to determine whether a floating point
- value should be saturated (ie >1 or <-1) by loading it into an integer
- register. these should be rewritten so that they make sense.
- an ieee floating point value
- 1.xxxxxxxxxxxxxxxxxxxx?
- is less than or equal to 1 and greater than or equal to -1 either:
- if the mantissa is 0 and the unbiased exponent is 0
- OR
- if the unbiased exponent < 0
- this translates to:
- if the mantissa is 0 and the biased exponent is 7F
- or
- if the biased exponent is less than 7F
- therefore the value is greater than 1 or less than -1 if
- the mantissa is not 0 and the biased exponent is 7F
- or
- if the biased exponent is greater than 7F
- in other words, if we mask out the sign bit, the value is
- greater than 1 or less than -1 if its integer representation is greater than:
- 0 01111111 0000 0000 0000 0000 0000 000
- 0011 1111 1000 0000 0000 0000 0000 0000 => 0x3F800000
- */
- #if defined(_WIN64) || defined(_WIN32_WCE)
- /*
- -EMT64/AMD64 uses different asm
- -VC2005 doesnt allow _WIN64 with inline assembly either!
- */
- void PaUtil_InitializeX86PlainConverters( void )
- {
- }
- #else
- /* -------------------------------------------------------------------------- */
- static const short fpuControlWord_ = 0x033F; /*round to nearest, 64 bit precision, all exceptions masked*/
- static const double int32Scaler_ = 0x7FFFFFFF;
- static const double ditheredInt32Scaler_ = 0x7FFFFFFE;
- static const double int24Scaler_ = 0x7FFFFF;
- static const double ditheredInt24Scaler_ = 0x7FFFFE;
- static const double int16Scaler_ = 0x7FFF;
- static const double ditheredInt16Scaler_ = 0x7FFE;
- #define PA_DITHER_BITS_ (15)
- /* Multiply by PA_FLOAT_DITHER_SCALE_ to get a float between -2.0 and +1.99999 */
- #define PA_FLOAT_DITHER_SCALE_ (1.0F / ((1<<PA_DITHER_BITS_)-1))
- static const float const_float_dither_scale_ = PA_FLOAT_DITHER_SCALE_;
- #define PA_DITHER_SHIFT_ ((32 - PA_DITHER_BITS_) + 1)
- /* -------------------------------------------------------------------------- */
- static void Float32_To_Int32(
- void *destinationBuffer, signed int destinationStride,
- void *sourceBuffer, signed int sourceStride,
- unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
- {
- /*
- float *src = (float*)sourceBuffer;
- signed long *dest = (signed long*)destinationBuffer;
- (void)ditherGenerator; // unused parameter
- while( count-- )
- {
- // REVIEW
- double scaled = *src * 0x7FFFFFFF;
- *dest = (signed long) scaled;
- src += sourceStride;
- dest += destinationStride;
- }
- */
- short savedFpuControlWord;
- (void) ditherGenerator; /* unused parameter */
- __asm{
- // esi -> source ptr
- // eax -> source byte stride
- // edi -> destination ptr
- // ebx -> destination byte stride
- // ecx -> source end ptr
- // edx -> temp
- mov esi, sourceBuffer
- mov edx, 4 // sizeof float32 and int32
- mov eax, sourceStride
- imul eax, edx
- mov ecx, count
- imul ecx, eax
- add ecx, esi
-
- mov edi, destinationBuffer
-
- mov ebx, destinationStride
- imul ebx, edx
- fwait
- fstcw savedFpuControlWord
- fldcw fpuControlWord_
- fld int32Scaler_ // stack: (int)0x7FFFFFFF
- Float32_To_Int32_loop:
- // load unscaled value into st(0)
- fld dword ptr [esi] // stack: value, (int)0x7FFFFFFF
- add esi, eax // increment source ptr
- //lea esi, [esi+eax]
- fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFFFF, (int)0x7FFFFFFF
- /*
- note: we could store to a temporary qword here which would cause
- wraparound distortion instead of int indefinite 0x10. that would
- be more work, and given that not enabling clipping is only advisable
- when you know that your signal isn't going to clip it isn't worth it.
- */
- fistp dword ptr [edi] // pop st(0) into dest, stack: (int)0x7FFFFFFF
- add edi, ebx // increment destination ptr
- //lea edi, [edi+ebx]
- cmp esi, ecx // has src ptr reached end?
- jne Float32_To_Int32_loop
- ffree st(0)
- fincstp
- fwait
- fnclex
- fldcw savedFpuControlWord
- }
- }
- /* -------------------------------------------------------------------------- */
- static void Float32_To_Int32_Clip(
- void *destinationBuffer, signed int destinationStride,
- void *sourceBuffer, signed int sourceStride,
- unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
- {
- /*
- float *src = (float*)sourceBuffer;
- signed long *dest = (signed long*)destinationBuffer;
- (void) ditherGenerator; // unused parameter
- while( count-- )
- {
- // REVIEW
- double scaled = *src * 0x7FFFFFFF;
- PA_CLIP_( scaled, -2147483648., 2147483647. );
- *dest = (signed long) scaled;
- src += sourceStride;
- dest += destinationStride;
- }
- */
- short savedFpuControlWord;
- (void) ditherGenerator; /* unused parameter */
- __asm{
- // esi -> source ptr
- // eax -> source byte stride
- // edi -> destination ptr
- // ebx -> destination byte stride
- // ecx -> source end ptr
- // edx -> temp
- mov esi, sourceBuffer
- mov edx, 4 // sizeof float32 and int32
- mov eax, sourceStride
- imul eax, edx
- mov ecx, count
- imul ecx, eax
- add ecx, esi
-
- mov edi, destinationBuffer
-
- mov ebx, destinationStride
- imul ebx, edx
- fwait
- fstcw savedFpuControlWord
- fldcw fpuControlWord_
- fld int32Scaler_ // stack: (int)0x7FFFFFFF
- Float32_To_Int32_Clip_loop:
- mov edx, dword ptr [esi] // load floating point value into integer register
- and edx, 0x7FFFFFFF // mask off sign
- cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
- jg Float32_To_Int32_Clip_clamp
- // load unscaled value into st(0)
- fld dword ptr [esi] // stack: value, (int)0x7FFFFFFF
- add esi, eax // increment source ptr
- //lea esi, [esi+eax]
- fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFFFF, (int)0x7FFFFFFF
- fistp dword ptr [edi] // pop st(0) into dest, stack: (int)0x7FFFFFFF
- jmp Float32_To_Int32_Clip_stored
-
- Float32_To_Int32_Clip_clamp:
- mov edx, dword ptr [esi] // load floating point value into integer register
- shr edx, 31 // move sign bit into bit 0
- add esi, eax // increment source ptr
- //lea esi, [esi+eax]
- add edx, 0x7FFFFFFF // convert to maximum range integers
- mov dword ptr [edi], edx
- Float32_To_Int32_Clip_stored:
- //add edi, ebx // increment destination ptr
- lea edi, [edi+ebx]
- cmp esi, ecx // has src ptr reached end?
- jne Float32_To_Int32_Clip_loop
- ffree st(0)
- fincstp
- fwait
- fnclex
- fldcw savedFpuControlWord
- }
- }
- /* -------------------------------------------------------------------------- */
- static void Float32_To_Int32_DitherClip(
- void *destinationBuffer, signed int destinationStride,
- void *sourceBuffer, signed int sourceStride,
- unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
- {
- /*
- float *src = (float*)sourceBuffer;
- signed long *dest = (signed long*)destinationBuffer;
- while( count-- )
- {
- // REVIEW
- double dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
- // use smaller scaler to prevent overflow when we add the dither
- double dithered = ((double)*src * (2147483646.0)) + dither;
- PA_CLIP_( dithered, -2147483648., 2147483647. );
- *dest = (signed long) dithered;
- src += sourceStride;
- dest += destinationStride;
- }
- */
- short savedFpuControlWord;
- // spill storage:
- signed long sourceByteStride;
- signed long highpassedDither;
- // dither state:
- unsigned long ditherPrevious = ditherGenerator->previous;
- unsigned long ditherRandSeed1 = ditherGenerator->randSeed1;
- unsigned long ditherRandSeed2 = ditherGenerator->randSeed2;
-
- __asm{
- // esi -> source ptr
- // eax -> source byte stride
- // edi -> destination ptr
- // ebx -> destination byte stride
- // ecx -> source end ptr
- // edx -> temp
- mov esi, sourceBuffer
- mov edx, 4 // sizeof float32 and int32
- mov eax, sourceStride
- imul eax, edx
- mov ecx, count
- imul ecx, eax
- add ecx, esi
-
- mov edi, destinationBuffer
-
- mov ebx, destinationStride
- imul ebx, edx
- fwait
- fstcw savedFpuControlWord
- fldcw fpuControlWord_
- fld ditheredInt32Scaler_ // stack: int scaler
- Float32_To_Int32_DitherClip_loop:
- mov edx, dword ptr [esi] // load floating point value into integer register
- and edx, 0x7FFFFFFF // mask off sign
- cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
- jg Float32_To_Int32_DitherClip_clamp
- // load unscaled value into st(0)
- fld dword ptr [esi] // stack: value, int scaler
- add esi, eax // increment source ptr
- //lea esi, [esi+eax]
- fmul st(0), st(1) // st(0) *= st(1), stack: value*(int scaler), int scaler
- /*
- // call PaUtil_GenerateFloatTriangularDither with C calling convention
- mov sourceByteStride, eax // save eax
- mov sourceEnd, ecx // save ecx
- push ditherGenerator // pass ditherGenerator parameter on stack
- call PaUtil_GenerateFloatTriangularDither // stack: dither, value*(int scaler), int scaler
- pop edx // clear parameter off stack
- mov ecx, sourceEnd // restore ecx
- mov eax, sourceByteStride // restore eax
- */
- // generate dither
- mov sourceByteStride, eax // save eax
- mov edx, 196314165
- mov eax, ditherRandSeed1
- mul edx // eax:edx = eax * 196314165
- //add eax, 907633515
- lea eax, [eax+907633515]
- mov ditherRandSeed1, eax
- mov edx, 196314165
- mov eax, ditherRandSeed2
- mul edx // eax:edx = eax * 196314165
- //add eax, 907633515
- lea eax, [eax+907633515]
- mov edx, ditherRandSeed1
- shr edx, PA_DITHER_SHIFT_
- mov ditherRandSeed2, eax
- shr eax, PA_DITHER_SHIFT_
- //add eax, edx // eax -> current
- lea eax, [eax+edx]
- mov edx, ditherPrevious
- neg edx
- lea edx, [eax+edx] // highpass = current - previous
- mov highpassedDither, edx
- mov ditherPrevious, eax // previous = current
- mov eax, sourceByteStride // restore eax
- fild highpassedDither
- fmul const_float_dither_scale_
- // end generate dither, dither signal in st(0)
-
- faddp st(1), st(0) // stack: dither + value*(int scaler), int scaler
- fistp dword ptr [edi] // pop st(0) into dest, stack: int scaler
- jmp Float32_To_Int32_DitherClip_stored
-
- Float32_To_Int32_DitherClip_clamp:
- mov edx, dword ptr [esi] // load floating point value into integer register
- shr edx, 31 // move sign bit into bit 0
- add esi, eax // increment source ptr
- //lea esi, [esi+eax]
- add edx, 0x7FFFFFFF // convert to maximum range integers
- mov dword ptr [edi], edx
- Float32_To_Int32_DitherClip_stored:
- //add edi, ebx // increment destination ptr
- lea edi, [edi+ebx]
- cmp esi, ecx // has src ptr reached end?
- jne Float32_To_Int32_DitherClip_loop
- ffree st(0)
- fincstp
- fwait
- fnclex
- fldcw savedFpuControlWord
- }
- ditherGenerator->previous = ditherPrevious;
- ditherGenerator->randSeed1 = ditherRandSeed1;
- ditherGenerator->randSeed2 = ditherRandSeed2;
- }
- /* -------------------------------------------------------------------------- */
- static void Float32_To_Int24(
- void *destinationBuffer, signed int destinationStride,
- void *sourceBuffer, signed int sourceStride,
- unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
- {
- /*
- float *src = (float*)sourceBuffer;
- unsigned char *dest = (unsigned char*)destinationBuffer;
- signed long temp;
- (void) ditherGenerator; // unused parameter
-
- while( count-- )
- {
- // convert to 32 bit and drop the low 8 bits
- double scaled = *src * 0x7FFFFFFF;
- temp = (signed long) scaled;
- dest[0] = (unsigned char)(temp >> 8);
- dest[1] = (unsigned char)(temp >> 16);
- dest[2] = (unsigned char)(temp >> 24);
- src += sourceStride;
- dest += destinationStride * 3;
- }
- */
- short savedFpuControlWord;
-
- signed long tempInt32;
- (void) ditherGenerator; /* unused parameter */
-
- __asm{
- // esi -> source ptr
- // eax -> source byte stride
- // edi -> destination ptr
- // ebx -> destination byte stride
- // ecx -> source end ptr
- // edx -> temp
- mov esi, sourceBuffer
- mov edx, 4 // sizeof float32
- mov eax, sourceStride
- imul eax, edx
- mov ecx, count
- imul ecx, eax
- add ecx, esi
- mov edi, destinationBuffer
- mov edx, 3 // sizeof int24
- mov ebx, destinationStride
- imul ebx, edx
- fwait
- fstcw savedFpuControlWord
- fldcw fpuControlWord_
- fld int24Scaler_ // stack: (int)0x7FFFFF
- Float32_To_Int24_loop:
- // load unscaled value into st(0)
- fld dword ptr [esi] // stack: value, (int)0x7FFFFF
- add esi, eax // increment source ptr
- //lea esi, [esi+eax]
- fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFF, (int)0x7FFFFF
- fistp tempInt32 // pop st(0) into tempInt32, stack: (int)0x7FFFFF
- mov edx, tempInt32
- mov byte ptr [edi], DL
- shr edx, 8
- //mov byte ptr [edi+1], DL
- //mov byte ptr [edi+2], DH
- mov word ptr [edi+1], DX
- //add edi, ebx // increment destination ptr
- lea edi, [edi+ebx]
- cmp esi, ecx // has src ptr reached end?
- jne Float32_To_Int24_loop
- ffree st(0)
- fincstp
- fwait
- fnclex
- fldcw savedFpuControlWord
- }
- }
- /* -------------------------------------------------------------------------- */
- static void Float32_To_Int24_Clip(
- void *destinationBuffer, signed int destinationStride,
- void *sourceBuffer, signed int sourceStride,
- unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
- {
- /*
- float *src = (float*)sourceBuffer;
- unsigned char *dest = (unsigned char*)destinationBuffer;
- signed long temp;
- (void) ditherGenerator; // unused parameter
-
- while( count-- )
- {
- // convert to 32 bit and drop the low 8 bits
- double scaled = *src * 0x7FFFFFFF;
- PA_CLIP_( scaled, -2147483648., 2147483647. );
- temp = (signed long) scaled;
- dest[0] = (unsigned char)(temp >> 8);
- dest[1] = (unsigned char)(temp >> 16);
- dest[2] = (unsigned char)(temp >> 24);
- src += sourceStride;
- dest += destinationStride * 3;
- }
- */
- short savedFpuControlWord;
-
- signed long tempInt32;
- (void) ditherGenerator; /* unused parameter */
-
- __asm{
- // esi -> source ptr
- // eax -> source byte stride
- // edi -> destination ptr
- // ebx -> destination byte stride
- // ecx -> source end ptr
- // edx -> temp
- mov esi, sourceBuffer
- mov edx, 4 // sizeof float32
- mov eax, sourceStride
- imul eax, edx
- mov ecx, count
- imul ecx, eax
- add ecx, esi
- mov edi, destinationBuffer
- mov edx, 3 // sizeof int24
- mov ebx, destinationStride
- imul ebx, edx
- fwait
- fstcw savedFpuControlWord
- fldcw fpuControlWord_
- fld int24Scaler_ // stack: (int)0x7FFFFF
- Float32_To_Int24_Clip_loop:
- mov edx, dword ptr [esi] // load floating point value into integer register
- and edx, 0x7FFFFFFF // mask off sign
- cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
- jg Float32_To_Int24_Clip_clamp
- // load unscaled value into st(0)
- fld dword ptr [esi] // stack: value, (int)0x7FFFFF
- add esi, eax // increment source ptr
- //lea esi, [esi+eax]
- fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFF, (int)0x7FFFFF
- fistp tempInt32 // pop st(0) into tempInt32, stack: (int)0x7FFFFF
- mov edx, tempInt32
- jmp Float32_To_Int24_Clip_store
-
- Float32_To_Int24_Clip_clamp:
- mov edx, dword ptr [esi] // load floating point value into integer register
- shr edx, 31 // move sign bit into bit 0
- add esi, eax // increment source ptr
- //lea esi, [esi+eax]
- add edx, 0x7FFFFF // convert to maximum range integers
- Float32_To_Int24_Clip_store:
- mov byte ptr [edi], DL
- shr edx, 8
- //mov byte ptr [edi+1], DL
- //mov byte ptr [edi+2], DH
- mov word ptr [edi+1], DX
- //add edi, ebx // increment destination ptr
- lea edi, [edi+ebx]
- cmp esi, ecx // has src ptr reached end?
- jne Float32_To_Int24_Clip_loop
- ffree st(0)
- fincstp
- fwait
- fnclex
- fldcw savedFpuControlWord
- }
- }
- /* -------------------------------------------------------------------------- */
- static void Float32_To_Int24_DitherClip(
- void *destinationBuffer, signed int destinationStride,
- void *sourceBuffer, signed int sourceStride,
- unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
- {
- /*
- float *src = (float*)sourceBuffer;
- unsigned char *dest = (unsigned char*)destinationBuffer;
- signed long temp;
-
- while( count-- )
- {
- // convert to 32 bit and drop the low 8 bits
- // FIXME: the dither amplitude here appears to be too small by 8 bits
- double dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
- // use smaller scaler to prevent overflow when we add the dither
- double dithered = ((double)*src * (2147483646.0)) + dither;
- PA_CLIP_( dithered, -2147483648., 2147483647. );
-
- temp = (signed long) dithered;
- dest[0] = (unsigned char)(temp >> 8);
- dest[1] = (unsigned char)(temp >> 16);
- dest[2] = (unsigned char)(temp >> 24);
- src += sourceStride;
- dest += destinationStride * 3;
- }
- */
- short savedFpuControlWord;
- // spill storage:
- signed long sourceByteStride;
- signed long highpassedDither;
- // dither state:
- unsigned long ditherPrevious = ditherGenerator->previous;
- unsigned long ditherRandSeed1 = ditherGenerator->randSeed1;
- unsigned long ditherRandSeed2 = ditherGenerator->randSeed2;
-
- signed long tempInt32;
-
- __asm{
- // esi -> source ptr
- // eax -> source byte stride
- // edi -> destination ptr
- // ebx -> destination byte stride
- // ecx -> source end ptr
- // edx -> temp
- mov esi, sourceBuffer
- mov edx, 4 // sizeof float32
- mov eax, sourceStride
- imul eax, edx
- mov ecx, count
- imul ecx, eax
- add ecx, esi
- mov edi, destinationBuffer
- mov edx, 3 // sizeof int24
- mov ebx, destinationStride
- imul ebx, edx
- fwait
- fstcw savedFpuControlWord
- fldcw fpuControlWord_
- fld ditheredInt24Scaler_ // stack: int scaler
- Float32_To_Int24_DitherClip_loop:
- mov edx, dword ptr [esi] // load floating point value into integer register
- and edx, 0x7FFFFFFF // mask off sign
- cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
- jg Float32_To_Int24_DitherClip_clamp
- // load unscaled value into st(0)
- fld dword ptr [esi] // stack: value, int scaler
- add esi, eax // increment source ptr
- //lea esi, [esi+eax]
- fmul st(0), st(1) // st(0) *= st(1), stack: value*(int scaler), int scaler
- /*
- // call PaUtil_GenerateFloatTriangularDither with C calling convention
- mov sourceByteStride, eax // save eax
- mov sourceEnd, ecx // save ecx
- push ditherGenerator // pass ditherGenerator parameter on stack
- call PaUtil_GenerateFloatTriangularDither // stack: dither, value*(int scaler), int scaler
- pop edx // clear parameter off stack
- mov ecx, sourceEnd // restore ecx
- mov eax, sourceByteStride // restore eax
- */
-
- // generate dither
- mov sourceByteStride, eax // save eax
- mov edx, 196314165
- mov eax, ditherRandSeed1
- mul edx // eax:edx = eax * 196314165
- //add eax, 907633515
- lea eax, [eax+907633515]
- mov ditherRandSeed1, eax
- mov edx, 196314165
- mov eax, ditherRandSeed2
- mul edx // eax:edx = eax * 196314165
- //add eax, 907633515
- lea eax, [eax+907633515]
- mov edx, ditherRandSeed1
- shr edx, PA_DITHER_SHIFT_
- mov ditherRandSeed2, eax
- shr eax, PA_DITHER_SHIFT_
- //add eax, edx // eax -> current
- lea eax, [eax+edx]
- mov edx, ditherPrevious
- neg edx
- lea edx, [eax+edx] // highpass = current - previous
- mov highpassedDither, edx
- mov ditherPrevious, eax // previous = current
- mov eax, sourceByteStride // restore eax
- fild highpassedDither
- fmul const_float_dither_scale_
- // end generate dither, dither signal in st(0)
- faddp st(1), st(0) // stack: dither * value*(int scaler), int scaler
- fistp tempInt32 // pop st(0) into tempInt32, stack: int scaler
- mov edx, tempInt32
- jmp Float32_To_Int24_DitherClip_store
-
- Float32_To_Int24_DitherClip_clamp:
- mov edx, dword ptr [esi] // load floating point value into integer register
- shr edx, 31 // move sign bit into bit 0
- add esi, eax // increment source ptr
- //lea esi, [esi+eax]
- add edx, 0x7FFFFF // convert to maximum range integers
- Float32_To_Int24_DitherClip_store:
- mov byte ptr [edi], DL
- shr edx, 8
- //mov byte ptr [edi+1], DL
- //mov byte ptr [edi+2], DH
- mov word ptr [edi+1], DX
- //add edi, ebx // increment destination ptr
- lea edi, [edi+ebx]
- cmp esi, ecx // has src ptr reached end?
- jne Float32_To_Int24_DitherClip_loop
- ffree st(0)
- fincstp
- fwait
- fnclex
- fldcw savedFpuControlWord
- }
- ditherGenerator->previous = ditherPrevious;
- ditherGenerator->randSeed1 = ditherRandSeed1;
- ditherGenerator->randSeed2 = ditherRandSeed2;
- }
- /* -------------------------------------------------------------------------- */
- static void Float32_To_Int16(
- void *destinationBuffer, signed int destinationStride,
- void *sourceBuffer, signed int sourceStride,
- unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
- {
- /*
- float *src = (float*)sourceBuffer;
- signed short *dest = (signed short*)destinationBuffer;
- (void)ditherGenerator; // unused parameter
- while( count-- )
- {
- short samp = (short) (*src * (32767.0f));
- *dest = samp;
- src += sourceStride;
- dest += destinationStride;
- }
- */
- short savedFpuControlWord;
-
- (void) ditherGenerator; /* unused parameter */
- __asm{
- // esi -> source ptr
- // eax -> source byte stride
- // edi -> destination ptr
- // ebx -> destination byte stride
- // ecx -> source end ptr
- // edx -> temp
- mov esi, sourceBuffer
- mov edx, 4 // sizeof float32
- mov eax, sourceStride
- imul eax, edx // source byte stride
- mov ecx, count
- imul ecx, eax
- add ecx, esi // source end ptr = count * source byte stride + source ptr
- mov edi, destinationBuffer
- mov edx, 2 // sizeof int16
- mov ebx, destinationStride
- imul ebx, edx // destination byte stride
- fwait
- fstcw savedFpuControlWord
- fldcw fpuControlWord_
- fld int16Scaler_ // stack: (int)0x7FFF
- Float32_To_Int16_loop:
- // load unscaled value into st(0)
- fld dword ptr [esi] // stack: value, (int)0x7FFF
- add esi, eax // increment source ptr
- //lea esi, [esi+eax]
- fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFF, (int)0x7FFF
- fistp word ptr [edi] // store scaled int into dest, stack: (int)0x7FFF
- add edi, ebx // increment destination ptr
- //lea edi, [edi+ebx]
-
- cmp esi, ecx // has src ptr reached end?
- jne Float32_To_Int16_loop
- ffree st(0)
- fincstp
- fwait
- fnclex
- fldcw savedFpuControlWord
- }
- }
- /* -------------------------------------------------------------------------- */
- static void Float32_To_Int16_Clip(
- void *destinationBuffer, signed int destinationStride,
- void *sourceBuffer, signed int sourceStride,
- unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
- {
- /*
- float *src = (float*)sourceBuffer;
- signed short *dest = (signed short*)destinationBuffer;
- (void)ditherGenerator; // unused parameter
- while( count-- )
- {
- long samp = (signed long) (*src * (32767.0f));
- PA_CLIP_( samp, -0x8000, 0x7FFF );
- *dest = (signed short) samp;
- src += sourceStride;
- dest += destinationStride;
- }
- */
- short savedFpuControlWord;
-
- (void) ditherGenerator; /* unused parameter */
- __asm{
- // esi -> source ptr
- // eax -> source byte stride
- // edi -> destination ptr
- // ebx -> destination byte stride
- // ecx -> source end ptr
- // edx -> temp
- mov esi, sourceBuffer
- mov edx, 4 // sizeof float32
- mov eax, sourceStride
- imul eax, edx // source byte stride
- mov ecx, count
- imul ecx, eax
- add ecx, esi // source end ptr = count * source byte stride + source ptr
- mov edi, destinationBuffer
- mov edx, 2 // sizeof int16
- mov ebx, destinationStride
- imul ebx, edx // destination byte stride
- fwait
- fstcw savedFpuControlWord
- fldcw fpuControlWord_
- fld int16Scaler_ // stack: (int)0x7FFF
- Float32_To_Int16_Clip_loop:
- mov edx, dword ptr [esi] // load floating point value into integer register
- and edx, 0x7FFFFFFF // mask off sign
- cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
- jg Float32_To_Int16_Clip_clamp
- // load unscaled value into st(0)
- fld dword ptr [esi] // stack: value, (int)0x7FFF
- add esi, eax // increment source ptr
- //lea esi, [esi+eax]
- fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFF, (int)0x7FFF
- fistp word ptr [edi] // store scaled int into dest, stack: (int)0x7FFF
- jmp Float32_To_Int16_Clip_stored
-
- Float32_To_Int16_Clip_clamp:
- mov edx, dword ptr [esi] // load floating point value into integer register
- shr edx, 31 // move sign bit into bit 0
- add esi, eax // increment source ptr
- //lea esi, [esi+eax]
- add dx, 0x7FFF // convert to maximum range integers
- mov word ptr [edi], dx // store clamped into into dest
- Float32_To_Int16_Clip_stored:
- add edi, ebx // increment destination ptr
- //lea edi, [edi+ebx]
-
- cmp esi, ecx // has src ptr reached end?
- jne Float32_To_Int16_Clip_loop
- ffree st(0)
- fincstp
- fwait
- fnclex
- fldcw savedFpuControlWord
- }
- }
- /* -------------------------------------------------------------------------- */
- static void Float32_To_Int16_DitherClip(
- void *destinationBuffer, signed int destinationStride,
- void *sourceBuffer, signed int sourceStride,
- unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
- {
- /*
- float *src = (float*)sourceBuffer;
- signed short *dest = (signed short*)destinationBuffer;
- (void)ditherGenerator; // unused parameter
- while( count-- )
- {
- float dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
- // use smaller scaler to prevent overflow when we add the dither
- float dithered = (*src * (32766.0f)) + dither;
- signed long samp = (signed long) dithered;
- PA_CLIP_( samp, -0x8000, 0x7FFF );
- *dest = (signed short) samp;
- src += sourceStride;
- dest += destinationStride;
- }
- */
- short savedFpuControlWord;
- // spill storage:
- signed long sourceByteStride;
- signed long highpassedDither;
- // dither state:
- unsigned long ditherPrevious = ditherGenerator->previous;
- unsigned long ditherRandSeed1 = ditherGenerator->randSeed1;
- unsigned long ditherRandSeed2 = ditherGenerator->randSeed2;
- __asm{
- // esi -> source ptr
- // eax -> source byte stride
- // edi -> destination ptr
- // ebx -> destination byte stride
- // ecx -> source end ptr
- // edx -> temp
- mov esi, sourceBuffer
- mov edx, 4 // sizeof float32
- mov eax, sourceStride
- imul eax, edx // source byte stride
- mov ecx, count
- imul ecx, eax
- add ecx, esi // source end ptr = count * source byte stride + source ptr
- mov edi, destinationBuffer
- mov edx, 2 // sizeof int16
- mov ebx, destinationStride
- imul ebx, edx // destination byte stride
- fwait
- fstcw savedFpuControlWord
- fldcw fpuControlWord_
- fld ditheredInt16Scaler_ // stack: int scaler
- Float32_To_Int16_DitherClip_loop:
- mov edx, dword ptr [esi] // load floating point value into integer register
- and edx, 0x7FFFFFFF // mask off sign
- cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
- jg Float32_To_Int16_DitherClip_clamp
- // load unscaled value into st(0)
- fld dword ptr [esi] // stack: value, int scaler
- add esi, eax // increment source ptr
- //lea esi, [esi+eax]
- fmul st(0), st(1) // st(0) *= st(1), stack: value*(int scaler), int scaler
- /*
- // call PaUtil_GenerateFloatTriangularDither with C calling convention
- mov sourceByteStride, eax // save eax
- mov sourceEnd, ecx // save ecx
- push ditherGenerator // pass ditherGenerator parameter on stack
- call PaUtil_GenerateFloatTriangularDither // stack: dither, value*(int scaler), int scaler
- pop edx // clear parameter off stack
- mov ecx, sourceEnd // restore ecx
- mov eax, sourceByteStride // restore eax
- */
- // generate dither
- mov sourceByteStride, eax // save eax
- mov edx, 196314165
- mov eax, ditherRandSeed1
- mul edx // eax:edx = eax * 196314165
- //add eax, 907633515
- lea eax, [eax+907633515]
- mov ditherRandSeed1, eax
- mov edx, 196314165
- mov eax, ditherRandSeed2
- mul edx // eax:edx = eax * 196314165
- //add eax, 907633515
- lea eax, [eax+907633515]
- mov edx, ditherRandSeed1
- shr edx, PA_DITHER_SHIFT_
- mov ditherRandSeed2, eax
- shr eax, PA_DITHER_SHIFT_
- //add eax, edx // eax -> current
- lea eax, [eax+edx] // current = randSeed1>>x + randSeed2>>x
- mov edx, ditherPrevious
- neg edx
- lea edx, [eax+edx] // highpass = current - previous
- mov highpassedDither, edx
- mov ditherPrevious, eax // previous = current
- mov eax, sourceByteStride // restore eax
- fild highpassedDither
- fmul const_float_dither_scale_
- // end generate dither, dither signal in st(0)
-
- faddp st(1), st(0) // stack: dither * value*(int scaler), int scaler
- fistp word ptr [edi] // store scaled int into dest, stack: int scaler
- jmp Float32_To_Int16_DitherClip_stored
-
- Float32_To_Int16_DitherClip_clamp:
- mov edx, dword ptr [esi] // load floating point value into integer register
- shr edx, 31 // move sign bit into bit 0
- add esi, eax // increment source ptr
- //lea esi, [esi+eax]
- add dx, 0x7FFF // convert to maximum range integers
- mov word ptr [edi], dx // store clamped into into dest
- Float32_To_Int16_DitherClip_stored:
- add edi, ebx // increment destination ptr
- //lea edi, [edi+ebx]
-
- cmp esi, ecx // has src ptr reached end?
- jne Float32_To_Int16_DitherClip_loop
- ffree st(0)
- fincstp
- fwait
- fnclex
- fldcw savedFpuControlWord
- }
- ditherGenerator->previous = ditherPrevious;
- ditherGenerator->randSeed1 = ditherRandSeed1;
- ditherGenerator->randSeed2 = ditherRandSeed2;
- }
- /* -------------------------------------------------------------------------- */
- void PaUtil_InitializeX86PlainConverters( void )
- {
- paConverters.Float32_To_Int32 = Float32_To_Int32;
- paConverters.Float32_To_Int32_Clip = Float32_To_Int32_Clip;
- paConverters.Float32_To_Int32_DitherClip = Float32_To_Int32_DitherClip;
- paConverters.Float32_To_Int24 = Float32_To_Int24;
- paConverters.Float32_To_Int24_Clip = Float32_To_Int24_Clip;
- paConverters.Float32_To_Int24_DitherClip = Float32_To_Int24_DitherClip;
-
- paConverters.Float32_To_Int16 = Float32_To_Int16;
- paConverters.Float32_To_Int16_Clip = Float32_To_Int16_Clip;
- paConverters.Float32_To_Int16_DitherClip = Float32_To_Int16_DitherClip;
- }
- #endif
- /* -------------------------------------------------------------------------- */
|