cu_main.c 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115
  1. /* cu_main.c - execution profiler under valgrind
  2. Copyright (C) 2013,2015-2017,2021,2023 Matthew Wette
  3. This program is free software; you can redistribute it and/or
  4. modify it under the terms of the GNU General Public License as
  5. published by the Free Software Foundation; either version 2 of the
  6. License, or (at your option) any later version.
  7. This program is distributed in the hope that it will be useful, but
  8. WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10. General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program; if not, see http://www.gnu.org/licenses.
  13. The GNU General Public License is contained in the file COPYING.
  14. mwette@alumni.caltech.edu
  15. */
  16. #include <limits.h> /* need LONG_MAX */
  17. #include "pub_tool_basics.h"
  18. #include "pub_tool_vki.h"
  19. #include "pub_tool_tooliface.h"
  20. #include "pub_tool_libcassert.h"
  21. #include "pub_tool_libcprint.h"
  22. #include "pub_tool_libcfile.h"
  23. #include "pub_tool_debuginfo.h"
  24. #include "pub_tool_libcbase.h"
  25. #include "pub_tool_options.h"
  26. #include "pub_tool_machine.h"
  27. #include "pub_tool_threadstate.h"
  28. #include "cputildefs.h"
  29. static const HChar cu_version[] = "v231209a";
  30. /*------------------------------------------------------------*/
  31. /*--- Instrumentation ---*/
  32. /*------------------------------------------------------------*/
  33. /* TODO: explaint the count tables
  34. * The counts can be with refernece to a divisor to get more resolution.
  35. * For example if, on average, an operation is 1.5 clocks, then the table
  36. * could use a divisor of 10 and define the operation count to be 15.
  37. */
  38. /* count_tables: beg */
  39. #define NUM_OP 1130
  40. static const HChar *cu_op_names[] = {
  41. "Iop_INVALID", "Iop_Add8", "Iop_Add16", "Iop_Add32", "Iop_Add64",
  42. "Iop_Sub8", "Iop_Sub16", "Iop_Sub32", "Iop_Sub64", "Iop_Mul8",
  43. "Iop_Mul16", "Iop_Mul32", "Iop_Mul64", "Iop_Or8", "Iop_Or16", "Iop_Or32",
  44. "Iop_Or64", "Iop_And8", "Iop_And16", "Iop_And32", "Iop_And64",
  45. "Iop_Xor8", "Iop_Xor16", "Iop_Xor32", "Iop_Xor64", "Iop_Shl8",
  46. "Iop_Shl16", "Iop_Shl32", "Iop_Shl64", "Iop_Shr8", "Iop_Shr16",
  47. "Iop_Shr32", "Iop_Shr64", "Iop_Sar8", "Iop_Sar16", "Iop_Sar32",
  48. "Iop_Sar64", "Iop_CmpEQ8", "Iop_CmpEQ16", "Iop_CmpEQ32", "Iop_CmpEQ64",
  49. "Iop_CmpNE8", "Iop_CmpNE16", "Iop_CmpNE32", "Iop_CmpNE64", "Iop_Not8",
  50. "Iop_Not16", "Iop_Not32", "Iop_Not64", "Iop_CasCmpEQ8", "Iop_CasCmpEQ16",
  51. "Iop_CasCmpEQ32", "Iop_CasCmpEQ64", "Iop_CasCmpNE8", "Iop_CasCmpNE16",
  52. "Iop_CasCmpNE32", "Iop_CasCmpNE64", "Iop_ExpCmpNE8", "Iop_ExpCmpNE16",
  53. "Iop_ExpCmpNE32", "Iop_ExpCmpNE64", "Iop_MullS8", "Iop_MullS16",
  54. "Iop_MullS32", "Iop_MullS64", "Iop_MullU8", "Iop_MullU16", "Iop_MullU32",
  55. "Iop_MullU64", "Iop_Clz64", "Iop_Clz32", "Iop_Ctz64", "Iop_Ctz32",
  56. "Iop_ClzNat64", "Iop_ClzNat32", "Iop_CtzNat64", "Iop_CtzNat32",
  57. "Iop_PopCount64", "Iop_PopCount32", "Iop_CmpLT32S", "Iop_CmpLT64S",
  58. "Iop_CmpLE32S", "Iop_CmpLE64S", "Iop_CmpLT32U", "Iop_CmpLT64U",
  59. "Iop_CmpLE32U", "Iop_CmpLE64U", "Iop_CmpNEZ8", "Iop_CmpNEZ16",
  60. "Iop_CmpNEZ32", "Iop_CmpNEZ64", "Iop_CmpwNEZ32", "Iop_CmpwNEZ64",
  61. "Iop_Left8", "Iop_Left16", "Iop_Left32", "Iop_Left64", "Iop_Max32U",
  62. "Iop_CmpORD32U", "Iop_CmpORD64U", "Iop_CmpORD32S", "Iop_CmpORD64S",
  63. "Iop_DivU32", "Iop_DivS32", "Iop_DivU64", "Iop_DivS64", "Iop_DivU128",
  64. "Iop_DivS128", "Iop_DivU32E", "Iop_DivS32E", "Iop_DivU64E",
  65. "Iop_DivS64E", "Iop_DivU128E", "Iop_DivS128E", "Iop_DivModU64to32",
  66. "Iop_DivModS64to32", "Iop_DivModU128to64", "Iop_DivModS128to64",
  67. "Iop_DivModS64to64", "Iop_DivModU64to64", "Iop_DivModS32to32",
  68. "Iop_DivModU32to32", "Iop_ModU128", "Iop_ModS128", "Iop_8Uto16",
  69. "Iop_8Uto32", "Iop_8Uto64", "Iop_16Uto32", "Iop_16Uto64", "Iop_32Uto64",
  70. "Iop_8Sto16", "Iop_8Sto32", "Iop_8Sto64", "Iop_16Sto32", "Iop_16Sto64",
  71. "Iop_32Sto64", "Iop_64to8", "Iop_32to8", "Iop_64to16", "Iop_16to8",
  72. "Iop_16HIto8", "Iop_8HLto16", "Iop_32to16", "Iop_32HIto16",
  73. "Iop_16HLto32", "Iop_64to32", "Iop_64HIto32", "Iop_32HLto64",
  74. "Iop_128to64", "Iop_128HIto64", "Iop_64HLto128", "Iop_Not1", "Iop_And1",
  75. "Iop_Or1", "Iop_32to1", "Iop_64to1", "Iop_1Uto8", "Iop_1Uto32",
  76. "Iop_1Uto64", "Iop_1Sto8", "Iop_1Sto16", "Iop_1Sto32", "Iop_1Sto64",
  77. "Iop_AddF64", "Iop_SubF64", "Iop_MulF64", "Iop_DivF64", "Iop_AddF32",
  78. "Iop_SubF32", "Iop_MulF32", "Iop_DivF32", "Iop_AddF64r32",
  79. "Iop_SubF64r32", "Iop_MulF64r32", "Iop_DivF64r32", "Iop_NegF64",
  80. "Iop_AbsF64", "Iop_NegF32", "Iop_AbsF32", "Iop_NegF16", "Iop_AbsF16",
  81. "Iop_SqrtF64", "Iop_SqrtF32", "Iop_SqrtF16", "Iop_SubF16", "Iop_AddF16",
  82. "Iop_CmpF64", "Iop_CmpF32", "Iop_CmpF16", "Iop_CmpF128", "Iop_F64toI16S",
  83. "Iop_F64toI32S", "Iop_F64toI64S", "Iop_F64toI64U", "Iop_F64toI32U",
  84. "Iop_I32StoF64", "Iop_I64StoF64", "Iop_I64UtoF64", "Iop_I64UtoF32",
  85. "Iop_I32UtoF32", "Iop_I32UtoF64", "Iop_F32toI32S", "Iop_F32toI64S",
  86. "Iop_F32toI32U", "Iop_F32toI64U", "Iop_I32StoF32", "Iop_I64StoF32",
  87. "Iop_F32toF64", "Iop_F64toF32", "Iop_ReinterpV128asI128",
  88. "Iop_ReinterpI128asV128", "Iop_ReinterpF128asI128",
  89. "Iop_ReinterpI128asF128", "Iop_ReinterpF64asI64", "Iop_ReinterpI64asF64",
  90. "Iop_ReinterpF32asI32", "Iop_ReinterpI32asF32", "Iop_F64HLtoF128",
  91. "Iop_F128HItoF64", "Iop_F128LOtoF64", "Iop_AddF128", "Iop_SubF128",
  92. "Iop_MulF128", "Iop_DivF128", "Iop_MAddF128", "Iop_MSubF128",
  93. "Iop_NegMAddF128", "Iop_NegMSubF128", "Iop_NegF128", "Iop_AbsF128",
  94. "Iop_SqrtF128", "Iop_I32StoF128", "Iop_I64StoF128", "Iop_I32UtoF128",
  95. "Iop_I64UtoF128", "Iop_F32toF128", "Iop_F64toF128", "Iop_I128UtoF128",
  96. "Iop_I128StoF128", "Iop_F128toI32S", "Iop_F128toI64S", "Iop_F128toI32U",
  97. "Iop_F128toI64U", "Iop_F128toI128S", "Iop_F128toF64", "Iop_F128toF32",
  98. "Iop_RndF128", "Iop_TruncF128toI32S", "Iop_TruncF128toI32U",
  99. "Iop_TruncF128toI64U", "Iop_TruncF128toI64S", "Iop_TruncF128toI128U",
  100. "Iop_TruncF128toI128S", "Iop_AtanF64", "Iop_Yl2xF64", "Iop_Yl2xp1F64",
  101. "Iop_PRemF64", "Iop_PRemC3210F64", "Iop_PRem1F64", "Iop_PRem1C3210F64",
  102. "Iop_ScaleF64", "Iop_SinF64", "Iop_CosF64", "Iop_TanF64", "Iop_2xm1F64",
  103. "Iop_RoundF128toInt", "Iop_RoundF64toInt", "Iop_RoundF32toInt",
  104. "Iop_MAddF32", "Iop_MSubF32", "Iop_MAddF64", "Iop_MSubF64",
  105. "Iop_MAddF64r32", "Iop_MSubF64r32", "Iop_RSqrtEst5GoodF64",
  106. "Iop_RoundF64toF64_NEAREST", "Iop_RoundF64toF64_NegINF",
  107. "Iop_RoundF64toF64_PosINF", "Iop_RoundF64toF64_ZERO",
  108. "Iop_TruncF64asF32", "Iop_RoundF64toF32", "Iop_RecpExpF64",
  109. "Iop_RecpExpF32", "Iop_MaxNumF64", "Iop_MinNumF64", "Iop_MaxNumF32",
  110. "Iop_MinNumF32", "Iop_F16toF64", "Iop_F64toF16", "Iop_F16toF32",
  111. "Iop_F32toF16", "Iop_QAdd32S", "Iop_QSub32S", "Iop_Add16x2",
  112. "Iop_Sub16x2", "Iop_QAdd16Sx2", "Iop_QAdd16Ux2", "Iop_QSub16Sx2",
  113. "Iop_QSub16Ux2", "Iop_HAdd16Ux2", "Iop_HAdd16Sx2", "Iop_HSub16Ux2",
  114. "Iop_HSub16Sx2", "Iop_Add8x4", "Iop_Sub8x4", "Iop_QAdd8Sx4",
  115. "Iop_QAdd8Ux4", "Iop_QSub8Sx4", "Iop_QSub8Ux4", "Iop_HAdd8Ux4",
  116. "Iop_HAdd8Sx4", "Iop_HSub8Ux4", "Iop_HSub8Sx4", "Iop_Sad8Ux4",
  117. "Iop_CmpNEZ16x2", "Iop_CmpNEZ8x4", "Iop_Reverse8sIn32_x1",
  118. "Iop_I32UtoF32x2_DEP", "Iop_I32StoF32x2_DEP", "Iop_F32toI32Ux2_RZ",
  119. "Iop_F32toI32Sx2_RZ", "Iop_F32ToFixed32Ux2_RZ", "Iop_F32ToFixed32Sx2_RZ",
  120. "Iop_Fixed32UToF32x2_RN", "Iop_Fixed32SToF32x2_RN", "Iop_Max32Fx2",
  121. "Iop_Min32Fx2", "Iop_PwMax32Fx2", "Iop_PwMin32Fx2", "Iop_CmpEQ32Fx2",
  122. "Iop_CmpGT32Fx2", "Iop_CmpGE32Fx2", "Iop_RecipEst32Fx2",
  123. "Iop_RecipStep32Fx2", "Iop_RSqrtEst32Fx2", "Iop_RSqrtStep32Fx2",
  124. "Iop_Neg32Fx2", "Iop_Abs32Fx2", "Iop_CmpNEZ8x8", "Iop_CmpNEZ16x4",
  125. "Iop_CmpNEZ32x2", "Iop_Add8x8", "Iop_Add16x4", "Iop_Add32x2",
  126. "Iop_QAdd8Ux8", "Iop_QAdd16Ux4", "Iop_QAdd32Ux2", "Iop_QAdd64Ux1",
  127. "Iop_QAdd8Sx8", "Iop_QAdd16Sx4", "Iop_QAdd32Sx2", "Iop_QAdd64Sx1",
  128. "Iop_PwAdd8x8", "Iop_PwAdd16x4", "Iop_PwAdd32x2", "Iop_PwMax8Sx8",
  129. "Iop_PwMax16Sx4", "Iop_PwMax32Sx2", "Iop_PwMax8Ux8", "Iop_PwMax16Ux4",
  130. "Iop_PwMax32Ux2", "Iop_PwMin8Sx8", "Iop_PwMin16Sx4", "Iop_PwMin32Sx2",
  131. "Iop_PwMin8Ux8", "Iop_PwMin16Ux4", "Iop_PwMin32Ux2", "Iop_PwAddL8Ux8",
  132. "Iop_PwAddL16Ux4", "Iop_PwAddL32Ux2", "Iop_PwAddL8Sx8",
  133. "Iop_PwAddL16Sx4", "Iop_PwAddL32Sx2", "Iop_Sub8x8", "Iop_Sub16x4",
  134. "Iop_Sub32x2", "Iop_QSub8Ux8", "Iop_QSub16Ux4", "Iop_QSub32Ux2",
  135. "Iop_QSub64Ux1", "Iop_QSub8Sx8", "Iop_QSub16Sx4", "Iop_QSub32Sx2",
  136. "Iop_QSub64Sx1", "Iop_Abs8x8", "Iop_Abs16x4", "Iop_Abs32x2",
  137. "Iop_Mul8x8", "Iop_Mul16x4", "Iop_Mul32x2", "Iop_Mul32Fx2",
  138. "Iop_MulHi16Ux4", "Iop_MulHi16Sx4", "Iop_PolynomialMul8x8",
  139. "Iop_QDMulHi16Sx4", "Iop_QDMulHi32Sx2", "Iop_QRDMulHi16Sx4",
  140. "Iop_QRDMulHi32Sx2", "Iop_Avg8Ux8", "Iop_Avg16Ux4", "Iop_Max8Sx8",
  141. "Iop_Max16Sx4", "Iop_Max32Sx2", "Iop_Max8Ux8", "Iop_Max16Ux4",
  142. "Iop_Max32Ux2", "Iop_Min8Sx8", "Iop_Min16Sx4", "Iop_Min32Sx2",
  143. "Iop_Min8Ux8", "Iop_Min16Ux4", "Iop_Min32Ux2", "Iop_CmpEQ8x8",
  144. "Iop_CmpEQ16x4", "Iop_CmpEQ32x2", "Iop_CmpGT8Ux8", "Iop_CmpGT16Ux4",
  145. "Iop_CmpGT32Ux2", "Iop_CmpGT8Sx8", "Iop_CmpGT16Sx4", "Iop_CmpGT32Sx2",
  146. "Iop_Cnt8x8", "Iop_Clz8x8", "Iop_Clz16x4", "Iop_Clz32x2", "Iop_Cls8x8",
  147. "Iop_Cls16x4", "Iop_Cls32x2", "Iop_Clz64x2", "Iop_Ctz8x16",
  148. "Iop_Ctz16x8", "Iop_Ctz32x4", "Iop_Ctz64x2", "Iop_Shl8x8", "Iop_Shl16x4",
  149. "Iop_Shl32x2", "Iop_Shr8x8", "Iop_Shr16x4", "Iop_Shr32x2", "Iop_Sar8x8",
  150. "Iop_Sar16x4", "Iop_Sar32x2", "Iop_Sal8x8", "Iop_Sal16x4", "Iop_Sal32x2",
  151. "Iop_Sal64x1", "Iop_ShlN8x8", "Iop_ShlN16x4", "Iop_ShlN32x2",
  152. "Iop_ShrN8x8", "Iop_ShrN16x4", "Iop_ShrN32x2", "Iop_SarN8x8",
  153. "Iop_SarN16x4", "Iop_SarN32x2", "Iop_QShl8x8", "Iop_QShl16x4",
  154. "Iop_QShl32x2", "Iop_QShl64x1", "Iop_QSal8x8", "Iop_QSal16x4",
  155. "Iop_QSal32x2", "Iop_QSal64x1", "Iop_QShlNsatSU8x8",
  156. "Iop_QShlNsatSU16x4", "Iop_QShlNsatSU32x2", "Iop_QShlNsatSU64x1",
  157. "Iop_QShlNsatUU8x8", "Iop_QShlNsatUU16x4", "Iop_QShlNsatUU32x2",
  158. "Iop_QShlNsatUU64x1", "Iop_QShlNsatSS8x8", "Iop_QShlNsatSS16x4",
  159. "Iop_QShlNsatSS32x2", "Iop_QShlNsatSS64x1", "Iop_QNarrowBin16Sto8Ux8",
  160. "Iop_QNarrowBin16Sto8Sx8", "Iop_QNarrowBin32Sto16Sx4",
  161. "Iop_NarrowBin16to8x8", "Iop_NarrowBin32to16x4", "Iop_InterleaveHI8x8",
  162. "Iop_InterleaveHI16x4", "Iop_InterleaveHI32x2", "Iop_InterleaveLO8x8",
  163. "Iop_InterleaveLO16x4", "Iop_InterleaveLO32x2",
  164. "Iop_InterleaveOddLanes8x8", "Iop_InterleaveEvenLanes8x8",
  165. "Iop_InterleaveOddLanes16x4", "Iop_InterleaveEvenLanes16x4",
  166. "Iop_CatOddLanes8x8", "Iop_CatOddLanes16x4", "Iop_CatEvenLanes8x8",
  167. "Iop_CatEvenLanes16x4", "Iop_GetElem8x8", "Iop_GetElem16x4",
  168. "Iop_GetElem32x2", "Iop_SetElem8x8", "Iop_SetElem16x4",
  169. "Iop_SetElem32x2", "Iop_Dup8x8", "Iop_Dup16x4", "Iop_Dup32x2",
  170. "Iop_Slice64", "Iop_Reverse8sIn16_x4", "Iop_Reverse8sIn32_x2",
  171. "Iop_Reverse16sIn32_x2", "Iop_Reverse8sIn64_x1", "Iop_Reverse16sIn64_x1",
  172. "Iop_Reverse32sIn64_x1", "Iop_Perm8x8", "Iop_PermOrZero8x8",
  173. "Iop_GetMSBs8x8", "Iop_RecipEst32Ux2", "Iop_RSqrtEst32Ux2", "Iop_AddD64",
  174. "Iop_SubD64", "Iop_MulD64", "Iop_DivD64", "Iop_AddD128", "Iop_SubD128",
  175. "Iop_MulD128", "Iop_DivD128", "Iop_ShlD64", "Iop_ShrD64", "Iop_ShlD128",
  176. "Iop_ShrD128", "Iop_D32toD64", "Iop_D64toD128", "Iop_I32StoD128",
  177. "Iop_I32UtoD128", "Iop_I64StoD128", "Iop_I64UtoD128", "Iop_I128StoD128",
  178. "Iop_D64toD32", "Iop_D128toD64", "Iop_I32StoD64", "Iop_I32UtoD64",
  179. "Iop_I64StoD64", "Iop_I64UtoD64", "Iop_D64toI32S", "Iop_D64toI32U",
  180. "Iop_D64toI64S", "Iop_D64toI64U", "Iop_D128toI32S", "Iop_D128toI32U",
  181. "Iop_D128toI64S", "Iop_D128toI64U", "Iop_D128toI128S", "Iop_F32toD32",
  182. "Iop_F32toD64", "Iop_F32toD128", "Iop_F64toD32", "Iop_F64toD64",
  183. "Iop_F64toD128", "Iop_F128toD32", "Iop_F128toD64", "Iop_F128toD128",
  184. "Iop_D32toF32", "Iop_D32toF64", "Iop_D32toF128", "Iop_D64toF32",
  185. "Iop_D64toF64", "Iop_D64toF128", "Iop_D128toF32", "Iop_D128toF64",
  186. "Iop_D128toF128", "Iop_RoundD64toInt", "Iop_RoundD128toInt",
  187. "Iop_CmpD64", "Iop_CmpD128", "Iop_CmpExpD64", "Iop_CmpExpD128",
  188. "Iop_QuantizeD64", "Iop_QuantizeD128", "Iop_SignificanceRoundD64",
  189. "Iop_SignificanceRoundD128", "Iop_ExtractExpD64", "Iop_ExtractExpD128",
  190. "Iop_ExtractSigD64", "Iop_ExtractSigD128", "Iop_InsertExpD64",
  191. "Iop_InsertExpD128", "Iop_D64HLtoD128", "Iop_D128HItoD64",
  192. "Iop_D128LOtoD64", "Iop_DPBtoBCD", "Iop_BCDtoDPB", "Iop_BCDAdd",
  193. "Iop_BCDSub", "Iop_I128StoBCD128", "Iop_BCD128toI128S",
  194. "Iop_ReinterpI64asD64", "Iop_ReinterpD64asI64", "Iop_Sqrt16Fx8",
  195. "Iop_Add16Fx8", "Iop_Sub16Fx8", "Iop_CmpLT16Fx8", "Iop_CmpLE16Fx8",
  196. "Iop_CmpEQ16Fx8", "Iop_Abs16Fx8", "Iop_Neg16Fx8", "Iop_Add32Fx4",
  197. "Iop_Sub32Fx4", "Iop_Mul32Fx4", "Iop_Div32Fx4", "Iop_Max32Fx4",
  198. "Iop_Min32Fx4", "Iop_Add32Fx2", "Iop_Sub32Fx2", "Iop_CmpEQ32Fx4",
  199. "Iop_CmpLT32Fx4", "Iop_CmpLE32Fx4", "Iop_CmpUN32Fx4", "Iop_CmpGT32Fx4",
  200. "Iop_CmpGE32Fx4", "Iop_PwMax32Fx4", "Iop_PwMin32Fx4", "Iop_Abs32Fx4",
  201. "Iop_Neg32Fx4", "Iop_Sqrt32Fx4", "Iop_RecipEst32Fx4",
  202. "Iop_RecipStep32Fx4", "Iop_RSqrtEst32Fx4", "Iop_Scale2_32Fx4",
  203. "Iop_Log2_32Fx4", "Iop_Exp2_32Fx4", "Iop_RSqrtStep32Fx4",
  204. "Iop_I32UtoF32x4_DEP", "Iop_I32StoF32x4_DEP", "Iop_I32StoF32x4",
  205. "Iop_F32toI32Sx4", "Iop_F32toI32Ux4_RZ", "Iop_F32toI32Sx4_RZ",
  206. "Iop_QF32toI32Ux4_RZ", "Iop_QF32toI32Sx4_RZ", "Iop_RoundF32x4_RM",
  207. "Iop_RoundF32x4_RP", "Iop_RoundF32x4_RN", "Iop_RoundF32x4_RZ",
  208. "Iop_F32ToFixed32Ux4_RZ", "Iop_F32ToFixed32Sx4_RZ",
  209. "Iop_Fixed32UToF32x4_RN", "Iop_Fixed32SToF32x4_RN", "Iop_F32toF16x4_DEP",
  210. "Iop_F32toF16x4", "Iop_F16toF32x4", "Iop_F64toF16x2_DEP",
  211. "Iop_F16toF64x2", "Iop_F32x4_2toQ16x8", "Iop_Add32F0x4", "Iop_Sub32F0x4",
  212. "Iop_Mul32F0x4", "Iop_Div32F0x4", "Iop_Max32F0x4", "Iop_Min32F0x4",
  213. "Iop_CmpEQ32F0x4", "Iop_CmpLT32F0x4", "Iop_CmpLE32F0x4",
  214. "Iop_CmpUN32F0x4", "Iop_RecipEst32F0x4", "Iop_Sqrt32F0x4",
  215. "Iop_RSqrtEst32F0x4", "Iop_Add64Fx2", "Iop_Sub64Fx2", "Iop_Mul64Fx2",
  216. "Iop_Div64Fx2", "Iop_Max64Fx2", "Iop_Min64Fx2", "Iop_CmpEQ64Fx2",
  217. "Iop_CmpLT64Fx2", "Iop_CmpLE64Fx2", "Iop_CmpUN64Fx2", "Iop_Abs64Fx2",
  218. "Iop_Neg64Fx2", "Iop_Sqrt64Fx2", "Iop_Scale2_64Fx2", "Iop_Log2_64Fx2",
  219. "Iop_RecipEst64Fx2", "Iop_RecipStep64Fx2", "Iop_RSqrtEst64Fx2",
  220. "Iop_RSqrtStep64Fx2", "Iop_F64x2_2toQ32x4", "Iop_Add64F0x2",
  221. "Iop_Sub64F0x2", "Iop_Mul64F0x2", "Iop_Div64F0x2", "Iop_Max64F0x2",
  222. "Iop_Min64F0x2", "Iop_CmpEQ64F0x2", "Iop_CmpLT64F0x2", "Iop_CmpLE64F0x2",
  223. "Iop_CmpUN64F0x2", "Iop_Sqrt64F0x2", "Iop_V128to64", "Iop_V128HIto64",
  224. "Iop_64HLtoV128", "Iop_64UtoV128", "Iop_SetV128lo64",
  225. "Iop_ZeroHI64ofV128", "Iop_ZeroHI96ofV128", "Iop_ZeroHI112ofV128",
  226. "Iop_ZeroHI120ofV128", "Iop_32UtoV128", "Iop_V128to32",
  227. "Iop_SetV128lo32", "Iop_NotV128", "Iop_AndV128", "Iop_OrV128",
  228. "Iop_XorV128", "Iop_ShlV128", "Iop_ShrV128", "Iop_SarV128",
  229. "Iop_CmpNEZ8x16", "Iop_CmpNEZ16x8", "Iop_CmpNEZ32x4", "Iop_CmpNEZ64x2",
  230. "Iop_CmpNEZ128x1", "Iop_Add8x16", "Iop_Add16x8", "Iop_Add32x4",
  231. "Iop_Add64x2", "Iop_Add128x1", "Iop_QAdd8Ux16", "Iop_QAdd16Ux8",
  232. "Iop_QAdd32Ux4", "Iop_QAdd64Ux2", "Iop_QAdd8Sx16", "Iop_QAdd16Sx8",
  233. "Iop_QAdd32Sx4", "Iop_QAdd64Sx2", "Iop_QAddExtUSsatSS8x16",
  234. "Iop_QAddExtUSsatSS16x8", "Iop_QAddExtUSsatSS32x4",
  235. "Iop_QAddExtUSsatSS64x2", "Iop_QAddExtSUsatUU8x16",
  236. "Iop_QAddExtSUsatUU16x8", "Iop_QAddExtSUsatUU32x4",
  237. "Iop_QAddExtSUsatUU64x2", "Iop_Sub8x16", "Iop_Sub16x8", "Iop_Sub32x4",
  238. "Iop_Sub64x2", "Iop_Sub128x1", "Iop_QSub8Ux16", "Iop_QSub16Ux8",
  239. "Iop_QSub32Ux4", "Iop_QSub64Ux2", "Iop_QSub8Sx16", "Iop_QSub16Sx8",
  240. "Iop_QSub32Sx4", "Iop_QSub64Sx2", "Iop_Mul8x16", "Iop_Mul16x8",
  241. "Iop_Mul32x4", "Iop_MulHi8Ux16", "Iop_MulHi16Ux8", "Iop_MulHi32Ux4",
  242. "Iop_MulHi8Sx16", "Iop_MulHi16Sx8", "Iop_MulHi32Sx4",
  243. "Iop_MullEven8Ux16", "Iop_MullEven16Ux8", "Iop_MullEven32Ux4",
  244. "Iop_MullEven8Sx16", "Iop_MullEven16Sx8", "Iop_MullEven32Sx4",
  245. "Iop_Mull8Ux8", "Iop_Mull8Sx8", "Iop_Mull16Ux4", "Iop_Mull16Sx4",
  246. "Iop_Mull32Ux2", "Iop_Mull32Sx2", "Iop_QDMull16Sx4", "Iop_QDMull32Sx2",
  247. "Iop_QDMulHi16Sx8", "Iop_QDMulHi32Sx4", "Iop_QRDMulHi16Sx8",
  248. "Iop_QRDMulHi32Sx4", "Iop_PolynomialMul8x16", "Iop_PolynomialMull8x8",
  249. "Iop_PolynomialMulAdd8x16", "Iop_PolynomialMulAdd16x8",
  250. "Iop_PolynomialMulAdd32x4", "Iop_PolynomialMulAdd64x2", "Iop_PwAdd8x16",
  251. "Iop_PwAdd16x8", "Iop_PwAdd32x4", "Iop_PwAdd32Fx2", "Iop_PwAddL8Ux16",
  252. "Iop_PwAddL16Ux8", "Iop_PwAddL32Ux4", "Iop_PwAddL64Ux2",
  253. "Iop_PwAddL8Sx16", "Iop_PwAddL16Sx8", "Iop_PwAddL32Sx4",
  254. "Iop_PwExtUSMulQAdd8x16", "Iop_PwBitMtxXpose64x2", "Iop_Abs8x16",
  255. "Iop_Abs16x8", "Iop_Abs32x4", "Iop_Abs64x2", "Iop_Avg8Ux16",
  256. "Iop_Avg16Ux8", "Iop_Avg32Ux4", "Iop_Avg64Ux2", "Iop_Avg8Sx16",
  257. "Iop_Avg16Sx8", "Iop_Avg32Sx4", "Iop_Avg64Sx2", "Iop_Max8Sx16",
  258. "Iop_Max16Sx8", "Iop_Max32Sx4", "Iop_Max64Sx2", "Iop_Max8Ux16",
  259. "Iop_Max16Ux8", "Iop_Max32Ux4", "Iop_Max64Ux2", "Iop_Min8Sx16",
  260. "Iop_Min16Sx8", "Iop_Min32Sx4", "Iop_Min64Sx2", "Iop_Min8Ux16",
  261. "Iop_Min16Ux8", "Iop_Min32Ux4", "Iop_Min64Ux2", "Iop_CmpEQ8x16",
  262. "Iop_CmpEQ16x8", "Iop_CmpEQ32x4", "Iop_CmpEQ64x2", "Iop_CmpGT8Sx16",
  263. "Iop_CmpGT16Sx8", "Iop_CmpGT32Sx4", "Iop_CmpGT64Sx2", "Iop_CmpGT8Ux16",
  264. "Iop_CmpGT16Ux8", "Iop_CmpGT32Ux4", "Iop_CmpGT64Ux2", "Iop_Cnt8x16",
  265. "Iop_Clz8x16", "Iop_Clz16x8", "Iop_Clz32x4", "Iop_Cls8x16",
  266. "Iop_Cls16x8", "Iop_Cls32x4", "Iop_ShlN8x16", "Iop_ShlN16x8",
  267. "Iop_ShlN32x4", "Iop_ShlN64x2", "Iop_ShrN8x16", "Iop_ShrN16x8",
  268. "Iop_ShrN32x4", "Iop_ShrN64x2", "Iop_SarN8x16", "Iop_SarN16x8",
  269. "Iop_SarN32x4", "Iop_SarN64x2", "Iop_Shl8x16", "Iop_Shl16x8",
  270. "Iop_Shl32x4", "Iop_Shl64x2", "Iop_Shr8x16", "Iop_Shr16x8",
  271. "Iop_Shr32x4", "Iop_Shr64x2", "Iop_Sar8x16", "Iop_Sar16x8",
  272. "Iop_Sar32x4", "Iop_Sar64x2", "Iop_Sal8x16", "Iop_Sal16x8",
  273. "Iop_Sal32x4", "Iop_Sal64x2", "Iop_Rol8x16", "Iop_Rol16x8",
  274. "Iop_Rol32x4", "Iop_Rol64x2", "Iop_QShl8x16", "Iop_QShl16x8",
  275. "Iop_QShl32x4", "Iop_QShl64x2", "Iop_QSal8x16", "Iop_QSal16x8",
  276. "Iop_QSal32x4", "Iop_QSal64x2", "Iop_QShlNsatSU8x16",
  277. "Iop_QShlNsatSU16x8", "Iop_QShlNsatSU32x4", "Iop_QShlNsatSU64x2",
  278. "Iop_QShlNsatUU8x16", "Iop_QShlNsatUU16x8", "Iop_QShlNsatUU32x4",
  279. "Iop_QShlNsatUU64x2", "Iop_QShlNsatSS8x16", "Iop_QShlNsatSS16x8",
  280. "Iop_QShlNsatSS32x4", "Iop_QShlNsatSS64x2", "Iop_QandUQsh8x16",
  281. "Iop_QandUQsh16x8", "Iop_QandUQsh32x4", "Iop_QandUQsh64x2",
  282. "Iop_QandSQsh8x16", "Iop_QandSQsh16x8", "Iop_QandSQsh32x4",
  283. "Iop_QandSQsh64x2", "Iop_QandUQRsh8x16", "Iop_QandUQRsh16x8",
  284. "Iop_QandUQRsh32x4", "Iop_QandUQRsh64x2", "Iop_QandSQRsh8x16",
  285. "Iop_QandSQRsh16x8", "Iop_QandSQRsh32x4", "Iop_QandSQRsh64x2",
  286. "Iop_Sh8Sx16", "Iop_Sh16Sx8", "Iop_Sh32Sx4", "Iop_Sh64Sx2",
  287. "Iop_Sh8Ux16", "Iop_Sh16Ux8", "Iop_Sh32Ux4", "Iop_Sh64Ux2",
  288. "Iop_Rsh8Sx16", "Iop_Rsh16Sx8", "Iop_Rsh32Sx4", "Iop_Rsh64Sx2",
  289. "Iop_Rsh8Ux16", "Iop_Rsh16Ux8", "Iop_Rsh32Ux4", "Iop_Rsh64Ux2",
  290. "Iop_QandQShrNnarrow16Uto8Ux8", "Iop_QandQShrNnarrow32Uto16Ux4",
  291. "Iop_QandQShrNnarrow64Uto32Ux2", "Iop_QandQSarNnarrow16Sto8Sx8",
  292. "Iop_QandQSarNnarrow32Sto16Sx4", "Iop_QandQSarNnarrow64Sto32Sx2",
  293. "Iop_QandQSarNnarrow16Sto8Ux8", "Iop_QandQSarNnarrow32Sto16Ux4",
  294. "Iop_QandQSarNnarrow64Sto32Ux2", "Iop_QandQRShrNnarrow16Uto8Ux8",
  295. "Iop_QandQRShrNnarrow32Uto16Ux4", "Iop_QandQRShrNnarrow64Uto32Ux2",
  296. "Iop_QandQRSarNnarrow16Sto8Sx8", "Iop_QandQRSarNnarrow32Sto16Sx4",
  297. "Iop_QandQRSarNnarrow64Sto32Sx2", "Iop_QandQRSarNnarrow16Sto8Ux8",
  298. "Iop_QandQRSarNnarrow32Sto16Ux4", "Iop_QandQRSarNnarrow64Sto32Ux2",
  299. "Iop_QNarrowBin16Sto8Ux16", "Iop_QNarrowBin32Sto16Ux8",
  300. "Iop_QNarrowBin16Sto8Sx16", "Iop_QNarrowBin32Sto16Sx8",
  301. "Iop_QNarrowBin16Uto8Ux16", "Iop_QNarrowBin32Uto16Ux8",
  302. "Iop_NarrowBin16to8x16", "Iop_NarrowBin32to16x8",
  303. "Iop_QNarrowBin64Sto32Sx4", "Iop_QNarrowBin64Uto32Ux4",
  304. "Iop_NarrowBin64to32x4", "Iop_NarrowUn16to8x8", "Iop_NarrowUn32to16x4",
  305. "Iop_NarrowUn64to32x2", "Iop_QNarrowUn16Sto8Sx8",
  306. "Iop_QNarrowUn32Sto16Sx4", "Iop_QNarrowUn64Sto32Sx2",
  307. "Iop_QNarrowUn16Sto8Ux8", "Iop_QNarrowUn32Sto16Ux4",
  308. "Iop_QNarrowUn64Sto32Ux2", "Iop_QNarrowUn16Uto8Ux8",
  309. "Iop_QNarrowUn32Uto16Ux4", "Iop_QNarrowUn64Uto32Ux2",
  310. "Iop_Widen8Uto16x8", "Iop_Widen16Uto32x4", "Iop_Widen32Uto64x2",
  311. "Iop_Widen8Sto16x8", "Iop_Widen16Sto32x4", "Iop_Widen32Sto64x2",
  312. "Iop_InterleaveHI8x16", "Iop_InterleaveHI16x8", "Iop_InterleaveHI32x4",
  313. "Iop_InterleaveHI64x2", "Iop_InterleaveLO8x16", "Iop_InterleaveLO16x8",
  314. "Iop_InterleaveLO32x4", "Iop_InterleaveLO64x2",
  315. "Iop_InterleaveOddLanes8x16", "Iop_InterleaveEvenLanes8x16",
  316. "Iop_InterleaveOddLanes16x8", "Iop_InterleaveEvenLanes16x8",
  317. "Iop_InterleaveOddLanes32x4", "Iop_InterleaveEvenLanes32x4",
  318. "Iop_PackOddLanes8x16", "Iop_PackEvenLanes8x16", "Iop_PackOddLanes16x8",
  319. "Iop_PackEvenLanes16x8", "Iop_PackOddLanes32x4", "Iop_PackEvenLanes32x4",
  320. "Iop_CatOddLanes8x16", "Iop_CatOddLanes16x8", "Iop_CatOddLanes32x4",
  321. "Iop_CatEvenLanes8x16", "Iop_CatEvenLanes16x8", "Iop_CatEvenLanes32x4",
  322. "Iop_GetElem8x16", "Iop_GetElem16x8", "Iop_GetElem32x4",
  323. "Iop_GetElem64x2", "Iop_SetElem8x16", "Iop_SetElem16x8",
  324. "Iop_SetElem32x4", "Iop_SetElem64x2", "Iop_Dup8x16", "Iop_Dup16x8",
  325. "Iop_Dup32x4", "Iop_SliceV128", "Iop_Reverse8sIn16_x8",
  326. "Iop_Reverse8sIn32_x4", "Iop_Reverse16sIn32_x4", "Iop_Reverse8sIn64_x2",
  327. "Iop_Reverse16sIn64_x2", "Iop_Reverse32sIn64_x2", "Iop_Reverse1sIn8_x16",
  328. "Iop_Perm8x16", "Iop_Perm32x4", "Iop_PermOrZero8x16", "Iop_Perm8x16x2",
  329. "Iop_GetMSBs8x16", "Iop_RecipEst32Ux4", "Iop_RSqrtEst32Ux4",
  330. "Iop_MulI128by10", "Iop_MulI128by10Carry", "Iop_MulI128by10E",
  331. "Iop_MulI128by10ECarry", "Iop_2xMultU64Add128CarryOut", "Iop_V256to64_0",
  332. "Iop_V256to64_1", "Iop_V256to64_2", "Iop_V256to64_3", "Iop_64x4toV256",
  333. "Iop_V256toV128_0", "Iop_V256toV128_1", "Iop_V128HLtoV256",
  334. "Iop_AndV256", "Iop_OrV256", "Iop_XorV256", "Iop_NotV256",
  335. "Iop_CmpNEZ8x32", "Iop_CmpNEZ16x16", "Iop_CmpNEZ32x8", "Iop_CmpNEZ64x4",
  336. "Iop_Add8x32", "Iop_Add16x16", "Iop_Add32x8", "Iop_Add64x4",
  337. "Iop_Sub8x32", "Iop_Sub16x16", "Iop_Sub32x8", "Iop_Sub64x4",
  338. "Iop_CmpEQ8x32", "Iop_CmpEQ16x16", "Iop_CmpEQ32x8", "Iop_CmpEQ64x4",
  339. "Iop_CmpGT8Sx32", "Iop_CmpGT16Sx16", "Iop_CmpGT32Sx8", "Iop_CmpGT64Sx4",
  340. "Iop_ShlN16x16", "Iop_ShlN32x8", "Iop_ShlN64x4", "Iop_ShrN16x16",
  341. "Iop_ShrN32x8", "Iop_ShrN64x4", "Iop_SarN16x16", "Iop_SarN32x8",
  342. "Iop_Max8Sx32", "Iop_Max16Sx16", "Iop_Max32Sx8", "Iop_Max8Ux32",
  343. "Iop_Max16Ux16", "Iop_Max32Ux8", "Iop_Min8Sx32", "Iop_Min16Sx16",
  344. "Iop_Min32Sx8", "Iop_Min8Ux32", "Iop_Min16Ux16", "Iop_Min32Ux8",
  345. "Iop_Mul16x16", "Iop_Mul32x8", "Iop_MulHi16Ux16", "Iop_MulHi16Sx16",
  346. "Iop_QAdd8Ux32", "Iop_QAdd16Ux16", "Iop_QAdd8Sx32", "Iop_QAdd16Sx16",
  347. "Iop_QSub8Ux32", "Iop_QSub16Ux16", "Iop_QSub8Sx32", "Iop_QSub16Sx16",
  348. "Iop_Avg8Ux32", "Iop_Avg16Ux16", "Iop_Perm32x8", "Iop_CipherV128",
  349. "Iop_CipherLV128", "Iop_CipherSV128", "Iop_NCipherV128",
  350. "Iop_NCipherLV128", "Iop_SHA512", "Iop_SHA256", "Iop_Add64Fx4",
  351. "Iop_Sub64Fx4", "Iop_Mul64Fx4", "Iop_Div64Fx4", "Iop_Add32Fx8",
  352. "Iop_Sub32Fx8", "Iop_Mul32Fx8", "Iop_Div32Fx8", "Iop_I32StoF32x8",
  353. "Iop_F32toI32Sx8", "Iop_F32toF16x8", "Iop_F16toF32x8", "Iop_Sqrt32Fx8",
  354. "Iop_Sqrt64Fx4", "Iop_RSqrtEst32Fx8", "Iop_RecipEst32Fx8",
  355. "Iop_Max32Fx8", "Iop_Min32Fx8", "Iop_Max64Fx4", "Iop_Min64Fx4",
  356. "Iop_Rotx32", "Iop_Rotx64", "Iop_LAST",
  357. };
  358. static UShort cu_op_counts[] = {
  359. 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  360. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  361. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3, 4, 2, 3, 3, 4, 1, 1, 1,
  362. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  363. 1, 1, 1, 1, 1, 1, 19, 19, 19, 19, 59, 59, 19, 19, 19, 19, 59, 59, 19, 19,
  364. 19, 19, 19, 19, 19, 19, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  365. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  366. 1, 2, 2, 2, 31, 1, 1, 1, 17, 2, 2, 3, 31, 1, 1, 1, 1, 1, 1, 3, 3, 3, 1,
  367. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  368. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 8, 64, 1, 1, 1, 1, 4, 4, 2, 1, 1,
  369. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 31,
  370. 31, 31, 31, 1, 1, 50, 50, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 5, 2, 2, 2, 2, 2,
  371. 2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  372. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  373. 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  374. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  375. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  376. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
  377. 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  378. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  379. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  380. 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 2, 2, 4, 31, 4,
  381. 4, 4, 65, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,
  382. 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
  383. 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  384. 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 2, 17, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  385. 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  386. 1, 1, 1, 1, 1, 1, 1, 99, 99, 1, 1, 1, 2, 17, 1, 1, 1, 1, 1, 1, 1, 5, 2,
  387. 1, 1, 2, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 31,
  388. 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
  389. 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  390. 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  391. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  392. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  393. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  394. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  395. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  396. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  397. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  398. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  399. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  400. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  401. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  402. 1, 1, 1, 1, 1, 99, 99, 99, 99, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  403. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  404. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  405. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 31, 1, 1, 1, 17, 1, 1, 1,
  406. 1, 3, 5, 1, 1, 1, 1, 1, 1, 1, 1, 0,
  407. };
  408. #define NUM_LD 16
  409. static const HChar *cu_ld_names[] = {
  410. "Ity_INVALID", "Ity_I1", "Ity_I8", "Ity_I16", "Ity_I32", "Ity_I64",
  411. "Ity_I128", "Ity_F16", "Ity_F32", "Ity_F64", "Ity_D32", "Ity_D64",
  412. "Ity_D128", "Ity_F128", "Ity_V128", "Ity_V256",
  413. };
  414. static UShort cu_ld_counts[] = {
  415. 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  416. };
  417. static UWord cu_divisor = 1;
  418. /* count_tables: end */
  419. #define CU_MAX_THR 32 /* max num threads to track */
  420. static struct {
  421. ThreadId tid; /* thread ID */
  422. UWord cnt; /* count */
  423. Bool full; /* full -- i.e., exhausted */
  424. } cu_cntr[CU_MAX_THR];
  425. /* This routine is called during instrumentation to count up clock cycles
  426. * for a particular instruction within a superblock.
  427. */
  428. static Int cu_expr_cnt(IRExpr *expr)
  429. {
  430. IROp op;
  431. IRType ty;
  432. op = Iop_INVALID;
  433. switch (expr->tag) {
  434. case Iex_Binder: return 0;
  435. case Iex_Get: return 0;
  436. case Iex_GetI: return 0;
  437. case Iex_RdTmp: return 0;
  438. case Iex_Qop: op = expr->Iex.Qop.details->op; break;
  439. case Iex_Triop: op = expr->Iex.Triop.details->op; break;
  440. case Iex_Binop: op = expr->Iex.Binop.op; break;
  441. case Iex_Unop: op = expr->Iex.Unop.op; break;
  442. case Iex_Load:
  443. ty = expr->Iex.Load.ty;
  444. if (ty - Ity_INVALID < NUM_LD) {
  445. return cu_ld_counts[ty - Ity_INVALID];
  446. } else {
  447. return 0;
  448. }
  449. case Iex_Const: return 0;
  450. case Iex_ITE: return 1;
  451. case Iex_CCall: return 0;
  452. case Iex_VECRET: return 0;
  453. case Iex_GSPTR: return 0;
  454. }
  455. if (op < Iop_INVALID) {
  456. return 0;
  457. } else if (op - Iop_INVALID >= NUM_OP) {
  458. return 0;
  459. } else if (cu_op_counts[op - Iop_INVALID] == 99) {
  460. VG_(printf)("bogus count for op: ~%s\n", cu_op_names[op - Iop_INVALID]);
  461. return 0;
  462. } else {
  463. //VG_(printf)("op: %s\n", cu_op_names[op - Iop_INVALID]);
  464. return cu_op_counts[op - Iop_INVALID];
  465. }
  466. }
  467. /* Find the index into the table of counters for the privided ThreadId.
  468. * If nothing found return -1.
  469. */
  470. static Int find_cntr(ThreadId tid)
  471. {
  472. Int ix;
  473. for (ix = 0; ix < CU_MAX_THR; ix++) {
  474. if (cu_cntr[ix].tid == tid) {
  475. return ix;
  476. }
  477. }
  478. return -1;
  479. }
  480. static void register_thread(ThreadId tid)
  481. {
  482. Int ix, ixp1;
  483. ix = find_cntr(tid);
  484. for (ix = 0; ix < CU_MAX_THR; ix++) {
  485. if (cu_cntr[ix].tid == tid) {
  486. /* Already registered. */
  487. return;
  488. }
  489. if (cu_cntr[ix].tid == VG_INVALID_THREADID) {
  490. /* This is always the last tid in the tracked list. */
  491. break;
  492. }
  493. }
  494. ixp1 = ix + 1;
  495. if (ixp1 == CU_MAX_THR) {
  496. VG_(printf)(" CU_REGTHR(): too many threads to track\n");
  497. return;
  498. }
  499. cu_cntr[ix+1].tid = cu_cntr[ix].tid;
  500. cu_cntr[ix+1].cnt = cu_cntr[ix].cnt;
  501. cu_cntr[ix+1].full = cu_cntr[ix].full;
  502. cu_cntr[ix].tid = tid;
  503. cu_cntr[ix].cnt = 0;
  504. cu_cntr[ix].full = False;
  505. }
  506. static void clr_counter(ThreadId tid)
  507. {
  508. Int ix;
  509. ix = find_cntr(tid);
  510. if (ix < 0) return;
  511. cu_cntr[ix].cnt = 0;
  512. cu_cntr[ix].full = False;
  513. }
  514. static UWord get_counter(ThreadId tid)
  515. {
  516. Int ix;
  517. ix = find_cntr(tid);
  518. if (ix < 0) {
  519. return 0;
  520. } else {
  521. return cu_cntr[ix].cnt;
  522. }
  523. }
  524. static void update_clkcnt(UInt lclkcnt)
  525. {
  526. ThreadId tid;
  527. Int ix;
  528. UWord clkcnt;
  529. tid = VG_(get_running_tid)();
  530. ix = find_cntr(tid);
  531. if (ix < 0) {
  532. /* Not tracking, so ignore. */
  533. return;
  534. }
  535. /* Update. The counter will get pegged at ULONG_MAX>>1. */
  536. if (cu_cntr[ix].full == True) {
  537. return;
  538. }
  539. clkcnt = cu_cntr[ix].cnt + lclkcnt;
  540. if (clkcnt > (ULONG_MAX>>1)) {
  541. cu_cntr[ix].cnt = (ULONG_MAX>>1);
  542. cu_cntr[ix].full = True;
  543. } else {
  544. cu_cntr[ix].cnt = clkcnt;
  545. }
  546. }
  547. #if 0
  548. /* for debug/development, so we can see counts via --trace-flags */
  549. static void mark_count(UInt clkcntinc) { }
  550. #define MARK_COUNT(C) do { \
  551. expr = IRExpr_Const(IRConst_U64(C)); \
  552. di = unsafeIRDirty_0_N( 0, "mark_count", \
  553. VG_(fnptr_to_fnentry)( &mark_count ), \
  554. mkIRExprVec_1(expr)); \
  555. addStmtToIRSB( sbOut, IRStmt_Dirty(di) ); } while (0)
  556. #else
  557. #define MARK_COUNT(C) /* */
  558. #endif
  559. /* We will count expressions and statements. We assume that, since the code
  560. * is flattened, no statement will have more than one expression.
  561. */
  562. static
  563. IRSB* cu_instrument ( VgCallbackClosure* closure,
  564. IRSB* sbIn,
  565. const VexGuestLayout* layout,
  566. const VexGuestExtents* vge,
  567. const VexArchInfo* vai,
  568. IRType gWordTy,
  569. IRType hWordTy )
  570. {
  571. IRDirty* di;
  572. Int i;
  573. IRSB* sbOut;
  574. Int lastst; /* last statement instrumented */
  575. Int lclkcnt; /* localized clock count */
  576. Int sclkcnt; /* statement clock count */
  577. IRExpr* expr;
  578. if (gWordTy != hWordTy) {
  579. /* We don't currently support this case. ??? */
  580. VG_(tool_panic)("host/guest word size mismatch");
  581. }
  582. /* Set up SB. */
  583. sbOut = deepCopyIRSBExceptStmts(sbIn);
  584. /* Copy verbatim any IR preamble preceding the first IMark. */
  585. i = 0;
  586. while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
  587. addStmtToIRSB( sbOut, sbIn->stmts[i] );
  588. i++;
  589. }
  590. /* Go through the statements adding code count for statements and their
  591. * member expressions. Update user variables at exits. This does not
  592. * remove the small number of clock cycles to implement clientrequest.
  593. */
  594. lclkcnt = cu_divisor; /* Count the clocks to branch here. */
  595. lastst = Ist_Exit;
  596. for (/*use current i*/; i < sbIn->stmts_used; i++) {
  597. IRStmt* st = sbIn->stmts[i];
  598. if (!st || st->tag == Ist_NoOp) continue;
  599. switch (st->tag) {
  600. case Ist_NoOp: /* no op */
  601. addStmtToIRSB( sbOut, st );
  602. break;
  603. case Ist_IMark: /* guest instruction marker */
  604. addStmtToIRSB( sbOut, st );
  605. break;
  606. case Ist_AbiHint: /* mark addr space undefined */
  607. addStmtToIRSB( sbOut, st );
  608. break;
  609. case Ist_Put: /* put to register */
  610. sclkcnt = 0;
  611. expr = st->Ist.Put.data;
  612. sclkcnt += cu_expr_cnt(expr);
  613. MARK_COUNT(sclkcnt);
  614. lclkcnt += sclkcnt;
  615. addStmtToIRSB( sbOut, st );
  616. break;
  617. case Ist_PutI: /* put to register, indirect */
  618. sclkcnt = 0;
  619. expr = st->Ist.PutI.details->data;
  620. sclkcnt += cu_expr_cnt(expr);
  621. MARK_COUNT(sclkcnt);
  622. lclkcnt += sclkcnt;
  623. addStmtToIRSB( sbOut, st );
  624. break;
  625. case Ist_WrTmp: /* write to temp */
  626. sclkcnt = 0;
  627. expr = st->Ist.WrTmp.data;
  628. sclkcnt += cu_expr_cnt(expr);
  629. MARK_COUNT(sclkcnt);
  630. lclkcnt += sclkcnt;
  631. addStmtToIRSB( sbOut, st );
  632. break;
  633. case Ist_Store: /* store to memory */
  634. sclkcnt = 0;
  635. expr = st->Ist.Store.data;
  636. sclkcnt += cu_expr_cnt(expr);
  637. sclkcnt += cu_divisor; /* Add 1 clock for the store. */
  638. MARK_COUNT(sclkcnt);
  639. lclkcnt += sclkcnt;
  640. addStmtToIRSB( sbOut, st );
  641. break;
  642. case Ist_LoadG: /* guarded load */
  643. sclkcnt = 0;
  644. expr = st->Ist.LoadG.details->addr;
  645. sclkcnt += cu_expr_cnt(expr);
  646. //expr = st->Ist.LoadG.details->alt;
  647. //sclkcnt += cu_expr_cnt(expr);
  648. expr = st->Ist.LoadG.details->guard;
  649. sclkcnt += cu_expr_cnt(expr);
  650. MARK_COUNT(sclkcnt);
  651. lclkcnt += sclkcnt;
  652. addStmtToIRSB( sbOut, st );
  653. break;
  654. case Ist_StoreG: /* guarded store */
  655. sclkcnt = 0;
  656. expr = st->Ist.StoreG.details->addr;
  657. sclkcnt += cu_expr_cnt(expr);
  658. expr = st->Ist.StoreG.details->data;
  659. sclkcnt += cu_expr_cnt(expr);
  660. expr = st->Ist.StoreG.details->guard;
  661. sclkcnt += cu_expr_cnt(expr);
  662. sclkcnt += cu_divisor; /* Add 1 clock for the store. */
  663. MARK_COUNT(sclkcnt);
  664. lclkcnt += sclkcnt;
  665. addStmtToIRSB( sbOut, st );
  666. break;
  667. case Ist_CAS: /* compare and swap */
  668. sclkcnt = cu_divisor;
  669. MARK_COUNT(sclkcnt);
  670. lclkcnt += sclkcnt;
  671. addStmtToIRSB( sbOut, st );
  672. break;
  673. case Ist_LLSC: /* load from memory */
  674. sclkcnt = 0;
  675. expr = st->Ist.LLSC.storedata;
  676. sclkcnt += cu_expr_cnt(expr);
  677. MARK_COUNT(sclkcnt);
  678. lclkcnt += sclkcnt;
  679. addStmtToIRSB( sbOut, st );
  680. break;
  681. case Ist_Dirty: /* instrumentation call */
  682. /* This dirty call could be our call to get the count so make
  683. * sure we are covering the current block.
  684. */
  685. expr = IRExpr_Const(IRConst_U64(lclkcnt));
  686. di = unsafeIRDirty_0_N( 0, "update_clkcnt",
  687. VG_(fnptr_to_fnentry)( &update_clkcnt ),
  688. mkIRExprVec_1(expr));
  689. addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
  690. lclkcnt = 0; /* Reset local count to zero. */
  691. addStmtToIRSB( sbOut, st );
  692. break;
  693. case Ist_MBE: /* memory bus event */
  694. /* Just assume one clock for now */
  695. sclkcnt = cu_divisor;
  696. MARK_COUNT(sclkdnt);
  697. lclkcnt += sclkcnt;
  698. addStmtToIRSB( sbOut, st );
  699. break;
  700. case Ist_Exit: /* exit superblock, add clocks? */
  701. /* This is the hook to add counts to user's counter. */
  702. /* \todo: Add counts for branching. ??? */
  703. sclkcnt = 0;
  704. expr = IRExpr_Const(IRConst_U64(lclkcnt));
  705. di = unsafeIRDirty_0_N( 0, "update_clkcnt",
  706. VG_(fnptr_to_fnentry)( &update_clkcnt ),
  707. mkIRExprVec_1(expr));
  708. addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
  709. lclkcnt = 0; /* Reset local count to zero. */
  710. addStmtToIRSB( sbOut, st );
  711. break;
  712. default:
  713. VG_(printf)("cu_main: st->tag=%u\n", st->tag);
  714. tl_assert(0);
  715. }
  716. lastst = st->tag;
  717. }
  718. if (lastst != Ist_Exit) {
  719. expr = IRExpr_Const(IRConst_U64(lclkcnt)); // IRConst_U64 == ULong
  720. di = unsafeIRDirty_0_N( 0, "update_clkcnt",
  721. VG_(fnptr_to_fnentry)( &update_clkcnt ),
  722. mkIRExprVec_1(expr) );
  723. addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
  724. }
  725. return sbOut;
  726. }
  727. /*------------------------------------------------------------*/
  728. /*--- Client Requests ---*/
  729. /*------------------------------------------------------------*/
  730. static Bool cu_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
  731. {
  732. //VG_(printf)("cu_handle_client_request called\n");
  733. if (!VG_IS_TOOL_USERREQ('C','U',arg[0])
  734. && VG_USERREQ__CU_REGTHR != arg[0]
  735. && VG_USERREQ__CU_CLRCTR != arg[0]
  736. && VG_USERREQ__CU_GETCTR != arg[0]
  737. && VG_USERREQ__CU_GETDIV != arg[0])
  738. return False;
  739. switch (arg[0]) {
  740. case VG_USERREQ__CU_REGTHR:
  741. register_thread(tid);
  742. break;
  743. case VG_USERREQ__CU_CLRCTR:
  744. clr_counter(tid);
  745. break;
  746. case VG_USERREQ__CU_GETCTR:
  747. *ret = get_counter(tid);
  748. break;
  749. case VG_USERREQ__CU_GETDIV:
  750. *ret = cu_divisor;
  751. break;
  752. default:
  753. VG_(message)(
  754. Vg_UserMsg,
  755. "Warning: unknown cputil client request code %llx\n",
  756. (ULong)arg[0]
  757. );
  758. return False;
  759. }
  760. return True;
  761. }
  762. /*------------------------------------------------------------*/
  763. /*--- Command line options ---*/
  764. /*------------------------------------------------------------*/
  765. /* Read one line. Return len, or -1 if problem. */
  766. static Int cu_readline(Int fd, HChar *buf, Int bufsiz)
  767. {
  768. Int r, n;
  769. HChar c;
  770. n = 0;
  771. do {
  772. r = VG_(read)(fd, &c, 1);
  773. if (r == -1) {
  774. return -1;
  775. }
  776. if (r == 0 || c == '\n') {
  777. buf[n] = 0;
  778. return n;
  779. }
  780. buf[n++] = c;
  781. if (n >= bufsiz -1) {
  782. buf[n] = 0;
  783. return -1;
  784. }
  785. } while (1);
  786. return 0;
  787. }
  788. /* Parse line of "<unsigned int><ws><string>". Guessing scanf is taboo. */
  789. static Int cu_parse_line(HChar *buf, UInt *ival, HChar *sval, Int slen)
  790. {
  791. Int ix, sx;
  792. UInt iv;
  793. ix = 0;
  794. /* Parse unsigned integer. */
  795. iv = 0;
  796. while ('0' <= buf[ix] && buf[ix] <= '9') {
  797. iv = (10*iv) + (buf[ix] - '0');
  798. ix = ix + 1;
  799. if (buf[ix] == 0) {
  800. /* end of buffer */
  801. return -1;
  802. }
  803. }
  804. if (ix == 0 ) {
  805. /* expecting unsigned integer */
  806. return -1;
  807. }
  808. *ival = iv;
  809. /* Skip whitespace. */
  810. if (buf[ix] != ' ' && buf[ix] != '\t') {
  811. /* expecting space */
  812. return -1;
  813. }
  814. while (buf[ix] == ' ' || buf[ix] == '\t') {
  815. ix++;
  816. if (buf[ix] == 0) {
  817. /* end of buffer */
  818. return -1;
  819. }
  820. }
  821. /* Parse string. */
  822. sx = 0;
  823. while (buf[ix] != ' ' && buf[ix] != '\t' && buf[ix] != 0) {
  824. if (sx >= slen - 1) {
  825. /* String buffer not long enough. */
  826. return -1;
  827. }
  828. sval[sx++] = buf[ix++];
  829. }
  830. sval[sx] = '\0';
  831. return 0;
  832. }
  833. static Bool cu_dump_op_table(const HChar *filename)
  834. {
  835. Int fd, i;
  836. HChar buf[80];
  837. SysRes sres;
  838. sres = VG_(open)(filename,
  839. VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
  840. VKI_S_IRUSR|VKI_S_IWUSR);
  841. if (sr_isError(sres)) {
  842. VG_(umsg)("error: can't open output file '%s'\n", filename);
  843. return False;
  844. } else {
  845. fd = sr_Res(sres);
  846. }
  847. VG_(sprintf)(buf, "# valgrind/cputil count table\n#\n");
  848. VG_(write)(fd, buf, VG_(strlen)(buf));
  849. VG_(sprintf)(buf, "%lu\tdivisor\n#\n", cu_divisor);
  850. VG_(write)(fd, buf, VG_(strlen)(buf));
  851. VG_(sprintf)(buf, "# load types\n");
  852. VG_(write)(fd, buf, VG_(strlen)(buf));
  853. for (i = 1; i < NUM_LD; i++) {
  854. VG_(sprintf)(buf, "%d\t%s\n", cu_ld_counts[i], cu_ld_names[i]);
  855. VG_(write)(fd, buf, VG_(strlen)(buf));
  856. }
  857. VG_(sprintf)(buf, "# other ops\n");
  858. VG_(write)(fd, buf, VG_(strlen)(buf));
  859. for (i = 1; i < NUM_OP; i++) {
  860. VG_(sprintf)(buf, "%d\t%s\n", cu_op_counts[i], cu_op_names[i]);
  861. VG_(write)(fd, buf, VG_(strlen)(buf));
  862. }
  863. VG_(close)(fd);
  864. return True;
  865. }
  866. static Bool cu_load_op_table(const HChar *filename)
  867. {
  868. Int fd, ln, ix, res;
  869. UInt cval;
  870. HChar buf[80], sval[40];
  871. SysRes sres;
  872. sres = VG_(open)(filename, VKI_O_RDONLY, VKI_S_IRUSR);
  873. if (sr_isError(sres)) {
  874. VG_(umsg)("error: can't open output file '%s'\n", filename);
  875. return False;
  876. } else {
  877. fd = sr_Res(sres);
  878. }
  879. ln = 0;
  880. res = cu_readline(fd, buf, 80); ln++;
  881. if (res < 0) return False;
  882. /* Read parameters. */
  883. cu_divisor = -1;
  884. while (1) {
  885. if (buf[0] == '#') { /* Allow #-comments. */
  886. res = cu_readline(fd, buf, 80); ln++;
  887. if (res < 0) return False;
  888. continue;
  889. }
  890. sval[39] = 0;
  891. res = cu_parse_line(buf, &cval, sval, 40);
  892. if (res < 0) {
  893. VG_(umsg)("parse error: line %d\n", ln);
  894. return False;
  895. }
  896. if (VG_(strcmp)("divisor", sval) == 0) {
  897. cu_divisor = cval;
  898. } else {
  899. break;
  900. }
  901. res = cu_readline(fd, buf, 80); ln++;
  902. if (res < 0) return False;
  903. }
  904. if (cu_divisor == -1) {
  905. VG_(umsg)("no divisor provided\n");
  906. return False;
  907. }
  908. /* Read load types. */
  909. ix = 1;
  910. while (ix < NUM_LD) {
  911. if (buf[0] == '#') { /* Allow #-comments. */
  912. res = cu_readline(fd, buf, 80); ln++;
  913. if (res < 0) return False;
  914. continue;
  915. }
  916. sval[39] = 0;
  917. res = cu_parse_line(buf, &cval, sval, 40);
  918. if (res < 0) {
  919. VG_(umsg)("parse error: line %d\n", ln);
  920. return False;
  921. }
  922. if (VG_(strcmp)(cu_ld_names[ix], sval) != 0) {
  923. VG_(umsg)("bad opname: line %d\n", ln);
  924. return False;
  925. }
  926. if (0 <= cval && cval <= 1999) { /* arbitrary max clock count */
  927. cu_ld_counts[ix] = cval;
  928. } else {
  929. VG_(umsg)("invalid clock count: %u, line %d\n", cval, ln);
  930. VG_(umsg)(" expect in: [0,1999]\n");
  931. return False;
  932. }
  933. ix++;
  934. res = cu_readline(fd, buf, 80); ln++;
  935. if (res < 0) return False;
  936. }
  937. ix = 1;
  938. while (ix < NUM_OP) {
  939. if (buf[0] == '#') { /* Allow #-comments. */
  940. res = cu_readline(fd, buf, 80); ln++;
  941. if (res < 0) return False;
  942. continue;
  943. }
  944. sval[39] = 0;
  945. res = cu_parse_line(buf, &cval, sval, 40);
  946. if (res < 0) {
  947. VG_(umsg)("parse error: line %d\n", ln);
  948. return False;
  949. }
  950. if (VG_(strcmp)(cu_op_names[ix], sval) != 0) {
  951. VG_(umsg)("bad opname: line %d\n", ln);
  952. return False;
  953. }
  954. if (0 <= cval && cval <= 1999) { /* arbitrary max clock count */
  955. cu_op_counts[ix] = cval;
  956. } else {
  957. VG_(umsg)("invalid clock count: %u, line %d\n", cval, ln);
  958. VG_(umsg)(" expect in: [0,1999]\n");
  959. return False;
  960. }
  961. ix++;
  962. res = cu_readline(fd, buf, 80); ln++;
  963. if (res < 0) return False;
  964. }
  965. VG_(close)(fd);
  966. return True;
  967. }
  968. static void cu_print_usage(void)
  969. {
  970. VG_(printf)(
  971. " --help help me\n"
  972. " --dump-op-table=file print op-count table to <file>\n"
  973. " --load-op-table=file load op-count table from <file>\n"
  974. );
  975. }
  976. static void cu_print_debug_usage(void)
  977. {
  978. VG_(printf)(
  979. " (none)\n"
  980. );
  981. }
  982. static Bool cu_process_cmd_line_option(const HChar* arg)
  983. {
  984. const HChar *xarg;
  985. if VG_STR_CLO(arg, "--dump-op-table", xarg) {
  986. return cu_dump_op_table(xarg);
  987. }
  988. else if VG_STR_CLO(arg, "--load-op-table", xarg) {
  989. return cu_load_op_table(xarg);
  990. }
  991. else
  992. return False;
  993. return True;
  994. }
  995. /*------------------------------------------------------------*/
  996. /*--- Other hooks ---*/
  997. /*------------------------------------------------------------*/
  998. static void cu_fini(Int exitcode)
  999. {
  1000. Int ix;
  1001. Bool counter_exhausted = False;
  1002. for (ix = 0; ix < CU_MAX_THR; ix++) {
  1003. if (cu_cntr[ix].tid == VG_INVALID_THREADID) {
  1004. break;
  1005. }
  1006. if (cu_cntr[ix].full) {
  1007. counter_exhausted = True;
  1008. }
  1009. }
  1010. if (counter_exhausted) {
  1011. VG_(message)(Vg_UserMsg, "cputil: counter exhausted\n");
  1012. }
  1013. }
  1014. static void cu_post_clo_init(void)
  1015. {
  1016. }
  1017. static void cu_pre_clo_init(void)
  1018. {
  1019. VG_(details_name) ("cputil");
  1020. VG_(details_version) (cu_version);
  1021. VG_(details_description) ("a CPU utilization profiler");
  1022. VG_(details_copyright_author)(
  1023. "Copyright (C) 2013,2016,2021 and GNU GPL'd, by Matt Wette.");
  1024. VG_(details_bug_reports_to) (VG_BUGS_TO);
  1025. //VG_(details_avg_translation_sizeB) ( 200 );
  1026. VG_(basic_tool_funcs) (cu_post_clo_init,
  1027. cu_instrument,
  1028. cu_fini);
  1029. VG_(needs_command_line_options)(cu_process_cmd_line_option,
  1030. cu_print_usage,
  1031. cu_print_debug_usage);
  1032. VG_(needs_client_requests) (cu_handle_client_request);
  1033. /* Initialize default counter */
  1034. cu_cntr[0].tid = VG_INVALID_THREADID;
  1035. cu_cntr[0].cnt = 0;
  1036. cu_cntr[0].full = False;
  1037. }
  1038. VG_DETERMINE_INTERFACE_VERSION(cu_pre_clo_init)
  1039. /*--------------------------------------------------------------------*/
  1040. /*--- end cu_main.c ---*/
  1041. /*--------------------------------------------------------------------*/
  1042. /* Local Variables: */
  1043. /* c-basic-offset: 3 */
  1044. /* eval: (c-set-offset 'case-label '+) */
  1045. /* End: */