cpistack_data.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. import collections, sniper_lib, sniper_config
  2. class CpiData:
  3. def __init__(self, jobid = '', resultsdir = '', config = None, stats = None, data = None, partial = None):
  4. if data:
  5. data_raw = data
  6. else:
  7. data_raw = sniper_lib.get_results(jobid = jobid, resultsdir = resultsdir, config = config, stats = stats, partial = partial)
  8. self.stats = data_raw['results']
  9. self.config = data_raw['config']
  10. self.parse()
  11. def parse(self):
  12. ncores = int(self.config['general/total_cores'])
  13. instrs = self.stats['performance_model.instruction_count'] if sum(self.stats['performance_model.instruction_count']) else self.stats['core.instructions']
  14. try:
  15. times = self.stats['performance_model.elapsed_time']
  16. cycles_scale = self.stats['fs_to_cycles_cores']
  17. except KeyError:
  18. # On error, assume that we are using the pre-DVFS version
  19. times = self.stats['performance_model.cycle_count']
  20. cycles_scale = [ 1. for idx in range(ncores) ]
  21. time0_begin = self.stats['global.time_begin']
  22. time0_end = self.stats['global.time_end']
  23. times = [ self.stats['performance_model.elapsed_time_end'][core] - time0_begin for core in range(ncores) ]
  24. # TODO: The below is needed for sampling. We're currently set up to work properly with the one-IPC model using in combination with --cache-only
  25. #if self.stats.get('fastforward_performance_model.fastforwarded_time', [0])[0]:
  26. # fastforward_scale = times[0] / (times[0] - self.stats['fastforward_performance_model.fastforwarded_time'][0])
  27. # fastforward_extrapolate = True
  28. # times = [ t-f for t, f in zip(times, self.stats['fastforward_performance_model.fastforwarded_time']) ]
  29. #else:
  30. # fastforward_scale = 1.
  31. # fastforward_extrapolate = False
  32. if 'performance_model.cpiFastforwardTime' in self.stats:
  33. del self.stats['performance_model.cpiFastforwardTime']
  34. fastforward_scale = 1.
  35. fastforward_extrapolate = False
  36. data = collections.defaultdict(lambda: collections.defaultdict(long))
  37. for key, values in self.stats.items():
  38. if '.cpi' in key:
  39. if key.startswith('thread.'):
  40. # Ignore per-thread statistics
  41. continue
  42. if key.startswith('fastforward_timer.') and fastforward_extrapolate:
  43. continue
  44. key = key.split('.cpi')[1]
  45. for core in range(ncores):
  46. data[core][key] += values[core] * cycles_scale[core]
  47. if not data:
  48. raise ValueError('No .cpi data found, simulation did not use the interval core model')
  49. # Split up cpiBase into 1/issue and path dependencies
  50. for core in range(ncores):
  51. if data[core].get('SyncMemAccess', 0) == data[core].get('SyncPthreadBarrier', 0):
  52. # Work around a bug in iGraphite where SyncMemAccess wrongly copied from SyncPthreadBarrier
  53. # Since SyncMemAccess usually isn't very big anyway, setting it to zero should be accurate enough
  54. # For simulations with a fixed version of iGraphite, the changes of SyncMemAccess being identical to
  55. # SyncPthreadBarrier, down to the last femtosecond, are slim, so this code shouldn't trigger
  56. data[core]['SyncMemAccess'] = 0
  57. if data[core].get('StartTime') == None and 'performance_model.idle_elapsed_time' in self.stats:
  58. # Fix a bug whereby the start time was not being reported in the CPI stacks correctly
  59. data[core]['StartTime'] = cycles_scale * self.stats['performance_model.idle_elapsed_time'][core] - \
  60. data[core]['SyncFutex'] - data[core]['SyncPthreadMutex'] - \
  61. data[core]['SyncPthreadCond'] - data[core]['SyncPthreadBarrier'] - \
  62. data[core]['Recv']
  63. # Critical path accounting
  64. cpContrMap = {
  65. # critical path components
  66. 'interval_timer.cpContr_generic': 'PathInt',
  67. 'interval_timer.cpContr_store': 'PathStore',
  68. 'interval_timer.cpContr_load_other': 'PathLoadX',
  69. 'interval_timer.cpContr_branch': 'PathBranch',
  70. 'interval_timer.cpContr_load_l1': 'DataCacheL1',
  71. 'interval_timer.cpContr_load_l2': 'DataCacheL2',
  72. 'interval_timer.cpContr_load_l3': 'DataCacheL3',
  73. 'interval_timer.cpContr_fp_addsub': 'PathFP',
  74. 'interval_timer.cpContr_fp_muldiv': 'PathFP',
  75. # issue ports
  76. 'interval_timer.cpContr_port0': 'PathP0',
  77. 'interval_timer.cpContr_port1': 'PathP1',
  78. 'interval_timer.cpContr_port2': 'PathP2',
  79. 'interval_timer.cpContr_port34': 'PathP34',
  80. 'interval_timer.cpContr_port5': 'PathP5',
  81. 'interval_timer.cpContr_port05': 'PathP05',
  82. 'interval_timer.cpContr_port015': 'PathP015',
  83. }
  84. for k in self.stats:
  85. if k.startswith('interval_timer.cpContr_'):
  86. if k not in cpContrMap.keys():
  87. print 'Missing in cpContrMap: ', k
  88. # Keep 1/width as base CPI component, break down the remainder according to critical path contributors
  89. BaseBest = instrs[core] / float(sniper_config.get_config(self.config, 'perf_model/core/interval_timer/dispatch_width', core))
  90. BaseAct = data[core]['Base']
  91. BaseCp = BaseAct - BaseBest
  92. scale = BaseCp / (BaseAct or 1)
  93. for cpName, cpiName in cpContrMap.items():
  94. val = float(self.stats.get(cpName, [0]*ncores)[core]) / 1e6
  95. data[core]['Base'] -= val * scale
  96. data[core][cpiName] = data[core].get(cpiName, 0) + val * scale
  97. # Issue width
  98. for key, values in self.stats.items():
  99. if key.startswith('interval_timer.detailed-cpiBase-'):
  100. if 'DispatchWidth' in key:
  101. if 'DispatchRate' not in key: # We already accounted for DispatchRate above, don't do it twice
  102. data[core]['Base'] -= values[core]
  103. data[core]['Issue'] = data[core].get('Issue', 0) + values[core]
  104. # Fix up large cpiSync fractions that started before but ended inside our interval
  105. time0_me = 'performance_model.elapsed_time_begin' in self.stats and self.stats['performance_model.elapsed_time_begin'][core] or 0
  106. if time0_me < time0_begin:
  107. time0_extra = time0_begin - time0_me
  108. # Number of cycles that weren't accounted for when starting this interval
  109. cycles_extra = time0_extra * cycles_scale[core]
  110. # Components that could be the cause of cycles_extra. It should be just one, but if there's many, we'll have to guess
  111. sync_components = dict([ (key, value) for key, value in data[core].items() if (key.startswith('Sync') or key == 'StartTime') and value > cycles_extra ])
  112. sync_total = sum(sync_components.values())
  113. for key, value in sync_components.items():
  114. data[core][key] -= cycles_extra*value/float(sync_total)
  115. data[core]['Imbalance'] = cycles_scale[core] * max(times) - sum(data[core].values())
  116. self.data = data
  117. self.ncores = ncores
  118. self.cores = range(ncores)
  119. self.instrs = instrs
  120. self.times = times
  121. self.cycles_scale = cycles_scale
  122. self.fastforward_scale = fastforward_scale
  123. def get_compfrac(self):
  124. max_time = self.cycles_scale[0] * max(self.times)
  125. return dict([ (
  126. core,
  127. 1 - (self.data[core].get('StartTime', 0) + self.data[core].get('Imbalance', 0) + self.data[core].get('SyncPthreadCond', 0) + \
  128. self.data[core].get('SyncPthreadBarrier', 0) + self.data[core].get('SyncJoin', 0) + self.data[core].get('Recv', 0)) / (float(max_time) or 1.)
  129. ) for core in self.data.keys() ])
  130. def filter(self, cores_list = None, core_mincomp = 0):
  131. if not cores_list:
  132. cores_list = self.cores
  133. if core_mincomp:
  134. compfrac = self.get_compfrac()
  135. cores_list = [ core for core in cores_list if compfrac[core] >= core_mincomp ]
  136. self.data = dict([ (core, self.data[core]) for core in cores_list ])
  137. self.instrs = dict([ (core, self.instrs[core]) for core in cores_list ])
  138. self.ncores = len(cores_list)
  139. self.cores = cores_list
  140. def aggregate(self):
  141. allkeys = self.data[self.cores[0]].keys()
  142. self.data = { 0: dict([ (key, sum([ self.data[core][key] for core in self.cores ]) / len(self.cores)) for key in allkeys ]) }
  143. self.instrs = { 0: sum(self.instrs[core] for core in self.cores) / len(self.cores) }
  144. self.ncores = 1
  145. self.cores = [0]