nehalem.cfg 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. # Common config file for Nehalem core
  2. [general]
  3. enable_icache_modeling = true
  4. [perf_model/core]
  5. logical_cpus = 1 # number of SMT threads per core
  6. type = interval
  7. core_model = nehalem
  8. [perf_model/core/interval_timer]
  9. dispatch_width = 4
  10. window_size = 128
  11. num_outstanding_loadstores = 10
  12. [perf_model/sync]
  13. reschedule_cost = 1000
  14. [caching_protocol]
  15. type = parametric_dram_directory_msi
  16. [perf_model/branch_predictor]
  17. type = pentium_m
  18. mispredict_penalty=8 # Reflects just the front-end portion (approx) of the penalty for Interval Simulation
  19. [perf_model/tlb]
  20. penalty = 30 # Page walk penalty in cycles
  21. [perf_model/itlb]
  22. size = 128 # Number of I-TLB entries
  23. associativity = 4 # I-TLB associativity
  24. [perf_model/dtlb]
  25. size = 64 # Number of D-TLB entries
  26. associativity = 4 # D-TLB associativity
  27. [perf_model/stlb]
  28. size = 512 # Number of second-level TLB entries
  29. associativity = 4 # S-TLB associativity
  30. [perf_model/cache]
  31. levels = 3
  32. [perf_model/l1_icache]
  33. perfect = false
  34. cache_size = 32
  35. associativity = 4
  36. address_hash = mask
  37. replacement_policy = lru
  38. data_access_time = 4
  39. tags_access_time = 1
  40. perf_model_type = parallel
  41. writethrough = 0
  42. shared_cores = 1
  43. [perf_model/l1_dcache]
  44. perfect = false
  45. cache_size = 32
  46. associativity = 8
  47. address_hash = mask
  48. replacement_policy = lru
  49. data_access_time = 4
  50. tags_access_time = 1
  51. perf_model_type = parallel
  52. writethrough = 0
  53. shared_cores = 1
  54. [perf_model/l2_cache]
  55. perfect = false
  56. cache_size = 256
  57. associativity = 8
  58. address_hash = mask
  59. replacement_policy = lru
  60. data_access_time = 8 # 8.something according to membench, -1 cycle L1 tag access time
  61. # http://www.realworldtech.com/page.cfm?ArticleID=RWT040208182719&p=7
  62. tags_access_time = 3
  63. # Total neighbor L1/L2 access time is around 40/70 cycles (60-70 when it's coming out of L1)
  64. writeback_time = 50 # L3 hit time will be added
  65. perf_model_type = parallel
  66. writethrough = 0
  67. shared_cores = 1
  68. [perf_model/l3_cache]
  69. cache_block_size = 64
  70. address_hash = mask
  71. dvfs_domain = global # L1 and L2 run at core frequency (default), L3 is system frequency
  72. prefetcher = none
  73. writeback_time = 0
  74. [clock_skew_minimization]
  75. scheme = barrier
  76. [clock_skew_minimization/barrier]
  77. quantum = 100
  78. [dvfs]
  79. transition_latency = 2000 # In ns, "under 2 microseconds" according to http://download.intel.com/design/intarch/papers/323671.pdf (page 8)
  80. [dvfs/simple]
  81. cores_per_socket = 1
  82. [power]
  83. vdd = 1.2 # Volts
  84. technology_node = 45 # nm