123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105 |
- # Common config file for Nehalem core
- [general]
- enable_icache_modeling = true
- [perf_model/core]
- logical_cpus = 1 # number of SMT threads per core
- type = interval
- core_model = nehalem
- [perf_model/core/interval_timer]
- dispatch_width = 4
- window_size = 128
- num_outstanding_loadstores = 10
- [perf_model/sync]
- reschedule_cost = 1000
- [caching_protocol]
- type = parametric_dram_directory_msi
- [perf_model/branch_predictor]
- type = pentium_m
- mispredict_penalty=8 # Reflects just the front-end portion (approx) of the penalty for Interval Simulation
- [perf_model/tlb]
- penalty = 30 # Page walk penalty in cycles
- [perf_model/itlb]
- size = 128 # Number of I-TLB entries
- associativity = 4 # I-TLB associativity
- [perf_model/dtlb]
- size = 64 # Number of D-TLB entries
- associativity = 4 # D-TLB associativity
- [perf_model/stlb]
- size = 512 # Number of second-level TLB entries
- associativity = 4 # S-TLB associativity
- [perf_model/cache]
- levels = 3
- [perf_model/l1_icache]
- perfect = false
- cache_size = 32
- associativity = 4
- address_hash = mask
- replacement_policy = lru
- data_access_time = 4
- tags_access_time = 1
- perf_model_type = parallel
- writethrough = 0
- shared_cores = 1
- [perf_model/l1_dcache]
- perfect = false
- cache_size = 32
- associativity = 8
- address_hash = mask
- replacement_policy = lru
- data_access_time = 4
- tags_access_time = 1
- perf_model_type = parallel
- writethrough = 0
- shared_cores = 1
- [perf_model/l2_cache]
- perfect = false
- cache_size = 256
- associativity = 8
- address_hash = mask
- replacement_policy = lru
- data_access_time = 8 # 8.something according to membench, -1 cycle L1 tag access time
- # http://www.realworldtech.com/page.cfm?ArticleID=RWT040208182719&p=7
- tags_access_time = 3
- # Total neighbor L1/L2 access time is around 40/70 cycles (60-70 when it's coming out of L1)
- writeback_time = 50 # L3 hit time will be added
- perf_model_type = parallel
- writethrough = 0
- shared_cores = 1
- [perf_model/l3_cache]
- cache_block_size = 64
- address_hash = mask
- dvfs_domain = global # L1 and L2 run at core frequency (default), L3 is system frequency
- prefetcher = none
- writeback_time = 0
- [clock_skew_minimization]
- scheme = barrier
- [clock_skew_minimization/barrier]
- quantum = 100
- [dvfs]
- transition_latency = 2000 # In ns, "under 2 microseconds" according to http://download.intel.com/design/intarch/papers/323671.pdf (page 8)
- [dvfs/simple]
- cores_per_socket = 1
- [power]
- vdd = 1.2 # Volts
- technology_node = 45 # nm
|