123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566 |
- # Configuration file for Xeon X5550 Gainestown
- # See http://en.wikipedia.org/wiki/Gainestown_(microprocessor)#Gainestown
- # and http://ark.intel.com/products/37106
- #include nehalem
- [perf_model/l2_cache]
- prefetcher = ghb
- [perf_model/l2_cache/prefetcher]
- prefetch_on_prefetch_hit = true # Do prefetches only on miss (false), or also on hits to lines brought in by the prefetcher (true)
- [perf_model/l2_cache/prefetcher/ghb]
- width = 2
- depth = 2
- ghb_size = 512
- ghb_table_size = 512
- [perf_model/core]
- frequency = 2.66
- [perf_model/l3_cache]
- perfect = false
- cache_block_size = 64
- cache_size = 4096
- associativity = 16
- address_hash = mask
- replacement_policy = rrip_pacman
- drrip = true
- data_access_time = 30 # 35 cycles total according to membench, +L1+L2 tag times
- tags_access_time = 10
- perf_model_type = parallel
- writethrough = 0
- shared_cores = 8
- [perf_model/l3_cache/rrip]
- pacman_m = true
- bits = 2,2,2,2
- [perf_model/dram_directory]
- # total_entries = number of entries per directory controller.
- total_entries = 1048576
- associativity = 16
- directory_type = full_map
- [perf_model/dram]
- # -1 means that we have a number of distributed DRAM controllers (4 in this case)
- num_controllers = -1
- controllers_interleaving = 4
- # DRAM access latency in nanoseconds. Should not include L1-LLC tag access time, directory access time (14 cycles = 5.2 ns),
- # or network time [(cache line size + 2*{overhead=40}) / network bandwidth = 18 ns]
- # Membench says 175 cycles @ 2.66 GHz = 66 ns total
- latency = 45
- per_controller_bandwidth = 7.6 # In GB/s, as measured by core_validation-dram
- chips_per_dimm = 8
- dimms_per_controller = 4
- [network]
- memory_model_1 = bus
- memory_model_2 = bus
- [network/bus]
- bandwidth = 25.6 # in GB/s. Actually, it's 12.8 GB/s per direction and per connected chip pair
- ignore_local_traffic = true # Memory controllers are on-chip, so traffic from core0 to dram0 does not use the QPI links
|