0017-haswell-NRI-Add-pre-training-steps.patch 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. From 42e43eb210bbb172af8e5ad064326c4570be8654 Mon Sep 17 00:00:00 2001
  2. From: Angel Pons <th3fanbus@gmail.com>
  3. Date: Sat, 7 May 2022 23:12:18 +0200
  4. Subject: [PATCH 17/26] haswell NRI: Add pre-training steps
  5. Implement pre-training steps, which consist of enabling ECC I/O and
  6. filling the WDB (Write Data Buffer, stores test patterns) through a
  7. magic LDAT port.
  8. Change-Id: Ie2e09e3b218c4569ed8de5c5e1b05d491032e0f1
  9. Signed-off-by: Angel Pons <th3fanbus@gmail.com>
  10. ---
  11. .../intel/haswell/native_raminit/Makefile.inc | 1 +
  12. .../haswell/native_raminit/raminit_main.c | 34 ++++
  13. .../haswell/native_raminit/raminit_native.h | 24 +++
  14. .../haswell/native_raminit/reg_structs.h | 45 +++++
  15. .../intel/haswell/native_raminit/setup_wdb.c | 159 ++++++++++++++++++
  16. .../intel/haswell/registers/mchbar.h | 9 +
  17. 6 files changed, 272 insertions(+)
  18. create mode 100644 src/northbridge/intel/haswell/native_raminit/setup_wdb.c
  19. diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
  20. index e9212df9e6..8d7d4e4db0 100644
  21. --- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
  22. +++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
  23. @@ -10,5 +10,6 @@ romstage-y += memory_map.c
  24. romstage-y += raminit_main.c
  25. romstage-y += raminit_native.c
  26. romstage-y += reut.c
  27. +romstage-y += setup_wdb.c
  28. romstage-y += spd_bitmunching.c
  29. romstage-y += timings_refresh.c
  30. diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
  31. index 73ff180b8c..5e4674957d 100644
  32. --- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
  33. +++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
  34. @@ -13,6 +13,39 @@
  35. #include "raminit_native.h"
  36. +static enum raminit_status pre_training(struct sysinfo *ctrl)
  37. +{
  38. + /* Skip on S3 resume */
  39. + if (ctrl->bootmode == BOOTMODE_S3)
  40. + return RAMINIT_STATUS_SUCCESS;
  41. +
  42. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
  43. + for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
  44. + if (!rank_in_ch(ctrl, slot + slot, channel))
  45. + continue;
  46. +
  47. + printk(RAM_DEBUG, "C%uS%u:\n", channel, slot);
  48. + printk(RAM_DEBUG, "\tMR0: 0x%04x\n", ctrl->mr0[channel][slot]);
  49. + printk(RAM_DEBUG, "\tMR1: 0x%04x\n", ctrl->mr1[channel][slot]);
  50. + printk(RAM_DEBUG, "\tMR2: 0x%04x\n", ctrl->mr2[channel][slot]);
  51. + printk(RAM_DEBUG, "\tMR3: 0x%04x\n", ctrl->mr3[channel][slot]);
  52. + printk(RAM_DEBUG, "\n");
  53. + }
  54. + if (ctrl->is_ecc) {
  55. + union mad_dimm_reg mad_dimm = {
  56. + .raw = mchbar_read32(MAD_DIMM(channel)),
  57. + };
  58. + /* Enable ECC I/O */
  59. + mad_dimm.ecc_mode = 1;
  60. + mchbar_write32(MAD_DIMM(channel), mad_dimm.raw);
  61. + /* Wait 4 usec after enabling the ECC I/O, needed by HW */
  62. + udelay(4);
  63. + }
  64. + }
  65. + setup_wdb(ctrl);
  66. + return RAMINIT_STATUS_SUCCESS;
  67. +}
  68. +
  69. struct task_entry {
  70. enum raminit_status (*task)(struct sysinfo *);
  71. bool is_enabled;
  72. @@ -26,6 +59,7 @@ static const struct task_entry cold_boot[] = {
  73. { configure_mc, true, "CONFMC", },
  74. { configure_memory_map, true, "MEMMAP", },
  75. { do_jedec_init, true, "JEDECINIT", },
  76. + { pre_training, true, "PRETRAIN", },
  77. };
  78. /* Return a generic stepping value to make stepping checks simpler */
  79. diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
  80. index e3cf4254a0..f29c2ec366 100644
  81. --- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
  82. +++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
  83. @@ -35,6 +35,13 @@
  84. #define RTTNOM_MASK (BIT(9) | BIT(6) | BIT(2))
  85. +#define BASIC_VA_PAT_SPREAD_8 0x01010101
  86. +
  87. +#define WDB_CACHE_LINE_SIZE 8
  88. +
  89. +#define NUM_WDB_CL_MUX_SEEDS 3
  90. +#define NUM_CADB_MUX_SEEDS 3
  91. +
  92. /* ZQ calibration types */
  93. enum {
  94. ZQ_INIT, /* DDR3: ZQCL with tZQinit, LPDDR3: ZQ Init with tZQinit */
  95. @@ -318,6 +325,23 @@ void reut_issue_mrs_all(
  96. enum raminit_status reut_issue_zq(struct sysinfo *ctrl, uint8_t chanmask, uint8_t zq_type);
  97. +void write_wdb_fixed_pat(
  98. + const struct sysinfo *ctrl,
  99. + const uint8_t patterns[],
  100. + const uint8_t pat_mask[],
  101. + uint8_t spread,
  102. + uint16_t start);
  103. +
  104. +void write_wdb_va_pat(
  105. + const struct sysinfo *ctrl,
  106. + uint32_t agg_mask,
  107. + uint32_t vic_mask,
  108. + uint8_t vic_rot,
  109. + uint16_t start);
  110. +
  111. +void program_wdb_lfsr(const struct sysinfo *ctrl, bool cleanup);
  112. +void setup_wdb(const struct sysinfo *ctrl);
  113. +
  114. uint8_t get_rx_bias(const struct sysinfo *ctrl);
  115. uint8_t get_tCWL(uint32_t mem_clock_mhz);
  116. diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
  117. index 9929f617fe..7aa8d8c8b2 100644
  118. --- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
  119. +++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
  120. @@ -335,6 +335,18 @@ union mcscheds_cbit_reg {
  121. uint32_t raw;
  122. };
  123. +union reut_pat_cl_mux_lmn_reg {
  124. + struct __packed {
  125. + uint32_t l_data_select : 1; // Bits 0:0
  126. + uint32_t en_sweep_freq : 1; // Bits 1:1
  127. + uint32_t : 6; // Bits 7:2
  128. + uint32_t l_counter : 8; // Bits 15:8
  129. + uint32_t m_counter : 8; // Bits 23:16
  130. + uint32_t n_counter : 8; // Bits 31:24
  131. + };
  132. + uint32_t raw;
  133. +};
  134. +
  135. union reut_pat_cadb_prog_reg {
  136. struct __packed {
  137. uint32_t addr : 16; // Bits 15:0
  138. @@ -439,6 +451,39 @@ union reut_misc_odt_ctrl_reg {
  139. uint32_t raw;
  140. };
  141. +union ldat_pdat_reg {
  142. + struct __packed {
  143. + uint32_t fast_addr : 12; // Bits 11:0
  144. + uint32_t : 4; // Bits 15:12
  145. + uint32_t addr_en : 1; // Bits 16:16
  146. + uint32_t seq_en : 1; // Bits 17:17
  147. + uint32_t pol_0 : 1; // Bits 18:18
  148. + uint32_t pol_1 : 1; // Bits 19:19
  149. + uint32_t cmd_a : 4; // Bits 23:20
  150. + uint32_t cmd_b : 4; // Bits 27:24
  151. + uint32_t cmd_c : 4; // Bits 31:28
  152. + };
  153. + uint32_t raw;
  154. +};
  155. +
  156. +union ldat_sdat_reg {
  157. + struct __packed {
  158. + uint32_t bank_sel : 4; // Bits 3:0
  159. + uint32_t : 1; // Bits 4:4
  160. + uint32_t array_sel : 5; // Bits 9:5
  161. + uint32_t cmp : 1; // Bits 10:10
  162. + uint32_t replicate : 1; // Bits 11:11
  163. + uint32_t dword : 4; // Bits 15:12
  164. + uint32_t mode : 2; // Bits 17:16
  165. + uint32_t mpmap : 6; // Bits 23:18
  166. + uint32_t mpb_offset : 4; // Bits 27:24
  167. + uint32_t stage_en : 1; // Bits 28:28
  168. + uint32_t shadow : 2; // Bits 30:29
  169. + uint32_t : 1; // Bits 31:31
  170. + };
  171. + uint32_t raw;
  172. +};
  173. +
  174. union mcscheds_dft_misc_reg {
  175. struct __packed {
  176. uint32_t wdar : 1; // Bits 0:0
  177. diff --git a/src/northbridge/intel/haswell/native_raminit/setup_wdb.c b/src/northbridge/intel/haswell/native_raminit/setup_wdb.c
  178. new file mode 100644
  179. index 0000000000..ec37c48415
  180. --- /dev/null
  181. +++ b/src/northbridge/intel/haswell/native_raminit/setup_wdb.c
  182. @@ -0,0 +1,159 @@
  183. +/* SPDX-License-Identifier: GPL-2.0-or-later */
  184. +
  185. +#include <northbridge/intel/haswell/haswell.h>
  186. +#include <types.h>
  187. +
  188. +#include "raminit_native.h"
  189. +
  190. +static void ldat_write_cacheline(
  191. + const struct sysinfo *const ctrl,
  192. + const uint8_t chunk,
  193. + const uint16_t start,
  194. + const uint64_t data)
  195. +{
  196. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
  197. + if (!does_ch_exist(ctrl, channel))
  198. + continue;
  199. +
  200. + /*
  201. + * Do not do a 64-bit write here. The register is not aligned
  202. + * to a 64-bit boundary, which could potentially cause issues.
  203. + */
  204. + mchbar_write32(QCLK_ch_LDAT_DATA_IN_x(channel, 0), data & UINT32_MAX);
  205. + mchbar_write32(QCLK_ch_LDAT_DATA_IN_x(channel, 1), data >> 32);
  206. + /*
  207. + * Set REPLICATE = 0 as you don't want to replicate the data.
  208. + * Set BANK_SEL to the chunk you want to write the 64 bits to.
  209. + * Set ARRAY_SEL = 0 (the MC WDB) and MODE = 1.
  210. + */
  211. + const union ldat_sdat_reg ldat_sdat = {
  212. + .bank_sel = chunk,
  213. + .mode = 1,
  214. + };
  215. + mchbar_write32(QCLK_ch_LDAT_SDAT(channel), ldat_sdat.raw);
  216. + /*
  217. + * Finally, write the PDAT register indicating which cacheline
  218. + * of the WDB you want to write to by setting FAST_ADDR field
  219. + * to one of the 64 cache lines. Also set CMD_B in the PDAT
  220. + * register to 4'b1000, indicating that this is a LDAT write.
  221. + */
  222. + const union ldat_pdat_reg ldat_pdat = {
  223. + .fast_addr = MIN(start, 0xfff),
  224. + .cmd_b = 8,
  225. + };
  226. + mchbar_write32(QCLK_ch_LDAT_PDAT(channel), ldat_pdat.raw);
  227. + }
  228. +}
  229. +
  230. +static void clear_ldat_mode(const struct sysinfo *const ctrl)
  231. +{
  232. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++)
  233. + mchbar_write32(QCLK_ch_LDAT_SDAT(channel), 0);
  234. +}
  235. +
  236. +void write_wdb_fixed_pat(
  237. + const struct sysinfo *const ctrl,
  238. + const uint8_t patterns[],
  239. + const uint8_t pat_mask[],
  240. + const uint8_t spread,
  241. + const uint16_t start)
  242. +{
  243. + for (uint8_t chunk = 0; chunk < WDB_CACHE_LINE_SIZE; chunk++) {
  244. + uint64_t data = 0;
  245. + for (uint8_t b = 0; b < 64; b++) {
  246. + const uint8_t beff = b % spread;
  247. + const uint8_t burst = patterns[pat_mask[beff]];
  248. + if (burst & BIT(chunk))
  249. + data |= 1ULL << b;
  250. + }
  251. + ldat_write_cacheline(ctrl, chunk, start, data);
  252. + }
  253. + clear_ldat_mode(ctrl);
  254. +}
  255. +
  256. +static inline uint32_t rol_u32(const uint32_t val)
  257. +{
  258. + return (val << 1) | ((val >> 31) & 1);
  259. +}
  260. +
  261. +void write_wdb_va_pat(
  262. + const struct sysinfo *const ctrl,
  263. + const uint32_t agg_mask,
  264. + const uint32_t vic_mask,
  265. + const uint8_t vic_rot,
  266. + const uint16_t start)
  267. +{
  268. + static const uint8_t va_mask_to_compressed[4] = {0xaa, 0xc0, 0xcc, 0xf0};
  269. + uint32_t v_mask = vic_mask;
  270. + uint32_t a_mask = agg_mask;
  271. + for (uint8_t v = 0; v < vic_rot; v++) {
  272. + uint8_t compressed[32] = {0};
  273. + /* Iterate through all 32 bits and create a compressed version of cacheline */
  274. + for (uint8_t b = 0; b < ARRAY_SIZE(compressed); b++) {
  275. + const uint8_t vic = !!(v_mask & BIT(b));
  276. + const uint8_t agg = !!(a_mask & BIT(b));
  277. + const uint8_t index = !vic << 1 | agg << 0;
  278. + compressed[b] = va_mask_to_compressed[index];
  279. + }
  280. + for (uint8_t chunk = 0; chunk < WDB_CACHE_LINE_SIZE; chunk++) {
  281. + uint32_t data = 0;
  282. + for (uint8_t b = 0; b < ARRAY_SIZE(compressed); b++)
  283. + data |= !!(compressed[b] & BIT(chunk)) << b;
  284. +
  285. + const uint64_t data64 = (uint64_t)data << 32 | data;
  286. + ldat_write_cacheline(ctrl, chunk, start + v, data64);
  287. + }
  288. + v_mask = rol_u32(v_mask);
  289. + a_mask = rol_u32(a_mask);
  290. + }
  291. + clear_ldat_mode(ctrl);
  292. +}
  293. +
  294. +void program_wdb_lfsr(const struct sysinfo *ctrl, const bool cleanup)
  295. +{
  296. + /* Cleanup LFSR seeds are sequential */
  297. + const uint32_t cleanup_seeds[NUM_WDB_CL_MUX_SEEDS] = { 0xaaaaaa, 0xcccccc, 0xf0f0f0 };
  298. + const uint32_t regular_seeds[NUM_WDB_CL_MUX_SEEDS] = { 0xa10ca1, 0xef0d08, 0xad0a1e };
  299. + const uint32_t *seeds = cleanup ? cleanup_seeds : regular_seeds;
  300. +
  301. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
  302. + if (!does_ch_exist(ctrl, channel))
  303. + continue;
  304. +
  305. + for (uint8_t i = 0; i < NUM_WDB_CL_MUX_SEEDS; i++) {
  306. + mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_RD_x(channel, i), seeds[i]);
  307. + mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_WR_x(channel, i), seeds[i]);
  308. + }
  309. + }
  310. +}
  311. +
  312. +void setup_wdb(const struct sysinfo *ctrl)
  313. +{
  314. + const uint32_t amask[9] = {
  315. + 0x86186186, 0x18618618, 0x30c30c30,
  316. + 0xa28a28a2, 0x8a28a28a, 0x14514514,
  317. + 0x28a28a28, 0x92492492, 0x24924924,
  318. + };
  319. + const uint32_t vmask = 0x41041041;
  320. +
  321. + /* Fill first 8 entries with simple 2-LFSR VA pattern */
  322. + write_wdb_va_pat(ctrl, 0, BASIC_VA_PAT_SPREAD_8, 8, 0);
  323. +
  324. + /* Fill next 54 entries with 3-LFSR VA pattern */
  325. + for (uint8_t a = 0; a < ARRAY_SIZE(amask); a++)
  326. + write_wdb_va_pat(ctrl, amask[a], vmask, 6, 8 + a * 6);
  327. +
  328. + program_wdb_lfsr(ctrl, false);
  329. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
  330. + if (!does_ch_exist(ctrl, channel))
  331. + continue;
  332. +
  333. + const union reut_pat_cl_mux_lmn_reg wdb_cl_mux_lmn = {
  334. + .en_sweep_freq = 1,
  335. + .l_counter = 1,
  336. + .m_counter = 1,
  337. + .n_counter = 10,
  338. + };
  339. + mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_LMN(channel), wdb_cl_mux_lmn.raw);
  340. + }
  341. +}
  342. diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
  343. index 4fc78a7f43..f8408e51a0 100644
  344. --- a/src/northbridge/intel/haswell/registers/mchbar.h
  345. +++ b/src/northbridge/intel/haswell/registers/mchbar.h
  346. @@ -94,6 +94,11 @@
  347. #define TC_BANK_RANK_D_ch(ch) _MCMAIN_C(0x4014, ch)
  348. #define SC_ROUNDT_LAT_ch(ch) _MCMAIN_C(0x4024, ch)
  349. +#define REUT_ch_PAT_WDB_CL_MUX_WR_x(ch, x) _MCMAIN_C_X(0x4048, ch, x) /* x in 0 .. 2 */
  350. +#define REUT_ch_PAT_WDB_CL_MUX_RD_x(ch, x) _MCMAIN_C_X(0x4054, ch, x) /* x in 0 .. 2 */
  351. +
  352. +#define REUT_ch_PAT_WDB_CL_MUX_LMN(ch) _MCMAIN_C(0x4078, ch)
  353. +
  354. #define SC_WR_ADD_DELAY_ch(ch) _MCMAIN_C(0x40d0, ch)
  355. #define REUT_ch_MISC_CKE_CTRL(ch) _MCMAIN_C(0x4190, ch)
  356. @@ -110,6 +115,10 @@
  357. #define MC_INIT_STATE_ch(ch) _MCMAIN_C(0x42a0, ch)
  358. #define TC_SRFTP_ch(ch) _MCMAIN_C(0x42a4, ch)
  359. +#define QCLK_ch_LDAT_PDAT(ch) _MCMAIN_C(0x42d0, ch)
  360. +#define QCLK_ch_LDAT_SDAT(ch) _MCMAIN_C(0x42d4, ch)
  361. +#define QCLK_ch_LDAT_DATA_IN_x(ch, x) _MCMAIN_C_X(0x42dc, ch, x) /* x in 0 .. 1 */
  362. +
  363. #define REUT_GLOBAL_ERR 0x4804
  364. #define REUT_ch_SEQ_CFG(ch) (0x48a8 + 8 * (ch))
  365. --
  366. 2.39.2