0011-haswell-NRI-Add-RcvEn-training.patch 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709
  1. From 4254a9ff03658d7a6f1a4e32cfe4c65dbfc072f8 Mon Sep 17 00:00:00 2001
  2. From: Angel Pons <th3fanbus@gmail.com>
  3. Date: Sun, 8 May 2022 00:05:41 +0200
  4. Subject: [PATCH 11/20] haswell NRI: Add RcvEn training
  5. Implement the RcvEn (Receive Enable) calibration procedure.
  6. Change-Id: Ifbfa520f3e0486c56d0988ce67af2ddb9cf29888
  7. Signed-off-by: Angel Pons <th3fanbus@gmail.com>
  8. ---
  9. .../intel/haswell/native_raminit/Makefile.mk | 1 +
  10. .../haswell/native_raminit/raminit_main.c | 1 +
  11. .../haswell/native_raminit/raminit_native.h | 14 +
  12. .../haswell/native_raminit/reg_structs.h | 13 +
  13. .../native_raminit/train_receive_enable.c | 561 ++++++++++++++++++
  14. .../intel/haswell/registers/mchbar.h | 3 +
  15. 6 files changed, 593 insertions(+)
  16. create mode 100644 src/northbridge/intel/haswell/native_raminit/train_receive_enable.c
  17. diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.mk b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
  18. index ebe9e9b762..e2fbfb4211 100644
  19. --- a/src/northbridge/intel/haswell/native_raminit/Makefile.mk
  20. +++ b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
  21. @@ -16,3 +16,4 @@ romstage-y += setup_wdb.c
  22. romstage-y += spd_bitmunching.c
  23. romstage-y += testing_io.c
  24. romstage-y += timings_refresh.c
  25. +romstage-y += train_receive_enable.c
  26. diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
  27. index 5e4674957d..7d444659c3 100644
  28. --- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
  29. +++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
  30. @@ -60,6 +60,7 @@ static const struct task_entry cold_boot[] = {
  31. { configure_memory_map, true, "MEMMAP", },
  32. { do_jedec_init, true, "JEDECINIT", },
  33. { pre_training, true, "PRETRAIN", },
  34. + { train_receive_enable, true, "RCVET", },
  35. };
  36. /* Return a generic stepping value to make stepping checks simpler */
  37. diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
  38. index 906b3143b9..b4e8c7de5a 100644
  39. --- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
  40. +++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
  41. @@ -42,6 +42,9 @@
  42. #define NUM_WDB_CL_MUX_SEEDS 3
  43. #define NUM_CADB_MUX_SEEDS 3
  44. +/* Specified in PI ticks. 64 PI ticks == 1 qclk */
  45. +#define tDQSCK_DRIFT 64
  46. +
  47. /* ZQ calibration types */
  48. enum {
  49. ZQ_INIT, /* DDR3: ZQCL with tZQinit, LPDDR3: ZQ Init with tZQinit */
  50. @@ -188,6 +191,7 @@ enum raminit_status {
  51. RAMINIT_STATUS_MPLL_INIT_FAILURE,
  52. RAMINIT_STATUS_POLL_TIMEOUT,
  53. RAMINIT_STATUS_REUT_ERROR,
  54. + RAMINIT_STATUS_RCVEN_FAILURE,
  55. RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
  56. };
  57. @@ -270,6 +274,10 @@ struct sysinfo {
  58. union ddr_data_vref_adjust_reg dimm_vref;
  59. + uint8_t io_latency[NUM_CHANNELS][NUM_SLOTRANKS];
  60. + uint8_t rt_latency[NUM_CHANNELS][NUM_SLOTRANKS];
  61. + uint32_t rt_io_comp[NUM_CHANNELS];
  62. +
  63. uint32_t data_offset_train[NUM_CHANNELS][NUM_LANES];
  64. uint32_t data_offset_comp[NUM_CHANNELS][NUM_LANES];
  65. @@ -344,6 +352,11 @@ static inline void clear_data_offset_train_all(struct sysinfo *ctrl)
  66. memset(ctrl->data_offset_train, 0, sizeof(ctrl->data_offset_train));
  67. }
  68. +static inline uint32_t get_data_train_feedback(const uint8_t channel, const uint8_t byte)
  69. +{
  70. + return mchbar_read32(DDR_DATA_TRAIN_FEEDBACK(channel, byte));
  71. +}
  72. +
  73. /* Number of ticks to wait in units of 69.841279 ns (citation needed) */
  74. static inline void tick_delay(const uint32_t delay)
  75. {
  76. @@ -399,6 +412,7 @@ enum raminit_status convert_timings(struct sysinfo *ctrl);
  77. enum raminit_status configure_mc(struct sysinfo *ctrl);
  78. enum raminit_status configure_memory_map(struct sysinfo *ctrl);
  79. enum raminit_status do_jedec_init(struct sysinfo *ctrl);
  80. +enum raminit_status train_receive_enable(struct sysinfo *ctrl);
  81. void configure_timings(struct sysinfo *ctrl);
  82. void configure_refresh(struct sysinfo *ctrl);
  83. diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
  84. index b943259b91..b099f4bb82 100644
  85. --- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
  86. +++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
  87. @@ -297,6 +297,19 @@ union ddr_scram_misc_control_reg {
  88. uint32_t raw;
  89. };
  90. +union sc_io_latency_reg {
  91. + struct __packed {
  92. + uint32_t iolat_rank0 : 4; // Bits 3:0
  93. + uint32_t iolat_rank1 : 4; // Bits 7:4
  94. + uint32_t iolat_rank2 : 4; // Bits 11:8
  95. + uint32_t iolat_rank3 : 4; // Bits 15:12
  96. + uint32_t rt_iocomp : 6; // Bits 21:16
  97. + uint32_t : 9; // Bits 30:22
  98. + uint32_t dis_rt_clk_gate : 1; // Bits 31:31
  99. + };
  100. + uint32_t raw;
  101. +};
  102. +
  103. union mcscheds_cbit_reg {
  104. struct __packed {
  105. uint32_t dis_opp_cas : 1; // Bits 0:0
  106. diff --git a/src/northbridge/intel/haswell/native_raminit/train_receive_enable.c b/src/northbridge/intel/haswell/native_raminit/train_receive_enable.c
  107. new file mode 100644
  108. index 0000000000..576c6bc21e
  109. --- /dev/null
  110. +++ b/src/northbridge/intel/haswell/native_raminit/train_receive_enable.c
  111. @@ -0,0 +1,561 @@
  112. +/* SPDX-License-Identifier: GPL-2.0-or-later */
  113. +
  114. +#include <console/console.h>
  115. +#include <northbridge/intel/haswell/haswell.h>
  116. +#include <types.h>
  117. +
  118. +#include "raminit_native.h"
  119. +#include "ranges.h"
  120. +
  121. +#define RCVEN_PLOT RAM_DEBUG
  122. +
  123. +static enum raminit_status change_rcven_timing(struct sysinfo *ctrl, const uint8_t channel)
  124. +{
  125. + int16_t max_rcven = -4096;
  126. + int16_t min_rcven = 4096;
  127. + int16_t max_rcven_rank[NUM_SLOTRANKS];
  128. + int16_t min_rcven_rank[NUM_SLOTRANKS];
  129. + for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
  130. + max_rcven_rank[rank] = max_rcven;
  131. + min_rcven_rank[rank] = min_rcven;
  132. + }
  133. + for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
  134. + if (!rank_in_ch(ctrl, rank, channel))
  135. + continue;
  136. +
  137. + for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
  138. + int16_t new_rcven = ctrl->rcven[channel][rank][byte];
  139. + new_rcven -= ctrl->io_latency[channel][rank] * 64;
  140. + if (max_rcven_rank[rank] < new_rcven)
  141. + max_rcven_rank[rank] = new_rcven;
  142. +
  143. + if (min_rcven_rank[rank] > new_rcven)
  144. + min_rcven_rank[rank] = new_rcven;
  145. + }
  146. + if (max_rcven < max_rcven_rank[rank])
  147. + max_rcven = max_rcven_rank[rank];
  148. +
  149. + if (min_rcven > min_rcven_rank[rank])
  150. + min_rcven = min_rcven_rank[rank];
  151. + }
  152. +
  153. + /*
  154. + * Determine how far we are from the ideal center point for RcvEn timing.
  155. + * (PiIdeal - AveRcvEn) / 64 is the ideal number of cycles we should have
  156. + * for IO latency. command training will reduce this by 64, so plan for
  157. + * that now in the ideal value. Round to closest integer.
  158. + */
  159. + const int16_t rre_pi_ideal = 256 + 64;
  160. + const int16_t pi_reserve = 64;
  161. + const int16_t rcven_center = (max_rcven + min_rcven) / 2;
  162. + const int8_t iolat_target = DIV_ROUND_CLOSEST(rre_pi_ideal - rcven_center, 64);
  163. +
  164. + int8_t io_g_offset = 0;
  165. + int8_t io_lat[NUM_SLOTRANKS] = { 0 };
  166. + for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
  167. + if (!rank_in_ch(ctrl, rank, channel))
  168. + continue;
  169. +
  170. + io_lat[rank] = iolat_target;
  171. +
  172. + /* Check for RcvEn underflow/overflow */
  173. + const int16_t rcven_lower = 64 * io_lat[rank] + min_rcven_rank[rank];
  174. + if (rcven_lower < pi_reserve)
  175. + io_lat[rank] += DIV_ROUND_UP(pi_reserve - rcven_lower, 64);
  176. +
  177. + const int16_t rcven_upper = 64 * io_lat[rank] + max_rcven_rank[rank];
  178. + if (rcven_upper > 511 - pi_reserve)
  179. + io_lat[rank] -= DIV_ROUND_UP(rcven_upper - (511 - pi_reserve), 64);
  180. +
  181. + /* Check for IO latency over/underflow */
  182. + if (io_lat[rank] - io_g_offset > 14)
  183. + io_g_offset = io_lat[rank] - 14;
  184. +
  185. + if (io_lat[rank] - io_g_offset < 1)
  186. + io_g_offset = io_lat[rank] - 1;
  187. +
  188. + const int8_t cycle_offset = io_lat[rank] - ctrl->io_latency[channel][rank];
  189. + for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
  190. + ctrl->rcven[channel][rank][byte] += 64 * cycle_offset;
  191. + update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
  192. + }
  193. + }
  194. +
  195. + /* Calculate new IO comp latency */
  196. + union sc_io_latency_reg sc_io_lat = {
  197. + .raw = mchbar_read32(SC_IO_LATENCY_ch(channel)),
  198. + };
  199. +
  200. + /* Check if we are underflowing or overflowing this field */
  201. + if (io_g_offset < 0 && sc_io_lat.rt_iocomp < -io_g_offset) {
  202. + printk(BIOS_ERR, "%s: IO COMP underflow\n", __func__);
  203. + printk(BIOS_ERR, "io_g_offset: %d\n", io_g_offset);
  204. + printk(BIOS_ERR, "rt_iocomp: %u\n", sc_io_lat.rt_iocomp);
  205. + return RAMINIT_STATUS_RCVEN_FAILURE;
  206. + }
  207. + if (io_g_offset > 0 && io_g_offset > 0x3f - sc_io_lat.rt_iocomp) {
  208. + printk(BIOS_ERR, "%s: IO COMP overflow\n", __func__);
  209. + printk(BIOS_ERR, "io_g_offset: %d\n", io_g_offset);
  210. + printk(BIOS_ERR, "rt_iocomp: %u\n", sc_io_lat.rt_iocomp);
  211. + return RAMINIT_STATUS_RCVEN_FAILURE;
  212. + }
  213. + sc_io_lat.rt_iocomp += io_g_offset;
  214. + ctrl->rt_io_comp[channel] = sc_io_lat.rt_iocomp;
  215. + for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
  216. + if (ctrl->rankmap[channel] & BIT(rank))
  217. + ctrl->io_latency[channel][rank] = io_lat[rank] - io_g_offset;
  218. +
  219. + const uint8_t shift = rank * 4;
  220. + sc_io_lat.raw &= ~(0xf << shift);
  221. + sc_io_lat.raw |= ctrl->io_latency[channel][rank] << shift;
  222. + }
  223. + mchbar_write32(SC_IO_LATENCY_ch(channel), sc_io_lat.raw);
  224. + return RAMINIT_STATUS_SUCCESS;
  225. +}
  226. +
  227. +#define RL_START (256 + 24)
  228. +#define RL_STOP (384 + 24)
  229. +#define RL_STEP 8
  230. +
  231. +#define RE_NUM_SAMPLES 6
  232. +
  233. +static enum raminit_status verify_high_region(const int32_t center, const int32_t lwidth)
  234. +{
  235. + if (center > RL_STOP) {
  236. + /* Check if center of high was found where it should be */
  237. + printk(BIOS_ERR, "RcvEn: Center of high (%d) higher than expected\n", center);
  238. + return RAMINIT_STATUS_RCVEN_FAILURE;
  239. + }
  240. + if (lwidth <= 32) {
  241. + /* Check if width is large enough */
  242. + printk(BIOS_ERR, "RcvEn: Width of high region (%d) too small\n", lwidth);
  243. + return RAMINIT_STATUS_RCVEN_FAILURE;
  244. + }
  245. + if (lwidth >= 96) {
  246. + /* Since we're calibrating a phase, a too large region is a problem */
  247. + printk(BIOS_ERR, "RcvEn: Width of high region (%d) too large\n", lwidth);
  248. + return RAMINIT_STATUS_RCVEN_FAILURE;
  249. + }
  250. + return RAMINIT_STATUS_SUCCESS;
  251. +}
  252. +
  253. +static void program_io_latency(struct sysinfo *ctrl, const uint8_t channel, const uint8_t rank)
  254. +{
  255. + const uint8_t shift = rank * 4;
  256. + const uint8_t iolat = ctrl->io_latency[channel][rank];
  257. + mchbar_clrsetbits32(SC_IO_LATENCY_ch(channel), 0xf << shift, iolat << shift);
  258. +}
  259. +
  260. +static void program_rl_delays(struct sysinfo *ctrl, const uint8_t rank, const uint16_t rl_delay)
  261. +{
  262. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
  263. + if (!rank_in_ch(ctrl, rank, channel))
  264. + continue;
  265. +
  266. + for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
  267. + update_rxt(ctrl, channel, rank, byte, RXT_RCVEN, rl_delay);
  268. + }
  269. +}
  270. +
  271. +static bool sample_dqs(const uint8_t channel, const uint8_t byte)
  272. +{
  273. + return (get_data_train_feedback(channel, byte) & 0x1ff) >= BIT(RE_NUM_SAMPLES - 1);
  274. +}
  275. +
  276. +enum raminit_status train_receive_enable(struct sysinfo *ctrl)
  277. +{
  278. + const struct reut_box reut_addr = {
  279. + .col = {
  280. + .start = 0,
  281. + .stop = 1023,
  282. + .inc_rate = 0,
  283. + .inc_val = 1,
  284. + },
  285. + };
  286. + const struct wdb_pat wdb_pattern = {
  287. + .start_ptr = 0,
  288. + .stop_ptr = 9,
  289. + .inc_rate = 32,
  290. + .dq_pattern = BASIC_VA,
  291. + };
  292. +
  293. + const uint16_t bytemask = BIT(ctrl->lanes) - 1;
  294. + const uint8_t fine_step = 1;
  295. +
  296. + const uint8_t rt_delta = is_hsw_ult() ? 4 : 2;
  297. + const uint8_t rt_io_comp = 21 + rt_delta;
  298. + const uint8_t rt_latency = 16 + rt_delta;
  299. + setup_io_test(
  300. + ctrl,
  301. + ctrl->chanmap,
  302. + PAT_RD,
  303. + 2,
  304. + RE_NUM_SAMPLES + 1,
  305. + &reut_addr,
  306. + 0,
  307. + &wdb_pattern,
  308. + 0,
  309. + 8);
  310. +
  311. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
  312. + if (!does_ch_exist(ctrl, channel))
  313. + continue;
  314. +
  315. + for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
  316. + union ddr_data_control_2_reg data_control_2 = {
  317. + .raw = ctrl->dq_control_2[channel][byte],
  318. + };
  319. + data_control_2.force_rx_on = 1;
  320. + mchbar_write32(DQ_CONTROL_2(channel, byte), data_control_2.raw);
  321. + }
  322. + union ddr_data_control_0_reg data_control_0 = {
  323. + .raw = ctrl->dq_control_0[channel],
  324. + };
  325. + if (ctrl->lpddr) {
  326. + /**
  327. + * W/A for b4618574 - @todo: remove for HSW ULT C0
  328. + * Can't have force_odt_on together with leaker, disable LPDDR
  329. + * mode during this training step. lpddr_mode is restored
  330. + * at the end of this function from the host structure.
  331. + */
  332. + data_control_0.lpddr_mode = 0;
  333. + mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
  334. + }
  335. + data_control_0.force_odt_on = 1;
  336. + data_control_0.rl_training_mode = 1;
  337. + mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
  338. + mchbar_write32(SC_IO_LATENCY_ch(channel), (union sc_io_latency_reg) {
  339. + .rt_iocomp = rt_io_comp,
  340. + }.raw);
  341. + }
  342. + enum raminit_status status = RAMINIT_STATUS_SUCCESS;
  343. + for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
  344. + if (!does_rank_exist(ctrl, rank))
  345. + continue;
  346. +
  347. + /*
  348. + * Set initial roundtrip latency values. Assume -4 QCLK for worst board
  349. + * layout. This is calculated as HW_ROUNDT_LAT_DEFAULT_VALUE plus:
  350. + *
  351. + * DDR3: Default + (2 * tAA) + 4 QCLK + PI_CLK + N-mode value * 2
  352. + * LPDDR3: Default + (2 * tAA) + 4 QCLK + PI_CLK + tDQSCK_max
  353. + *
  354. + * N-mode is 3 during training mode. Both channels use the same timings.
  355. + */
  356. + /** TODO: differs for LPDDR **/
  357. + const uint32_t tmp = MAX(ctrl->multiplier, 4) + 5 + 2 * ctrl->tAA;
  358. + const uint32_t initial_rt_latency = MIN(rt_latency + tmp, 0x3f);
  359. +
  360. + uint8_t chanmask = 0;
  361. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
  362. + chanmask |= select_reut_ranks(ctrl, channel, BIT(rank));
  363. + if (!rank_in_ch(ctrl, rank, channel))
  364. + continue;
  365. +
  366. + ctrl->io_latency[channel][rank] = 0;
  367. + mchbar_write8(SC_ROUNDT_LAT_ch(channel) + rank, initial_rt_latency);
  368. + ctrl->rt_latency[channel][rank] = initial_rt_latency;
  369. + }
  370. +
  371. + printk(BIOS_DEBUG, "Rank %u\n", rank);
  372. + printk(BIOS_DEBUG, "Steps 1 and 2: Find middle of high region\n");
  373. + printk(RCVEN_PLOT, "Byte");
  374. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
  375. + if (!rank_in_ch(ctrl, rank, channel))
  376. + continue;
  377. +
  378. + printk(RCVEN_PLOT, "\t");
  379. + for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
  380. + printk(RCVEN_PLOT, "%u ", byte);
  381. + }
  382. + printk(RCVEN_PLOT, "\nRcvEn\n");
  383. + struct phase_train_data region_data[NUM_CHANNELS][NUM_LANES] = { 0 };
  384. + for (uint16_t rl_delay = RL_START; rl_delay < RL_STOP; rl_delay += RL_STEP) {
  385. + printk(RCVEN_PLOT, " % 3d", rl_delay);
  386. + program_rl_delays(ctrl, rank, rl_delay);
  387. + run_io_test(ctrl, chanmask, BASIC_VA, true);
  388. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
  389. + if (!rank_in_ch(ctrl, rank, channel))
  390. + continue;
  391. +
  392. + printk(RCVEN_PLOT, "\t");
  393. + for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
  394. + const bool high = sample_dqs(channel, byte);
  395. + printk(RCVEN_PLOT, high ? ". " : "# ");
  396. + phase_record_pass(
  397. + &region_data[channel][byte],
  398. + high,
  399. + rl_delay,
  400. + RL_START,
  401. + RL_STEP);
  402. + }
  403. + }
  404. + printk(RCVEN_PLOT, "\n");
  405. + }
  406. + printk(RCVEN_PLOT, "\n");
  407. + printk(BIOS_DEBUG, "Update RcvEn timing to be in the center of high region\n");
  408. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
  409. + if (!rank_in_ch(ctrl, rank, channel))
  410. + continue;
  411. +
  412. + printk(BIOS_DEBUG, "C%u.R%u: \tLeft\tRight\tWidth\tCenter\n",
  413. + channel, rank);
  414. + for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
  415. + struct phase_train_data *const curr_data =
  416. + &region_data[channel][byte];
  417. + phase_append_current_to_initial(curr_data, RL_START, RL_STEP);
  418. + const int32_t lwidth = range_width(curr_data->largest);
  419. + const int32_t center = range_center(curr_data->largest);
  420. + printk(BIOS_DEBUG, " B%u: \t%d\t%d\t%d\t%d\n",
  421. + byte,
  422. + curr_data->largest.start,
  423. + curr_data->largest.end,
  424. + lwidth,
  425. + center);
  426. +
  427. + status = verify_high_region(center, lwidth);
  428. + if (status) {
  429. + printk(BIOS_ERR,
  430. + "RcvEn problems on channel %u, byte %u\n",
  431. + channel, byte);
  432. + goto clean_up;
  433. + }
  434. + ctrl->rcven[channel][rank][byte] = center;
  435. + update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
  436. + }
  437. + printk(BIOS_DEBUG, "\n");
  438. + }
  439. +
  440. + printk(BIOS_DEBUG, "Step 3: Quarter preamble - Walk backwards\n");
  441. + printk(RCVEN_PLOT, "Byte");
  442. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
  443. + if (!rank_in_ch(ctrl, rank, channel))
  444. + continue;
  445. +
  446. + printk(RCVEN_PLOT, "\t");
  447. + for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
  448. + printk(RCVEN_PLOT, "%u ", byte);
  449. + }
  450. + printk(RCVEN_PLOT, "\nIOLAT\n");
  451. + bool done = false;
  452. + while (!done) {
  453. + run_io_test(ctrl, chanmask, BASIC_VA, true);
  454. + done = true;
  455. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
  456. + if (!rank_in_ch(ctrl, rank, channel))
  457. + continue;
  458. +
  459. + printk(RCVEN_PLOT, " %2u\t", ctrl->io_latency[channel][rank]);
  460. + uint16_t highs = 0;
  461. + for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
  462. + const bool high = sample_dqs(channel, byte);
  463. + printk(RCVEN_PLOT, high ? "H " : "L ");
  464. + if (high)
  465. + highs |= BIT(byte);
  466. + }
  467. + if (!highs)
  468. + continue;
  469. +
  470. + done = false;
  471. +
  472. + /* If all bytes sample high, adjust timing globally */
  473. + if (highs == bytemask && ctrl->io_latency[channel][rank] < 14) {
  474. + ctrl->io_latency[channel][rank] += 2;
  475. + ctrl->io_latency[channel][rank] %= 16;
  476. + program_io_latency(ctrl, channel, rank);
  477. + continue;
  478. + }
  479. +
  480. + /* Otherwise, adjust individual bytes */
  481. + for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
  482. + if (!(highs & BIT(byte)))
  483. + continue;
  484. +
  485. + if (ctrl->rcven[channel][rank][byte] < 128) {
  486. + printk(BIOS_ERR,
  487. + "RcvEn underflow: walking backwards\n");
  488. + printk(BIOS_ERR,
  489. + "For channel %u, rank %u, byte %u\n",
  490. + channel, rank, byte);
  491. + status = RAMINIT_STATUS_RCVEN_FAILURE;
  492. + goto clean_up;
  493. + }
  494. + ctrl->rcven[channel][rank][byte] -= 128;
  495. + update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
  496. + }
  497. + }
  498. + printk(RCVEN_PLOT, "\n");
  499. + }
  500. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
  501. + if (!rank_in_ch(ctrl, rank, channel))
  502. + continue;
  503. +
  504. + printk(BIOS_DEBUG, "\nC%u: Preamble\n", channel);
  505. + for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
  506. + printk(BIOS_DEBUG,
  507. + " B%u: %u\n", byte, ctrl->rcven[channel][rank][byte]);
  508. + }
  509. + }
  510. + printk(BIOS_DEBUG, "\n");
  511. +
  512. + printk(BIOS_DEBUG, "Step 4: Add 1 qclk\n");
  513. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
  514. + if (!rank_in_ch(ctrl, rank, channel))
  515. + continue;
  516. +
  517. + for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
  518. + ctrl->rcven[channel][rank][byte] += 64;
  519. + update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
  520. + }
  521. + }
  522. + printk(BIOS_DEBUG, "\n");
  523. +
  524. + printk(BIOS_DEBUG, "Step 5: Walk forward to find rising edge\n");
  525. + printk(RCVEN_PLOT, "Byte");
  526. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
  527. + if (!rank_in_ch(ctrl, rank, channel))
  528. + continue;
  529. +
  530. + printk(RCVEN_PLOT, "\t");
  531. + for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
  532. + printk(RCVEN_PLOT, "%u ", byte);
  533. + }
  534. + printk(RCVEN_PLOT, "\n inc\n");
  535. + uint16_t ch_result[NUM_CHANNELS] = { 0 };
  536. + uint8_t inc_preamble[NUM_CHANNELS][NUM_LANES] = { 0 };
  537. + for (uint8_t inc = 0; inc < 64; inc += fine_step) {
  538. + printk(RCVEN_PLOT, " %2u\t", inc);
  539. + run_io_test(ctrl, chanmask, BASIC_VA, true);
  540. + done = true;
  541. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
  542. + if (!rank_in_ch(ctrl, rank, channel))
  543. + continue;
  544. +
  545. + for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
  546. + if (ch_result[channel] & BIT(byte)) {
  547. + /* Skip bytes that are already done */
  548. + printk(RCVEN_PLOT, ". ");
  549. + continue;
  550. + }
  551. + const bool pass = sample_dqs(channel, byte);
  552. + printk(RCVEN_PLOT, pass ? ". " : "# ");
  553. + if (pass) {
  554. + ch_result[channel] |= BIT(byte);
  555. + continue;
  556. + }
  557. + ctrl->rcven[channel][rank][byte] += fine_step;
  558. + update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
  559. + inc_preamble[channel][byte] = inc;
  560. + }
  561. + printk(RCVEN_PLOT, "\t");
  562. + if (ch_result[channel] != bytemask)
  563. + done = false;
  564. + }
  565. + printk(RCVEN_PLOT, "\n");
  566. + if (done)
  567. + break;
  568. + }
  569. + printk(BIOS_DEBUG, "\n");
  570. + if (!done) {
  571. + printk(BIOS_ERR, "Error: Preamble edge not found for all bytes\n");
  572. + printk(BIOS_ERR, "The final RcvEn results are as follows:\n");
  573. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
  574. + if (!rank_in_ch(ctrl, rank, channel))
  575. + continue;
  576. +
  577. + printk(BIOS_ERR, "Channel %u Rank %u: preamble\n",
  578. + channel, rank);
  579. + for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
  580. + printk(BIOS_ERR, " Byte %u: %u%s\n", byte,
  581. + ctrl->rcven[channel][rank][byte],
  582. + (ch_result[channel] ^ bytemask) & BIT(byte)
  583. + ? ""
  584. + : " *** Check this byte! ***");
  585. + }
  586. + }
  587. + status = RAMINIT_STATUS_RCVEN_FAILURE;
  588. + goto clean_up;
  589. + }
  590. +
  591. + printk(BIOS_DEBUG, "Step 6: center on preamble and clean up rank\n");
  592. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
  593. + if (!rank_in_ch(ctrl, rank, channel))
  594. + continue;
  595. +
  596. + printk(BIOS_DEBUG, "C%u: Preamble increment\n", channel);
  597. + for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
  598. + /*
  599. + * For Traditional, pull in RcvEn by 64. For ULT, take the DQS
  600. + * drift into account to the specified guardband: tDQSCK_DRIFT.
  601. + */
  602. + ctrl->rcven[channel][rank][byte] -= tDQSCK_DRIFT;
  603. + update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
  604. + printk(BIOS_DEBUG, " B%u: %u %u\n", byte,
  605. + ctrl->rcven[channel][rank][byte],
  606. + inc_preamble[channel][byte]);
  607. + }
  608. + printk(BIOS_DEBUG, "\n");
  609. + }
  610. + printk(BIOS_DEBUG, "\n");
  611. + }
  612. +
  613. +clean_up:
  614. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
  615. + if (!does_ch_exist(ctrl, channel))
  616. + continue;
  617. +
  618. + if (ctrl->lpddr) {
  619. + /**
  620. + * W/A for b4618574 - @todo: remove for HSW ULT C0
  621. + * Can't have force_odt_on together with leaker, disable LPDDR mode for
  622. + * this training step. This write will disable force_odt_on while still
  623. + * keeping LPDDR mode disabled. Second write will restore LPDDR mode.
  624. + */
  625. + union ddr_data_control_0_reg data_control_0 = {
  626. + .raw = ctrl->dq_control_0[channel],
  627. + };
  628. + data_control_0.lpddr_mode = 0;
  629. + mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
  630. + }
  631. + mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), ctrl->dq_control_0[channel]);
  632. + for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
  633. + mchbar_write32(DQ_CONTROL_2(channel, byte),
  634. + ctrl->dq_control_2[channel][byte]);
  635. + }
  636. + }
  637. + io_reset();
  638. + if (status)
  639. + return status;
  640. +
  641. + printk(BIOS_DEBUG, "Step 7: Sync IO latency across all ranks\n");
  642. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
  643. + if (!does_ch_exist(ctrl, channel))
  644. + continue;
  645. +
  646. + status = change_rcven_timing(ctrl, channel);
  647. + if (status)
  648. + return status;
  649. + }
  650. + printk(BIOS_DEBUG, "\nFinal Receive Enable and IO latency settings:\n");
  651. + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
  652. + if (!does_ch_exist(ctrl, channel))
  653. + continue;
  654. +
  655. + for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
  656. + if (!rank_in_ch(ctrl, rank, channel))
  657. + continue;
  658. +
  659. + const union sc_io_latency_reg sc_io_latency = {
  660. + .raw = mchbar_read32(SC_IO_LATENCY_ch(channel)),
  661. + };
  662. + printk(BIOS_DEBUG, " C%u.R%u: IOLAT = %u rt_iocomp = %u\n", channel,
  663. + rank, ctrl->io_latency[channel][rank], sc_io_latency.rt_iocomp);
  664. + for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
  665. + printk(BIOS_DEBUG, " B%u: %u\n", byte,
  666. + ctrl->rcven[channel][rank][byte]);
  667. + }
  668. + printk(BIOS_DEBUG, "\n");
  669. + }
  670. + }
  671. + return status;
  672. +}
  673. diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
  674. index a81559bb1e..9172d4f2b0 100644
  675. --- a/src/northbridge/intel/haswell/registers/mchbar.h
  676. +++ b/src/northbridge/intel/haswell/registers/mchbar.h
  677. @@ -18,6 +18,8 @@
  678. #define RX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0000, ch, rank, byte)
  679. #define TX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0020, ch, rank, byte)
  680. +#define DDR_DATA_TRAIN_FEEDBACK(ch, byte) _DDRIO_C_R_B(0x0054, ch, 0, byte)
  681. +
  682. #define DQ_CONTROL_2(ch, byte) _DDRIO_C_R_B(0x0064, ch, 0, byte)
  683. #define DQ_CONTROL_0(ch, byte) _DDRIO_C_R_B(0x0074, ch, 0, byte)
  684. @@ -100,6 +102,7 @@
  685. #define COMMAND_RATE_LIMIT_ch(ch) _MCMAIN_C(0x4010, ch)
  686. #define TC_BANK_RANK_D_ch(ch) _MCMAIN_C(0x4014, ch)
  687. #define SC_ROUNDT_LAT_ch(ch) _MCMAIN_C(0x4024, ch)
  688. +#define SC_IO_LATENCY_ch(ch) _MCMAIN_C(0x4028, ch)
  689. #define REUT_ch_PAT_WDB_CL_MUX_CFG(ch) _MCMAIN_C(0x4040, ch)
  690. --
  691. 2.39.2