123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477 |
- From be58501141aa97aa544b670e566cd6cf6797c18e Mon Sep 17 00:00:00 2001
- From: Angel Pons <th3fanbus@gmail.com>
- Date: Wed, 17 Apr 2024 13:20:32 +0200
- Subject: [PATCH 17/20] haswell NRI: Do sense amplifier offset training
- Quoting Wikipedia:
- A sense amplifier is a circuit that is used to amplify and detect
- small signals in electronic systems. It is commonly used in memory
- circuits, such as dynamic random access memory (DRAM), to read and
- amplify the weak signals stored in memory cells.
- In this case, we're calibrating the sense amplifiers in the memory
- controller. This training procedure uses a magic "sense amp offset
- cancel" mode of the DDRIO to observe the sampled logic levels, and
- sweeps Vref to find the low-high transition for each bit lane. The
- procedure consists of two stages: the first stage centers per-byte
- Vref (to ensure per-bit Vref offsets are as small as possible) and
- the second stage centers per-bit Vref.
- Because this procedure uses the "sense amp offset cancel" mode, it
- does not rely on DRAM being trained. It is assumed that the memory
- controller simply makes sense amp output levels observable via the
- `DDR_DATA_TRAIN_FEEDBACK` register and that the memory bus is idle
- during this training step (so the lane voltage is Vdd / 2).
- Note: This procedure will need to be adapted for Broadwell because
- it has per-rank per-bit RxVref registers, whereas Haswell only has
- a single per-bit RxVref register for all ranks.
- Change-Id: Ia07db68763f90e9701c8a376e01279ada8dbbe07
- Signed-off-by: Angel Pons <th3fanbus@gmail.com>
- ---
- .../intel/haswell/native_raminit/Makefile.mk | 1 +
- .../haswell/native_raminit/raminit_main.c | 1 +
- .../haswell/native_raminit/raminit_native.h | 12 +
- .../native_raminit/train_sense_amp_offset.c | 341 ++++++++++++++++++
- .../intel/haswell/registers/mchbar.h | 2 +
- 5 files changed, 357 insertions(+)
- create mode 100644 src/northbridge/intel/haswell/native_raminit/train_sense_amp_offset.c
- diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.mk b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
- index 8fdd17c542..4bd668a2d6 100644
- --- a/src/northbridge/intel/haswell/native_raminit/Makefile.mk
- +++ b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
- @@ -21,3 +21,4 @@ romstage-y += timings_refresh.c
- romstage-y += train_jedec_write_leveling.c
- romstage-y += train_read_mpr.c
- romstage-y += train_receive_enable.c
- +romstage-y += train_sense_amp_offset.c
- diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
- index 056dde1adc..ce637e2d03 100644
- --- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
- +++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
- @@ -60,6 +60,7 @@ static const struct task_entry cold_boot[] = {
- { configure_memory_map, true, "MEMMAP", },
- { do_jedec_init, true, "JEDECINIT", },
- { pre_training, true, "PRETRAIN", },
- + { train_sense_amp_offset, true, "SOT", },
- { train_receive_enable, true, "RCVET", },
- { train_read_mpr, true, "RDMPRT", },
- { train_jedec_write_leveling, true, "JWRL", },
- diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
- index 0750904aec..95ccd0a8b3 100644
- --- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
- +++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
- @@ -22,6 +22,8 @@
- #define NUM_LANES 9
- #define NUM_LANES_NO_ECC 8
-
- +#define NUM_BITS 8
- +
- #define COMP_INT 10
-
- /* Always use 12 legs for emphasis (not trained) */
- @@ -218,6 +220,7 @@ enum raminit_status {
- RAMINIT_STATUS_MPLL_INIT_FAILURE,
- RAMINIT_STATUS_POLL_TIMEOUT,
- RAMINIT_STATUS_REUT_ERROR,
- + RAMINIT_STATUS_SAMP_OFFSET_FAILURE,
- RAMINIT_STATUS_RCVEN_FAILURE,
- RAMINIT_STATUS_RMPR_FAILURE,
- RAMINIT_STATUS_JWRL_FAILURE,
- @@ -243,6 +246,12 @@ struct raminit_dimm_info {
- bool valid;
- };
-
- +struct vref_margin {
- + uint8_t low;
- + uint8_t center;
- + uint8_t high;
- +};
- +
- struct sysinfo {
- enum raminit_boot_mode bootmode;
- enum generic_stepping stepping;
- @@ -330,6 +339,8 @@ struct sysinfo {
- uint8_t rxdqsn[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
- int8_t rxvref[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
-
- + struct vref_margin rxdqvrefpb[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES][NUM_BITS];
- +
- uint8_t clk_pi_code[NUM_CHANNELS][NUM_SLOTRANKS];
- uint8_t ctl_pi_code[NUM_CHANNELS][NUM_SLOTRANKS];
- uint8_t cke_pi_code[NUM_CHANNELS][NUM_SLOTRANKS];
- @@ -452,6 +463,7 @@ enum raminit_status convert_timings(struct sysinfo *ctrl);
- enum raminit_status configure_mc(struct sysinfo *ctrl);
- enum raminit_status configure_memory_map(struct sysinfo *ctrl);
- enum raminit_status do_jedec_init(struct sysinfo *ctrl);
- +enum raminit_status train_sense_amp_offset(struct sysinfo *ctrl);
- enum raminit_status train_receive_enable(struct sysinfo *ctrl);
- enum raminit_status train_read_mpr(struct sysinfo *ctrl);
- enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl);
- diff --git a/src/northbridge/intel/haswell/native_raminit/train_sense_amp_offset.c b/src/northbridge/intel/haswell/native_raminit/train_sense_amp_offset.c
- new file mode 100644
- index 0000000000..d4f199fefb
- --- /dev/null
- +++ b/src/northbridge/intel/haswell/native_raminit/train_sense_amp_offset.c
- @@ -0,0 +1,341 @@
- +/* SPDX-License-Identifier: GPL-2.0-or-later */
- +
- +#include <assert.h>
- +#include <commonlib/bsd/clamp.h>
- +#include <console/console.h>
- +#include <delay.h>
- +#include <lib.h>
- +#include <types.h>
- +
- +#include "raminit_native.h"
- +
- +#define VREF_OFFSET_PLOT RAM_DEBUG
- +#define SAMP_OFFSET_PLOT RAM_DEBUG
- +
- +struct vref_train_data {
- + int8_t best_sum;
- + int8_t best_vref;
- + int8_t sum_bits;
- + uint8_t high_mask;
- + uint8_t low_mask;
- +};
- +
- +static enum raminit_status train_vref_offset(struct sysinfo *ctrl)
- +{
- + const int8_t vref_start = -15;
- + const int8_t vref_stop = 15;
- + const struct vref_train_data initial_vref_values = {
- + .best_sum = -NUM_LANES,
- + .best_vref = 0,
- + .high_mask = 0,
- + .low_mask = 0xff,
- + };
- + struct vref_train_data vref_data[NUM_CHANNELS][NUM_LANES];
- +
- + printk(VREF_OFFSET_PLOT, "Plot of sum_bits across Vref settings\nChannel");
- + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
- + if (!does_ch_exist(ctrl, channel))
- + continue;
- +
- + printk(VREF_OFFSET_PLOT, "\t%u\t\t", channel);
- + }
- +
- + printk(VREF_OFFSET_PLOT, "\nByte");
- + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
- + if (!does_ch_exist(ctrl, channel))
- + continue;
- +
- + printk(VREF_OFFSET_PLOT, "\t");
- + for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
- + printk(VREF_OFFSET_PLOT, "%u ", byte);
- + vref_data[channel][byte] = initial_vref_values;
- + union ddr_data_control_2_reg data_control_2 = {
- + .raw = ctrl->dq_control_2[channel][byte],
- + };
- + data_control_2.force_bias_on = 1;
- + data_control_2.force_rx_on = 1;
- + mchbar_write32(DQ_CONTROL_2(channel, byte), data_control_2.raw);
- + }
- + }
- +
- + /* Sweep through Vref settings and find point SampOffset of +/- 7 passes */
- + printk(VREF_OFFSET_PLOT, "\n1/2 Vref");
- + for (int8_t vref = vref_start; vref <= vref_stop; vref++) {
- + printk(VREF_OFFSET_PLOT, "\n% 3d", vref);
- +
- + /*
- + * To perform this test, enable offset cancel mode and enable ODT.
- + * Check results and update variables. Ideal result is all zeroes.
- + * Clear offset cancel mode at end of test to write RX_OFFSET_VDQ.
- + */
- + change_1d_margin_multicast(ctrl, RdV, vref, 0, false, REG_FILE_USE_RANK);
- +
- + /* Program settings for Vref and SampOffset = 7 (8 + 7) */
- + mchbar_write32(DDR_DATA_RX_OFFSET_VDQ, 0xffffffff);
- + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
- + if (!does_ch_exist(ctrl, channel))
- + continue;
- +
- + /* Propagate delay values (without a read command) */
- + union ddr_data_control_0_reg data_control_0 = {
- + .raw = ctrl->dq_control_0[channel],
- + };
- + data_control_0.read_rf_rd = 1;
- + data_control_0.read_rf_wr = 0;
- + data_control_0.read_rf_rank = 0;
- + data_control_0.force_odt_on = 1;
- + data_control_0.samp_train_mode = 1;
- + mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
- + udelay(1);
- + data_control_0.samp_train_mode = 0;
- + mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
- + for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
- + const uint8_t feedback = get_data_train_feedback(channel, byte);
- + struct vref_train_data *curr_data = &vref_data[channel][byte];
- + curr_data->low_mask &= feedback;
- + curr_data->sum_bits = -popcnt(feedback);
- + }
- + }
- +
- + /* Program settings for Vref and SampOffset = -7 (8 - 7) */
- + mchbar_write32(DDR_DATA_RX_OFFSET_VDQ, 0x11111111);
- + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
- + if (!does_ch_exist(ctrl, channel))
- + continue;
- +
- + /* Propagate delay values (without a read command) */
- + union ddr_data_control_0_reg data_control_0 = {
- + .raw = ctrl->dq_control_0[channel],
- + };
- + data_control_0.read_rf_rd = 1;
- + data_control_0.read_rf_wr = 0;
- + data_control_0.read_rf_rank = 0;
- + data_control_0.force_odt_on = 1;
- + data_control_0.samp_train_mode = 1;
- + mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
- + udelay(1);
- + data_control_0.samp_train_mode = 0;
- + mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
- + printk(VREF_OFFSET_PLOT, "\t");
- + for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
- + const uint8_t feedback = get_data_train_feedback(channel, byte);
- + struct vref_train_data *curr_data = &vref_data[channel][byte];
- + curr_data->high_mask |= feedback;
- + curr_data->sum_bits += popcnt(feedback);
- + printk(VREF_OFFSET_PLOT, "%d ", curr_data->sum_bits);
- + if (curr_data->sum_bits > curr_data->best_sum) {
- + curr_data->best_sum = curr_data->sum_bits;
- + curr_data->best_vref = vref;
- + ctrl->rxvref[channel][0][byte] = vref;
- + } else if (curr_data->sum_bits == curr_data->best_sum) {
- + curr_data->best_vref = vref;
- + }
- + }
- + }
- + }
- + printk(BIOS_DEBUG, "\n\nHi-Lo (XOR):");
- + enum raminit_status status = RAMINIT_STATUS_SUCCESS;
- + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
- + if (!does_ch_exist(ctrl, channel))
- + continue;
- +
- + printk(BIOS_DEBUG, "\n C%u:", channel);
- + for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
- + struct vref_train_data *const curr_data = &vref_data[channel][byte];
- + const uint8_t bit_xor = curr_data->high_mask ^ curr_data->low_mask;
- + printk(BIOS_DEBUG, "\t0x%02x", bit_xor);
- + if (bit_xor == 0xff)
- + continue;
- +
- + /* Report an error if any bit did not change */
- + status = RAMINIT_STATUS_SAMP_OFFSET_FAILURE;
- + }
- + }
- + if (status)
- + printk(BIOS_ERR, "\nUnexpected bit error in Vref offset training\n");
- +
- + printk(BIOS_DEBUG, "\n\nRdVref:");
- + change_1d_margin_multicast(ctrl, RdV, 0, 0, false, REG_FILE_USE_RANK);
- + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
- + if (!does_ch_exist(ctrl, channel))
- + continue;
- +
- + printk(BIOS_DEBUG, "\n C%u:", channel);
- + for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
- + struct vref_train_data *const curr_data = &vref_data[channel][byte];
- + const int8_t vref_width =
- + curr_data->best_vref - ctrl->rxvref[channel][0][byte];
- +
- + /*
- + * Step size for Rx Vref in DATA_OFFSET_TRAIN is about 3.9 mV
- + * whereas Rx Vref step size in RX_TRAIN_RANK is about 7.8 mV
- + */
- + int8_t vref = ctrl->rxvref[channel][0][byte] + vref_width / 2;
- + if (vref < 0)
- + vref--;
- + else
- + vref++;
- +
- + for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
- + if (!rank_in_ch(ctrl, rank, channel))
- + continue;
- +
- + ctrl->rxvref[channel][rank][byte] = vref / 2;
- + update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
- + }
- + printk(BIOS_DEBUG, "\t% 4d", ctrl->rxvref[channel][0][byte]);
- + }
- + }
- + printk(BIOS_DEBUG, "\n\n");
- + return status;
- +}
- +
- +/**
- + * LPDDR has an additional bit for DQS per each byte.
- + *
- + * TODO: The DQS value must be written into Data Control 2.
- + */
- +#define NUM_OFFSET_TRAIN_BITS (NUM_BITS + 1)
- +
- +#define PLOT_CH_SPACE " "
- +
- +struct samp_train_data {
- + uint8_t first_zero;
- + uint8_t last_one;
- +};
- +
- +static void train_samp_offset(struct sysinfo *ctrl)
- +{
- + const uint8_t max_train_bits = ctrl->lpddr ? NUM_OFFSET_TRAIN_BITS : NUM_BITS;
- +
- + struct samp_train_data samp_data[NUM_CHANNELS][NUM_LANES][NUM_OFFSET_TRAIN_BITS] = {0};
- +
- + printk(BIOS_DEBUG, "Channel ");
- + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
- + if (!does_ch_exist(ctrl, channel))
- + continue;
- +
- + printk(BIOS_DEBUG, "%u ", channel); /* Same length as PLOT_CH_SPACE */
- + for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
- + printk(BIOS_DEBUG, " %s ", ctrl->lpddr ? " " : "");
- + }
- + printk(BIOS_DEBUG, "\nByte ");
- + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
- + if (!does_ch_exist(ctrl, channel))
- + continue;
- +
- + for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
- + printk(BIOS_DEBUG, "%u %s ", byte, ctrl->lpddr ? " " : "");
- +
- + printk(BIOS_DEBUG, PLOT_CH_SPACE);
- + }
- + printk(SAMP_OFFSET_PLOT, "\nBits ");
- + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
- + for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
- + printk(SAMP_OFFSET_PLOT, "01234567%s ", ctrl->lpddr ? "S" : "");
- +
- + printk(SAMP_OFFSET_PLOT, PLOT_CH_SPACE);
- + }
- + printk(SAMP_OFFSET_PLOT, "\n SAmp\n");
- + for (uint8_t samp_offset = 1; samp_offset <= 15; samp_offset++) {
- + printk(SAMP_OFFSET_PLOT, "% 5d\t", samp_offset);
- +
- + uint32_t rx_offset_vdq = 0;
- + for (uint8_t bit = 0; bit < NUM_BITS; bit++) {
- + rx_offset_vdq += samp_offset << (4 * bit);
- + }
- + mchbar_write32(DDR_DATA_RX_OFFSET_VDQ, rx_offset_vdq);
- + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
- + if (!does_ch_exist(ctrl, channel))
- + continue;
- +
- + /* Propagate delay values (without a read command) */
- + union ddr_data_control_0_reg data_control_0 = {
- + .raw = ctrl->dq_control_0[channel],
- + };
- + data_control_0.read_rf_rd = 1;
- + data_control_0.read_rf_wr = 0;
- + data_control_0.read_rf_rank = 0;
- + data_control_0.force_odt_on = 1;
- + data_control_0.samp_train_mode = 1;
- + mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
- + udelay(1);
- + for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
- + const uint32_t feedback =
- + get_data_train_feedback(channel, byte);
- +
- + for (uint8_t bit = 0; bit < max_train_bits; bit++) {
- + struct samp_train_data *const curr_data =
- + &samp_data[channel][byte][bit];
- + const bool result = feedback & BIT(bit);
- + if (result) {
- + curr_data->last_one = samp_offset;
- + } else if (curr_data->first_zero == 0) {
- + curr_data->first_zero = samp_offset;
- + }
- + printk(SAMP_OFFSET_PLOT, result ? "." : "#");
- + }
- + printk(SAMP_OFFSET_PLOT, " ");
- + }
- + printk(SAMP_OFFSET_PLOT, PLOT_CH_SPACE);
- + data_control_0.samp_train_mode = 0;
- + mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
- + }
- + printk(SAMP_OFFSET_PLOT, "\n");
- + }
- + printk(BIOS_DEBUG, "\nBitSAmp ");
- + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
- + if (!does_ch_exist(ctrl, channel))
- + continue;
- +
- + for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
- + uint32_t rx_offset_vdq = 0;
- + for (uint8_t bit = 0; bit < max_train_bits; bit++) {
- + struct samp_train_data *const curr_data =
- + &samp_data[channel][byte][bit];
- +
- + uint8_t vref = curr_data->first_zero + curr_data->last_one;
- + vref = clamp_u8(0, vref / 2, 15);
- + /*
- + * Check for saturation conditions to make sure
- + * we are as close as possible to Vdd/2 (750 mV).
- + */
- + if (curr_data->first_zero == 0)
- + vref = 15;
- + if (curr_data->last_one == 0)
- + vref = 0;
- +
- + ctrl->rxdqvrefpb[channel][0][byte][bit].center = vref;
- + rx_offset_vdq += vref & 0xf << (4 * bit);
- + printk(BIOS_DEBUG, "%x", vref);
- + }
- + mchbar_write32(RX_OFFSET_VDQ(channel, byte), rx_offset_vdq);
- + printk(BIOS_DEBUG, " ");
- + download_regfile(ctrl, channel, 1, 0, REG_FILE_USE_RANK, 0, 1, 0);
- + }
- + printk(BIOS_DEBUG, PLOT_CH_SPACE);
- + }
- + printk(BIOS_DEBUG, "\n");
- +}
- +
- +enum raminit_status train_sense_amp_offset(struct sysinfo *ctrl)
- +{
- + printk(BIOS_DEBUG, "Stage 1: Vref offset training\n");
- + const enum raminit_status status = train_vref_offset(ctrl);
- +
- + printk(BIOS_DEBUG, "Stage 2: Samp offset training\n");
- + train_samp_offset(ctrl);
- +
- + /* Clean up after test */
- + for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
- + if (!does_ch_exist(ctrl, channel))
- + continue;
- +
- + mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), ctrl->dq_control_0[channel]);
- + for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
- + mchbar_write32(DQ_CONTROL_2(channel, byte),
- + ctrl->dq_control_2[channel][byte]);
- + }
- + io_reset();
- + return status;
- +}
- diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
- index 49a215aa71..1a168a3fc8 100644
- --- a/src/northbridge/intel/haswell/registers/mchbar.h
- +++ b/src/northbridge/intel/haswell/registers/mchbar.h
- @@ -18,6 +18,8 @@
- #define RX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0000, ch, rank, byte)
- #define TX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0020, ch, rank, byte)
-
- +#define RX_OFFSET_VDQ(ch, byte) _DDRIO_C_R_B(0x004c, ch, 0, byte)
- +
- #define DDR_DATA_TRAIN_FEEDBACK(ch, byte) _DDRIO_C_R_B(0x0054, ch, 0, byte)
-
- #define DQ_CONTROL_1(ch, byte) _DDRIO_C_R_B(0x0060, ch, 0, byte)
- --
- 2.39.2
|