lbmk/config/coreboot/haswell/patches/0011-haswell-NRI-Add-RcvEn-...

709 lines
24 KiB
Diff

From 4254a9ff03658d7a6f1a4e32cfe4c65dbfc072f8 Mon Sep 17 00:00:00 2001
From: Angel Pons <th3fanbus@gmail.com>
Date: Sun, 8 May 2022 00:05:41 +0200
Subject: [PATCH 11/20] haswell NRI: Add RcvEn training
Implement the RcvEn (Receive Enable) calibration procedure.
Change-Id: Ifbfa520f3e0486c56d0988ce67af2ddb9cf29888
Signed-off-by: Angel Pons <th3fanbus@gmail.com>
---
.../intel/haswell/native_raminit/Makefile.mk | 1 +
.../haswell/native_raminit/raminit_main.c | 1 +
.../haswell/native_raminit/raminit_native.h | 14 +
.../haswell/native_raminit/reg_structs.h | 13 +
.../native_raminit/train_receive_enable.c | 561 ++++++++++++++++++
.../intel/haswell/registers/mchbar.h | 3 +
6 files changed, 593 insertions(+)
create mode 100644 src/northbridge/intel/haswell/native_raminit/train_receive_enable.c
diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.mk b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
index ebe9e9b762..e2fbfb4211 100644
--- a/src/northbridge/intel/haswell/native_raminit/Makefile.mk
+++ b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
@@ -16,3 +16,4 @@ romstage-y += setup_wdb.c
romstage-y += spd_bitmunching.c
romstage-y += testing_io.c
romstage-y += timings_refresh.c
+romstage-y += train_receive_enable.c
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
index 5e4674957d..7d444659c3 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
@@ -60,6 +60,7 @@ static const struct task_entry cold_boot[] = {
{ configure_memory_map, true, "MEMMAP", },
{ do_jedec_init, true, "JEDECINIT", },
{ pre_training, true, "PRETRAIN", },
+ { train_receive_enable, true, "RCVET", },
};
/* Return a generic stepping value to make stepping checks simpler */
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
index 906b3143b9..b4e8c7de5a 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
@@ -42,6 +42,9 @@
#define NUM_WDB_CL_MUX_SEEDS 3
#define NUM_CADB_MUX_SEEDS 3
+/* Specified in PI ticks. 64 PI ticks == 1 qclk */
+#define tDQSCK_DRIFT 64
+
/* ZQ calibration types */
enum {
ZQ_INIT, /* DDR3: ZQCL with tZQinit, LPDDR3: ZQ Init with tZQinit */
@@ -188,6 +191,7 @@ enum raminit_status {
RAMINIT_STATUS_MPLL_INIT_FAILURE,
RAMINIT_STATUS_POLL_TIMEOUT,
RAMINIT_STATUS_REUT_ERROR,
+ RAMINIT_STATUS_RCVEN_FAILURE,
RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
};
@@ -270,6 +274,10 @@ struct sysinfo {
union ddr_data_vref_adjust_reg dimm_vref;
+ uint8_t io_latency[NUM_CHANNELS][NUM_SLOTRANKS];
+ uint8_t rt_latency[NUM_CHANNELS][NUM_SLOTRANKS];
+ uint32_t rt_io_comp[NUM_CHANNELS];
+
uint32_t data_offset_train[NUM_CHANNELS][NUM_LANES];
uint32_t data_offset_comp[NUM_CHANNELS][NUM_LANES];
@@ -344,6 +352,11 @@ static inline void clear_data_offset_train_all(struct sysinfo *ctrl)
memset(ctrl->data_offset_train, 0, sizeof(ctrl->data_offset_train));
}
+static inline uint32_t get_data_train_feedback(const uint8_t channel, const uint8_t byte)
+{
+ return mchbar_read32(DDR_DATA_TRAIN_FEEDBACK(channel, byte));
+}
+
/* Number of ticks to wait in units of 69.841279 ns (citation needed) */
static inline void tick_delay(const uint32_t delay)
{
@@ -399,6 +412,7 @@ enum raminit_status convert_timings(struct sysinfo *ctrl);
enum raminit_status configure_mc(struct sysinfo *ctrl);
enum raminit_status configure_memory_map(struct sysinfo *ctrl);
enum raminit_status do_jedec_init(struct sysinfo *ctrl);
+enum raminit_status train_receive_enable(struct sysinfo *ctrl);
void configure_timings(struct sysinfo *ctrl);
void configure_refresh(struct sysinfo *ctrl);
diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
index b943259b91..b099f4bb82 100644
--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
@@ -297,6 +297,19 @@ union ddr_scram_misc_control_reg {
uint32_t raw;
};
+union sc_io_latency_reg {
+ struct __packed {
+ uint32_t iolat_rank0 : 4; // Bits 3:0
+ uint32_t iolat_rank1 : 4; // Bits 7:4
+ uint32_t iolat_rank2 : 4; // Bits 11:8
+ uint32_t iolat_rank3 : 4; // Bits 15:12
+ uint32_t rt_iocomp : 6; // Bits 21:16
+ uint32_t : 9; // Bits 30:22
+ uint32_t dis_rt_clk_gate : 1; // Bits 31:31
+ };
+ uint32_t raw;
+};
+
union mcscheds_cbit_reg {
struct __packed {
uint32_t dis_opp_cas : 1; // Bits 0:0
diff --git a/src/northbridge/intel/haswell/native_raminit/train_receive_enable.c b/src/northbridge/intel/haswell/native_raminit/train_receive_enable.c
new file mode 100644
index 0000000000..576c6bc21e
--- /dev/null
+++ b/src/northbridge/intel/haswell/native_raminit/train_receive_enable.c
@@ -0,0 +1,561 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <console/console.h>
+#include <northbridge/intel/haswell/haswell.h>
+#include <types.h>
+
+#include "raminit_native.h"
+#include "ranges.h"
+
+#define RCVEN_PLOT RAM_DEBUG
+
+static enum raminit_status change_rcven_timing(struct sysinfo *ctrl, const uint8_t channel)
+{
+ int16_t max_rcven = -4096;
+ int16_t min_rcven = 4096;
+ int16_t max_rcven_rank[NUM_SLOTRANKS];
+ int16_t min_rcven_rank[NUM_SLOTRANKS];
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
+ max_rcven_rank[rank] = max_rcven;
+ min_rcven_rank[rank] = min_rcven;
+ }
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ int16_t new_rcven = ctrl->rcven[channel][rank][byte];
+ new_rcven -= ctrl->io_latency[channel][rank] * 64;
+ if (max_rcven_rank[rank] < new_rcven)
+ max_rcven_rank[rank] = new_rcven;
+
+ if (min_rcven_rank[rank] > new_rcven)
+ min_rcven_rank[rank] = new_rcven;
+ }
+ if (max_rcven < max_rcven_rank[rank])
+ max_rcven = max_rcven_rank[rank];
+
+ if (min_rcven > min_rcven_rank[rank])
+ min_rcven = min_rcven_rank[rank];
+ }
+
+ /*
+ * Determine how far we are from the ideal center point for RcvEn timing.
+ * (PiIdeal - AveRcvEn) / 64 is the ideal number of cycles we should have
+ * for IO latency. command training will reduce this by 64, so plan for
+ * that now in the ideal value. Round to closest integer.
+ */
+ const int16_t rre_pi_ideal = 256 + 64;
+ const int16_t pi_reserve = 64;
+ const int16_t rcven_center = (max_rcven + min_rcven) / 2;
+ const int8_t iolat_target = DIV_ROUND_CLOSEST(rre_pi_ideal - rcven_center, 64);
+
+ int8_t io_g_offset = 0;
+ int8_t io_lat[NUM_SLOTRANKS] = { 0 };
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ io_lat[rank] = iolat_target;
+
+ /* Check for RcvEn underflow/overflow */
+ const int16_t rcven_lower = 64 * io_lat[rank] + min_rcven_rank[rank];
+ if (rcven_lower < pi_reserve)
+ io_lat[rank] += DIV_ROUND_UP(pi_reserve - rcven_lower, 64);
+
+ const int16_t rcven_upper = 64 * io_lat[rank] + max_rcven_rank[rank];
+ if (rcven_upper > 511 - pi_reserve)
+ io_lat[rank] -= DIV_ROUND_UP(rcven_upper - (511 - pi_reserve), 64);
+
+ /* Check for IO latency over/underflow */
+ if (io_lat[rank] - io_g_offset > 14)
+ io_g_offset = io_lat[rank] - 14;
+
+ if (io_lat[rank] - io_g_offset < 1)
+ io_g_offset = io_lat[rank] - 1;
+
+ const int8_t cycle_offset = io_lat[rank] - ctrl->io_latency[channel][rank];
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ ctrl->rcven[channel][rank][byte] += 64 * cycle_offset;
+ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
+ }
+ }
+
+ /* Calculate new IO comp latency */
+ union sc_io_latency_reg sc_io_lat = {
+ .raw = mchbar_read32(SC_IO_LATENCY_ch(channel)),
+ };
+
+ /* Check if we are underflowing or overflowing this field */
+ if (io_g_offset < 0 && sc_io_lat.rt_iocomp < -io_g_offset) {
+ printk(BIOS_ERR, "%s: IO COMP underflow\n", __func__);
+ printk(BIOS_ERR, "io_g_offset: %d\n", io_g_offset);
+ printk(BIOS_ERR, "rt_iocomp: %u\n", sc_io_lat.rt_iocomp);
+ return RAMINIT_STATUS_RCVEN_FAILURE;
+ }
+ if (io_g_offset > 0 && io_g_offset > 0x3f - sc_io_lat.rt_iocomp) {
+ printk(BIOS_ERR, "%s: IO COMP overflow\n", __func__);
+ printk(BIOS_ERR, "io_g_offset: %d\n", io_g_offset);
+ printk(BIOS_ERR, "rt_iocomp: %u\n", sc_io_lat.rt_iocomp);
+ return RAMINIT_STATUS_RCVEN_FAILURE;
+ }
+ sc_io_lat.rt_iocomp += io_g_offset;
+ ctrl->rt_io_comp[channel] = sc_io_lat.rt_iocomp;
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
+ if (ctrl->rankmap[channel] & BIT(rank))
+ ctrl->io_latency[channel][rank] = io_lat[rank] - io_g_offset;
+
+ const uint8_t shift = rank * 4;
+ sc_io_lat.raw &= ~(0xf << shift);
+ sc_io_lat.raw |= ctrl->io_latency[channel][rank] << shift;
+ }
+ mchbar_write32(SC_IO_LATENCY_ch(channel), sc_io_lat.raw);
+ return RAMINIT_STATUS_SUCCESS;
+}
+
+#define RL_START (256 + 24)
+#define RL_STOP (384 + 24)
+#define RL_STEP 8
+
+#define RE_NUM_SAMPLES 6
+
+static enum raminit_status verify_high_region(const int32_t center, const int32_t lwidth)
+{
+ if (center > RL_STOP) {
+ /* Check if center of high was found where it should be */
+ printk(BIOS_ERR, "RcvEn: Center of high (%d) higher than expected\n", center);
+ return RAMINIT_STATUS_RCVEN_FAILURE;
+ }
+ if (lwidth <= 32) {
+ /* Check if width is large enough */
+ printk(BIOS_ERR, "RcvEn: Width of high region (%d) too small\n", lwidth);
+ return RAMINIT_STATUS_RCVEN_FAILURE;
+ }
+ if (lwidth >= 96) {
+ /* Since we're calibrating a phase, a too large region is a problem */
+ printk(BIOS_ERR, "RcvEn: Width of high region (%d) too large\n", lwidth);
+ return RAMINIT_STATUS_RCVEN_FAILURE;
+ }
+ return RAMINIT_STATUS_SUCCESS;
+}
+
+static void program_io_latency(struct sysinfo *ctrl, const uint8_t channel, const uint8_t rank)
+{
+ const uint8_t shift = rank * 4;
+ const uint8_t iolat = ctrl->io_latency[channel][rank];
+ mchbar_clrsetbits32(SC_IO_LATENCY_ch(channel), 0xf << shift, iolat << shift);
+}
+
+static void program_rl_delays(struct sysinfo *ctrl, const uint8_t rank, const uint16_t rl_delay)
+{
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
+ update_rxt(ctrl, channel, rank, byte, RXT_RCVEN, rl_delay);
+ }
+}
+
+static bool sample_dqs(const uint8_t channel, const uint8_t byte)
+{
+ return (get_data_train_feedback(channel, byte) & 0x1ff) >= BIT(RE_NUM_SAMPLES - 1);
+}
+
+enum raminit_status train_receive_enable(struct sysinfo *ctrl)
+{
+ const struct reut_box reut_addr = {
+ .col = {
+ .start = 0,
+ .stop = 1023,
+ .inc_rate = 0,
+ .inc_val = 1,
+ },
+ };
+ const struct wdb_pat wdb_pattern = {
+ .start_ptr = 0,
+ .stop_ptr = 9,
+ .inc_rate = 32,
+ .dq_pattern = BASIC_VA,
+ };
+
+ const uint16_t bytemask = BIT(ctrl->lanes) - 1;
+ const uint8_t fine_step = 1;
+
+ const uint8_t rt_delta = is_hsw_ult() ? 4 : 2;
+ const uint8_t rt_io_comp = 21 + rt_delta;
+ const uint8_t rt_latency = 16 + rt_delta;
+ setup_io_test(
+ ctrl,
+ ctrl->chanmap,
+ PAT_RD,
+ 2,
+ RE_NUM_SAMPLES + 1,
+ &reut_addr,
+ 0,
+ &wdb_pattern,
+ 0,
+ 8);
+
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ union ddr_data_control_2_reg data_control_2 = {
+ .raw = ctrl->dq_control_2[channel][byte],
+ };
+ data_control_2.force_rx_on = 1;
+ mchbar_write32(DQ_CONTROL_2(channel, byte), data_control_2.raw);
+ }
+ union ddr_data_control_0_reg data_control_0 = {
+ .raw = ctrl->dq_control_0[channel],
+ };
+ if (ctrl->lpddr) {
+ /**
+ * W/A for b4618574 - @todo: remove for HSW ULT C0
+ * Can't have force_odt_on together with leaker, disable LPDDR
+ * mode during this training step. lpddr_mode is restored
+ * at the end of this function from the host structure.
+ */
+ data_control_0.lpddr_mode = 0;
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
+ }
+ data_control_0.force_odt_on = 1;
+ data_control_0.rl_training_mode = 1;
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
+ mchbar_write32(SC_IO_LATENCY_ch(channel), (union sc_io_latency_reg) {
+ .rt_iocomp = rt_io_comp,
+ }.raw);
+ }
+ enum raminit_status status = RAMINIT_STATUS_SUCCESS;
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
+ if (!does_rank_exist(ctrl, rank))
+ continue;
+
+ /*
+ * Set initial roundtrip latency values. Assume -4 QCLK for worst board
+ * layout. This is calculated as HW_ROUNDT_LAT_DEFAULT_VALUE plus:
+ *
+ * DDR3: Default + (2 * tAA) + 4 QCLK + PI_CLK + N-mode value * 2
+ * LPDDR3: Default + (2 * tAA) + 4 QCLK + PI_CLK + tDQSCK_max
+ *
+ * N-mode is 3 during training mode. Both channels use the same timings.
+ */
+ /** TODO: differs for LPDDR **/
+ const uint32_t tmp = MAX(ctrl->multiplier, 4) + 5 + 2 * ctrl->tAA;
+ const uint32_t initial_rt_latency = MIN(rt_latency + tmp, 0x3f);
+
+ uint8_t chanmask = 0;
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ chanmask |= select_reut_ranks(ctrl, channel, BIT(rank));
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ ctrl->io_latency[channel][rank] = 0;
+ mchbar_write8(SC_ROUNDT_LAT_ch(channel) + rank, initial_rt_latency);
+ ctrl->rt_latency[channel][rank] = initial_rt_latency;
+ }
+
+ printk(BIOS_DEBUG, "Rank %u\n", rank);
+ printk(BIOS_DEBUG, "Steps 1 and 2: Find middle of high region\n");
+ printk(RCVEN_PLOT, "Byte");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(RCVEN_PLOT, "\t");
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
+ printk(RCVEN_PLOT, "%u ", byte);
+ }
+ printk(RCVEN_PLOT, "\nRcvEn\n");
+ struct phase_train_data region_data[NUM_CHANNELS][NUM_LANES] = { 0 };
+ for (uint16_t rl_delay = RL_START; rl_delay < RL_STOP; rl_delay += RL_STEP) {
+ printk(RCVEN_PLOT, " % 3d", rl_delay);
+ program_rl_delays(ctrl, rank, rl_delay);
+ run_io_test(ctrl, chanmask, BASIC_VA, true);
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(RCVEN_PLOT, "\t");
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ const bool high = sample_dqs(channel, byte);
+ printk(RCVEN_PLOT, high ? ". " : "# ");
+ phase_record_pass(
+ &region_data[channel][byte],
+ high,
+ rl_delay,
+ RL_START,
+ RL_STEP);
+ }
+ }
+ printk(RCVEN_PLOT, "\n");
+ }
+ printk(RCVEN_PLOT, "\n");
+ printk(BIOS_DEBUG, "Update RcvEn timing to be in the center of high region\n");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(BIOS_DEBUG, "C%u.R%u: \tLeft\tRight\tWidth\tCenter\n",
+ channel, rank);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ struct phase_train_data *const curr_data =
+ &region_data[channel][byte];
+ phase_append_current_to_initial(curr_data, RL_START, RL_STEP);
+ const int32_t lwidth = range_width(curr_data->largest);
+ const int32_t center = range_center(curr_data->largest);
+ printk(BIOS_DEBUG, " B%u: \t%d\t%d\t%d\t%d\n",
+ byte,
+ curr_data->largest.start,
+ curr_data->largest.end,
+ lwidth,
+ center);
+
+ status = verify_high_region(center, lwidth);
+ if (status) {
+ printk(BIOS_ERR,
+ "RcvEn problems on channel %u, byte %u\n",
+ channel, byte);
+ goto clean_up;
+ }
+ ctrl->rcven[channel][rank][byte] = center;
+ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
+ }
+ printk(BIOS_DEBUG, "\n");
+ }
+
+ printk(BIOS_DEBUG, "Step 3: Quarter preamble - Walk backwards\n");
+ printk(RCVEN_PLOT, "Byte");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(RCVEN_PLOT, "\t");
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
+ printk(RCVEN_PLOT, "%u ", byte);
+ }
+ printk(RCVEN_PLOT, "\nIOLAT\n");
+ bool done = false;
+ while (!done) {
+ run_io_test(ctrl, chanmask, BASIC_VA, true);
+ done = true;
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(RCVEN_PLOT, " %2u\t", ctrl->io_latency[channel][rank]);
+ uint16_t highs = 0;
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ const bool high = sample_dqs(channel, byte);
+ printk(RCVEN_PLOT, high ? "H " : "L ");
+ if (high)
+ highs |= BIT(byte);
+ }
+ if (!highs)
+ continue;
+
+ done = false;
+
+ /* If all bytes sample high, adjust timing globally */
+ if (highs == bytemask && ctrl->io_latency[channel][rank] < 14) {
+ ctrl->io_latency[channel][rank] += 2;
+ ctrl->io_latency[channel][rank] %= 16;
+ program_io_latency(ctrl, channel, rank);
+ continue;
+ }
+
+ /* Otherwise, adjust individual bytes */
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ if (!(highs & BIT(byte)))
+ continue;
+
+ if (ctrl->rcven[channel][rank][byte] < 128) {
+ printk(BIOS_ERR,
+ "RcvEn underflow: walking backwards\n");
+ printk(BIOS_ERR,
+ "For channel %u, rank %u, byte %u\n",
+ channel, rank, byte);
+ status = RAMINIT_STATUS_RCVEN_FAILURE;
+ goto clean_up;
+ }
+ ctrl->rcven[channel][rank][byte] -= 128;
+ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
+ }
+ }
+ printk(RCVEN_PLOT, "\n");
+ }
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(BIOS_DEBUG, "\nC%u: Preamble\n", channel);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ printk(BIOS_DEBUG,
+ " B%u: %u\n", byte, ctrl->rcven[channel][rank][byte]);
+ }
+ }
+ printk(BIOS_DEBUG, "\n");
+
+ printk(BIOS_DEBUG, "Step 4: Add 1 qclk\n");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ ctrl->rcven[channel][rank][byte] += 64;
+ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
+ }
+ }
+ printk(BIOS_DEBUG, "\n");
+
+ printk(BIOS_DEBUG, "Step 5: Walk forward to find rising edge\n");
+ printk(RCVEN_PLOT, "Byte");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(RCVEN_PLOT, "\t");
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
+ printk(RCVEN_PLOT, "%u ", byte);
+ }
+ printk(RCVEN_PLOT, "\n inc\n");
+ uint16_t ch_result[NUM_CHANNELS] = { 0 };
+ uint8_t inc_preamble[NUM_CHANNELS][NUM_LANES] = { 0 };
+ for (uint8_t inc = 0; inc < 64; inc += fine_step) {
+ printk(RCVEN_PLOT, " %2u\t", inc);
+ run_io_test(ctrl, chanmask, BASIC_VA, true);
+ done = true;
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ if (ch_result[channel] & BIT(byte)) {
+ /* Skip bytes that are already done */
+ printk(RCVEN_PLOT, ". ");
+ continue;
+ }
+ const bool pass = sample_dqs(channel, byte);
+ printk(RCVEN_PLOT, pass ? ". " : "# ");
+ if (pass) {
+ ch_result[channel] |= BIT(byte);
+ continue;
+ }
+ ctrl->rcven[channel][rank][byte] += fine_step;
+ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
+ inc_preamble[channel][byte] = inc;
+ }
+ printk(RCVEN_PLOT, "\t");
+ if (ch_result[channel] != bytemask)
+ done = false;
+ }
+ printk(RCVEN_PLOT, "\n");
+ if (done)
+ break;
+ }
+ printk(BIOS_DEBUG, "\n");
+ if (!done) {
+ printk(BIOS_ERR, "Error: Preamble edge not found for all bytes\n");
+ printk(BIOS_ERR, "The final RcvEn results are as follows:\n");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(BIOS_ERR, "Channel %u Rank %u: preamble\n",
+ channel, rank);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ printk(BIOS_ERR, " Byte %u: %u%s\n", byte,
+ ctrl->rcven[channel][rank][byte],
+ (ch_result[channel] ^ bytemask) & BIT(byte)
+ ? ""
+ : " *** Check this byte! ***");
+ }
+ }
+ status = RAMINIT_STATUS_RCVEN_FAILURE;
+ goto clean_up;
+ }
+
+ printk(BIOS_DEBUG, "Step 6: center on preamble and clean up rank\n");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ printk(BIOS_DEBUG, "C%u: Preamble increment\n", channel);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ /*
+ * For Traditional, pull in RcvEn by 64. For ULT, take the DQS
+ * drift into account to the specified guardband: tDQSCK_DRIFT.
+ */
+ ctrl->rcven[channel][rank][byte] -= tDQSCK_DRIFT;
+ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
+ printk(BIOS_DEBUG, " B%u: %u %u\n", byte,
+ ctrl->rcven[channel][rank][byte],
+ inc_preamble[channel][byte]);
+ }
+ printk(BIOS_DEBUG, "\n");
+ }
+ printk(BIOS_DEBUG, "\n");
+ }
+
+clean_up:
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ if (ctrl->lpddr) {
+ /**
+ * W/A for b4618574 - @todo: remove for HSW ULT C0
+ * Can't have force_odt_on together with leaker, disable LPDDR mode for
+ * this training step. This write will disable force_odt_on while still
+ * keeping LPDDR mode disabled. Second write will restore LPDDR mode.
+ */
+ union ddr_data_control_0_reg data_control_0 = {
+ .raw = ctrl->dq_control_0[channel],
+ };
+ data_control_0.lpddr_mode = 0;
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
+ }
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), ctrl->dq_control_0[channel]);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ mchbar_write32(DQ_CONTROL_2(channel, byte),
+ ctrl->dq_control_2[channel][byte]);
+ }
+ }
+ io_reset();
+ if (status)
+ return status;
+
+ printk(BIOS_DEBUG, "Step 7: Sync IO latency across all ranks\n");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ status = change_rcven_timing(ctrl, channel);
+ if (status)
+ return status;
+ }
+ printk(BIOS_DEBUG, "\nFinal Receive Enable and IO latency settings:\n");
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+ if (!does_ch_exist(ctrl, channel))
+ continue;
+
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
+ if (!rank_in_ch(ctrl, rank, channel))
+ continue;
+
+ const union sc_io_latency_reg sc_io_latency = {
+ .raw = mchbar_read32(SC_IO_LATENCY_ch(channel)),
+ };
+ printk(BIOS_DEBUG, " C%u.R%u: IOLAT = %u rt_iocomp = %u\n", channel,
+ rank, ctrl->io_latency[channel][rank], sc_io_latency.rt_iocomp);
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
+ printk(BIOS_DEBUG, " B%u: %u\n", byte,
+ ctrl->rcven[channel][rank][byte]);
+ }
+ printk(BIOS_DEBUG, "\n");
+ }
+ }
+ return status;
+}
diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
index a81559bb1e..9172d4f2b0 100644
--- a/src/northbridge/intel/haswell/registers/mchbar.h
+++ b/src/northbridge/intel/haswell/registers/mchbar.h
@@ -18,6 +18,8 @@
#define RX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0000, ch, rank, byte)
#define TX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0020, ch, rank, byte)
+#define DDR_DATA_TRAIN_FEEDBACK(ch, byte) _DDRIO_C_R_B(0x0054, ch, 0, byte)
+
#define DQ_CONTROL_2(ch, byte) _DDRIO_C_R_B(0x0064, ch, 0, byte)
#define DQ_CONTROL_0(ch, byte) _DDRIO_C_R_B(0x0074, ch, 0, byte)
@@ -100,6 +102,7 @@
#define COMMAND_RATE_LIMIT_ch(ch) _MCMAIN_C(0x4010, ch)
#define TC_BANK_RANK_D_ch(ch) _MCMAIN_C(0x4014, ch)
#define SC_ROUNDT_LAT_ch(ch) _MCMAIN_C(0x4024, ch)
+#define SC_IO_LATENCY_ch(ch) _MCMAIN_C(0x4028, ch)
#define REUT_ch_PAT_WDB_CL_MUX_CFG(ch) _MCMAIN_C(0x4040, ch)
--
2.39.2