689 lines
22 KiB
Diff
689 lines
22 KiB
Diff
From 20fe4fa852d3e13851a01b51dc984ec5976c864e Mon Sep 17 00:00:00 2001
|
|
From: Angel Pons <th3fanbus@gmail.com>
|
|
Date: Sun, 8 May 2022 12:56:04 +0200
|
|
Subject: [PATCH 24/26] haswell NRI: Add write leveling
|
|
|
|
Implement JEDEC write leveling, which is done in two steps. The first
|
|
step uses the JEDEC procedure to do "fine" write leveling, i.e. align
|
|
the DQS phase to the clock signal. The second step performs a regular
|
|
read-write test to correct "coarse" cycle errors.
|
|
|
|
Change-Id: I27678523fe22c38173a688e2a4751c259a20f009
|
|
Signed-off-by: Angel Pons <th3fanbus@gmail.com>
|
|
---
|
|
.../intel/haswell/native_raminit/Makefile.inc | 1 +
|
|
.../haswell/native_raminit/raminit_main.c | 1 +
|
|
.../haswell/native_raminit/raminit_native.h | 10 +
|
|
.../train_jedec_write_leveling.c | 580 ++++++++++++++++++
|
|
.../intel/haswell/registers/mchbar.h | 2 +
|
|
5 files changed, 594 insertions(+)
|
|
create mode 100644 src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c
|
|
|
|
diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
|
|
index c442be0728..40c2f5e014 100644
|
|
--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
|
|
+++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
|
|
@@ -16,5 +16,6 @@ romstage-y += setup_wdb.c
|
|
romstage-y += spd_bitmunching.c
|
|
romstage-y += testing_io.c
|
|
romstage-y += timings_refresh.c
|
|
+romstage-y += train_jedec_write_leveling.c
|
|
romstage-y += train_read_mpr.c
|
|
romstage-y += train_receive_enable.c
|
|
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
|
|
index 264d1468f5..1ff23be615 100644
|
|
--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
|
|
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
|
|
@@ -62,6 +62,7 @@ static const struct task_entry cold_boot[] = {
|
|
{ pre_training, true, "PRETRAIN", },
|
|
{ train_receive_enable, true, "RCVET", },
|
|
{ train_read_mpr, true, "RDMPRT", },
|
|
+ { train_jedec_write_leveling, true, "JWRL", },
|
|
};
|
|
|
|
/* Return a generic stepping value to make stepping checks simpler */
|
|
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
|
|
index a7551ad63c..666b233c45 100644
|
|
--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
|
|
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
|
|
@@ -59,6 +59,9 @@
|
|
/* Specified in PI ticks. 64 PI ticks == 1 qclk */
|
|
#define tDQSCK_DRIFT 64
|
|
|
|
+/* Maximum additional latency */
|
|
+#define MAX_ADD_DELAY 2
|
|
+
|
|
enum margin_parameter {
|
|
RcvEna,
|
|
RdT,
|
|
@@ -215,6 +218,7 @@ enum raminit_status {
|
|
RAMINIT_STATUS_REUT_ERROR,
|
|
RAMINIT_STATUS_RCVEN_FAILURE,
|
|
RAMINIT_STATUS_RMPR_FAILURE,
|
|
+ RAMINIT_STATUS_JWRL_FAILURE,
|
|
RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
|
|
};
|
|
|
|
@@ -380,6 +384,11 @@ static inline uint32_t get_data_train_feedback(const uint8_t channel, const uint
|
|
return mchbar_read32(DDR_DATA_TRAIN_FEEDBACK(channel, byte));
|
|
}
|
|
|
|
+static inline uint16_t get_byte_group_errors(const uint8_t channel)
|
|
+{
|
|
+ return mchbar_read32(4 + REUT_ch_ERR_MISC_STATUS(channel)) & 0x1ff;
|
|
+}
|
|
+
|
|
/* Number of ticks to wait in units of 69.841279 ns (citation needed) */
|
|
static inline void tick_delay(const uint32_t delay)
|
|
{
|
|
@@ -439,6 +448,7 @@ enum raminit_status configure_memory_map(struct sysinfo *ctrl);
|
|
enum raminit_status do_jedec_init(struct sysinfo *ctrl);
|
|
enum raminit_status train_receive_enable(struct sysinfo *ctrl);
|
|
enum raminit_status train_read_mpr(struct sysinfo *ctrl);
|
|
+enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl);
|
|
|
|
void configure_timings(struct sysinfo *ctrl);
|
|
void configure_refresh(struct sysinfo *ctrl);
|
|
diff --git a/src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c b/src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c
|
|
new file mode 100644
|
|
index 0000000000..1ba28a3bd4
|
|
--- /dev/null
|
|
+++ b/src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c
|
|
@@ -0,0 +1,580 @@
|
|
+/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
+
|
|
+#include <assert.h>
|
|
+#include <console/console.h>
|
|
+#include <delay.h>
|
|
+#include <northbridge/intel/haswell/haswell.h>
|
|
+#include <types.h>
|
|
+
|
|
+#include "raminit_native.h"
|
|
+#include "ranges.h"
|
|
+
|
|
+#define JWLC_PLOT RAM_DEBUG
|
|
+#define JWRL_PLOT RAM_DEBUG
|
|
+
|
|
+static void reset_dram_dll(struct sysinfo *ctrl, const uint8_t channel, const uint8_t rank)
|
|
+{
|
|
+ reut_issue_mrs(ctrl, channel, BIT(rank), 0, ctrl->mr0[channel][rank] | MR0_DLL_RESET);
|
|
+}
|
|
+
|
|
+static void program_wdb_pattern(struct sysinfo *ctrl, const bool invert)
|
|
+{
|
|
+ /* Pattern to keep DQ-DQS simple but detect any failures. Same as NHM/WSM. */
|
|
+ const uint8_t pat[4][2] = {
|
|
+ { 0x00, 0xff },
|
|
+ { 0xff, 0x00 },
|
|
+ { 0xc3, 0x3c },
|
|
+ { 0x3c, 0xc3 },
|
|
+ };
|
|
+ const uint8_t pmask[2][8] = {
|
|
+ { 0, 0, 1, 1, 1, 1, 0, 0 },
|
|
+ { 1, 1, 0, 0, 0, 0, 1, 1 },
|
|
+ };
|
|
+ for (uint8_t s = 0; s < ARRAY_SIZE(pat); s++)
|
|
+ write_wdb_fixed_pat(ctrl, pat[s], pmask[invert], ARRAY_SIZE(pmask[invert]), s);
|
|
+}
|
|
+
|
|
+static int16_t set_add_delay(uint32_t *add_delay, uint8_t rank, int8_t target_off)
|
|
+{
|
|
+ const uint8_t shift = rank * 2;
|
|
+ if (target_off > MAX_ADD_DELAY) {
|
|
+ *add_delay &= ~(3 << shift);
|
|
+ *add_delay |= MAX_ADD_DELAY << shift;
|
|
+ return 128 * (target_off - MAX_ADD_DELAY);
|
|
+ } else if (target_off < 0) {
|
|
+ *add_delay &= ~(3 << shift);
|
|
+ *add_delay |= 0 << shift;
|
|
+ return 128 * target_off;
|
|
+ } else {
|
|
+ *add_delay &= ~(3 << shift);
|
|
+ *add_delay |= target_off << shift;
|
|
+ return 0;
|
|
+ }
|
|
+}
|
|
+
|
|
+static enum raminit_status train_jedec_write_leveling_cleanup(struct sysinfo *ctrl)
|
|
+{
|
|
+ const struct reut_box reut_addr = {
|
|
+ .col = {
|
|
+ .start = 0,
|
|
+ .stop = 1023,
|
|
+ .inc_val = 1,
|
|
+ },
|
|
+ };
|
|
+ const struct wdb_pat wdb_pattern = {
|
|
+ .start_ptr = 0,
|
|
+ .stop_ptr = 3,
|
|
+ .inc_rate = 1,
|
|
+ .dq_pattern = BASIC_VA,
|
|
+ };
|
|
+ const int8_t offsets[] = { 0, 1, -1, 2, 3 };
|
|
+ const int8_t dq_offsets[] = { 0, -10, 10, -5, 5, -15, 15 };
|
|
+ const uint8_t dq_offset_max = ARRAY_SIZE(dq_offsets);
|
|
+
|
|
+ /* Set LFSR seeds to be sequential */
|
|
+ program_wdb_lfsr(ctrl, true);
|
|
+ setup_io_test(
|
|
+ ctrl,
|
|
+ ctrl->chanmap,
|
|
+ PAT_WR_RD,
|
|
+ 2,
|
|
+ 4,
|
|
+ &reut_addr,
|
|
+ NSOE,
|
|
+ &wdb_pattern,
|
|
+ 0,
|
|
+ 0);
|
|
+
|
|
+ const union reut_pat_wdb_cl_mux_cfg_reg reut_wdb_cl_mux_cfg = {
|
|
+ .mux_0_control = REUT_MUX_BTBUFFER,
|
|
+ .mux_1_control = REUT_MUX_BTBUFFER,
|
|
+ .mux_2_control = REUT_MUX_BTBUFFER,
|
|
+ .ecc_data_source_sel = 1,
|
|
+ };
|
|
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
|
|
+ if (!does_ch_exist(ctrl, channel))
|
|
+ continue;
|
|
+
|
|
+ mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_CFG(channel), reut_wdb_cl_mux_cfg.raw);
|
|
+ }
|
|
+
|
|
+ int8_t byte_off[NUM_CHANNELS][NUM_LANES] = { 0 };
|
|
+ uint32_t add_delay[NUM_CHANNELS] = { 0 };
|
|
+ enum raminit_status status = RAMINIT_STATUS_SUCCESS;
|
|
+ bool invert = false;
|
|
+ const uint16_t valid_byte_mask = BIT(ctrl->lanes) - 1;
|
|
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
|
|
+ uint8_t chanmask = 0;
|
|
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++)
|
|
+ chanmask |= select_reut_ranks(ctrl, channel, BIT(rank));
|
|
+
|
|
+ if (!chanmask)
|
|
+ continue;
|
|
+
|
|
+ printk(BIOS_DEBUG, "Rank %u\n", rank);
|
|
+ printk(JWLC_PLOT, "Channel");
|
|
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
|
|
+ if (!rank_in_ch(ctrl, rank, channel))
|
|
+ continue;
|
|
+
|
|
+ printk(JWLC_PLOT, "\t\t%u\t", channel);
|
|
+ }
|
|
+ printk(JWLC_PLOT, "\nByte\t");
|
|
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
|
|
+ if (!rank_in_ch(ctrl, rank, channel))
|
|
+ continue;
|
|
+
|
|
+ printk(JWLC_PLOT, "\t");
|
|
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
|
|
+ printk(JWLC_PLOT, "%u ", byte);
|
|
+ }
|
|
+ printk(JWLC_PLOT, "\nDelay DqOffset");
|
|
+ bool done = false;
|
|
+ int8_t byte_sum[NUM_CHANNELS] = { 0 };
|
|
+ uint16_t byte_pass[NUM_CHANNELS] = { 0 };
|
|
+ for (uint8_t off = 0; off < ARRAY_SIZE(offsets); off++) {
|
|
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
|
|
+ if (!rank_in_ch(ctrl, rank, channel))
|
|
+ continue;
|
|
+
|
|
+ const int16_t global_byte_off =
|
|
+ set_add_delay(&add_delay[channel], rank, offsets[off]);
|
|
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
|
|
+ update_txt(ctrl, channel, rank, byte, TXT_DQDQS_OFF,
|
|
+ global_byte_off);
|
|
+ }
|
|
+ mchbar_write32(SC_WR_ADD_DELAY_ch(channel),
|
|
+ add_delay[channel]);
|
|
+ }
|
|
+ /* Reset FIFOs and DRAM DLL (Micron workaround) */
|
|
+ if (!ctrl->lpddr) {
|
|
+ io_reset();
|
|
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
|
|
+ if (!rank_in_ch(ctrl, rank, channel))
|
|
+ continue;
|
|
+
|
|
+ reset_dram_dll(ctrl, channel, rank);
|
|
+ }
|
|
+ udelay(1);
|
|
+ }
|
|
+ for (uint8_t dq_offset = 0; dq_offset < dq_offset_max; dq_offset++) {
|
|
+ printk(JWLC_PLOT, "\n% 3d\t% 3d",
|
|
+ offsets[off], dq_offsets[dq_offset]);
|
|
+ change_1d_margin_multicast(
|
|
+ ctrl,
|
|
+ WrT,
|
|
+ dq_offsets[dq_offset],
|
|
+ rank,
|
|
+ false,
|
|
+ REG_FILE_USE_RANK);
|
|
+
|
|
+ /*
|
|
+ * Re-program the WDB pattern. Change the pattern
|
|
+ * for the next test to avoid false pass issues.
|
|
+ */
|
|
+ program_wdb_pattern(ctrl, invert);
|
|
+ invert = !invert;
|
|
+ run_io_test(ctrl, chanmask, BASIC_VA, true);
|
|
+ done = true;
|
|
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
|
|
+ if (!rank_in_ch(ctrl, rank, channel))
|
|
+ continue;
|
|
+
|
|
+ printk(JWLC_PLOT, "\t");
|
|
+ uint16_t result = get_byte_group_errors(channel);
|
|
+ result &= valid_byte_mask;
|
|
+
|
|
+ /* Skip bytes that have failed or already passed */
|
|
+ const uint16_t skip_me = result | byte_pass[channel];
|
|
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
|
|
+ const bool pass = result & BIT(byte);
|
|
+ printk(JWLC_PLOT, pass ? "# " : ". ");
|
|
+ if (skip_me & BIT(byte))
|
|
+ continue;
|
|
+
|
|
+ byte_pass[channel] |= BIT(byte);
|
|
+ byte_off[channel][byte] = offsets[off];
|
|
+ byte_sum[channel] += offsets[off];
|
|
+ }
|
|
+ if (byte_pass[channel] != valid_byte_mask)
|
|
+ done = false;
|
|
+ }
|
|
+ if (done)
|
|
+ break;
|
|
+ }
|
|
+ if (done)
|
|
+ break;
|
|
+ }
|
|
+ printk(BIOS_DEBUG, "\n\n");
|
|
+ if (!done) {
|
|
+ printk(BIOS_ERR, "JWLC: Could not find a pass for all bytes\n");
|
|
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
|
|
+ if (!rank_in_ch(ctrl, rank, channel))
|
|
+ continue;
|
|
+
|
|
+ printk(BIOS_ERR, "Channel %u, rank %u fail:", channel, rank);
|
|
+ const uint16_t passing_mask = byte_pass[channel];
|
|
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
|
|
+ if (BIT(byte) & passing_mask)
|
|
+ continue;
|
|
+
|
|
+ printk(BIOS_ERR, " %u", byte);
|
|
+ }
|
|
+ printk(BIOS_ERR, "\n");
|
|
+ }
|
|
+ status = RAMINIT_STATUS_JWRL_FAILURE;
|
|
+ break;
|
|
+ }
|
|
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
|
|
+ if (!rank_in_ch(ctrl, rank, channel))
|
|
+ continue;
|
|
+
|
|
+ /* Refine target offset to make sure it works for all bytes */
|
|
+ int8_t target_off = DIV_ROUND_CLOSEST(byte_sum[channel], ctrl->lanes);
|
|
+ int16_t global_byte_off = 0;
|
|
+ uint8_t all_good_loops = 0;
|
|
+ bool all_good = 0;
|
|
+ while (!all_good) {
|
|
+ global_byte_off =
|
|
+ set_add_delay(&add_delay[channel], rank, target_off);
|
|
+ all_good = true;
|
|
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
|
|
+ int16_t local_offset;
|
|
+ local_offset = byte_off[channel][byte] - target_off;
|
|
+ local_offset = local_offset * 128 + global_byte_off;
|
|
+ const uint16_t tx_dq = ctrl->tx_dq[channel][rank][byte];
|
|
+ if (tx_dq + local_offset >= (512 - 64)) {
|
|
+ all_good = false;
|
|
+ all_good_loops++;
|
|
+ target_off++;
|
|
+ break;
|
|
+ }
|
|
+ const uint16_t txdqs = ctrl->tx_dq[channel][rank][byte];
|
|
+ if (txdqs + local_offset < 96) {
|
|
+ all_good = false;
|
|
+ all_good_loops++;
|
|
+ target_off--;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ /* Avoid an infinite loop */
|
|
+ if (all_good_loops > 3)
|
|
+ break;
|
|
+ }
|
|
+ if (!all_good) {
|
|
+ printk(BIOS_ERR, "JWLC: Target offset refining failed\n");
|
|
+ status = RAMINIT_STATUS_JWRL_FAILURE;
|
|
+ break;
|
|
+ }
|
|
+ printk(BIOS_DEBUG, "C%u.R%u: Offset\tFinalEdge\n", channel, rank);
|
|
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
|
|
+ int16_t local_offset;
|
|
+ local_offset = byte_off[channel][byte] - target_off;
|
|
+ local_offset = local_offset * 128 + global_byte_off;
|
|
+ ctrl->tx_dq[channel][rank][byte] += local_offset;
|
|
+ ctrl->txdqs[channel][rank][byte] += local_offset;
|
|
+ update_txt(ctrl, channel, rank, byte, TXT_RESTORE, 0);
|
|
+ printk(BIOS_DEBUG, " B%u: %d\t%d\n", byte, local_offset,
|
|
+ ctrl->txdqs[channel][rank][byte]);
|
|
+ }
|
|
+ mchbar_write32(SC_WR_ADD_DELAY_ch(channel), add_delay[channel]);
|
|
+ if (!ctrl->lpddr) {
|
|
+ reset_dram_dll(ctrl, channel, rank);
|
|
+ udelay(1);
|
|
+ }
|
|
+ printk(BIOS_DEBUG, "\n");
|
|
+ }
|
|
+ printk(BIOS_DEBUG, "\n");
|
|
+ }
|
|
+
|
|
+ /* Restore WDB after test */
|
|
+ write_wdb_va_pat(ctrl, 0, BASIC_VA_PAT_SPREAD_8, 8, 0);
|
|
+ program_wdb_lfsr(ctrl, false);
|
|
+ mchbar_write32(DDR_DATA_OFFSET_TRAIN, 0);
|
|
+
|
|
+ /** TODO: Do full JEDEC init instead? **/
|
|
+ io_reset();
|
|
+ return status;
|
|
+}
|
|
+
|
|
+static enum raminit_status verify_wl_width(const int32_t lwidth)
|
|
+{
|
|
+ if (lwidth <= 32) {
|
|
+ /* Check if width is valid */
|
|
+ printk(BIOS_ERR, "WrLevel: Width region (%d) too small\n", lwidth);
|
|
+ return RAMINIT_STATUS_JWRL_FAILURE;
|
|
+ }
|
|
+ if (lwidth >= 96) {
|
|
+ /* Since we're calibrating a phase, a too large region is a problem */
|
|
+ printk(BIOS_ERR, "WrLevel: Width region (%d) too large\n", lwidth);
|
|
+ return RAMINIT_STATUS_JWRL_FAILURE;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl)
|
|
+{
|
|
+ /*
|
|
+ * Enabling WL mode causes DQS to toggle for 1024 QCLK.
|
|
+ * Wait for this to stop. Round up to nearest microsecond.
|
|
+ */
|
|
+ const bool wl_long_delay = ctrl->lpddr;
|
|
+ const uint32_t dqs_toggle_time = wl_long_delay ? 2048 : 1024;
|
|
+ const uint32_t wait_time_us = DIV_ROUND_UP(ctrl->qclkps * dqs_toggle_time, 1000 * 1000);
|
|
+
|
|
+ const uint16_t wl_start = 192;
|
|
+ const uint16_t wl_stop = 192 + 128;
|
|
+ const uint16_t wl_step = 2;
|
|
+
|
|
+ /* Do not use cached MR values */
|
|
+ const bool save_restore_mrs = ctrl->restore_mrs;
|
|
+ ctrl->restore_mrs = 0;
|
|
+
|
|
+ /* Propagate delay values (without a write command) */
|
|
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
|
|
+ if (!does_ch_exist(ctrl, channel))
|
|
+ continue;
|
|
+
|
|
+ /* Propagate delay values from rank 0 to prevent assertion failures in RTL */
|
|
+ union ddr_data_control_0_reg data_control_0 = {
|
|
+ .raw = ctrl->dq_control_0[channel],
|
|
+ };
|
|
+ data_control_0.read_rf_rd = 0;
|
|
+ data_control_0.read_rf_wr = 1;
|
|
+ data_control_0.read_rf_rank = 0;
|
|
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
|
|
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
|
|
+ union ddr_data_control_2_reg data_control_2 = {
|
|
+ .raw = ctrl->dq_control_2[channel][byte],
|
|
+ };
|
|
+ data_control_2.force_bias_on = 1;
|
|
+ data_control_2.force_rx_on = 0;
|
|
+ data_control_2.wl_long_delay = wl_long_delay;
|
|
+ mchbar_write32(DQ_CONTROL_2(channel, byte), data_control_2.raw);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (ctrl->lpddr)
|
|
+ die("%s: Missing LPDDR support\n", __func__);
|
|
+
|
|
+ if (!ctrl->lpddr)
|
|
+ ddr3_program_mr1(ctrl, 0, 1);
|
|
+
|
|
+ enum raminit_status status = RAMINIT_STATUS_SUCCESS;
|
|
+ struct phase_train_data region_data[NUM_CHANNELS][NUM_LANES] = { 0 };
|
|
+ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
|
|
+ if (!does_rank_exist(ctrl, rank))
|
|
+ continue;
|
|
+
|
|
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
|
|
+ if (!rank_in_ch(ctrl, rank, channel))
|
|
+ continue;
|
|
+
|
|
+ /** TODO: Differs for LPDDR **/
|
|
+ uint16_t mr1reg = ctrl->mr1[channel][rank];
|
|
+ mr1reg &= ~MR1_QOFF_ENABLE;
|
|
+ mr1reg |= MR1_WL_ENABLE;
|
|
+ if (is_hsw_ult()) {
|
|
+ mr1reg &= ~RTTNOM_MASK;
|
|
+ mr1reg |= encode_ddr3_rttnom(120);
|
|
+ } else if (ctrl->dpc[channel] == 2) {
|
|
+ mr1reg &= ~RTTNOM_MASK;
|
|
+ mr1reg |= encode_ddr3_rttnom(60);
|
|
+ }
|
|
+ reut_issue_mrs(ctrl, channel, BIT(rank), 1, mr1reg);
|
|
+
|
|
+ /* Assert ODT for myself */
|
|
+ uint8_t odt_matrix = BIT(rank);
|
|
+ if (ctrl->dpc[channel] == 2) {
|
|
+ /* Assert ODT for non-target DIMM */
|
|
+ const uint8_t other_dimm = ((rank + 2) / 2) & 1;
|
|
+ odt_matrix |= BIT(2 * other_dimm);
|
|
+ }
|
|
+
|
|
+ union reut_misc_odt_ctrl_reg reut_misc_odt_ctrl = {
|
|
+ .raw = 0,
|
|
+ };
|
|
+ if (ctrl->lpddr) {
|
|
+ /* Only one ODT pin for ULT */
|
|
+ reut_misc_odt_ctrl.odt_on = 1;
|
|
+ reut_misc_odt_ctrl.odt_override = 1;
|
|
+ } else if (!is_hsw_ult()) {
|
|
+ reut_misc_odt_ctrl.odt_on = odt_matrix;
|
|
+ reut_misc_odt_ctrl.odt_override = 0xf;
|
|
+ }
|
|
+ mchbar_write32(REUT_ch_MISC_ODT_CTRL(channel), reut_misc_odt_ctrl.raw);
|
|
+ }
|
|
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
|
|
+ if (!rank_in_ch(ctrl, rank, channel))
|
|
+ continue;
|
|
+
|
|
+ /*
|
|
+ * Enable write leveling mode in DDR and propagate delay
|
|
+ * values (without a write command). Stay in WL mode.
|
|
+ */
|
|
+ union ddr_data_control_0_reg data_control_0 = {
|
|
+ .raw = ctrl->dq_control_0[channel],
|
|
+ };
|
|
+ data_control_0.wl_training_mode = 1;
|
|
+ data_control_0.tx_pi_on = 1;
|
|
+ data_control_0.read_rf_rd = 0;
|
|
+ data_control_0.read_rf_wr = 1;
|
|
+ data_control_0.read_rf_rank = rank;
|
|
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
|
|
+ }
|
|
+ printk(BIOS_DEBUG, "\nRank %u\n", rank);
|
|
+ printk(JWRL_PLOT, "Channel\t");
|
|
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
|
|
+ if (!rank_in_ch(ctrl, rank, channel))
|
|
+ continue;
|
|
+
|
|
+ printk(JWRL_PLOT, "%u", channel);
|
|
+ if (channel > 0)
|
|
+ continue;
|
|
+
|
|
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
|
|
+ printk(JWRL_PLOT, "\t");
|
|
+ }
|
|
+ printk(JWRL_PLOT, "\nByte");
|
|
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
|
|
+ if (!rank_in_ch(ctrl, rank, channel))
|
|
+ continue;
|
|
+
|
|
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
|
|
+ printk(JWRL_PLOT, "\t%u", byte);
|
|
+ }
|
|
+ printk(JWRL_PLOT, "\nWlDelay");
|
|
+ for (uint16_t wl_delay = wl_start; wl_delay < wl_stop; wl_delay += wl_step) {
|
|
+ printk(JWRL_PLOT, "\n %3u:", wl_delay);
|
|
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
|
|
+ if (!rank_in_ch(ctrl, rank, channel))
|
|
+ continue;
|
|
+
|
|
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
|
|
+ update_txt(ctrl, channel, rank, byte, TXT_TXDQS,
|
|
+ wl_delay);
|
|
+ }
|
|
+ }
|
|
+ /* Wait for the first burst to finish */
|
|
+ if (wl_delay == wl_start)
|
|
+ udelay(wait_time_us);
|
|
+
|
|
+ io_reset();
|
|
+ udelay(wait_time_us);
|
|
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
|
|
+ if (!rank_in_ch(ctrl, rank, channel))
|
|
+ continue;
|
|
+
|
|
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
|
|
+ const uint32_t feedback =
|
|
+ get_data_train_feedback(channel, byte);
|
|
+ const bool pass = (feedback & 0x1ff) >= 16;
|
|
+ printk(JWRL_PLOT, "\t%c%u", pass ? '.' : '#', feedback);
|
|
+ phase_record_pass(
|
|
+ ®ion_data[channel][byte],
|
|
+ pass,
|
|
+ wl_delay,
|
|
+ wl_start,
|
|
+ wl_step);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ printk(JWRL_PLOT, "\n");
|
|
+ printk(BIOS_DEBUG, "\n\tInitSt\tInitEn\tCurrSt\tCurrEn\tLargSt\tLargEn\n");
|
|
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
|
|
+ if (!rank_in_ch(ctrl, rank, channel))
|
|
+ continue;
|
|
+
|
|
+ printk(BIOS_DEBUG, "C%u\n", channel);
|
|
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
|
|
+ struct phase_train_data *data = ®ion_data[channel][byte];
|
|
+
|
|
+ phase_append_initial_to_current(data, wl_start, wl_step);
|
|
+ printk(BIOS_DEBUG, " B%u:\t%d\t%d\t%d\t%d\t%d\t%d\n",
|
|
+ byte,
|
|
+ data->initial.start,
|
|
+ data->initial.end,
|
|
+ data->current.start,
|
|
+ data->current.end,
|
|
+ data->largest.start,
|
|
+ data->largest.end);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Clean up after test. Very coarsely adjust for
|
|
+ * any cycle errors. Program values for TxDQS.
|
|
+ */
|
|
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
|
|
+ if (!rank_in_ch(ctrl, rank, channel))
|
|
+ continue;
|
|
+
|
|
+ /* Clear ODT before MRS (JEDEC spec) */
|
|
+ mchbar_write32(REUT_ch_MISC_ODT_CTRL(channel), 0);
|
|
+
|
|
+ /** TODO: Differs for LPDDR **/
|
|
+ const uint16_t mr1reg = ctrl->mr1[channel][rank] | MR1_QOFF_ENABLE;
|
|
+ reut_issue_mrs(ctrl, channel, BIT(rank), 1, mr1reg);
|
|
+
|
|
+ printk(BIOS_DEBUG, "\nC%u.R%u: LftEdge Width\n", channel, rank);
|
|
+ const bool rank_x16 = ctrl->dimms[channel][rank / 2].data.width == 16;
|
|
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
|
|
+ struct phase_train_data *data = ®ion_data[channel][byte];
|
|
+ const int32_t lwidth = range_width(data->largest);
|
|
+ int32_t tx_start = data->largest.start;
|
|
+ printk(BIOS_DEBUG, " B%u: %d\t%d\n", byte, tx_start, lwidth);
|
|
+ status = verify_wl_width(lwidth);
|
|
+ if (status) {
|
|
+ printk(BIOS_ERR,
|
|
+ "WrLevel problems on channel %u, byte %u\n",
|
|
+ channel, byte);
|
|
+ goto clean_up;
|
|
+ }
|
|
+
|
|
+ /* Align byte pairs if DIMM is x16 */
|
|
+ if (rank_x16 && (byte & 1)) {
|
|
+ const struct phase_train_data *const ref_data =
|
|
+ ®ion_data[channel][byte - 1];
|
|
+
|
|
+ if (tx_start > ref_data->largest.start + 64)
|
|
+ tx_start -= 128;
|
|
+
|
|
+ if (tx_start < ref_data->largest.start - 64)
|
|
+ tx_start += 128;
|
|
+ }
|
|
+
|
|
+ /* Fix for b4618067 - need to add 1 QCLK to DQS PI */
|
|
+ if (is_hsw_ult())
|
|
+ tx_start += 64;
|
|
+
|
|
+ assert(tx_start >= 0);
|
|
+ ctrl->txdqs[channel][rank][byte] = tx_start;
|
|
+ ctrl->tx_dq[channel][rank][byte] = tx_start + 32;
|
|
+ update_txt(ctrl, channel, rank, byte, TXT_RESTORE, 0);
|
|
+ }
|
|
+ }
|
|
+ printk(BIOS_DEBUG, "\n");
|
|
+ }
|
|
+
|
|
+clean_up:
|
|
+ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
|
|
+ if (!does_ch_exist(ctrl, channel))
|
|
+ continue;
|
|
+
|
|
+ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), ctrl->dq_control_0[channel]);
|
|
+ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
|
|
+ mchbar_write32(DQ_CONTROL_2(channel, byte),
|
|
+ ctrl->dq_control_2[channel][byte]);
|
|
+ }
|
|
+ }
|
|
+ if (!ctrl->lpddr)
|
|
+ ddr3_program_mr1(ctrl, 0, 0);
|
|
+
|
|
+ ctrl->restore_mrs = save_restore_mrs;
|
|
+
|
|
+ if (status)
|
|
+ return status;
|
|
+
|
|
+ /** TODO: If this step fails and dec_wrd is set, clear it and try again **/
|
|
+ return train_jedec_write_leveling_cleanup(ctrl);
|
|
+}
|
|
diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
|
|
index 6a31d3a32c..7c0b5a49de 100644
|
|
--- a/src/northbridge/intel/haswell/registers/mchbar.h
|
|
+++ b/src/northbridge/intel/haswell/registers/mchbar.h
|
|
@@ -121,6 +121,8 @@
|
|
|
|
#define REUT_ch_ERR_DATA_MASK(ch) _MCMAIN_C(0x40d8, ch)
|
|
|
|
+#define REUT_ch_ERR_MISC_STATUS(ch) _MCMAIN_C(0x40e8, ch)
|
|
+
|
|
#define REUT_ch_MISC_CKE_CTRL(ch) _MCMAIN_C(0x4190, ch)
|
|
#define REUT_ch_MISC_ODT_CTRL(ch) _MCMAIN_C(0x4194, ch)
|
|
#define REUT_ch_MISC_PAT_CADB_CTRL(ch) _MCMAIN_C(0x4198, ch)
|
|
--
|
|
2.39.2
|
|
|