mtd: nand_mxs: Setup timing

So far we relied on the default timing values which are rather slow. This patch adds the timing setup functions from the kernel. Tested on i.MX6 with a AMD/Spansion S34ML08G2 and a Micron MT29F32G08ABAAAWP. Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
2016-06-24 09:52:48 +02:00 · 2016-06-24 09:52:48 +02:00 · 1daa3bc8dc
parent 6a175cb3ff
commit 1daa3bc8dc
1 changed files with 808 additions and 4 deletions
--- a/drivers/mtd/nand/nand_mxs.c
+++ b/drivers/mtd/nand/nand_mxs.c
@ -67,16 +67,16 @@
 #define	GPMI_CTRL0_XFER_COUNT_OFFSET			0

 #define GPMI_CTRL1					0x00000060
+#define GPMI_CTRL1_SET					0x00000064
+#define GPMI_CTRL1_CLR					0x00000068
 #define	GPMI_CTRL1_DECOUPLE_CS				(1 << 24)
-#define	GPMI_CTRL1_WRN_DLY_SEL_MASK			(0x3 << 22)
-#define	GPMI_CTRL1_WRN_DLY_SEL_OFFSET			22
+#define	GPMI_CTRL1_WRN_DLY(d)				(((d) & 0x3) << 22)
 #define	GPMI_CTRL1_TIMEOUT_IRQ_EN			(1 << 20)
 #define	GPMI_CTRL1_GANGED_RDYBUSY			(1 << 19)
 #define	GPMI_CTRL1_BCH_MODE				(1 << 18)
 #define	GPMI_CTRL1_DLL_ENABLE				(1 << 17)
 #define	GPMI_CTRL1_HALF_PERIOD				(1 << 16)
-#define	GPMI_CTRL1_RDN_DELAY_MASK			(0xf << 12)
-#define	GPMI_CTRL1_RDN_DELAY_OFFSET			12
+#define	GPMI_CTRL1_RDN_DELAY(d)				(((d) & 0xf) << 12)
 #define	GPMI_CTRL1_DMA2ECC_MODE				(1 << 11)
 #define	GPMI_CTRL1_DEV_IRQ				(1 << 10)
 #define	GPMI_CTRL1_TIMEOUT_IRQ				(1 << 9)
@ -89,6 +89,20 @@
 #define	GPMI_CTRL1_CAMERA_MODE				(1 << 1)
 #define	GPMI_CTRL1_GPMI_MODE				(1 << 0)

+#define BV_GPMI_CTRL1_WRN_DLY_SEL_4_TO_8NS		0x0
+#define BV_GPMI_CTRL1_WRN_DLY_SEL_6_TO_10NS		0x1
+#define BV_GPMI_CTRL1_WRN_DLY_SEL_7_TO_12NS		0x2
+#define BV_GPMI_CTRL1_WRN_DLY_SEL_NO_DELAY		0x3
+
+#define GPMI_TIMING0					0x00000070
+
+#define	GPMI_TIMING0_ADDRESS_SETUP(d)			(((d) & 0xff) << 16)
+#define	GPMI_TIMING0_DATA_HOLD(d)			(((d) & 0xff) << 8)
+#define	GPMI_TIMING0_DATA_SETUP(d)			(((d) & 0xff) << 0)
+
+#define GPMI_TIMING1					0x00000080
+#define	GPMI_TIMING1_BUSY_TIMEOUT(d)			(((d) & 0xffff) << 16)
+
 #define	GPMI_ECCCTRL_HANDLE_MASK			(0xffff << 16)
 #define	GPMI_ECCCTRL_HANDLE_OFFSET			16
 #define	GPMI_ECCCTRL_ECC_CMD_MASK			(0x3 << 13)
@ -147,7 +161,41 @@ enum gpmi_type {
 	GPMI_IMX6,
 };

+/**
+ * struct nand_timing - Fundamental timing attributes for NAND.
+ * @data_setup_in_ns:         The data setup time, in nanoseconds. Usually the
+ *                            maximum of tDS and tWP. A negative value
+ *                            indicates this characteristic isn't known.
+ * @data_hold_in_ns:          The data hold time, in nanoseconds. Usually the
+ *                            maximum of tDH, tWH and tREH. A negative value
+ *                            indicates this characteristic isn't known.
+ * @address_setup_in_ns:      The address setup time, in nanoseconds. Usually
+ *                            the maximum of tCLS, tCS and tALS. A negative
+ *                            value indicates this characteristic isn't known.
+ * @gpmi_sample_delay_in_ns:  A GPMI-specific timing parameter. A negative value
+ *                            indicates this characteristic isn't known.
+ * @tREA_in_ns:               tREA, in nanoseconds, from the data sheet. A
+ *                            negative value indicates this characteristic isn't
+ *                            known.
+ * @tRLOH_in_ns:              tRLOH, in nanoseconds, from the data sheet. A
+ *                            negative value indicates this characteristic isn't
+ *                            known.
+ * @tRHOH_in_ns:              tRHOH, in nanoseconds, from the data sheet. A
+ *                            negative value indicates this characteristic isn't
+ *                            known.
+ */
+struct nand_timing {
+	int8_t  data_setup_in_ns;
+	int8_t  data_hold_in_ns;
+	int8_t  address_setup_in_ns;
+	int8_t  gpmi_sample_delay_in_ns;
+	int8_t  tREA_in_ns;
+	int8_t  tRLOH_in_ns;
+	int8_t  tRHOH_in_ns;
+};
+
 struct mxs_nand_info {
+	struct device_d		*dev;
 	struct nand_chip	nand_chip;
 	void __iomem		*io_base;
 	void __iomem		*bch_base;
@ -179,6 +227,11 @@ struct mxs_nand_info {
 	/* DMA descriptors */
 	struct mxs_dma_desc	**desc;
 	uint32_t		desc_index;
+
+#define GPMI_ASYNC_EDO_ENABLED	(1 << 0)
+#define GPMI_TIMING_INIT_OK	(1 << 1)
+	unsigned		flags;
+	struct nand_timing	timing;
 };

 struct nand_ecclayout fake_ecc_layout;
@ -1305,6 +1358,753 @@ static void mxs_nand_probe_dt(struct device_d *dev, struct mxs_nand_info *nand_i
 		nand->bbt_options |= NAND_BBT_USE_FLASH | NAND_BBT_NO_OOB;
 }

+/**
+ * struct gpmi_nfc_hardware_timing - GPMI hardware timing parameters.
+ * @data_setup_in_cycles:      The data setup time, in cycles.
+ * @data_hold_in_cycles:       The data hold time, in cycles.
+ * @address_setup_in_cycles:   The address setup time, in cycles.
+ * @device_busy_timeout:       The timeout waiting for NAND Ready/Busy,
+ *                             this value is the number of cycles multiplied
+ *                             by 4096.
+ * @use_half_periods:          Indicates the clock is running slowly, so the
+ *                             NFC DLL should use half-periods.
+ * @sample_delay_factor:       The sample delay factor.
+ * @wrn_dly_sel:               The delay on the GPMI write strobe.
+ */
+struct gpmi_nfc_hardware_timing {
+	/* for GPMI_TIMING0 */
+	uint8_t  data_setup_in_cycles;
+	uint8_t  data_hold_in_cycles;
+	uint8_t  address_setup_in_cycles;
+
+	/* for GPMI_TIMING1 */
+	uint16_t device_busy_timeout;
+
+	/* for GPMI_CTRL1 */
+	bool     use_half_periods;
+	uint8_t  sample_delay_factor;
+	uint8_t  wrn_dly_sel;
+};
+
+/**
+ * struct timing_threshod - Timing threshold
+ * @max_data_setup_cycles:       The maximum number of data setup cycles that
+ *                               can be expressed in the hardware.
+ * @internal_data_setup_in_ns:   The time, in ns, that the NFC hardware requires
+ *                               for data read internal setup. In the Reference
+ *                               Manual, see the chapter "High-Speed NAND
+ *                               Timing" for more details.
+ * @max_sample_delay_factor:     The maximum sample delay factor that can be
+ *                               expressed in the hardware.
+ * @max_dll_clock_period_in_ns:  The maximum period of the GPMI clock that the
+ *                               sample delay DLL hardware can possibly work
+ *                               with (the DLL is unusable with longer periods).
+ *                               If the full-cycle period is greater than HALF
+ *                               this value, the DLL must be configured to use
+ *                               half-periods.
+ * @max_dll_delay_in_ns:         The maximum amount of delay, in ns, that the
+ *                               DLL can implement.
+ */
+struct timing_threshold {
+	const unsigned int      max_data_setup_cycles;
+	const unsigned int      internal_data_setup_in_ns;
+	const unsigned int      max_sample_delay_factor;
+	const unsigned int      max_dll_clock_period_in_ns;
+	const unsigned int      max_dll_delay_in_ns;
+};
+
+static struct timing_threshold timing_default_threshold = {
+	.max_data_setup_cycles       = 0xff,
+	.internal_data_setup_in_ns   = 0,
+	.max_sample_delay_factor     = 15,
+	.max_dll_clock_period_in_ns  = 32,
+	.max_dll_delay_in_ns         = 16,
+};
+
+/* Converts time in nanoseconds to cycles. */
+static unsigned int ns_to_cycles(unsigned int time,
+			unsigned int period, unsigned int min)
+{
+	unsigned int k;
+
+	k = (time + period - 1) / period;
+	return max(k, min);
+}
+
+/* Apply timing to current hardware conditions. */
+static int mxs_nand_compute_hardware_timing(struct mxs_nand_info *info,
+					struct gpmi_nfc_hardware_timing *hw)
+{
+	struct timing_threshold *nfc = &timing_default_threshold;
+	struct nand_chip *nand = &info->nand_chip;
+	struct nand_timing target = info->timing;
+	unsigned long clock_frequency_in_hz;
+	unsigned int clock_period_in_ns;
+	bool dll_use_half_periods;
+	unsigned int dll_delay_shift;
+	unsigned int max_sample_delay_in_ns;
+	unsigned int address_setup_in_cycles;
+	unsigned int data_setup_in_ns;
+	unsigned int data_setup_in_cycles;
+	unsigned int data_hold_in_cycles;
+	int ideal_sample_delay_in_ns;
+	unsigned int sample_delay_factor;
+	int tEYE;
+	unsigned int min_prop_delay_in_ns = 5;
+	unsigned int max_prop_delay_in_ns = 9;
+
+	/*
+	 * If there are multiple chips, we need to relax the timings to allow
+	 * for signal distortion due to higher capacitance.
+	 */
+	if (nand->numchips > 2) {
+		target.data_setup_in_ns    += 10;
+		target.data_hold_in_ns     += 10;
+		target.address_setup_in_ns += 10;
+	} else if (nand->numchips > 1) {
+		target.data_setup_in_ns    += 5;
+		target.data_hold_in_ns     += 5;
+		target.address_setup_in_ns += 5;
+	}
+
+	/* Inspect the clock. */
+	clock_frequency_in_hz = clk_get_rate(info->clk);
+	clock_period_in_ns    = NSEC_PER_SEC / clock_frequency_in_hz;
+
+	/*
+	 * The NFC quantizes setup and hold parameters in terms of clock cycles.
+	 * Here, we quantize the setup and hold timing parameters to the
+	 * next-highest clock period to make sure we apply at least the
+	 * specified times.
+	 *
+	 * For data setup and data hold, the hardware interprets a value of zero
+	 * as the largest possible delay. This is not what's intended by a zero
+	 * in the input parameter, so we impose a minimum of one cycle.
+	 */
+	data_setup_in_cycles    = ns_to_cycles(target.data_setup_in_ns,
+							clock_period_in_ns, 1);
+	data_hold_in_cycles     = ns_to_cycles(target.data_hold_in_ns,
+							clock_period_in_ns, 1);
+	address_setup_in_cycles = ns_to_cycles(target.address_setup_in_ns,
+							clock_period_in_ns, 0);
+
+	/*
+	 * The clock's period affects the sample delay in a number of ways:
+	 *
+	 * (1) The NFC HAL tells us the maximum clock period the sample delay
+	 *     DLL can tolerate. If the clock period is greater than half that
+	 *     maximum, we must configure the DLL to be driven by half periods.
+	 *
+	 * (2) We need to convert from an ideal sample delay, in ns, to a
+	 *     "sample delay factor," which the NFC uses. This factor depends on
+	 *     whether we're driving the DLL with full or half periods.
+	 *     Paraphrasing the reference manual:
+	 *
+	 *         AD = SDF x 0.125 x RP
+	 *
+	 * where:
+	 *
+	 *     AD   is the applied delay, in ns.
+	 *     SDF  is the sample delay factor, which is dimensionless.
+	 *     RP   is the reference period, in ns, which is a full clock period
+	 *          if the DLL is being driven by full periods, or half that if
+	 *          the DLL is being driven by half periods.
+	 *
+	 * Let's re-arrange this in a way that's more useful to us:
+	 *
+	 *                        8
+	 *         SDF  =  AD x ----
+	 *                       RP
+	 *
+	 * The reference period is either the clock period or half that, so this
+	 * is:
+	 *
+	 *                        8       AD x DDF
+	 *         SDF  =  AD x -----  =  --------
+	 *                      f x P        P
+	 *
+	 * where:
+	 *
+	 *       f  is 1 or 1/2, depending on how we're driving the DLL.
+	 *       P  is the clock period.
+	 *     DDF  is the DLL Delay Factor, a dimensionless value that
+	 *          incorporates all the constants in the conversion.
+	 *
+	 * DDF will be either 8 or 16, both of which are powers of two. We can
+	 * reduce the cost of this conversion by using bit shifts instead of
+	 * multiplication or division. Thus:
+	 *
+	 *                 AD << DDS
+	 *         SDF  =  ---------
+	 *                     P
+	 *
+	 *     or
+	 *
+	 *         AD  =  (SDF >> DDS) x P
+	 *
+	 * where:
+	 *
+	 *     DDS  is the DLL Delay Shift, the logarithm to base 2 of the DDF.
+	 */
+	if (clock_period_in_ns > (nfc->max_dll_clock_period_in_ns >> 1)) {
+		dll_use_half_periods = true;
+		dll_delay_shift      = 3 + 1;
+	} else {
+		dll_use_half_periods = false;
+		dll_delay_shift      = 3;
+	}
+
+	/*
+	 * Compute the maximum sample delay the NFC allows, under current
+	 * conditions. If the clock is running too slowly, no sample delay is
+	 * possible.
+	 */
+	if (clock_period_in_ns > nfc->max_dll_clock_period_in_ns) {
+		max_sample_delay_in_ns = 0;
+	} else {
+		/*
+		 * Compute the delay implied by the largest sample delay factor
+		 * the NFC allows.
+		 */
+		max_sample_delay_in_ns =
+			(nfc->max_sample_delay_factor * clock_period_in_ns) >>
+								dll_delay_shift;
+
+		/*
+		 * Check if the implied sample delay larger than the NFC
+		 * actually allows.
+		 */
+		if (max_sample_delay_in_ns > nfc->max_dll_delay_in_ns)
+			max_sample_delay_in_ns = nfc->max_dll_delay_in_ns;
+	}
+
+	/*
+	 * Fold the read setup time required by the NFC into the ideal
+	 * sample delay.
+	 */
+	ideal_sample_delay_in_ns = target.gpmi_sample_delay_in_ns +
+					nfc->internal_data_setup_in_ns;
+
+	/*
+	 * The ideal sample delay may be greater than the maximum
+	 * allowed by the NFC. If so, we can trade off sample delay time
+	 * for more data setup time.
+	 *
+	 * In each iteration of the following loop, we add a cycle to
+	 * the data setup time and subtract a corresponding amount from
+	 * the sample delay until we've satisified the constraints or
+	 * can't do any better.
+	 */
+	while ((ideal_sample_delay_in_ns > max_sample_delay_in_ns) &&
+		(data_setup_in_cycles < nfc->max_data_setup_cycles)) {
+
+		data_setup_in_cycles++;
+		ideal_sample_delay_in_ns -= clock_period_in_ns;
+
+		if (ideal_sample_delay_in_ns < 0)
+			ideal_sample_delay_in_ns = 0;
+	}
+
+	/*
+	 * Compute the sample delay factor that corresponds most closely
+	 * to the ideal sample delay. If the result is too large for the
+	 * NFC, use the maximum value.
+	 *
+	 * Notice that we use the ns_to_cycles function to compute the
+	 * sample delay factor. We do this because the form of the
+	 * computation is the same as that for calculating cycles.
+	 */
+	sample_delay_factor =
+		ns_to_cycles(
+			ideal_sample_delay_in_ns << dll_delay_shift,
+						clock_period_in_ns, 0);
+
+	if (sample_delay_factor > nfc->max_sample_delay_factor)
+		sample_delay_factor = nfc->max_sample_delay_factor;
+
+	/* Skip to the part where we return our results. */
+	goto return_results;
+
+	/*
+	 * If control arrives here, we have more detailed timing information,
+	 * so we can use a better algorithm.
+	 */
+
+	/*
+	 * Fold the read setup time required by the NFC into the maximum
+	 * propagation delay.
+	 */
+	max_prop_delay_in_ns += nfc->internal_data_setup_in_ns;
+
+	/*
+	 * Earlier, we computed the number of clock cycles required to satisfy
+	 * the data setup time. Now, we need to know the actual nanoseconds.
+	 */
+	data_setup_in_ns = clock_period_in_ns * data_setup_in_cycles;
+
+	/*
+	 * Compute tEYE, the width of the data eye when reading from the NAND
+	 * Flash. The eye width is fundamentally determined by the data setup
+	 * time, perturbed by propagation delays and some characteristics of the
+	 * NAND Flash device.
+	 *
+	 * start of the eye = max_prop_delay + tREA
+	 * end of the eye   = min_prop_delay + tRHOH + data_setup
+	 */
+	tEYE = (int)min_prop_delay_in_ns + (int)target.tRHOH_in_ns +
+							(int)data_setup_in_ns;
+
+	tEYE -= (int)max_prop_delay_in_ns + (int)target.tREA_in_ns;
+
+	/*
+	 * The eye must be open. If it's not, we can try to open it by
+	 * increasing its main forcer, the data setup time.
+	 *
+	 * In each iteration of the following loop, we increase the data setup
+	 * time by a single clock cycle. We do this until either the eye is
+	 * open or we run into NFC limits.
+	 */
+	while ((tEYE <= 0) &&
+			(data_setup_in_cycles < nfc->max_data_setup_cycles)) {
+		/* Give a cycle to data setup. */
+		data_setup_in_cycles++;
+		/* Synchronize the data setup time with the cycles. */
+		data_setup_in_ns += clock_period_in_ns;
+		/* Adjust tEYE accordingly. */
+		tEYE += clock_period_in_ns;
+	}
+
+	/*
+	 * When control arrives here, the eye is open. The ideal time to sample
+	 * the data is in the center of the eye:
+	 *
+	 *     end of the eye + start of the eye
+	 *     ---------------------------------  -  data_setup
+	 *                    2
+	 *
+	 * After some algebra, this simplifies to the code immediately below.
+	 */
+	ideal_sample_delay_in_ns =
+		((int)max_prop_delay_in_ns +
+			(int)target.tREA_in_ns +
+				(int)min_prop_delay_in_ns +
+					(int)target.tRHOH_in_ns -
+						(int)data_setup_in_ns) >> 1;
+
+	/*
+	 * The following figure illustrates some aspects of a NAND Flash read:
+	 *
+	 *
+	 *           __                   _____________________________________
+	 * RDN         \_________________/
+	 *
+	 *                                         <---- tEYE ----->
+	 *                                        /-----------------\
+	 * Read Data ----------------------------<                   >---------
+	 *                                        \-----------------/
+	 *             ^                 ^                 ^              ^
+	 *             |                 |                 |              |
+	 *             |<--Data Setup -->|<--Delay Time -->|              |
+	 *             |                 |                 |              |
+	 *             |                 |                                |
+	 *             |                 |<--   Quantized Delay Time   -->|
+	 *             |                 |                                |
+	 *
+	 *
+	 * We have some issues we must now address:
+	 *
+	 * (1) The *ideal* sample delay time must not be negative. If it is, we
+	 *     jam it to zero.
+	 *
+	 * (2) The *ideal* sample delay time must not be greater than that
+	 *     allowed by the NFC. If it is, we can increase the data setup
+	 *     time, which will reduce the delay between the end of the data
+	 *     setup and the center of the eye. It will also make the eye
+	 *     larger, which might help with the next issue...
+	 *
+	 * (3) The *quantized* sample delay time must not fall either before the
+	 *     eye opens or after it closes (the latter is the problem
+	 *     illustrated in the above figure).
+	 */
+
+	/* Jam a negative ideal sample delay to zero. */
+	if (ideal_sample_delay_in_ns < 0)
+		ideal_sample_delay_in_ns = 0;
+
+	/*
+	 * Extend the data setup as needed to reduce the ideal sample delay
+	 * below the maximum permitted by the NFC.
+	 */
+	while ((ideal_sample_delay_in_ns > max_sample_delay_in_ns) &&
+			(data_setup_in_cycles < nfc->max_data_setup_cycles)) {
+
+		/* Give a cycle to data setup. */
+		data_setup_in_cycles++;
+		/* Synchronize the data setup time with the cycles. */
+		data_setup_in_ns += clock_period_in_ns;
+		/* Adjust tEYE accordingly. */
+		tEYE += clock_period_in_ns;
+
+		/*
+		 * Decrease the ideal sample delay by one half cycle, to keep it
+		 * in the middle of the eye.
+		 */
+		ideal_sample_delay_in_ns -= (clock_period_in_ns >> 1);
+
+		/* Jam a negative ideal sample delay to zero. */
+		if (ideal_sample_delay_in_ns < 0)
+			ideal_sample_delay_in_ns = 0;
+	}
+
+	/*
+	 * Compute the sample delay factor that corresponds to the ideal sample
+	 * delay. If the result is too large, then use the maximum allowed
+	 * value.
+	 *
+	 * Notice that we use the ns_to_cycles function to compute the sample
+	 * delay factor. We do this because the form of the computation is the
+	 * same as that for calculating cycles.
+	 */
+	sample_delay_factor =
+		ns_to_cycles(ideal_sample_delay_in_ns << dll_delay_shift,
+							clock_period_in_ns, 0);
+
+	if (sample_delay_factor > nfc->max_sample_delay_factor)
+		sample_delay_factor = nfc->max_sample_delay_factor;
+
+	/*
+	 * These macros conveniently encapsulate a computation we'll use to
+	 * continuously evaluate whether or not the data sample delay is inside
+	 * the eye.
+	 */
+	#define IDEAL_DELAY  ((int) ideal_sample_delay_in_ns)
+
+	#define QUANTIZED_DELAY  \
+		((int) ((sample_delay_factor * clock_period_in_ns) >> \
+							dll_delay_shift))
+
+	#define DELAY_ERROR  (abs(QUANTIZED_DELAY - IDEAL_DELAY))
+
+	#define SAMPLE_IS_NOT_WITHIN_THE_EYE  (DELAY_ERROR > (tEYE >> 1))
+
+	/*
+	 * While the quantized sample time falls outside the eye, reduce the
+	 * sample delay or extend the data setup to move the sampling point back
+	 * toward the eye. Do not allow the number of data setup cycles to
+	 * exceed the maximum allowed by the NFC.
+	 */
+	while (SAMPLE_IS_NOT_WITHIN_THE_EYE &&
+			(data_setup_in_cycles < nfc->max_data_setup_cycles)) {
+		/*
+		 * If control arrives here, the quantized sample delay falls
+		 * outside the eye. Check if it's before the eye opens, or after
+		 * the eye closes.
+		 */
+		if (QUANTIZED_DELAY > IDEAL_DELAY) {
+			/*
+			 * If control arrives here, the quantized sample delay
+			 * falls after the eye closes. Decrease the quantized
+			 * delay time and then go back to re-evaluate.
+			 */
+			if (sample_delay_factor != 0)
+				sample_delay_factor--;
+			continue;
+		}
+
+		/*
+		 * If control arrives here, the quantized sample delay falls
+		 * before the eye opens. Shift the sample point by increasing
+		 * data setup time. This will also make the eye larger.
+		 */
+
+		/* Give a cycle to data setup. */
+		data_setup_in_cycles++;
+		/* Synchronize the data setup time with the cycles. */
+		data_setup_in_ns += clock_period_in_ns;
+		/* Adjust tEYE accordingly. */
+		tEYE += clock_period_in_ns;
+
+		/*
+		 * Decrease the ideal sample delay by one half cycle, to keep it
+		 * in the middle of the eye.
+		 */
+		ideal_sample_delay_in_ns -= (clock_period_in_ns >> 1);
+
+		/* ...and one less period for the delay time. */
+		ideal_sample_delay_in_ns -= clock_period_in_ns;
+
+		/* Jam a negative ideal sample delay to zero. */
+		if (ideal_sample_delay_in_ns < 0)
+			ideal_sample_delay_in_ns = 0;
+
+		/*
+		 * We have a new ideal sample delay, so re-compute the quantized
+		 * delay.
+		 */
+		sample_delay_factor =
+			ns_to_cycles(
+				ideal_sample_delay_in_ns << dll_delay_shift,
+							clock_period_in_ns, 0);
+
+		if (sample_delay_factor > nfc->max_sample_delay_factor)
+			sample_delay_factor = nfc->max_sample_delay_factor;
+	}
+
+	/* Control arrives here when we're ready to return our results. */
+return_results:
+	hw->data_setup_in_cycles    = data_setup_in_cycles;
+	hw->data_hold_in_cycles     = data_hold_in_cycles;
+	hw->address_setup_in_cycles = address_setup_in_cycles;
+	hw->use_half_periods        = dll_use_half_periods;
+	hw->sample_delay_factor     = sample_delay_factor;
+	hw->device_busy_timeout     = 0x500;
+	hw->wrn_dly_sel             = BV_GPMI_CTRL1_WRN_DLY_SEL_4_TO_8NS;
+
+	/* Return success. */
+	return 0;
+}
+
+/*
+ * <1> Firstly, we should know what's the GPMI-clock means.
+ *     The GPMI-clock is the internal clock in the gpmi nand controller.
+ *     If you set 100MHz to gpmi nand controller, the GPMI-clock's period
+ *     is 10ns. Mark the GPMI-clock's period as GPMI-clock-period.
+ *
+ * <2> Secondly, we should know what's the frequency on the nand chip pins.
+ *     The frequency on the nand chip pins is derived from the GPMI-clock.
+ *     We can get it from the following equation:
+ *
+ *         F = G / (DS + DH)
+ *
+ *         F  : the frequency on the nand chip pins.
+ *         G  : the GPMI clock, such as 100MHz.
+ *         DS : GPMI_TIMING0:DATA_SETUP
+ *         DH : GPMI_TIMING0:DATA_HOLD
+ *
+ * <3> Thirdly, when the frequency on the nand chip pins is above 33MHz,
+ *     the nand EDO(extended Data Out) timing could be applied.
+ *     The GPMI implements a feedback read strobe to sample the read data.
+ *     The feedback read strobe can be delayed to support the nand EDO timing
+ *     where the read strobe may deasserts before the read data is valid, and
+ *     read data is valid for some time after read strobe.
+ *
+ *     The following figure illustrates some aspects of a NAND Flash read:
+ *
+ *                   |<---tREA---->|
+ *                   |             |
+ *                   |         |   |
+ *                   |<--tRP-->|   |
+ *                   |         |   |
+ *                  __          ___|__________________________________
+ *     RDN            \________/   |
+ *                                 |
+ *                                 /---------\
+ *     Read Data    --------------<           >---------
+ *                                 \---------/
+ *                                |     |
+ *                                |<-D->|
+ *     FeedbackRDN  ________             ____________
+ *                          \___________/
+ *
+ *          D stands for delay, set in the GPMI_CTRL1:RDN_DELAY.
+ *
+ *
+ * <4> Now, we begin to describe how to compute the right RDN_DELAY.
+ *
+ *  4.1) From the aspect of the nand chip pins:
+ *        Delay = (tREA + C - tRP)               {1}
+ *
+ *        tREA : the maximum read access time. From the ONFI nand standards,
+ *               we know that tREA is 16ns in mode 5, tREA is 20ns is mode 4.
+ *               Please check it in : www.onfi.org
+ *        C    : a constant for adjust the delay. default is 4.
+ *        tRP  : the read pulse width.
+ *               Specified by the GPMI_TIMING0:DATA_SETUP:
+ *                    tRP = (GPMI-clock-period) * DATA_SETUP
+ *
+ *  4.2) From the aspect of the GPMI nand controller:
+ *         Delay = RDN_DELAY * 0.125 * RP        {2}
+ *
+ *         RP   : the DLL reference period.
+ *            if (GPMI-clock-period > DLL_THRETHOLD)
+ *                   RP = GPMI-clock-period / 2;
+ *            else
+ *                   RP = GPMI-clock-period;
+ *
+ *            Set the GPMI_CTRL1:HALF_PERIOD if GPMI-clock-period
+ *            is greater DLL_THRETHOLD. In other SOCs, the DLL_THRETHOLD
+ *            is 16ns, but in mx6q, we use 12ns.
+ *
+ *  4.3) since {1} equals {2}, we get:
+ *
+ *                    (tREA + 4 - tRP) * 8
+ *         RDN_DELAY = ---------------------     {3}
+ *                           RP
+ *
+ *  4.4) We only support the fastest asynchronous mode of ONFI nand.
+ *       For some ONFI nand, the mode 4 is the fastest mode;
+ *       while for some ONFI nand, the mode 5 is the fastest mode.
+ *       So we only support the mode 4 and mode 5. It is no need to
+ *       support other modes.
+ */
+static void mxs_nand_compute_edo_timing(struct mxs_nand_info *info,
+			struct gpmi_nfc_hardware_timing *hw, int mode)
+{
+	unsigned long rate = clk_get_rate(info->clk);
+	int dll_threshold = 12;
+	unsigned long delay;
+	unsigned long clk_period;
+	int t_rea;
+	int c = 4;
+	int t_rp;
+	int rp;
+
+	/*
+	 * [1] for GPMI_TIMING0:
+	 *     The async mode requires 40MHz for mode 4, 50MHz for mode 5.
+	 *     The GPMI can support 100MHz at most. So if we want to
+	 *     get the 40MHz or 50MHz, we have to set DS=1, DH=1.
+	 *     Set the ADDRESS_SETUP to 0 in mode 4.
+	 */
+	hw->data_setup_in_cycles = 1;
+	hw->data_hold_in_cycles = 1;
+	hw->address_setup_in_cycles = ((mode == 5) ? 1 : 0);
+
+	/* [2] for GPMI_TIMING1 */
+	hw->device_busy_timeout = 0x9000;
+
+	/* [3] for GPMI_CTRL1 */
+	hw->wrn_dly_sel = BV_GPMI_CTRL1_WRN_DLY_SEL_NO_DELAY;
+
+	/*
+	 * Enlarge 10 times for the numerator and denominator in {3}.
+	 * This make us to get more accurate result.
+	 */
+	clk_period = NSEC_PER_SEC / (rate / 10);
+	dll_threshold *= 10;
+	t_rea = ((mode == 5) ? 16 : 20) * 10;
+	c *= 10;
+
+	t_rp = clk_period * 1; /* DATA_SETUP is 1 */
+
+	if (clk_period > dll_threshold) {
+		hw->use_half_periods = 1;
+		rp = clk_period / 2;
+	} else {
+		hw->use_half_periods = 0;
+		rp = clk_period;
+	}
+
+	/*
+	 * Multiply the numerator with 10, we could do a round off:
+	 *      7.8 round up to 8; 7.4 round down to 7.
+	 */
+	delay  = (((t_rea + c - t_rp) * 8) * 10) / rp;
+	delay = (delay + 5) / 10;
+
+	hw->sample_delay_factor = delay;
+}
+
+static int mxs_nand_enable_edo_mode(struct mxs_nand_info *info)
+{
+	struct nand_chip *nand = &info->nand_chip;
+	struct mtd_info	 *mtd = &info->mtd;
+	uint8_t feature[ONFI_SUBFEATURE_PARAM_LEN] = {};
+	int ret, mode;
+
+	if (!mxs_nand_is_imx6(info))
+		return -ENODEV;
+
+	if (!nand->onfi_version)
+		return -ENOENT;
+
+	mode = onfi_get_async_timing_mode(nand);
+
+	/* We only support the timing mode 4 and mode 5. */
+	if (mode & ONFI_TIMING_MODE_5)
+		mode = 5;
+	else if (mode & ONFI_TIMING_MODE_4)
+		mode = 4;
+	else
+		return -EINVAL;
+
+	nand->select_chip(mtd, 0);
+
+	/* [1] send SET FEATURE commond to NAND */
+	feature[0] = mode;
+
+	ret = nand->onfi_set_features(mtd, nand,
+				ONFI_FEATURE_ADDR_TIMING_MODE, feature);
+	if (ret)
+		goto err_out;
+
+	/* [2] send GET FEATURE command to double-check the timing mode */
+	ret = nand->onfi_get_features(mtd, nand,
+				ONFI_FEATURE_ADDR_TIMING_MODE, feature);
+	if (ret || feature[0] != mode)
+		goto err_out;
+
+	nand->select_chip(mtd, -1);
+
+	/* [3] set the main IO clock, 100MHz for mode 5, 80MHz for mode 4. */
+	clk_set_rate(info->clk, (mode == 5) ? 100000000 : 80000000);
+
+	dev_dbg(info->dev, "using asynchronous EDO mode %d\n", mode);
+
+	return mode;
+
+err_out:
+	nand->select_chip(mtd, -1);
+
+	return -EINVAL;
+}
+
+static void mxs_nand_setup_timing(struct mxs_nand_info *info)
+{
+	void __iomem *gpmi_regs = info->io_base;
+	uint32_t reg;
+	struct gpmi_nfc_hardware_timing hw;
+	int mode;
+
+	mode = mxs_nand_enable_edo_mode(info);
+	if (mode >= 0)
+		mxs_nand_compute_edo_timing(info, &hw, mode);
+	else
+		mxs_nand_compute_hardware_timing(info, &hw);
+
+	writel(GPMI_TIMING0_ADDRESS_SETUP(hw.address_setup_in_cycles) |
+		GPMI_TIMING0_DATA_HOLD(hw.data_hold_in_cycles) |
+		GPMI_TIMING0_DATA_SETUP(hw.data_setup_in_cycles),
+		gpmi_regs + GPMI_TIMING0);
+
+	writel(GPMI_TIMING1_BUSY_TIMEOUT(hw.device_busy_timeout),
+		gpmi_regs + GPMI_TIMING1);
+
+	reg = readl(gpmi_regs + GPMI_CTRL1);
+
+	reg &= ~GPMI_CTRL1_WRN_DLY(3);
+	reg |= GPMI_CTRL1_WRN_DLY(hw.wrn_dly_sel);
+	/* DLL_ENABLE must be set to 0 when setting RDN_DELAY or HALF_PERIOD. */
+	reg &= ~GPMI_CTRL1_DLL_ENABLE;
+	reg &= ~GPMI_CTRL1_RDN_DELAY(0xf);
+	reg |= GPMI_CTRL1_RDN_DELAY(hw.sample_delay_factor);
+	reg &= ~GPMI_CTRL1_HALF_PERIOD;
+	if (hw.use_half_periods)
+		reg |= GPMI_CTRL1_HALF_PERIOD;
+
+	writel(reg, gpmi_regs + GPMI_CTRL1);
+
+	if (hw.sample_delay_factor) {
+		writel(GPMI_CTRL1_DLL_ENABLE, gpmi_regs + GPMI_CTRL1_SET);
+		/*
+		 * After we enable the GPMI DLL, we have to wait 64 clock
+		 * cycles before we can use the GPMI. Assume 1MHz as lowest
+		 * bus clock.
+		 */
+		udelay(64);
+	}
+}
+
 static int mxs_nand_probe(struct device_d *dev)
 {
 	struct resource *iores;
@ -1324,6 +2124,8 @@ static int mxs_nand_probe(struct device_d *dev)
 		return -ENOMEM;
 	}

+	nand_info->dev = dev;
+
 	mxs_nand_probe_dt(dev, nand_info);

 	nand_info->type = type;
@ -1404,6 +2206,8 @@ static int mxs_nand_probe(struct device_d *dev)

 	nand->options |= NAND_NO_SUBPAGE_WRITE;

+	mxs_nand_setup_timing(nand_info);
+
 	/* second phase scan */
 	err = nand_scan_tail(mtd);
 	if (err)