[dev][nand][aml-rawnand] Handle (read) DMAs being aborted. From the data we have, it seems clear that in the event of high data bus contention, NAND read DMA operations get aborted. The data DMA is aborted and the DMA of the read completion state is never done. This means the driver cannot rely on the read completion state being accurate or correct. Pre-initialize the read completion state with "bad" values, So that if the DMA is aborted, and the read completion state is never posted to memory, then we fail the read. The read is retried from the NAND protocol layer. Bug: ZX-2616. Test: Run Ricardo's read verification test (with a higher number of reads per block, running in a forever loop), with Ruchira's code to do sdio reads in the background once every 100us. Verified that the DMAs are aborted by pre-seeding the data buffer in the NAND controller driver with a known value, and seeing that same value posted back to the application in a "successful" read (before this change in place). Change-Id: Ia0d27af53ce2535302eac6f9985bf7b4edc474fb

commit: aec377cdac19b31e106fdba544dc935060b5f428 [log] [tgz]
author: Mohan Srinivasan <srmohan@google.com> Mon Sep 24 11:47:34 2018 -0700
committer: Mohan Srinivasan <srmohan@google.com> Mon Sep 24 13:02:58 2018 -0700
tree: b52b27f2d3b145b1682d93807ce7f2fe46458c90
parent: 6359964106009b99e3152f35154c611a512392f8 [diff]
diff --git a/system/dev/nand/aml-rawnand/aml-rawnand.c b/system/dev/nand/aml-rawnand/aml-rawnand.c
index af93c26..ce7831b 100644
--- a/system/dev/nand/aml-rawnand/aml-rawnand.c
+++ b/system/dev/nand/aml-rawnand/aml-rawnand.c

@@ -329,7 +329,7 @@
                 raw_nand->stats.failed++;
                 return ECC_CHECK_RETURN_FF;
             }
-            zxlogf(ERROR, "%s: Blank Page@%u\n", __func__, nand_page);
+            zxlogf(INFO, "%s: Blank Page@%u\n", __func__, nand_page);
             continue;
         }
         if (info->ecc.eccerr_cnt != 0) {
@@ -484,6 +484,37 @@
             ((nand_page % AML_PAGE0_STEP) == 0));
 }
 
+/*
+ * Fills up the (data) buffer with 0xdeadbeef to debug what is being
+ * returned back to the user from the read.
+ */
+static void fill_data_pattern(char* buf, size_t size) {
+    uint32_t* p = (uint32_t*)buf;
+
+    for (int num_words = size / sizeof(uint32_t); num_words > 0; num_words--) {
+        *p++ = 0xdeadbeef;
+    }
+}
+
+static void fill_info_pattern(aml_raw_nand_t* raw_nand, char* buf) {
+    uint32_t ecc_pagesize = aml_get_ecc_pagesize(raw_nand, raw_nand->controller_params.bch_mode);
+    uint32_t ecc_pages = raw_nand->writesize / ecc_pagesize;
+
+    for (uint32_t i = 0; i < ecc_pages; i++) {
+        struct aml_info_format* info;
+
+        info = aml_info_ptr(raw_nand, i);
+        /*
+         * Force the read completion state to be "bad". For successful
+         * reads, the NAND controller will successfully DMA state in here.
+         * If the DMA happens to be aborted, we will retain the bad state
+         * we seed here and the read will fail.
+         */
+        info->ecc.eccerr_cnt = AML_ECC_UNCORRECTABLE_CNT;
+        info->ecc.completed = 0;
+    }
+}
+
 static zx_status_t aml_read_page_hwecc(void* ctx,
                                        void* data,
                                        void* oob,
@@ -513,6 +544,21 @@
      * Flush and invalidate (only invalidate is really needed), the
      * info and data buffers before kicking off DMA into them.
      */
+#if 0
+    /* TODO - Remove this once we fix our HW issues with DMA aborts (ZX-2616). */
+    fill_data_pattern(raw_nand->data_buf, raw_nand->writesize);
+#endif
+    /*
+     * We see DMAs (data + info) being aborted, which means we cannot
+     * rely on consistent and correct read completion status being posted.
+     *
+     * Pre-initialize the info buf (read completion status) for
+     * every ECC page with known "bad" values (read not completed,
+     * ECC uncorrectable errors). This way if the DMA of the data+status
+     * is aborted, we will forcibly fail the read (and have it retried
+     * from the NAND protocol layer).
+     */
+    fill_info_pattern(raw_nand, raw_nand->info_buf);
     io_buffer_cache_flush_invalidate(&raw_nand->data_buffer, 0,
                                      raw_nand->writesize);
     io_buffer_cache_flush_invalidate(&raw_nand->info_buffer, 0,
commit	aec377cdac19b31e106fdba544dc935060b5f428	[log] [tgz]
author	Mohan Srinivasan <srmohan@google.com>	Mon Sep 24 11:47:34 2018 -0700
committer	Mohan Srinivasan <srmohan@google.com>	Mon Sep 24 13:02:58 2018 -0700
tree	b52b27f2d3b145b1682d93807ce7f2fe46458c90
parent	6359964106009b99e3152f35154c611a512392f8 [diff]