[dev][nand][aml-rawnand] Fix uncorrectable ECC handling.
1) Fix handling of uncorrectable ECC errors, For non-randomized
data, uncorrectable ECC errors result in an error propagated up
to the NAND protocol, which retries the read thrice. For randomized
data a further check is necessary (which was previously wrong - reversed).
2) Add verbose error logging to the code handling uncorrectable ECC errors.
3) Fix error handling for the case where we timeout waiting for a read
interrupt.
4) Populate the ECC strength from the Page0 read (the ECC strength is
used in the handling of ECC uncorrectable errors for the randomized
case - not applicable to us right now, but might be later).
Bug: ZX-2616.
Test: Use Ruchira's code to do SDIO reads at intervals of our choice
to generate bus loading, and use Ricardo's tester for data validation.
Change-Id: I1be58d291f872cb91018aabd3c0673caa109c507
diff --git a/system/dev/nand/aml-rawnand/aml-rawnand.c b/system/dev/nand/aml-rawnand/aml-rawnand.c
index 68592f1..af93c26 100644
--- a/system/dev/nand/aml-rawnand/aml-rawnand.c
+++ b/system/dev/nand/aml-rawnand/aml-rawnand.c
@@ -34,7 +34,7 @@
static const uint32_t chipsel[2] = {NAND_CE0, NAND_CE1};
struct aml_controller_params aml_params = {
- 8,
+ 8, /* Overwritten using BCH setting from page0 */
2,
/* The 2 following values are overwritten by page0 contents */
1, /* rand-mode is 1 for page0 */
@@ -100,6 +100,36 @@
return ecc_page;
}
+int aml_get_ecc_strength(uint32_t ecc_mode) {
+ int ecc_strength;
+
+ switch (ecc_mode) {
+ case AML_ECC_BCH8:
+ case AML_ECC_BCH8_1K:
+ ecc_strength = 8;
+ break;
+ case AML_ECC_BCH24_1K:
+ ecc_strength = 24;
+ break;
+ case AML_ECC_BCH30_1K:
+ ecc_strength = 30;
+ break;
+ case AML_ECC_BCH40_1K:
+ ecc_strength = 40;
+ break;
+ case AML_ECC_BCH50_1K:
+ ecc_strength = 50;
+ break;
+ case AML_ECC_BCH60_1K:
+ ecc_strength = 60;
+ break;
+ default:
+ ecc_strength = -1;
+ break;
+ }
+ return ecc_strength;
+}
+
static void aml_cmd_idle(aml_raw_nand_t* raw_nand, uint32_t time) {
uint32_t cmd = 0;
volatile uint8_t* reg = (volatile uint8_t*)
@@ -269,7 +299,8 @@
* Returns the maximum bitflips corrected on this NAND page
* (the maximum bitflips across all of the ECC pages in this page).
*/
-static int aml_get_ecc_corrections(aml_raw_nand_t* raw_nand, int ecc_pages) {
+static int aml_get_ecc_corrections(aml_raw_nand_t* raw_nand, int ecc_pages,
+ uint32_t nand_page) {
struct aml_info_format* info;
int bitflips = 0;
uint8_t zero_cnt;
@@ -277,6 +308,11 @@
for (int i = 0; i < ecc_pages; i++) {
info = aml_info_ptr(raw_nand, i);
if (info->ecc.eccerr_cnt == AML_ECC_UNCORRECTABLE_CNT) {
+ if (!raw_nand->controller_params.rand_mode) {
+ zxlogf(ERROR, "%s: ECC failure (non-randomized)@%u\n", __func__, nand_page);
+ raw_nand->stats.failed++;
+ return ECC_CHECK_RETURN_FF;
+ }
/*
* Why are we checking for zero_cnt here ?
* Per Amlogic HW architect, this is to deal with
@@ -287,15 +323,19 @@
* blank page.
*/
zero_cnt = info->zero_cnt & AML_ECC_UNCORRECTABLE_CNT;
- if (raw_nand->controller_params.rand_mode &&
- (zero_cnt < raw_nand->controller_params.ecc_strength)) {
- zxlogf(ERROR, "%s: Returning ECC failure\n",
- __func__);
+ if (zero_cnt >= raw_nand->controller_params.ecc_strength) {
+ zxlogf(ERROR, "%s: ECC failure (randomized)@%u zero_cnt=%u\n",
+ __func__, nand_page, zero_cnt);
+ raw_nand->stats.failed++;
return ECC_CHECK_RETURN_FF;
}
- raw_nand->stats.failed++;
+ zxlogf(ERROR, "%s: Blank Page@%u\n", __func__, nand_page);
continue;
}
+ if (info->ecc.eccerr_cnt != 0) {
+ zxlogf(INFO, "%s: Corrected %u ECC errors@%u\n",
+ __func__, info->ecc.eccerr_cnt, nand_page);
+ }
raw_nand->stats.ecc_corrected += info->ecc.eccerr_cnt;
bitflips = MAX(bitflips, info->ecc.eccerr_cnt);
}
@@ -506,7 +546,11 @@
__func__, status);
return status;
}
- aml_queue_rb(raw_nand);
+ status = aml_queue_rb(raw_nand);
+ if (status != ZX_OK) {
+ zxlogf(ERROR, "%s: aml_queue_rb failed %d\n", __func__, status);
+ return ZX_ERR_IO;
+ }
status = aml_check_ecc_pages(raw_nand, ecc_pages);
if (status != ZX_OK) {
zxlogf(ERROR, "%s: aml_check_ecc_pages failed %d\n",
@@ -524,7 +568,7 @@
}
if (oob != NULL)
status = aml_get_oob_byte(raw_nand, oob);
- ecc_c = aml_get_ecc_corrections(raw_nand, ecc_pages);
+ ecc_c = aml_get_ecc_corrections(raw_nand, ecc_pages, nand_page);
if (ecc_c < 0) {
zxlogf(ERROR, "%s: Uncorrectable ECC error on read\n",
__func__);
@@ -863,6 +907,15 @@
(page0->nand_setup.cfg.d32 >> 19) & 0x1;
raw_nand->controller_params.bch_mode =
(page0->nand_setup.cfg.d32 >> 14) & 0x7;
+
+ raw_nand->controller_params.ecc_strength =
+ aml_get_ecc_strength(raw_nand->controller_params.bch_mode);
+ if (raw_nand->controller_params.ecc_strength < 0) {
+ zxlogf(INFO, "%s: BAD ECC strength computed from BCH Mode\n", __func__);
+ free(data);
+ return ZX_ERR_BAD_STATE;
+ }
+
zxlogf(INFO, "%s: NAND BCH Mode is %s\n", __func__,
aml_ecc_string(raw_nand->controller_params.bch_mode));
free(data);