From 1dd786328ff42b239a00fdbcdaed6fd9709042bf Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Thu, 19 Aug 2010 17:28:50 -0700 Subject: [PATCH 01/16] UBI: cleanup and simplify Kconfig Cleanup the Kconfig for UBI by using menuconfig to enable/disable the entire driver. Remove the dependency checks for MTD_UBI and MTD_UBI_DEBUG by wrapping the options in if/endif blocks and remove any redundant checks. Remove all default n since that is the Kconfig default. Change menu "Additional UBI debugging messages" into a comment to remove one menu level. Signed-off-by: H Hartley Sweeten Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/Kconfig | 17 ++++++----------- drivers/mtd/ubi/Kconfig.debug | 29 ++++------------------------- 2 files changed, 10 insertions(+), 36 deletions(-) diff --git a/drivers/mtd/ubi/Kconfig b/drivers/mtd/ubi/Kconfig index f702a163d8df..3cf193fb5e00 100644 --- a/drivers/mtd/ubi/Kconfig +++ b/drivers/mtd/ubi/Kconfig @@ -1,9 +1,5 @@ -menu "UBI - Unsorted block images" - depends on MTD - -config MTD_UBI - tristate "Enable UBI" - depends on MTD +menuconfig MTD_UBI + tristate "Enable UBI - Unsorted block images" select CRC32 help UBI is a software layer above MTD layer which admits of LVM-like @@ -12,11 +8,12 @@ config MTD_UBI capabilities. Please, consult the MTD web site for more details (www.linux-mtd.infradead.org). +if MTD_UBI + config MTD_UBI_WL_THRESHOLD int "UBI wear-leveling threshold" default 4096 range 2 65536 - depends on MTD_UBI help This parameter defines the maximum difference between the highest erase counter value and the lowest erase counter value of eraseblocks @@ -34,7 +31,6 @@ config MTD_UBI_BEB_RESERVE int "Percentage of reserved eraseblocks for bad eraseblocks handling" default 1 range 0 25 - depends on MTD_UBI help If the MTD device admits of bad eraseblocks (e.g. NAND flash), UBI reserves some amount of physical eraseblocks to handle new bad @@ -48,8 +44,6 @@ config MTD_UBI_BEB_RESERVE config MTD_UBI_GLUEBI tristate "MTD devices emulation driver (gluebi)" - default n - depends on MTD_UBI help This option enables gluebi - an additional driver which emulates MTD devices on top of UBI volumes: for each UBI volumes an MTD device is @@ -59,4 +53,5 @@ config MTD_UBI_GLUEBI software. source "drivers/mtd/ubi/Kconfig.debug" -endmenu + +endif # MTD_UBI diff --git a/drivers/mtd/ubi/Kconfig.debug b/drivers/mtd/ubi/Kconfig.debug index 61f6e5e40458..fad4adc0fe2c 100644 --- a/drivers/mtd/ubi/Kconfig.debug +++ b/drivers/mtd/ubi/Kconfig.debug @@ -1,94 +1,73 @@ comment "UBI debugging options" - depends on MTD_UBI config MTD_UBI_DEBUG bool "UBI debugging" depends on SYSFS - depends on MTD_UBI select DEBUG_FS select KALLSYMS_ALL if KALLSYMS && DEBUG_KERNEL help This option enables UBI debugging. +if MTD_UBI_DEBUG + config MTD_UBI_DEBUG_MSG bool "UBI debugging messages" - depends on MTD_UBI_DEBUG - default n help This option enables UBI debugging messages. config MTD_UBI_DEBUG_PARANOID bool "Extra self-checks" - default n - depends on MTD_UBI_DEBUG help This option enables extra checks in UBI code. Note this slows UBI down significantly. config MTD_UBI_DEBUG_DISABLE_BGT bool "Do not enable the UBI background thread" - depends on MTD_UBI_DEBUG - default n help This option switches the background thread off by default. The thread may be also be enabled/disabled via UBI sysfs. config MTD_UBI_DEBUG_EMULATE_BITFLIPS bool "Emulate flash bit-flips" - depends on MTD_UBI_DEBUG - default n help This option emulates bit-flips with probability 1/50, which in turn causes scrubbing. Useful for debugging and stressing UBI. config MTD_UBI_DEBUG_EMULATE_WRITE_FAILURES bool "Emulate flash write failures" - depends on MTD_UBI_DEBUG - default n help This option emulates write failures with probability 1/100. Useful for debugging and testing how UBI handlines errors. config MTD_UBI_DEBUG_EMULATE_ERASE_FAILURES bool "Emulate flash erase failures" - depends on MTD_UBI_DEBUG - default n help This option emulates erase failures with probability 1/100. Useful for debugging and testing how UBI handlines errors. -menu "Additional UBI debugging messages" - depends on MTD_UBI_DEBUG +comment "Additional UBI debugging messages" config MTD_UBI_DEBUG_MSG_BLD bool "Additional UBI initialization and build messages" - default n - depends on MTD_UBI_DEBUG help This option enables detailed UBI initialization and device build debugging messages. config MTD_UBI_DEBUG_MSG_EBA bool "Eraseblock association unit messages" - default n - depends on MTD_UBI_DEBUG help This option enables debugging messages from the UBI eraseblock association unit. config MTD_UBI_DEBUG_MSG_WL bool "Wear-leveling unit messages" - default n - depends on MTD_UBI_DEBUG help This option enables debugging messages from the UBI wear-leveling unit. config MTD_UBI_DEBUG_MSG_IO bool "Input/output unit messages" - default n - depends on MTD_UBI_DEBUG help This option enables debugging messages from the UBI input/output unit. -endmenu # UBI debugging messages +endif # MTD_UBI_DEBUG From c174a08c72ae77a05be59d0d810dc13239b81e8e Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 3 Sep 2010 15:11:17 +0300 Subject: [PATCH 02/16] UBI: fix small 80 characters limit style issue One line was longer than 80 lines, make it shorter. Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/build.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 78ae89488a4f..f247c4e7b40d 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -95,8 +95,8 @@ DEFINE_MUTEX(ubi_devices_mutex); static DEFINE_SPINLOCK(ubi_devices_lock); /* "Show" method for files in '//class/ubi/' */ -static ssize_t ubi_version_show(struct class *class, struct class_attribute *attr, - char *buf) +static ssize_t ubi_version_show(struct class *class, + struct class_attribute *attr, char *buf) { return sprintf(buf, "%d\n", UBI_VERSION); } From 756e1df1d2b8b572a92dd1b82d2a432d5b280b1c Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 3 Sep 2010 01:30:16 +0300 Subject: [PATCH 03/16] UBI: rename IO error code Rename UBI_IO_BAD_HDR_READ into UBI_IO_BAD_HDR_EBADMSG which is presumably more self-documenting and readable. Indeed, the '_READ' suffix does not tell much and even confuses, while '_EBADMSG' tells about uncorrectable ECC error, because we use -EBADMSG all over the place to represent ECC errors. Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/eba.c | 4 ++-- drivers/mtd/ubi/io.c | 33 +++++++++++++++++++-------------- drivers/mtd/ubi/scan.c | 8 ++++---- drivers/mtd/ubi/scan.h | 2 +- drivers/mtd/ubi/ubi.h | 7 ++++--- 5 files changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index fe74749e0dae..334865ef5220 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -418,7 +418,7 @@ retry: * may try to recover data. FIXME: but this is * not implemented. */ - if (err == UBI_IO_BAD_HDR_READ || + if (err == UBI_IO_BAD_HDR_EBADMSG || err == UBI_IO_BAD_HDR) { ubi_warn("corrupted VID header at PEB " "%d, LEB %d:%d", pnum, vol_id, @@ -963,7 +963,7 @@ write_error: static int is_error_sane(int err) { if (err == -EIO || err == -ENOMEM || err == UBI_IO_BAD_HDR || - err == UBI_IO_BAD_HDR_READ || err == -ETIMEDOUT) + err == UBI_IO_BAD_HDR_EBADMSG || err == -ETIMEDOUT) return 0; return 1; } diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index 332f992f13d9..05774da4abca 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -517,7 +517,7 @@ static int nor_erase_prepare(struct ubi_device *ubi, int pnum) * In this case we probably anyway have garbage in this PEB. */ err1 = ubi_io_read_vid_hdr(ubi, pnum, &vid_hdr, 0); - if (err1 == UBI_IO_BAD_HDR_READ || err1 == UBI_IO_BAD_HDR) + if (err1 == UBI_IO_BAD_HDR_EBADMSG || err1 == UBI_IO_BAD_HDR) /* * The VID header is corrupted, so we can safely erase this * PEB and not afraid that it will be treated as a valid PEB in @@ -712,6 +712,8 @@ bad: * and corrected by the flash driver; this is harmless but may indicate that * this eraseblock may become bad soon (but may be not); * o %UBI_IO_BAD_HDR if the erase counter header is corrupted (a CRC error); + * o %UBI_IO_BAD_HDR_EBADMSG is the same as %UBI_IO_BAD_HDR, but there also was + * a data integrity error (uncorrectable ECC error in case of NAND); * o %UBI_IO_PEB_EMPTY if the physical eraseblock is empty; * o a negative error code in case of failure. */ @@ -731,15 +733,15 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, /* * We read all the data, but either a correctable bit-flip - * occurred, or MTD reported about some data integrity error, - * like an ECC error in case of NAND. The former is harmless, - * the later may mean that the read data is corrupted. But we - * have a CRC check-sum and we will detect this. If the EC - * header is still OK, we just report this as there was a - * bit-flip. + * occurred, or MTD reported a data integrity error + * (uncorrectable ECC error in case of NAND). The former is + * harmless, the later may mean that the read data is + * corrupted. But we have a CRC check-sum and we will detect + * this. If the EC header is still OK, we just report this as + * there was a bit-flip, to force scrubbing. */ if (err == -EBADMSG) - read_err = UBI_IO_BAD_HDR_READ; + read_err = UBI_IO_BAD_HDR_EBADMSG; } magic = be32_to_cpu(ec_hdr->magic); @@ -983,6 +985,8 @@ bad: * this eraseblock may become bad soon; * o %UBI_IO_BAD_HDR if the volume identifier header is corrupted (a CRC * error detected); + * o %UBI_IO_BAD_HDR_EBADMSG is the same as %UBI_IO_BAD_HDR, but there also was + * a data integrity error (uncorrectable ECC error in case of NAND); * o %UBI_IO_PEB_FREE if the physical eraseblock is free (i.e., there is no VID * header there); * o a negative error code in case of failure. @@ -1006,14 +1010,15 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, /* * We read all the data, but either a correctable bit-flip - * occurred, or MTD reported about some data integrity error, - * like an ECC error in case of NAND. The former is harmless, - * the later may mean the read data is corrupted. But we have a - * CRC check-sum and we will identify this. If the VID header is - * still OK, we just report this as there was a bit-flip. + * occurred, or MTD reported a data integrity error + * (uncorrectable ECC error in case of NAND). The former is + * harmless, the later may mean that the read data is + * corrupted. But we have a CRC check-sum and we will detect + * this. If the VID header is still OK, we just report this as + * there was a bit-flip, to force scrubbing. */ if (err == -EBADMSG) - read_err = UBI_IO_BAD_HDR_READ; + read_err = UBI_IO_BAD_HDR_EBADMSG; } magic = be32_to_cpu(vid_hdr->magic); diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index 69b52e9c9489..7e7c56d162ec 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -750,7 +750,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, bitflips = 1; else if (err == UBI_IO_PEB_EMPTY) return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, &si->erase); - else if (err == UBI_IO_BAD_HDR_READ || err == UBI_IO_BAD_HDR) { + else if (err == UBI_IO_BAD_HDR_EBADMSG || err == UBI_IO_BAD_HDR) { /* * We have to also look at the VID header, possibly it is not * corrupted. Set %bitflips flag in order to make this PEB be @@ -816,11 +816,11 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, return err; else if (err == UBI_IO_BITFLIPS) bitflips = 1; - else if (err == UBI_IO_BAD_HDR_READ || err == UBI_IO_BAD_HDR || + else if (err == UBI_IO_BAD_HDR_EBADMSG || err == UBI_IO_BAD_HDR || (err == UBI_IO_PEB_FREE && ec_corr)) { /* VID header is corrupted */ - if (err == UBI_IO_BAD_HDR_READ || - ec_corr == UBI_IO_BAD_HDR_READ) + if (err == UBI_IO_BAD_HDR_EBADMSG || + ec_corr == UBI_IO_BAD_HDR_EBADMSG) si->read_err_count += 1; err = add_to_list(si, pnum, ec, &si->corr); if (err) diff --git a/drivers/mtd/ubi/scan.h b/drivers/mtd/ubi/scan.h index 2576a8d1532b..0876649318dd 100644 --- a/drivers/mtd/ubi/scan.h +++ b/drivers/mtd/ubi/scan.h @@ -93,7 +93,7 @@ struct ubi_scan_volume { * those belonging to "preserve"-compatible internal volumes) * @used_peb_count: count of used PEBs * @corr_peb_count: count of PEBs in the @corr list - * @read_err_count: count of PEBs read with error (%UBI_IO_BAD_HDR_READ was + * @read_err_count: count of PEBs read with error (%UBI_IO_BAD_HDR_EBADMSG was * returned) * @free_peb_count: count of PEBs in the @free list * @erase_peb_count: count of PEBs in the @erase list diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 0359e0cce482..24a7c7647cb7 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -90,15 +90,16 @@ * UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a * valid erase counter header, and the rest are %0xFF bytes * UBI_IO_BAD_HDR: the EC or VID header is corrupted (bad magic or CRC) - * UBI_IO_BAD_HDR_READ: the same as %UBI_IO_BAD_HDR, but also there was a read - * error reported by the flash driver + * UBI_IO_BAD_HDR_EBADMSG: the same as %UBI_IO_BAD_HDR, but also there was a + * data integrity error reported by the MTD driver + * (uncorrectable ECC error in case of NAND) * UBI_IO_BITFLIPS: bit-flips were detected and corrected */ enum { UBI_IO_PEB_EMPTY = 1, UBI_IO_PEB_FREE, UBI_IO_BAD_HDR, - UBI_IO_BAD_HDR_READ, + UBI_IO_BAD_HDR_EBADMSG, UBI_IO_BITFLIPS }; From 74d82d2660058e32644f0c673656b2a1d01d3688 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 3 Sep 2010 02:11:20 +0300 Subject: [PATCH 04/16] UBI: remove duplicate IO error codes The 'UBI_IO_PEB_EMPTY' and 'UBI_IO_PEB_FREE' are essentially the same and mean that there are only 0xFF bytes instead of headers. Simplify UBI a little by turning them into a single 'UBI_IO_FF' error code. Also, stop maintaining commentaries in 'ubi_io_read_vid_hdr()' which are almost identical to commentaries in 'ubi_io_read_ec_hdr()'. Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/io.c | 45 +++++++----------------------------------- drivers/mtd/ubi/scan.c | 6 +++--- drivers/mtd/ubi/ubi.h | 8 ++------ drivers/mtd/ubi/wl.c | 2 +- 4 files changed, 13 insertions(+), 48 deletions(-) diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index 05774da4abca..1677a215af64 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -714,7 +714,7 @@ bad: * o %UBI_IO_BAD_HDR if the erase counter header is corrupted (a CRC error); * o %UBI_IO_BAD_HDR_EBADMSG is the same as %UBI_IO_BAD_HDR, but there also was * a data integrity error (uncorrectable ECC error in case of NAND); - * o %UBI_IO_PEB_EMPTY if the physical eraseblock is empty; + * o %UBI_IO_FF if only 0xFF bytes were read (the PEB is supposedly empty) * o a negative error code in case of failure. */ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, @@ -762,7 +762,7 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, else if (UBI_IO_DEBUG) dbg_msg("no EC header found at PEB %d, " "only 0xFF bytes", pnum); - return UBI_IO_PEB_EMPTY; + return UBI_IO_FF; } /* @@ -977,19 +977,11 @@ bad: * * This function reads the volume identifier header from physical eraseblock * @pnum and stores it in @vid_hdr. It also checks CRC checksum of the read - * volume identifier header. The following codes may be returned: + * volume identifier header. The error codes are the same as in + * 'ubi_io_read_ec_hdr()'. * - * o %0 if the CRC checksum is correct and the header was successfully read; - * o %UBI_IO_BITFLIPS if the CRC is correct, but bit-flips were detected - * and corrected by the flash driver; this is harmless but may indicate that - * this eraseblock may become bad soon; - * o %UBI_IO_BAD_HDR if the volume identifier header is corrupted (a CRC - * error detected); - * o %UBI_IO_BAD_HDR_EBADMSG is the same as %UBI_IO_BAD_HDR, but there also was - * a data integrity error (uncorrectable ECC error in case of NAND); - * o %UBI_IO_PEB_FREE if the physical eraseblock is free (i.e., there is no VID - * header there); - * o a negative error code in case of failure. + * Note, the implementation of this function is also very similar to + * 'ubi_io_read_ec_hdr()', so refer commentaries in 'ubi_io_read_ec_hdr()'. */ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, struct ubi_vid_hdr *vid_hdr, int verbose) @@ -1008,15 +1000,6 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, if (err != UBI_IO_BITFLIPS && err != -EBADMSG) return err; - /* - * We read all the data, but either a correctable bit-flip - * occurred, or MTD reported a data integrity error - * (uncorrectable ECC error in case of NAND). The former is - * harmless, the later may mean that the read data is - * corrupted. But we have a CRC check-sum and we will detect - * this. If the VID header is still OK, we just report this as - * there was a bit-flip, to force scrubbing. - */ if (err == -EBADMSG) read_err = UBI_IO_BAD_HDR_EBADMSG; } @@ -1026,25 +1009,16 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, if (read_err) return read_err; - /* - * If we have read all 0xFF bytes, the VID header probably does - * not exist and the physical eraseblock is assumed to be free. - */ if (check_pattern(vid_hdr, 0xFF, UBI_VID_HDR_SIZE)) { - /* The physical eraseblock is supposedly free */ if (verbose) ubi_warn("no VID header found at PEB %d, " "only 0xFF bytes", pnum); else if (UBI_IO_DEBUG) dbg_msg("no VID header found at PEB %d, " "only 0xFF bytes", pnum); - return UBI_IO_PEB_FREE; + return UBI_IO_FF; } - /* - * This is not a valid VID header, and these are not 0xFF - * bytes. Report that the header is corrupted. - */ if (verbose) { ubi_warn("bad magic number at PEB %d: %08x instead of " "%08x", pnum, magic, UBI_VID_HDR_MAGIC); @@ -1069,17 +1043,12 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, return read_err ?: UBI_IO_BAD_HDR; } - /* Validate the VID header that we have just read */ err = validate_vid_hdr(ubi, vid_hdr); if (err) { ubi_err("validation failed for PEB %d", pnum); return -EINVAL; } - /* - * If there was a read error (%-EBADMSG), but the header CRC is still - * OK, report about a bit-flip to force scrubbing on this PEB. - */ return read_err ? UBI_IO_BITFLIPS : 0; } diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index 7e7c56d162ec..37cb18ff10c6 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -748,7 +748,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, return err; else if (err == UBI_IO_BITFLIPS) bitflips = 1; - else if (err == UBI_IO_PEB_EMPTY) + else if (err == UBI_IO_FF) return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, &si->erase); else if (err == UBI_IO_BAD_HDR_EBADMSG || err == UBI_IO_BAD_HDR) { /* @@ -817,7 +817,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, else if (err == UBI_IO_BITFLIPS) bitflips = 1; else if (err == UBI_IO_BAD_HDR_EBADMSG || err == UBI_IO_BAD_HDR || - (err == UBI_IO_PEB_FREE && ec_corr)) { + (err == UBI_IO_FF && ec_corr)) { /* VID header is corrupted */ if (err == UBI_IO_BAD_HDR_EBADMSG || ec_corr == UBI_IO_BAD_HDR_EBADMSG) @@ -826,7 +826,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, if (err) return err; goto adjust_mean_ec; - } else if (err == UBI_IO_PEB_FREE) { + } else if (err == UBI_IO_FF) { /* No VID header - the physical eraseblock is free */ err = add_to_list(si, pnum, ec, &si->free); if (err) diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 24a7c7647cb7..774bdcad6a07 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -85,10 +85,7 @@ /* * Error codes returned by the I/O sub-system. * - * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only - * %0xFF bytes - * UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a - * valid erase counter header, and the rest are %0xFF bytes + * UBI_IO_FF: the read region of flash contains only 0xFFs * UBI_IO_BAD_HDR: the EC or VID header is corrupted (bad magic or CRC) * UBI_IO_BAD_HDR_EBADMSG: the same as %UBI_IO_BAD_HDR, but also there was a * data integrity error reported by the MTD driver @@ -96,8 +93,7 @@ * UBI_IO_BITFLIPS: bit-flips were detected and corrected */ enum { - UBI_IO_PEB_EMPTY = 1, - UBI_IO_PEB_FREE, + UBI_IO_FF = 1, UBI_IO_BAD_HDR, UBI_IO_BAD_HDR_EBADMSG, UBI_IO_BITFLIPS diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 97a435672eaf..a9e7c9eed703 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -745,7 +745,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0); if (err && err != UBI_IO_BITFLIPS) { - if (err == UBI_IO_PEB_FREE) { + if (err == UBI_IO_FF) { /* * We are trying to move PEB without a VID header. UBI * always write VID headers shortly after the PEB was From 92e1a7d9e7e07fb1cf0cbbcdf202938d0819b54d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 3 Sep 2010 14:22:17 +0300 Subject: [PATCH 05/16] UBI: handle bit-flips when no header found Currently UBI has one small flaw - when we read EC or VID header, but find only 0xFF bytes, we return UBI_IO_FF and do not report whether we had bit-flips or not. In case of the VID header, the scanning code adds this PEB to the free list, even though there were bit-flips. Imagine the following situation: we start writing VID header to a PEB and have a power cut, so the PEB becomes unstable. When we scan and read the PEB, we get a bit-flip. Currently, UBI would just ignore this and treat the PEB as free. This patch changes UBI behavior and now UBI will schedule this PEB for erasure. Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/io.c | 54 +++++++++++++++++++++++------------------- drivers/mtd/ubi/scan.c | 4 ++-- drivers/mtd/ubi/ubi.h | 10 +++++++- drivers/mtd/ubi/wl.c | 10 ++++++++ 4 files changed, 51 insertions(+), 27 deletions(-) diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index 1677a215af64..b76252465c87 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -720,16 +720,16 @@ bad: int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, struct ubi_ec_hdr *ec_hdr, int verbose) { - int err, read_err = 0; + int err, read_err; uint32_t crc, magic, hdr_crc; dbg_io("read EC header from PEB %d", pnum); ubi_assert(pnum >= 0 && pnum < ubi->peb_count); - err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE); - if (err) { - if (err != UBI_IO_BITFLIPS && err != -EBADMSG) - return err; + read_err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE); + if (read_err) { + if (read_err != UBI_IO_BITFLIPS && read_err != -EBADMSG) + return read_err; /* * We read all the data, but either a correctable bit-flip @@ -740,14 +740,12 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, * this. If the EC header is still OK, we just report this as * there was a bit-flip, to force scrubbing. */ - if (err == -EBADMSG) - read_err = UBI_IO_BAD_HDR_EBADMSG; } magic = be32_to_cpu(ec_hdr->magic); if (magic != UBI_EC_HDR_MAGIC) { - if (read_err) - return read_err; + if (read_err == -EBADMSG) + return UBI_IO_BAD_HDR_EBADMSG; /* * The magic field is wrong. Let's check if we have read all @@ -762,7 +760,10 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, else if (UBI_IO_DEBUG) dbg_msg("no EC header found at PEB %d, " "only 0xFF bytes", pnum); - return UBI_IO_FF; + if (!read_err) + return UBI_IO_FF; + else + return UBI_IO_FF_BITFLIPS; } /* @@ -790,7 +791,11 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, } else if (UBI_IO_DEBUG) dbg_msg("bad EC header CRC at PEB %d, calculated " "%#08x, read %#08x", pnum, crc, hdr_crc); - return read_err ?: UBI_IO_BAD_HDR; + + if (!read_err) + return UBI_IO_BAD_HDR; + else + return UBI_IO_BAD_HDR_EBADMSG; } /* And of course validate what has just been read from the media */ @@ -986,7 +991,7 @@ bad: int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, struct ubi_vid_hdr *vid_hdr, int verbose) { - int err, read_err = 0; + int err, read_err; uint32_t crc, magic, hdr_crc; void *p; @@ -994,20 +999,15 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, ubi_assert(pnum >= 0 && pnum < ubi->peb_count); p = (char *)vid_hdr - ubi->vid_hdr_shift; - err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, + read_err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, ubi->vid_hdr_alsize); - if (err) { - if (err != UBI_IO_BITFLIPS && err != -EBADMSG) - return err; - - if (err == -EBADMSG) - read_err = UBI_IO_BAD_HDR_EBADMSG; - } + if (read_err && read_err != UBI_IO_BITFLIPS && read_err != -EBADMSG) + return read_err; magic = be32_to_cpu(vid_hdr->magic); if (magic != UBI_VID_HDR_MAGIC) { - if (read_err) - return read_err; + if (read_err == -EBADMSG) + return UBI_IO_BAD_HDR_EBADMSG; if (check_pattern(vid_hdr, 0xFF, UBI_VID_HDR_SIZE)) { if (verbose) @@ -1016,7 +1016,10 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, else if (UBI_IO_DEBUG) dbg_msg("no VID header found at PEB %d, " "only 0xFF bytes", pnum); - return UBI_IO_FF; + if (!read_err) + return UBI_IO_FF; + else + return UBI_IO_FF_BITFLIPS; } if (verbose) { @@ -1040,7 +1043,10 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, } else if (UBI_IO_DEBUG) dbg_msg("bad CRC at PEB %d, calculated %#08x, " "read %#08x", pnum, crc, hdr_crc); - return read_err ?: UBI_IO_BAD_HDR; + if (!read_err) + return UBI_IO_BAD_HDR; + else + return UBI_IO_BAD_HDR_EBADMSG; } err = validate_vid_hdr(ubi, vid_hdr); diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index 37cb18ff10c6..6f9080767e3f 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -748,7 +748,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, return err; else if (err == UBI_IO_BITFLIPS) bitflips = 1; - else if (err == UBI_IO_FF) + else if (err == UBI_IO_FF || err == UBI_IO_FF_BITFLIPS) return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, &si->erase); else if (err == UBI_IO_BAD_HDR_EBADMSG || err == UBI_IO_BAD_HDR) { /* @@ -817,7 +817,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, else if (err == UBI_IO_BITFLIPS) bitflips = 1; else if (err == UBI_IO_BAD_HDR_EBADMSG || err == UBI_IO_BAD_HDR || - (err == UBI_IO_FF && ec_corr)) { + (err == UBI_IO_FF && ec_corr) || err == UBI_IO_FF_BITFLIPS) { /* VID header is corrupted */ if (err == UBI_IO_BAD_HDR_EBADMSG || ec_corr == UBI_IO_BAD_HDR_EBADMSG) diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 774bdcad6a07..10990770bc9e 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -86,17 +86,25 @@ * Error codes returned by the I/O sub-system. * * UBI_IO_FF: the read region of flash contains only 0xFFs + * UBI_IO_FF_BITFLIPS: the same as %UBI_IO_FF, but also also there was a data + * integrity error reported by the MTD driver + * (uncorrectable ECC error in case of NAND) * UBI_IO_BAD_HDR: the EC or VID header is corrupted (bad magic or CRC) * UBI_IO_BAD_HDR_EBADMSG: the same as %UBI_IO_BAD_HDR, but also there was a * data integrity error reported by the MTD driver * (uncorrectable ECC error in case of NAND) * UBI_IO_BITFLIPS: bit-flips were detected and corrected + * + * Note, it is probably better to have bit-flip and ebadmsg as flags which can + * be or'ed with other error code. But this is a big change because there are + * may callers, so it does not worth the risk of introducing a bug */ enum { UBI_IO_FF = 1, + UBI_IO_FF_BITFLIPS, UBI_IO_BAD_HDR, UBI_IO_BAD_HDR_EBADMSG, - UBI_IO_BITFLIPS + UBI_IO_BITFLIPS, }; /* diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index a9e7c9eed703..605ecb1e22bb 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -759,6 +759,16 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, dbg_wl("PEB %d has no VID header", e1->pnum); protect = 1; goto out_not_moved; + } else if (err == UBI_IO_FF_BITFLIPS) { + /* + * The same situation as %UBI_IO_FF, but bit-flips were + * detected. It is better to schedule this PEB for + * scrubbing. + */ + dbg_wl("PEB %d has no VID header but has bit-flips", + e1->pnum); + scrubbing = 1; + goto out_not_moved; } ubi_err("error %d while reading VID header from PEB %d", From e0e718c281ce1dd4006681e1255535cc6f2857d4 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 3 Sep 2010 14:53:23 +0300 Subject: [PATCH 06/16] UBI: rename a local variable Rename local variable 'ec_corr' into 'ec_err' to make the code a little bit more readable. 'ec_err' is more appropriate because it sounds more like 'error when EC was read' and it looks more logical because we use it together with 'err'. Just a minor nicification which should improve the rather complex scanning code. Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/scan.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index 6f9080767e3f..a15e9bcb6e77 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -725,7 +725,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum) { long long uninitialized_var(ec); - int err, bitflips = 0, vol_id, ec_corr = 0; + int err, bitflips = 0, vol_id, ec_err = 0; dbg_bld("scan PEB %d", pnum); @@ -756,12 +756,12 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, * corrupted. Set %bitflips flag in order to make this PEB be * moved and EC be re-created. */ - ec_corr = err; + ec_err = err; ec = UBI_SCAN_UNKNOWN_EC; bitflips = 1; } - if (!ec_corr) { + if (!ec_err) { int image_seq; /* Make sure UBI version is OK */ @@ -817,10 +817,10 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, else if (err == UBI_IO_BITFLIPS) bitflips = 1; else if (err == UBI_IO_BAD_HDR_EBADMSG || err == UBI_IO_BAD_HDR || - (err == UBI_IO_FF && ec_corr) || err == UBI_IO_FF_BITFLIPS) { + (err == UBI_IO_FF && ec_err) || err == UBI_IO_FF_BITFLIPS) { /* VID header is corrupted */ if (err == UBI_IO_BAD_HDR_EBADMSG || - ec_corr == UBI_IO_BAD_HDR_EBADMSG) + ec_err == UBI_IO_BAD_HDR_EBADMSG) si->read_err_count += 1; err = add_to_list(si, pnum, ec, &si->corr); if (err) @@ -870,7 +870,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, } } - if (ec_corr) + if (ec_err) ubi_warn("valid VID header but corrupted EC header at PEB %d", pnum); err = ubi_scan_add_used(ubi, si, pnum, ec, vidh, bitflips); @@ -878,7 +878,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, return err; adjust_mean_ec: - if (!ec_corr) { + if (!ec_err) { si->ec_sum += ec; si->ec_count += 1; if (ec > si->max_ec) From b33215084c1c06258a2d9deb035d343aafa4066e Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 3 Sep 2010 14:40:55 +0300 Subject: [PATCH 07/16] UBI: change cascade of ifs to switch statements This patch improves readability and simplifies scanning code by changing a long cascade of 'if' statements to a switch statement. This should presumably be a little faster as well. Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/scan.c | 45 ++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index a15e9bcb6e77..9405e2420055 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -746,11 +746,18 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, err = ubi_io_read_ec_hdr(ubi, pnum, ech, 0); if (err < 0) return err; - else if (err == UBI_IO_BITFLIPS) + switch (err) { + case 0: + break; + case UBI_IO_BITFLIPS: bitflips = 1; - else if (err == UBI_IO_FF || err == UBI_IO_FF_BITFLIPS) + break; + case UBI_IO_FF: + case UBI_IO_FF_BITFLIPS: return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, &si->erase); - else if (err == UBI_IO_BAD_HDR_EBADMSG || err == UBI_IO_BAD_HDR) { + case UBI_IO_BAD_HDR_EBADMSG: + si->read_err_count += 1; + case UBI_IO_BAD_HDR: /* * We have to also look at the VID header, possibly it is not * corrupted. Set %bitflips flag in order to make this PEB be @@ -759,6 +766,10 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, ec_err = err; ec = UBI_SCAN_UNKNOWN_EC; bitflips = 1; + break; + default: + ubi_err("'ubi_io_read_ec_hdr()' returned unknown code %d", err); + return -EINVAL; } if (!ec_err) { @@ -814,24 +825,32 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, err = ubi_io_read_vid_hdr(ubi, pnum, vidh, 0); if (err < 0) return err; - else if (err == UBI_IO_BITFLIPS) + switch (err) { + case 0: + break; + case UBI_IO_BITFLIPS: bitflips = 1; - else if (err == UBI_IO_BAD_HDR_EBADMSG || err == UBI_IO_BAD_HDR || - (err == UBI_IO_FF && ec_err) || err == UBI_IO_FF_BITFLIPS) { - /* VID header is corrupted */ - if (err == UBI_IO_BAD_HDR_EBADMSG || - ec_err == UBI_IO_BAD_HDR_EBADMSG) - si->read_err_count += 1; + break; + case UBI_IO_BAD_HDR_EBADMSG: + si->read_err_count += 1; + case UBI_IO_BAD_HDR: + case UBI_IO_FF_BITFLIPS: err = add_to_list(si, pnum, ec, &si->corr); if (err) return err; goto adjust_mean_ec; - } else if (err == UBI_IO_FF) { - /* No VID header - the physical eraseblock is free */ - err = add_to_list(si, pnum, ec, &si->free); + case UBI_IO_FF: + if (ec_err) + err = add_to_list(si, pnum, ec, &si->corr); + else + err = add_to_list(si, pnum, ec, &si->free); if (err) return err; goto adjust_mean_ec; + default: + ubi_err("'ubi_io_read_vid_hdr()' returned unknown code %d", + err); + return -EINVAL; } vol_id = be32_to_cpu(vidh->vol_id); From 3fb34124da9d5e37576d9f87d7a5005ba1d82dd7 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 3 Sep 2010 15:36:12 +0300 Subject: [PATCH 08/16] UBI: separate out corrupted list This patch introduces 'add_corrupted()' function and separates out 'corr' list manipulation from the common 'add_to_list()' function. This is just a preparation for further changes - this patch does not change functionality. Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/scan.c | 45 +++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index 9405e2420055..fba3dc6a97e9 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -64,9 +64,9 @@ static struct ubi_vid_hdr *vidh; * @ec: erase counter of the physical eraseblock * @list: the list to add to * - * This function adds physical eraseblock @pnum to free, erase, corrupted or - * alien lists. Returns zero in case of success and a negative error code in - * case of failure. + * This function adds physical eraseblock @pnum to free, erase, or alien lists. + * Returns zero in case of success and a negative error code in case of + * failure. */ static int add_to_list(struct ubi_scan_info *si, int pnum, int ec, struct list_head *list) @@ -79,9 +79,6 @@ static int add_to_list(struct ubi_scan_info *si, int pnum, int ec, } else if (list == &si->erase) { dbg_bld("add to erase: PEB %d, EC %d", pnum, ec); si->erase_peb_count += 1; - } else if (list == &si->corr) { - dbg_bld("add to corrupted: PEB %d, EC %d", pnum, ec); - si->corr_peb_count += 1; } else if (list == &si->alien) { dbg_bld("add to alien: PEB %d, EC %d", pnum, ec); si->alien_peb_count += 1; @@ -98,6 +95,33 @@ static int add_to_list(struct ubi_scan_info *si, int pnum, int ec, return 0; } +/** + * add_corrupted - add a corrupted physical eraseblock. + * @si: scanning information + * @pnum: physical eraseblock number to add + * @ec: erase counter of the physical eraseblock + * + * This function adds corrupted physical eraseblock @pnum to the 'corr' list. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +static int add_corrupted(struct ubi_scan_info *si, int pnum, int ec) +{ + struct ubi_scan_leb *seb; + + dbg_bld("add to corrupted: PEB %d, EC %d", pnum, ec); + + seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL); + if (!seb) + return -ENOMEM; + + si->corr_peb_count += 1; + seb->pnum = pnum; + seb->ec = ec; + list_add(&seb->u.list, &si->corr); + return 0; +} + /** * validate_vid_hdr - check volume identifier header. * @vid_hdr: the volume identifier header to check @@ -464,8 +488,7 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, return err; if (cmp_res & 4) - err = add_to_list(si, seb->pnum, seb->ec, - &si->corr); + err = add_corrupted(si, seb->pnum, seb->ec); else err = add_to_list(si, seb->pnum, seb->ec, &si->erase); @@ -488,7 +511,7 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, * previously. */ if (cmp_res & 4) - return add_to_list(si, pnum, ec, &si->corr); + return add_corrupted(si, pnum, ec); else return add_to_list(si, pnum, ec, &si->erase); } @@ -835,13 +858,13 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, si->read_err_count += 1; case UBI_IO_BAD_HDR: case UBI_IO_FF_BITFLIPS: - err = add_to_list(si, pnum, ec, &si->corr); + err = add_corrupted(si, pnum, ec); if (err) return err; goto adjust_mean_ec; case UBI_IO_FF: if (ec_err) - err = add_to_list(si, pnum, ec, &si->corr); + err = add_corrupted(si, pnum, ec); else err = add_to_list(si, pnum, ec, &si->free); if (err) From 0525dac9fd31e5a12fb934238abd09e2752a5967 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 3 Sep 2010 17:11:37 +0300 Subject: [PATCH 09/16] UBI: do not put eraseblocks to the corrupted list unnecessarily Currently UBI maintains 2 lists of PEBs during scanning: 1. 'erase' list - PEBs which have no corruptions but should be erased 2. 'corr' list - PEBs which have some corruptions and should be erased But we do not really need 2 lists for PEBs which should be erased after scanning is done - this is redundant. So this patch makes sure all PEBs which are corrupted are moved to the head of the 'erase' list. We add them to the head to make sure they are erased first and we get rid of corruption ASAP. However, we do not remove the 'corr' list and realted functions, because the plan is to use this list for other purposes. Namely, we plan to put eraseblocks with corruption which does not look like it was caused by unclean power cut. Then we'll preserve thes PEBs in order to avoid killing potentially valuable user data. This patch also amends PEBs accounting, because it was closely tight to the 'erase'/'corr' lists separation. Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/scan.c | 145 +++++++++++++++++++++++------------------ drivers/mtd/ubi/scan.h | 15 ++--- drivers/mtd/ubi/vtbl.c | 2 +- 3 files changed, 90 insertions(+), 72 deletions(-) diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index fba3dc6a97e9..19dc5e04fd61 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -29,7 +29,7 @@ * objects which are kept in volume RB-tree with root at the @volumes field. * The RB-tree is indexed by the volume ID. * - * Found logical eraseblocks are represented by &struct ubi_scan_leb objects. + * Scanned logical eraseblocks are represented by &struct ubi_scan_leb objects. * These objects are kept in per-volume RB-trees with the root at the * corresponding &struct ubi_scan_volume object. To put it differently, we keep * an RB-tree of per-volume objects and each of these objects is the root of @@ -38,6 +38,21 @@ * Corrupted physical eraseblocks are put to the @corr list, free physical * eraseblocks are put to the @free list and the physical eraseblock to be * erased are put to the @erase list. + * + * UBI tries to distinguish between 2 types of corruptions. + * 1. Corruptions caused by power cuts. These are harmless and expected + * corruptions and UBI tries to handle them gracefully, without printing too + * many warnings and error messages. The idea is that we do not lose + * important data in these case - we may lose only the data which was being + * written to the media just before the power cut happened, and the upper + * layers are supposed to handle these situations. UBI puts these PEBs to + * the head of the @erase list and they are scheduled for erasure. + * + * 2. Unexpected corruptions which are not caused by power cuts. During + * scanning, such PEBs are put to the @corr list and UBI preserves them. + * Obviously, this lessens the amount of available PEBs, and if at some + * point UBI runs out of free PEBs, it switches to R/O mode. UBI also loudly + * informs about such PEBs every time the MTD device is attached. */ #include @@ -62,23 +77,26 @@ static struct ubi_vid_hdr *vidh; * @si: scanning information * @pnum: physical eraseblock number to add * @ec: erase counter of the physical eraseblock + * @to_head: if not zero, add to the head of the list * @list: the list to add to * * This function adds physical eraseblock @pnum to free, erase, or alien lists. - * Returns zero in case of success and a negative error code in case of + * If @to_head is not zero, PEB will be added to the head of the list, which + * basically means it will be processed first later. E.g., we add corrupted + * PEBs (corrupted due to power cuts) to the head of the erase list to make + * sure we erase them first and get rid of corruptions ASAP. This function + * returns zero in case of success and a negative error code in case of * failure. */ -static int add_to_list(struct ubi_scan_info *si, int pnum, int ec, +static int add_to_list(struct ubi_scan_info *si, int pnum, int ec, int to_head, struct list_head *list) { struct ubi_scan_leb *seb; if (list == &si->free) { dbg_bld("add to free: PEB %d, EC %d", pnum, ec); - si->free_peb_count += 1; } else if (list == &si->erase) { dbg_bld("add to erase: PEB %d, EC %d", pnum, ec); - si->erase_peb_count += 1; } else if (list == &si->alien) { dbg_bld("add to alien: PEB %d, EC %d", pnum, ec); si->alien_peb_count += 1; @@ -91,7 +109,10 @@ static int add_to_list(struct ubi_scan_info *si, int pnum, int ec, seb->pnum = pnum; seb->ec = ec; - list_add_tail(&seb->u.list, list); + if (to_head) + list_add(&seb->u.list, list); + else + list_add_tail(&seb->u.list, list); return 0; } @@ -282,8 +303,8 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb, * created before sequence numbers support has been added. At * that times we used 32-bit LEB versions stored in logical * eraseblocks. That was before UBI got into mainline. We do not - * support these images anymore. Well, those images will work - * still work, but only if no unclean reboots happened. + * support these images anymore. Well, those images still work, + * but only if no unclean reboots happened. */ ubi_err("unsupported on-flash UBI format\n"); return -EINVAL; @@ -321,7 +342,7 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb, bitflips = 1; else { dbg_err("VID of PEB %d header is bad, but it " - "was OK earlier", pnum); + "was OK earlier, err %d", pnum, err); if (err > 0) err = -EIO; @@ -487,11 +508,8 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, if (err) return err; - if (cmp_res & 4) - err = add_corrupted(si, seb->pnum, seb->ec); - else - err = add_to_list(si, seb->pnum, seb->ec, - &si->erase); + err = add_to_list(si, seb->pnum, seb->ec, cmp_res & 4, + &si->erase); if (err) return err; @@ -510,10 +528,8 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, * This logical eraseblock is older than the one found * previously. */ - if (cmp_res & 4) - return add_corrupted(si, pnum, ec); - else - return add_to_list(si, pnum, ec, &si->erase); + return add_to_list(si, pnum, ec, cmp_res & 4, + &si->erase); } } @@ -544,7 +560,6 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, sv->leb_count += 1; rb_link_node(&seb->u.rb, parent, p); rb_insert_color(&seb->u.rb, &sv->root); - si->used_peb_count += 1; return 0; } @@ -776,10 +791,14 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, bitflips = 1; break; case UBI_IO_FF: + si->empty_peb_count += 1; + return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, 0, + &si->erase); case UBI_IO_FF_BITFLIPS: - return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, &si->erase); + si->empty_peb_count += 1; + return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, 1, + &si->erase); case UBI_IO_BAD_HDR_EBADMSG: - si->read_err_count += 1; case UBI_IO_BAD_HDR: /* * We have to also look at the VID header, possibly it is not @@ -855,18 +874,25 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, bitflips = 1; break; case UBI_IO_BAD_HDR_EBADMSG: - si->read_err_count += 1; + if (ec_err == UBI_IO_BAD_HDR_EBADMSG) + /* + * Both EC and VID headers are corrupted and were read + * with data integrity error, probably this is a bad + * PEB, bit it is not marked as bad yet. This may also + * be a result of power cut during erasure. + */ + si->maybe_bad_peb_count += 1; case UBI_IO_BAD_HDR: case UBI_IO_FF_BITFLIPS: - err = add_corrupted(si, pnum, ec); + err = add_to_list(si, pnum, ec, 1, &si->erase); if (err) return err; goto adjust_mean_ec; case UBI_IO_FF: if (ec_err) - err = add_corrupted(si, pnum, ec); + err = add_to_list(si, pnum, ec, 1, &si->erase); else - err = add_to_list(si, pnum, ec, &si->free); + err = add_to_list(si, pnum, ec, 0, &si->free); if (err) return err; goto adjust_mean_ec; @@ -885,7 +911,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, case UBI_COMPAT_DELETE: ubi_msg("\"delete\" compatible internal volume %d:%d" " found, will remove it", vol_id, lnum); - err = add_to_list(si, pnum, ec, &si->erase); + err = add_to_list(si, pnum, ec, 1, &si->erase); if (err) return err; return 0; @@ -900,7 +926,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, case UBI_COMPAT_PRESERVE: ubi_msg("\"preserve\" compatible internal volume %d:%d" " found", vol_id, lnum); - err = add_to_list(si, pnum, ec, &si->alien); + err = add_to_list(si, pnum, ec, 0, &si->alien); if (err) return err; return 0; @@ -946,19 +972,20 @@ adjust_mean_ec: static int check_what_we_have(struct ubi_device *ubi, struct ubi_scan_info *si) { struct ubi_scan_leb *seb; - int max_corr; + int max_corr, peb_count; - max_corr = ubi->peb_count - si->bad_peb_count - si->alien_peb_count; - max_corr = max_corr / 20 ?: 8; + peb_count = ubi->peb_count - si->bad_peb_count - si->alien_peb_count; + max_corr = peb_count / 20 ?: 8; /* - * Few corrupted PEBs are not a problem and may be just a result of + * Few corrupted PEBs is not a problem and may be just a result of * unclean reboots. However, many of them may indicate some problems * with the flash HW or driver. */ - if (si->corr_peb_count >= 8) { - ubi_warn("%d PEBs are corrupted", si->corr_peb_count); - printk(KERN_WARNING "corrupted PEBs are:"); + if (si->corr_peb_count) { + ubi_err("%d PEBs are corrupted and preserved", + si->corr_peb_count); + printk(KERN_ERR "Corrupted PEBs are:"); list_for_each_entry(seb, &si->corr, u.list) printk(KERN_CONT " %d", seb->pnum); printk(KERN_CONT "\n"); @@ -973,41 +1000,35 @@ static int check_what_we_have(struct ubi_device *ubi, struct ubi_scan_info *si) } } - if (si->free_peb_count + si->used_peb_count + - si->alien_peb_count == 0) { - /* No UBI-formatted eraseblocks were found */ - if (si->corr_peb_count == si->read_err_count && - si->corr_peb_count < 8) { - /* No or just few corrupted PEBs, and all of them had a - * read error. We assume that those are bad PEBs, which - * were just not marked as bad so far. - * - * This piece of code basically tries to distinguish - * between the following 2 situations: - * - * 1. Flash is empty, but there are few bad PEBs, which - * are not marked as bad so far, and which were read - * with error. We want to go ahead and format this - * flash. While formating, the faulty PEBs will - * probably be marked as bad. - * - * 2. Flash probably contains non-UBI data and we do - * not want to format it and destroy possibly needed - * data (e.g., consider the case when the bootloader - * MTD partition was accidentally fed to UBI). - */ + if (si->empty_peb_count + si->maybe_bad_peb_count == peb_count) { + /* + * All PEBs are empty, or almost all - a couple PEBs look like + * they may be bad PEBs which were not marked as bad yet. + * + * This piece of code basically tries to distinguish between + * the following situations: + * + * 1. Flash is empty, but there are few bad PEBs, which are not + * marked as bad so far, and which were read with error. We + * want to go ahead and format this flash. While formatting, + * the faulty PEBs will probably be marked as bad. + * + * 2. Flash contains non-UBI data and we do not want to format + * it and destroy possibly important information. + */ + if (si->maybe_bad_peb_count <= 2) { si->is_empty = 1; ubi_msg("empty MTD device detected"); - get_random_bytes(&ubi->image_seq, sizeof(ubi->image_seq)); + get_random_bytes(&ubi->image_seq, + sizeof(ubi->image_seq)); } else { - ubi_err("MTD device possibly contains non-UBI data, " - "refusing it"); + ubi_err("MTD device is not UBI-formatted and possibly " + "contains non-UBI data - refusing it"); return -EINVAL; } + } - if (si->corr_peb_count > 0) - ubi_msg("corrupted PEBs will be formatted"); return 0; } diff --git a/drivers/mtd/ubi/scan.h b/drivers/mtd/ubi/scan.h index 0876649318dd..12ac852c993b 100644 --- a/drivers/mtd/ubi/scan.h +++ b/drivers/mtd/ubi/scan.h @@ -91,14 +91,13 @@ struct ubi_scan_volume { * @erase: list of physical eraseblocks which have to be erased * @alien: list of physical eraseblocks which should not be used by UBI (e.g., * those belonging to "preserve"-compatible internal volumes) - * @used_peb_count: count of used PEBs * @corr_peb_count: count of PEBs in the @corr list - * @read_err_count: count of PEBs read with error (%UBI_IO_BAD_HDR_EBADMSG was - * returned) - * @free_peb_count: count of PEBs in the @free list - * @erase_peb_count: count of PEBs in the @erase list + * @empty_peb_count: count of PEBs which are presumably empty (contain only + * 0xFF bytes) * @alien_peb_count: count of PEBs in the @alien list * @bad_peb_count: count of bad physical eraseblocks + * @maybe_bad_peb_count: count of bad physical eraseblocks which are not marked + * as bad yet, but which look like bad * @vols_found: number of volumes found during scanning * @highest_vol_id: highest volume ID * @is_empty: flag indicating whether the MTD device is empty or not @@ -119,13 +118,11 @@ struct ubi_scan_info { struct list_head free; struct list_head erase; struct list_head alien; - int used_peb_count; int corr_peb_count; - int read_err_count; - int free_peb_count; - int erase_peb_count; + int empty_peb_count; int alien_peb_count; int bad_peb_count; + int maybe_bad_peb_count; int vols_found; int highest_vol_id; int is_empty; diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index 14c10bed94ee..3bfe00a94667 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -366,7 +366,7 @@ write_error: * Probably this physical eraseblock went bad, try to pick * another one. */ - list_add_tail(&new_seb->u.list, &si->corr); + list_add(&new_seb->u.list, &si->erase); goto retry; } kfree(new_seb); From bb00e180a93a6c8e89c3b2d1f9473781e1e2d2a4 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sat, 31 Jul 2010 09:37:34 +0300 Subject: [PATCH 10/16] UBI: make check_pattern function non-static This patch turns static function 'check_pattern()' into a non-static 'ubi_check_pattern()'. This is just a preparation for the chages which are coming in the next patches. Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/io.c | 30 ++++++------------------------ drivers/mtd/ubi/misc.c | 19 +++++++++++++++++++ drivers/mtd/ubi/ubi.h | 1 + 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index b76252465c87..c2960ac9f39c 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -376,25 +376,6 @@ retry: return 0; } -/** - * check_pattern - check if buffer contains only a certain byte pattern. - * @buf: buffer to check - * @patt: the pattern to check - * @size: buffer size in bytes - * - * This function returns %1 in there are only @patt bytes in @buf, and %0 if - * something else was also found. - */ -static int check_pattern(const void *buf, uint8_t patt, int size) -{ - int i; - - for (i = 0; i < size; i++) - if (((const uint8_t *)buf)[i] != patt) - return 0; - return 1; -} - /* Patterns to write to a physical eraseblock when torturing it */ static uint8_t patterns[] = {0xa5, 0x5a, 0x0}; @@ -426,7 +407,7 @@ static int torture_peb(struct ubi_device *ubi, int pnum) if (err) goto out; - err = check_pattern(ubi->peb_buf1, 0xFF, ubi->peb_size); + err = ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->peb_size); if (err == 0) { ubi_err("erased PEB %d, but a non-0xFF byte found", pnum); @@ -445,7 +426,8 @@ static int torture_peb(struct ubi_device *ubi, int pnum) if (err) goto out; - err = check_pattern(ubi->peb_buf1, patterns[i], ubi->peb_size); + err = ubi_check_pattern(ubi->peb_buf1, patterns[i], + ubi->peb_size); if (err == 0) { ubi_err("pattern %x checking failed for PEB %d", patterns[i], pnum); @@ -752,7 +734,7 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, * 0xFF. If yes, this physical eraseblock is assumed to be * empty. */ - if (check_pattern(ec_hdr, 0xFF, UBI_EC_HDR_SIZE)) { + if (ubi_check_pattern(ec_hdr, 0xFF, UBI_EC_HDR_SIZE)) { /* The physical eraseblock is supposedly empty */ if (verbose) ubi_warn("no EC header found at PEB %d, " @@ -1009,7 +991,7 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, if (read_err == -EBADMSG) return UBI_IO_BAD_HDR_EBADMSG; - if (check_pattern(vid_hdr, 0xFF, UBI_VID_HDR_SIZE)) { + if (ubi_check_pattern(vid_hdr, 0xFF, UBI_VID_HDR_SIZE)) { if (verbose) ubi_warn("no VID header found at PEB %d, " "only 0xFF bytes", pnum); @@ -1363,7 +1345,7 @@ int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len) goto error; } - err = check_pattern(ubi->dbg_peb_buf, 0xFF, len); + err = ubi_check_pattern(ubi->dbg_peb_buf, 0xFF, len); if (err == 0) { ubi_err("flash region at PEB %d:%d, length %d does not " "contain all 0xFF bytes", pnum, offset, len); diff --git a/drivers/mtd/ubi/misc.c b/drivers/mtd/ubi/misc.c index 22ad31402945..ff2a65c37f69 100644 --- a/drivers/mtd/ubi/misc.c +++ b/drivers/mtd/ubi/misc.c @@ -103,3 +103,22 @@ void ubi_calculate_reserved(struct ubi_device *ubi) if (ubi->beb_rsvd_level < MIN_RESEVED_PEBS) ubi->beb_rsvd_level = MIN_RESEVED_PEBS; } + +/** + * ubi_check_pattern - check if buffer contains only a certain byte pattern. + * @buf: buffer to check + * @patt: the pattern to check + * @size: buffer size in bytes + * + * This function returns %1 in there are only @patt bytes in @buf, and %0 if + * something else was also found. + */ +int ubi_check_pattern(const void *buf, uint8_t patt, int size) +{ + int i; + + for (i = 0; i < size; i++) + if (((const uint8_t *)buf)[i] != patt) + return 0; + return 1; +} diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 10990770bc9e..8831d7ba9f21 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -511,6 +511,7 @@ int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, int length); int ubi_check_volume(struct ubi_device *ubi, int vol_id); void ubi_calculate_reserved(struct ubi_device *ubi); +int ubi_check_pattern(const void *buf, uint8_t patt, int size); /* eba.c */ int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol, From 315324947cbc7264af86b7ecdc2d5fb5f8556222 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 3 Sep 2010 22:27:46 +0300 Subject: [PATCH 11/16] UBI: introduce debugging helper function Introduce a helper function to print hexdump: 'ubi_dbg_print_hex_dump()'. It is compiled out if debugging is enabled. Will be used in the next patch. Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/debug.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h index 17a107129726..9eca95074bc2 100644 --- a/drivers/mtd/ubi/debug.h +++ b/drivers/mtd/ubi/debug.h @@ -57,6 +57,9 @@ void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type); void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req); void ubi_dbg_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len); +#define ubi_dbg_print_hex_dump(l, ps, pt, r, g, b, len, a) \ + print_hex_dump(l, ps, pt, r, g, b, len, a) + #ifdef CONFIG_MTD_UBI_DEBUG_MSG /* General debugging messages */ #define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) @@ -172,6 +175,7 @@ static inline int ubi_dbg_is_erase_failure(void) #define ubi_dbg_dump_seb(seb, type) ({}) #define ubi_dbg_dump_mkvol_req(req) ({}) #define ubi_dbg_dump_flash(ubi, pnum, offset, len) ({}) +#define ubi_dbg_print_hex_dump(l, ps, pt, r, g, b, len, a) ({}) #define UBI_IO_DEBUG 0 #define DBG_DISABLE_BGT 0 From feeba4b872e5166ca64c44fbb5bbec234dfce199 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 3 Sep 2010 22:50:53 +0300 Subject: [PATCH 12/16] UBI: add truly corrupted PEBs to corrupted list Start using the 'corr' list and add there PEBs which look truly corrupted, which means they have corrupted VID header and the data which follows the corrupted header does not contain all 0xFF bytes. At the moment, this does not change UBI functionality much because these PEBs will be erase when scanning finishes. But the plan is to teach UBI preserving them. Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/scan.c | 76 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 74 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index 19dc5e04fd61..def0bf03d7fe 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -123,8 +123,8 @@ static int add_to_list(struct ubi_scan_info *si, int pnum, int ec, int to_head, * @ec: erase counter of the physical eraseblock * * This function adds corrupted physical eraseblock @pnum to the 'corr' list. - * Returns zero in case of success and a negative error code in case of - * failure. + * The corruption was presumably not caused by a power cut. Returns zero in + * case of success and a negative error code in case of failure. */ static int add_corrupted(struct ubi_scan_info *si, int pnum, int ec) { @@ -750,6 +750,53 @@ struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi, return ERR_PTR(-ENOSPC); } +/** + * check_data_ff - make sure PEB contains only 0xFF data. + * @ubi: UBI device description object + * @vid_hrd: the (corrupted) VID header of this PEB + * @pnum: the physical eraseblock number to check + * + * This is a helper function which is used to distinguish between VID header + * corruptions caused by power cuts and other reasons. If the PEB contains only + * 0xFF bytes at the data area, the VID header is most probably corrupted + * because of a power cut (%0 is returned in this case). Otherwise, it was + * corrupted for some other reasons (%1 is returned in this case). A negative + * error code is returned if a read error occurred. + * + * If the corruption reason was a power cut, UBI can safely erase this PEB. + * Otherwise, it should preserve it to avoid possibly destroying important + * information. + */ +static int check_data_ff(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr, + int pnum) +{ + int err; + + mutex_lock(&ubi->buf_mutex); + memset(ubi->peb_buf1, 0x00, ubi->leb_size); + + err = ubi_io_read(ubi, ubi->peb_buf1, pnum, ubi->leb_start, + ubi->leb_size); + if (err && err != UBI_IO_BITFLIPS && err != -EBADMSG) + return err; + + if (ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->leb_size)) { + mutex_unlock(&ubi->buf_mutex); + return 0; + } + + ubi_err("PEB %d contains corrupted VID header, and the data does not " + "contain all 0xFF, this may be a non-UBI PEB or a severe VID " + "header corruption which requires manual inspection", pnum); + ubi_dbg_dump_vid_hdr(vid_hdr); + dbg_msg("hexdump of PEB %d offset %d, length %d", + pnum, ubi->leb_start, ubi->leb_size); + ubi_dbg_print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, + ubi->peb_buf1, ubi->leb_size, 1); + mutex_unlock(&ubi->buf_mutex); + return -EINVAL; +} + /** * process_eb - read, check UBI headers, and add them to scanning information. * @ubi: UBI device description object @@ -883,6 +930,31 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, */ si->maybe_bad_peb_count += 1; case UBI_IO_BAD_HDR: + if (ec_err) + /* + * Both headers are corrupted. There is a possibility + * that this a valid UBI PEB which has corresponding + * LEB, but the headers are corrupted. However, it is + * impossible to distinguish it from a PEB which just + * contains garbage because a power cut during erase + * operation. So we just schedule this PEB for erasure. + */ + err = 0; + else + /* + * The EC was OK, but the VID header is corrupted. We + * have to check what is in the data area. + */ + err = check_data_ff(ubi, vidh, pnum); + if (!err) + /* This corruption is caused by a power cut */ + err = add_to_list(si, pnum, ec, 1, &si->erase); + else + /* This is an unexpected corruption */ + err = add_corrupted(si, pnum, ec); + if (err) + return err; + goto adjust_mean_ec; case UBI_IO_FF_BITFLIPS: err = add_to_list(si, pnum, ec, 1, &si->erase); if (err) From 5fc01ab6934c43b42c41bc753fe1123c16d7f38f Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 3 Sep 2010 23:08:15 +0300 Subject: [PATCH 13/16] UBI: preserve corrupted PEBs Currently UBI erases all corrupted eraseblocks, irrespectively of the nature of corruption: corruption due to power cuts and non-power cut corruption. The former case is OK, but the latter is not, because UBI may destroy potentially important data. With this patch, during scanning, when UBI hits a PEB with corrupted VID header, it checks whether this PEB contains only 0xFF data. If yes, it is safe to erase this PEB and it is put to the 'erase' list. If not, this may be important data and it is better to avoid erasing this PEB. Instead, UBI puts it to the corr list and moves out of the pool of available PEB. IOW, UBI preserves this PEB. Such corrupted PEB lessen the amount of available PEBs. So the more of them we accumulate, the less PEBs are available. The maximum amount of non-power cut corrupted PEBs is 8. This patch is a response to UBIFS problem where reporter (Matthew L. Creech ) observes that UBIFS index points to an unmapped LEB. The theory is that corresponding PEB somehow got corrupted and UBI wiped it. This patch (actually a series of patches) tries to make sure such PEBs are preserved - this would make it is easier to analyze the corruption. Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/build.c | 2 ++ drivers/mtd/ubi/eba.c | 6 ++++++ drivers/mtd/ubi/scan.c | 48 ++++++++++++++++------------------------- drivers/mtd/ubi/ubi.h | 3 +++ drivers/mtd/ubi/vmt.c | 6 ++++++ drivers/mtd/ubi/vtbl.c | 8 +++++-- drivers/mtd/ubi/wl.c | 19 +++------------- 7 files changed, 45 insertions(+), 47 deletions(-) diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index f247c4e7b40d..5ebe280225d6 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -591,6 +591,7 @@ static int attach_by_scanning(struct ubi_device *ubi) ubi->bad_peb_count = si->bad_peb_count; ubi->good_peb_count = ubi->peb_count - ubi->bad_peb_count; + ubi->corr_peb_count = si->corr_peb_count; ubi->max_ec = si->max_ec; ubi->mean_ec = si->mean_ec; ubi_msg("max. sequence number: %llu", si->max_sqnum); @@ -972,6 +973,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) ubi_msg("MTD device size: %llu MiB", ubi->flash_size >> 20); ubi_msg("number of good PEBs: %d", ubi->good_peb_count); ubi_msg("number of bad PEBs: %d", ubi->bad_peb_count); + ubi_msg("number of corrupted PEBs: %d", ubi->corr_peb_count); ubi_msg("max. allowed volumes: %d", ubi->vtbl_slots); ubi_msg("wear-leveling threshold: %d", CONFIG_MTD_UBI_WL_THRESHOLD); ubi_msg("number of internal volumes: %d", UBI_INT_VOL_COUNT); diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index 334865ef5220..4be671815014 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -1201,6 +1201,9 @@ static void print_rsvd_warning(struct ubi_device *ubi, ubi_warn("cannot reserve enough PEBs for bad PEB handling, reserved %d," " need %d", ubi->beb_rsvd_pebs, ubi->beb_rsvd_level); + if (ubi->corr_peb_count) + ubi_warn("%d PEBs are corrupted and not used", + ubi->corr_peb_count); } /** @@ -1263,6 +1266,9 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) if (ubi->avail_pebs < EBA_RESERVED_PEBS) { ubi_err("no enough physical eraseblocks (%d, need %d)", ubi->avail_pebs, EBA_RESERVED_PEBS); + if (ubi->corr_peb_count) + ubi_err("%d PEBs are corrupted and not used", + ubi->corr_peb_count); err = -ENOSPC; goto out_free; } diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index def0bf03d7fe..30b710216f26 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -706,8 +706,8 @@ out_free: struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi, struct ubi_scan_info *si) { - int err = 0, i; - struct ubi_scan_leb *seb; + int err = 0; + struct ubi_scan_leb *seb, *tmp_seb; if (!list_empty(&si->free)) { seb = list_entry(si->free.next, struct ubi_scan_leb, u.list); @@ -716,37 +716,27 @@ struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi, return seb; } - for (i = 0; i < 2; i++) { - struct list_head *head; - struct ubi_scan_leb *tmp_seb; + /* + * We try to erase the first physical eraseblock from the erase list + * and pick it if we succeed, or try to erase the next one if not. And + * so forth. We don't want to take care about bad eraseblocks here - + * they'll be handled later. + */ + list_for_each_entry_safe(seb, tmp_seb, &si->erase, u.list) { + if (seb->ec == UBI_SCAN_UNKNOWN_EC) + seb->ec = si->mean_ec; - if (i == 0) - head = &si->erase; - else - head = &si->corr; + err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1); + if (err) + continue; - /* - * We try to erase the first physical eraseblock from the @head - * list and pick it if we succeed, or try to erase the - * next one if not. And so forth. We don't want to take care - * about bad eraseblocks here - they'll be handled later. - */ - list_for_each_entry_safe(seb, tmp_seb, head, u.list) { - if (seb->ec == UBI_SCAN_UNKNOWN_EC) - seb->ec = si->mean_ec; - - err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1); - if (err) - continue; - - seb->ec += 1; - list_del(&seb->u.list); - dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec); - return seb; - } + seb->ec += 1; + list_del(&seb->u.list); + dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec); + return seb; } - ubi_err("no eraseblocks found"); + ubi_err("no free eraseblocks"); return ERR_PTR(-ENOSPC); } diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 8831d7ba9f21..0b0149c41fe3 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -361,6 +361,8 @@ struct ubi_wl_entry; * @peb_size: physical eraseblock size * @bad_peb_count: count of bad physical eraseblocks * @good_peb_count: count of good physical eraseblocks + * @corr_peb_count: count of corrupted physical eraseblocks (preserved and not + * used by UBI) * @erroneous_peb_count: count of erroneous physical eraseblocks in @erroneous * @max_erroneous: maximum allowed amount of erroneous physical eraseblocks * @min_io_size: minimal input/output unit size of the underlying MTD device @@ -447,6 +449,7 @@ struct ubi_device { int peb_size; int bad_peb_count; int good_peb_count; + int corr_peb_count; int erroneous_peb_count; int max_erroneous; int min_io_size; diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c index e42afab9a9fe..c47620dfc722 100644 --- a/drivers/mtd/ubi/vmt.c +++ b/drivers/mtd/ubi/vmt.c @@ -261,6 +261,9 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) /* Reserve physical eraseblocks */ if (vol->reserved_pebs > ubi->avail_pebs) { dbg_err("not enough PEBs, only %d available", ubi->avail_pebs); + if (ubi->corr_peb_count) + dbg_err("%d PEBs are corrupted and not used", + ubi->corr_peb_count); err = -ENOSPC; goto out_unlock; } @@ -527,6 +530,9 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) if (pebs > ubi->avail_pebs) { dbg_err("not enough PEBs: requested %d, available %d", pebs, ubi->avail_pebs); + if (ubi->corr_peb_count) + dbg_err("%d PEBs are corrupted and not used", + ubi->corr_peb_count); spin_unlock(&ubi->volumes_lock); err = -ENOSPC; goto out_free; diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index 3bfe00a94667..fcdb7f65fe0b 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -662,9 +662,13 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si, ubi->vol_count += 1; vol->ubi = ubi; - if (reserved_pebs > ubi->avail_pebs) + if (reserved_pebs > ubi->avail_pebs) { ubi_err("not enough PEBs, required %d, available %d", reserved_pebs, ubi->avail_pebs); + if (ubi->corr_peb_count) + ubi_err("%d PEBs are corrupted and not used", + ubi->corr_peb_count); + } ubi->rsvd_pebs += reserved_pebs; ubi->avail_pebs -= reserved_pebs; @@ -837,7 +841,7 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si) return PTR_ERR(ubi->vtbl); } - ubi->avail_pebs = ubi->good_peb_count; + ubi->avail_pebs = ubi->good_peb_count - ubi->corr_peb_count; /* * The layout volume is OK, initialize the corresponding in-RAM data diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 605ecb1e22bb..655bbbe415d9 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1478,22 +1478,6 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) ubi->lookuptbl[e->pnum] = e; } - list_for_each_entry(seb, &si->corr, u.list) { - cond_resched(); - - e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) - goto out_free; - - e->pnum = seb->pnum; - e->ec = seb->ec; - ubi->lookuptbl[e->pnum] = e; - if (schedule_erase(ubi, e, 0)) { - kmem_cache_free(ubi_wl_entry_slab, e); - goto out_free; - } - } - ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) { ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) { cond_resched(); @@ -1520,6 +1504,9 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) if (ubi->avail_pebs < WL_RESERVED_PEBS) { ubi_err("no enough physical eraseblocks (%d, need %d)", ubi->avail_pebs, WL_RESERVED_PEBS); + if (ubi->corr_peb_count) + ubi_err("%d PEBs are corrupted and not used", + ubi->corr_peb_count); goto out_free; } ubi->avail_pebs -= WL_RESERVED_PEBS; From fb22b59b2c38054cc847f6acc5c46daa26dc6dd3 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 19 Oct 2010 22:00:11 +0300 Subject: [PATCH 14/16] UBI: remember copy_flag while scanning While scanning the flash we read all VID headers and store some important information in 'struct ubi_scan_leb'. Store also the 'copy_flag' value there as it is needed when comparing LEBs. We do not increase memory consumption because this is just one bit and we have plenty of spare bits in 'struct ubi_scan_leb' (sizeof(struct ubi_scan_leb) is 48 both with and without this patch). Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/scan.c | 20 ++++++++++---------- drivers/mtd/ubi/scan.h | 4 +++- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index 30b710216f26..2fbb571b9828 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -330,12 +330,18 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb, return 1; } } else { - pnum = seb->pnum; + if (!seb->copy_flag) { + /* It is not a copy, so it is newer */ + dbg_bld("first PEB %d is newer, copy_flag is unset", + pnum); + return bitflips << 1; + } vh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); if (!vh) return -ENOMEM; + pnum = seb->pnum; err = ubi_io_read_vid_hdr(ubi, pnum, vh, 0); if (err) { if (err == UBI_IO_BITFLIPS) @@ -350,14 +356,6 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb, } } - if (!vh->copy_flag) { - /* It is not a copy, so it is newer */ - dbg_bld("first PEB %d is newer, copy_flag is unset", - pnum); - err = bitflips << 1; - goto out_free_vidh; - } - vid_hdr = vh; } @@ -516,6 +514,7 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, seb->ec = ec; seb->pnum = pnum; seb->scrub = ((cmp_res & 2) || bitflips); + seb->copy_flag = vid_hdr->copy_flag; seb->sqnum = sqnum; if (sv->highest_lnum == lnum) @@ -549,8 +548,9 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, seb->ec = ec; seb->pnum = pnum; seb->lnum = lnum; - seb->sqnum = sqnum; seb->scrub = bitflips; + seb->copy_flag = vid_hdr->copy_flag; + seb->sqnum = sqnum; if (sv->highest_lnum <= lnum) { sv->highest_lnum = lnum; diff --git a/drivers/mtd/ubi/scan.h b/drivers/mtd/ubi/scan.h index 12ac852c993b..a3264f0bef2b 100644 --- a/drivers/mtd/ubi/scan.h +++ b/drivers/mtd/ubi/scan.h @@ -30,6 +30,7 @@ * @pnum: physical eraseblock number * @lnum: logical eraseblock number * @scrub: if this physical eraseblock needs scrubbing + * @copy_flag: this LEB is a copy (@copy_flag is set in VID header of this LEB) * @sqnum: sequence number * @u: unions RB-tree or @list links * @u.rb: link in the per-volume RB-tree of &struct ubi_scan_leb objects @@ -42,7 +43,8 @@ struct ubi_scan_leb { int ec; int pnum; int lnum; - int scrub; + unsigned int scrub:1; + unsigned int copy_flag:1; unsigned long long sqnum; union { struct rb_node rb; From df3fca4cdddfa6e1f51b65214d4342660649bd1f Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 20 Oct 2010 11:51:21 +0300 Subject: [PATCH 15/16] UBI: fix check_data_ff return code When the data does not contain all 0xFF bytes, 'check_data_ff()' should return 1, not -EINVAL; Also, the caller ('process_eb()') should not add the PEB to the "corrupted" list if there was a read error. Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/scan.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index 2fbb571b9828..e7b800b1d072 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -784,7 +784,7 @@ static int check_data_ff(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr, ubi_dbg_print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, ubi->peb_buf1, ubi->leb_size, 1); mutex_unlock(&ubi->buf_mutex); - return -EINVAL; + return 1; } /** @@ -936,7 +936,10 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, * have to check what is in the data area. */ err = check_data_ff(ubi, vidh, pnum); - if (!err) + + if (err < 0) + return err; + else if (!err) /* This corruption is caused by a power cut */ err = add_to_list(si, pnum, ec, 1, &si->erase); else From 45aafd32996e27bfc4862654ff31231bdddbe200 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 20 Oct 2010 11:54:58 +0300 Subject: [PATCH 16/16] UBI: tighten the corrupted PEB criteria If we get a bit-flip of ECC error while reading the data area, do not add it to corrupted list, because it is possible that this is just unstable PEB with corruptions caused by unclean reboots. This patch also improves commentaries. Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/scan.c | 45 +++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index e7b800b1d072..3c631863bf40 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -45,14 +45,26 @@ * many warnings and error messages. The idea is that we do not lose * important data in these case - we may lose only the data which was being * written to the media just before the power cut happened, and the upper - * layers are supposed to handle these situations. UBI puts these PEBs to - * the head of the @erase list and they are scheduled for erasure. + * layers (e.g., UBIFS) are supposed to handle these situations. UBI puts + * these PEBs to the head of the @erase list and they are scheduled for + * erasure. * * 2. Unexpected corruptions which are not caused by power cuts. During * scanning, such PEBs are put to the @corr list and UBI preserves them. * Obviously, this lessens the amount of available PEBs, and if at some * point UBI runs out of free PEBs, it switches to R/O mode. UBI also loudly * informs about such PEBs every time the MTD device is attached. + * + * However, it is difficult to reliably distinguish between these types of + * corruptions and UBI's strategy is as follows. UBI assumes (2.) if the VID + * header is corrupted and the data area does not contain all 0xFFs, and there + * were not bit-flips or integrity errors while reading the data area. Otherwise + * UBI assumes (1.). The assumptions are: + * o if the data area contains only 0xFFs, there is no data, and it is safe + * to just erase this PEB. + * o if the data area has bit-flips and data integrity errors (ECC errors on + * NAND), it is probably a PEB which was being erased when power cut + * happened. */ #include @@ -741,24 +753,24 @@ struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi, } /** - * check_data_ff - make sure PEB contains only 0xFF data. + * check_corruption - check the data area of PEB. * @ubi: UBI device description object * @vid_hrd: the (corrupted) VID header of this PEB * @pnum: the physical eraseblock number to check * * This is a helper function which is used to distinguish between VID header * corruptions caused by power cuts and other reasons. If the PEB contains only - * 0xFF bytes at the data area, the VID header is most probably corrupted + * 0xFF bytes in the data area, the VID header is most probably corrupted * because of a power cut (%0 is returned in this case). Otherwise, it was - * corrupted for some other reasons (%1 is returned in this case). A negative - * error code is returned if a read error occurred. + * probably corrupted for some other reasons (%1 is returned in this case). A + * negative error code is returned if a read error occurred. * * If the corruption reason was a power cut, UBI can safely erase this PEB. * Otherwise, it should preserve it to avoid possibly destroying important * information. */ -static int check_data_ff(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr, - int pnum) +static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr, + int pnum) { int err; @@ -767,7 +779,18 @@ static int check_data_ff(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr, err = ubi_io_read(ubi, ubi->peb_buf1, pnum, ubi->leb_start, ubi->leb_size); - if (err && err != UBI_IO_BITFLIPS && err != -EBADMSG) + if (err == UBI_IO_BITFLIPS || err == -EBADMSG) { + /* + * Bit-flips or integrity errors while reading the data area. + * It is difficult to say for sure what type of corruption is + * this, but presumably a power cut happened while this PEB was + * erased, so it became unstable and corrupted, and should be + * erased. + */ + return 0; + } + + if (err) return err; if (ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->leb_size)) { @@ -926,7 +949,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, * that this a valid UBI PEB which has corresponding * LEB, but the headers are corrupted. However, it is * impossible to distinguish it from a PEB which just - * contains garbage because a power cut during erase + * contains garbage because of a power cut during erase * operation. So we just schedule this PEB for erasure. */ err = 0; @@ -935,7 +958,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, * The EC was OK, but the VID header is corrupted. We * have to check what is in the data area. */ - err = check_data_ff(ubi, vidh, pnum); + err = check_corruption(ubi, vidh, pnum); if (err < 0) return err;