qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 2/2] hw/block/nvme: add write uncorrectable command


From: Klaus Jensen
Subject: [PATCH 2/2] hw/block/nvme: add write uncorrectable command
Date: Wed, 10 Feb 2021 08:06:46 +0100

From: Gollu Appalanaidu <anaidu.gollu@samsung.com>

Add support for marking blocks invalid with the Write Uncorrectable
command. Block status is tracked in a (non-persistent) bitmap that is
checked on all reads and written to on all writes. This is potentially
expensive, so keep Write Uncorrectable disabled by default.

Signed-off-by: Gollu Appalanaidu <anaidu.gollu@samsung.com>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 docs/specs/nvme.txt   |  3 ++
 hw/block/nvme-ns.h    |  2 ++
 hw/block/nvme.h       |  1 +
 hw/block/nvme-ns.c    |  2 ++
 hw/block/nvme.c       | 65 +++++++++++++++++++++++++++++++++++++------
 hw/block/trace-events |  1 +
 6 files changed, 66 insertions(+), 8 deletions(-)

diff --git a/docs/specs/nvme.txt b/docs/specs/nvme.txt
index 56d393884e7a..88f9cc278d4c 100644
--- a/docs/specs/nvme.txt
+++ b/docs/specs/nvme.txt
@@ -19,5 +19,8 @@ Known issues
 
 * The accounting numbers in the SMART/Health are reset across power cycles
 
+* Marking blocks invalid with the Write Uncorrectable is not persisted across
+  power cycles.
+
 * Interrupt Coalescing is not supported and is disabled by default in volation
   of the specification.
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index 7af6884862b5..15fa422ded03 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -72,6 +72,8 @@ typedef struct NvmeNamespace {
     struct {
         uint32_t err_rec;
     } features;
+
+    unsigned long *uncorrectable;
 } NvmeNamespace;
 
 static inline uint32_t nvme_nsid(NvmeNamespace *ns)
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 98082b2dfba3..9b8f85b9cf16 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -68,6 +68,7 @@ static inline const char *nvme_io_opc_str(uint8_t opc)
     case NVME_CMD_FLUSH:            return "NVME_NVM_CMD_FLUSH";
     case NVME_CMD_WRITE:            return "NVME_NVM_CMD_WRITE";
     case NVME_CMD_READ:             return "NVME_NVM_CMD_READ";
+    case NVME_CMD_WRITE_UNCOR:      return "NVME_CMD_WRITE_UNCOR";
     case NVME_CMD_COMPARE:          return "NVME_NVM_CMD_COMPARE";
     case NVME_CMD_WRITE_ZEROES:     return "NVME_NVM_CMD_WRITE_ZEROES";
     case NVME_CMD_DSM:              return "NVME_NVM_CMD_DSM";
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index ade46e2f3739..742bbc4b4b62 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -72,6 +72,8 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
     id_ns->mcl = cpu_to_le32(ns->params.mcl);
     id_ns->msrc = ns->params.msrc;
 
+    ns->uncorrectable = bitmap_new(id_ns->nsze);
+
     return 0;
 }
 
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index e5f6666725d7..56048046c193 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1112,6 +1112,20 @@ static uint16_t nvme_check_dulbe(NvmeNamespace *ns, 
uint64_t slba,
     return NVME_SUCCESS;
 }
 
+static inline uint16_t nvme_check_uncor(NvmeNamespace *ns, uint64_t slba,
+                                        uint32_t nlb)
+{
+    uint64_t elba = nlb + slba;
+
+    if (ns->uncorrectable) {
+        if (find_next_bit(ns->uncorrectable, elba, slba) < elba) {
+            return NVME_UNRECOVERED_READ | NVME_DNR;
+        }
+    }
+
+    return NVME_SUCCESS;
+}
+
 static void nvme_aio_err(NvmeRequest *req, int ret)
 {
     uint16_t status = NVME_SUCCESS;
@@ -1423,14 +1437,24 @@ static void nvme_rw_cb(void *opaque, int ret)
     BlockAcctCookie *acct = &req->acct;
     BlockAcctStats *stats = blk_get_stats(blk);
 
+    bool is_write = nvme_is_write(req);
+
     trace_pci_nvme_rw_cb(nvme_cid(req), blk_name(blk));
 
-    if (ns->params.zoned && nvme_is_write(req)) {
+    if (ns->params.zoned && is_write) {
         nvme_finalize_zoned_write(ns, req);
     }
 
     if (!ret) {
         block_acct_done(stats, acct);
+
+        if (is_write) {
+            NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+            uint64_t slba = le64_to_cpu(rw->slba);
+            uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
+
+            bitmap_clear(ns->uncorrectable, slba, nlb);
+        }
     } else {
         block_acct_failed(stats, acct);
         nvme_aio_err(req, ret);
@@ -1521,13 +1545,13 @@ static void nvme_copy_cb(void *opaque, int ret)
 {
     NvmeRequest *req = opaque;
     NvmeNamespace *ns = req->ns;
+    NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
+    uint64_t sdlba = le64_to_cpu(copy->sdlba);
     struct nvme_copy_ctx *ctx = req->opaque;
 
     trace_pci_nvme_copy_cb(nvme_cid(req));
 
     if (ns->params.zoned) {
-        NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
-        uint64_t sdlba = le64_to_cpu(copy->sdlba);
         NvmeZone *zone = nvme_get_zone_by_slba(ns, sdlba);
 
         __nvme_advance_zone_wp(ns, zone, ctx->nlb);
@@ -1535,6 +1559,7 @@ static void nvme_copy_cb(void *opaque, int ret)
 
     if (!ret) {
         block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct);
+        bitmap_clear(ns->uncorrectable, sdlba, ctx->nlb);
     } else {
         block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct);
         nvme_aio_err(req, ret);
@@ -1953,6 +1978,12 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
         goto invalid;
     }
 
+    status = nvme_check_uncor(ns, slba, nlb);
+    if (status) {
+        trace_pci_nvme_err_unrecoverable_read(slba, nlb);
+        return status;
+    }
+
     if (ns->params.zoned) {
         status = nvme_check_zone_read(ns, slba, nlb);
         if (status) {
@@ -1992,7 +2023,7 @@ invalid:
 }
 
 static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
-                              bool wrz)
+                              bool wrz, bool uncor)
 {
     NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
     NvmeNamespace *ns = req->ns;
@@ -2008,7 +2039,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest 
*req, bool append,
     trace_pci_nvme_write(nvme_cid(req), nvme_io_opc_str(rw->opcode),
                          nvme_nsid(ns), nlb, data_size, slba);
 
-    if (!wrz) {
+    if (!wrz && !uncor) {
         status = nvme_check_mdts(n, data_size);
         if (status) {
             trace_pci_nvme_err_mdts(nvme_cid(req), data_size);
@@ -2055,6 +2086,11 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest 
*req, bool append,
         zone->w_ptr += nlb;
     }
 
+    if (uncor) {
+        bitmap_set(ns->uncorrectable, slba, nlb);
+        return NVME_SUCCESS;
+    }
+
     data_offset = nvme_l2b(ns, slba);
 
     if (!wrz) {
@@ -2087,17 +2123,22 @@ invalid:
 
 static inline uint16_t nvme_write(NvmeCtrl *n, NvmeRequest *req)
 {
-    return nvme_do_write(n, req, false, false);
+    return nvme_do_write(n, req, false, false, false);
 }
 
 static inline uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
 {
-    return nvme_do_write(n, req, false, true);
+    return nvme_do_write(n, req, false, true, false);
 }
 
 static inline uint16_t nvme_zone_append(NvmeCtrl *n, NvmeRequest *req)
 {
-    return nvme_do_write(n, req, true, false);
+    return nvme_do_write(n, req, true, false, false);
+}
+
+static inline uint16_t nvme_write_uncor(NvmeCtrl *n, NvmeRequest *req)
+{
+    return nvme_do_write(n, req, false, false, true);
 }
 
 static uint16_t nvme_get_mgmt_zone_slba_idx(NvmeNamespace *ns, NvmeCmd *c,
@@ -2596,6 +2637,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
         return nvme_flush(n, req);
     case NVME_CMD_WRITE_ZEROES:
         return nvme_write_zeroes(n, req);
+    case NVME_CMD_WRITE_UNCOR:
+        return nvme_write_uncor(n, req);
     case NVME_CMD_ZONE_APPEND:
         return nvme_zone_append(n, req);
     case NVME_CMD_WRITE:
@@ -4514,6 +4557,11 @@ static void nvme_init_cse_iocs(NvmeCtrl *n)
     n->iocs.nvm[NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC;
     n->iocs.nvm[NVME_CMD_READ]  = NVME_CMD_EFF_CSUPP;
 
+    if (oncs & NVME_ONCS_WRITE_UNCORR) {
+        n->iocs.nvm[NVME_CMD_WRITE_UNCOR] = NVME_CMD_EFF_CSUPP |
+            NVME_CMD_EFF_LBCC;
+    }
+
     if (oncs & NVME_ONCS_WRITE_ZEROES) {
         n->iocs.nvm[NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP |
             NVME_CMD_EFF_LBCC;
@@ -4853,6 +4901,7 @@ static void nvme_exit(PCIDevice *pci_dev)
         }
 
         nvme_ns_cleanup(ns);
+        g_free(ns->uncorrectable);
     }
 
     g_free(n->cq);
diff --git a/hw/block/trace-events b/hw/block/trace-events
index 4b5ee04024f4..f30ef220c26a 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -128,6 +128,7 @@ pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not 
page aligned: 0x%"PR
 pci_nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8""
 pci_nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8""
 pci_nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) 
"Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64""
+pci_nvme_err_unrecoverable_read(uint64_t start, uint32_t nlb) "islba 
0x%"PRIx64" nlb %"PRIu32""
 pci_nvme_err_invalid_log_page_offset(uint64_t ofs, uint64_t size) "must be <= 
%"PRIu64", got %"PRIu64""
 pci_nvme_err_cmb_invalid_cba(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64""
 pci_nvme_err_cmb_not_enabled(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64""
-- 
2.30.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]