nvme: Build the page list in the existing dma buffer

Commit 01f2736cc9 ("nvme: Pass large I/O requests as PRP lists")
introduced multi-page requests using the NVMe PRP mechanism. To store the
list and "first page to write to" hints, it added fields to the NVMe
namespace struct.

Unfortunately, that struct resides in fseg which is read-only at runtime.
While KVM ignores the read-only part and allows writes, real hardware and
TCG adhere to the semantics and ignore writes to the fseg region. The net
effect of that is that reads and writes were always happening on address 0,
unless they went through the bounce buffer logic.

This patch builds the PRP maintenance data in the existing "dma bounce
buffer" and only builds it when needed.

Fixes: 01f2736cc9 ("nvme: Pass large I/O requests as PRP lists")
Reported-by: Matt DeVillier <matt.devillier@gmail.com>
Signed-off-by: Alexander Graf <graf@amazon.com>
Signed-off-by: Kevin O'Connor <kevin@koconnor.net>
Reviewed-by: Alexander Graf <graf@amazon.com>
This commit is contained in:
Kevin O'Connor 2022-01-19 13:20:21 -05:00
parent 0a40653f30
commit f13b650015
2 changed files with 24 additions and 43 deletions

View File

@ -10,8 +10,6 @@
#include "types.h" // u32 #include "types.h" // u32
#include "pcidevice.h" // struct pci_device #include "pcidevice.h" // struct pci_device
#define NVME_MAX_PRPL_ENTRIES 15 /* Allows requests up to 64kb */
/* Data structures */ /* Data structures */
/* The register file of a NVMe host controller. This struct follows the naming /* The register file of a NVMe host controller. This struct follows the naming
@ -122,10 +120,6 @@ struct nvme_namespace {
/* Page aligned buffer of size NVME_PAGE_SIZE. */ /* Page aligned buffer of size NVME_PAGE_SIZE. */
char *dma_buffer; char *dma_buffer;
/* Page List */
u32 prpl_len;
u64 prpl[NVME_MAX_PRPL_ENTRIES];
}; };
/* Data structures for NVMe admin identify commands */ /* Data structures for NVMe admin identify commands */

View File

@ -469,39 +469,23 @@ nvme_bounce_xfer(struct nvme_namespace *ns, u64 lba, void *buf, u16 count,
return res; return res;
} }
static void nvme_reset_prpl(struct nvme_namespace *ns) #define NVME_MAX_PRPL_ENTRIES 15 /* Allows requests up to 64kb */
{
ns->prpl_len = 0;
}
static int nvme_add_prpl(struct nvme_namespace *ns, u64 base)
{
if (ns->prpl_len >= NVME_MAX_PRPL_ENTRIES)
return -1;
ns->prpl[ns->prpl_len++] = base;
return 0;
}
// Transfer data using page list (if applicable) // Transfer data using page list (if applicable)
static int static int
nvme_prpl_xfer(struct nvme_namespace *ns, u64 lba, void *buf, u16 count, nvme_prpl_xfer(struct nvme_namespace *ns, u64 lba, void *buf, u16 count,
int write) int write)
{ {
int first_page = 1;
u32 base = (long)buf; u32 base = (long)buf;
s32 size; s32 size;
if (count > ns->max_req_size) if (count > ns->max_req_size)
count = ns->max_req_size; count = ns->max_req_size;
nvme_reset_prpl(ns);
size = count * ns->block_size; size = count * ns->block_size;
/* Special case for transfers that fit into PRP1, but are unaligned */ /* Special case for transfers that fit into PRP1, but are unaligned */
if (((size + (base & ~NVME_PAGE_MASK)) <= NVME_PAGE_SIZE)) if (((size + (base & ~NVME_PAGE_MASK)) <= NVME_PAGE_SIZE))
return nvme_io_xfer(ns, lba, buf, NULL, count, write); goto single;
/* Every request has to be page aligned */ /* Every request has to be page aligned */
if (base & ~NVME_PAGE_MASK) if (base & ~NVME_PAGE_MASK)
@ -511,28 +495,31 @@ nvme_prpl_xfer(struct nvme_namespace *ns, u64 lba, void *buf, u16 count,
if (size & (ns->block_size - 1ULL)) if (size & (ns->block_size - 1ULL))
goto bounce; goto bounce;
for (; size > 0; base += NVME_PAGE_SIZE, size -= NVME_PAGE_SIZE) { /* Build PRP list if we need to describe more than 2 pages */
if (first_page) { if ((ns->block_size * count) > (NVME_PAGE_SIZE * 2)) {
/* First page is special */ u32 prpl_len = 0;
first_page = 0; u64 *prpl = (void*)ns->dma_buffer;
continue; int first_page = 1;
for (; size > 0; base += NVME_PAGE_SIZE, size -= NVME_PAGE_SIZE) {
if (first_page) {
/* First page is special */
first_page = 0;
continue;
}
if (prpl_len >= NVME_MAX_PRPL_ENTRIES)
goto bounce;
prpl[prpl_len++] = base;
} }
if (nvme_add_prpl(ns, base)) return nvme_io_xfer(ns, lba, buf, prpl, count, write);
goto bounce;
} }
void *prp2; /* Directly embed the 2nd page if we only need 2 pages */
if ((ns->block_size * count) > (NVME_PAGE_SIZE * 2)) { if ((ns->block_size * count) > NVME_PAGE_SIZE)
/* We need to describe more than 2 pages, rely on PRP List */ return nvme_io_xfer(ns, lba, buf, buf + NVME_PAGE_SIZE, count, write);
prp2 = ns->prpl;
} else if ((ns->block_size * count) > NVME_PAGE_SIZE) { single:
/* Directly embed the 2nd page if we only need 2 pages */ /* One page is enough, don't expose anything else */
prp2 = (void *)(long)ns->prpl[0]; return nvme_io_xfer(ns, lba, buf, NULL, count, write);
} else {
/* One page is enough, don't expose anything else */
prp2 = NULL;
}
return nvme_io_xfer(ns, lba, buf, prp2, count, write);
bounce: bounce:
/* Use bounce buffer to make transfer */ /* Use bounce buffer to make transfer */