Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add NVMe SGL support for FEMU #129

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
209 changes: 207 additions & 2 deletions hw/femu/dma.c
Original file line number Diff line number Diff line change
@@ -1,23 +1,29 @@
#include "./nvme.h"

void nvme_addr_read(FemuCtrl *n, hwaddr addr, void *buf, int size)
int nvme_addr_read(FemuCtrl *n, hwaddr addr, void *buf, int size)
{
if (n->cmbsz && addr >= n->ctrl_mem.addr &&
addr < (n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size))) {
memcpy(buf, (void *)&n->cmbuf[addr - n->ctrl_mem.addr], size);
return 0;
} else {
pci_dma_read(&n->parent_obj, addr, buf, size);
return 0;
}
return 1;
}

void nvme_addr_write(FemuCtrl *n, hwaddr addr, void *buf, int size)
int nvme_addr_write(FemuCtrl *n, hwaddr addr, void *buf, int size)
{
if (n->cmbsz && addr >= n->ctrl_mem.addr &&
addr < (n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size))) {
memcpy((void *)&n->cmbuf[addr - n->ctrl_mem.addr], buf, size);
return 0;
} else {
pci_dma_write(&n->parent_obj, addr, buf, size);
return 0;
}
return 1;
}

uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
Expand Down Expand Up @@ -109,6 +115,205 @@ uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
return NVME_INVALID_FIELD | NVME_DNR;
}

static inline void nvme_sg_init(FemuCtrl *n, QEMUSGList *sg, bool dma)
{
if (dma) {
pci_dma_sglist_init(sg, PCI_DEVICE(n), 0);
}
}
/*
* Map 'nsgld' data descriptors from 'segment'. The function will subtract the
* number of bytes mapped in len.
*/
static uint16_t nvme_map_sgl_data(FemuCtrl *n, QEMUSGList *sg,
NvmeSglDescriptor *segment, uint64_t nsgld,
size_t *len, NvmeCmd *cmd)
{
dma_addr_t addr, trans_len;
uint32_t dlen;
uint16_t status;

for (int i = 0; i < nsgld; i++) {
uint8_t type = NVME_SGL_TYPE(segment[i].type);

switch (type) {
case NVME_SGL_DESCR_TYPE_DATA_BLOCK:
break;
case NVME_SGL_DESCR_TYPE_SEGMENT:
case NVME_SGL_DESCR_TYPE_LAST_SEGMENT:
return NVME_INVALID_NUM_SGL_DESCRS | NVME_DNR;
default:
return NVME_SGL_DESCR_TYPE_INVALID | NVME_DNR;
}

dlen = le32_to_cpu(segment[i].len);

if (!dlen) {
continue;
}

if (*len == 0) {
/*
* All data has been mapped, but the SGL contains additional
* segments and/or descriptors. The controller might accept
* ignoring the rest of the SGL.
*/
uint32_t sgls = le32_to_cpu(n->id_ctrl.sgls);
if (sgls & NVME_CTRL_SGLS_EXCESS_LENGTH) {
break;
}

return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
}

trans_len = MIN(*len, dlen);

addr = le64_to_cpu(segment[i].addr);

if (UINT64_MAX - addr < dlen) {
return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
}

qemu_sglist_add(sg, addr, trans_len);

*len -= trans_len;
}

return NVME_SUCCESS;
}

uint16_t nvme_map_sgl(FemuCtrl *n, QEMUSGList *sg, NvmeSglDescriptor sgl,
size_t len, NvmeCmd *cmd)
{
/*
* Read the segment in chunks of 256 descriptors (one 4k page) to avoid
* dynamically allocating a potentially huge SGL. The spec allows the SGL
* to be larger (as in number of bytes required to describe the SGL
* descriptors and segment chain) than the command transfer size, so it is
* not bounded by MDTS.
*/
const int SEG_CHUNK_SIZE = 256;

NvmeSglDescriptor segment[SEG_CHUNK_SIZE], *sgld, *last_sgld;
uint64_t nsgld;
uint32_t seg_len;
uint16_t status;
hwaddr addr;
int ret;

sgld = &sgl;
addr = le64_to_cpu(sgl.addr);

nvme_sg_init(n, sg, true);

/*
* If the entire transfer can be described with a single data block it can
* be mapped directly.
*/
if (NVME_SGL_TYPE(sgl.type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) {
status = nvme_map_sgl_data(n, sg, sgld, 1, &len, cmd);
if (status) {
goto unmap;
}

goto out;
}

for (;;) {
switch (NVME_SGL_TYPE(sgld->type)) {
case NVME_SGL_DESCR_TYPE_SEGMENT:
case NVME_SGL_DESCR_TYPE_LAST_SEGMENT:
break;
default:
return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
}

seg_len = le32_to_cpu(sgld->len);

/* check the length of the (Last) Segment descriptor */
if (!seg_len || seg_len & 0xf) {
return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
}

if (UINT64_MAX - addr < seg_len) {
return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
}

nsgld = seg_len / sizeof(NvmeSglDescriptor);

while (nsgld > SEG_CHUNK_SIZE) {
if (nvme_addr_read(n, addr, segment, sizeof(segment))) {
status = NVME_DATA_TRAS_ERROR;
goto unmap;
}

status = nvme_map_sgl_data(n, sg, segment, SEG_CHUNK_SIZE,
&len, cmd);
if (status) {
goto unmap;
}

nsgld -= SEG_CHUNK_SIZE;
addr += SEG_CHUNK_SIZE * sizeof(NvmeSglDescriptor);
}

ret = nvme_addr_read(n, addr, segment, nsgld *
sizeof(NvmeSglDescriptor));
if (ret) {
status = NVME_DATA_TRAS_ERROR;
goto unmap;
}

last_sgld = &segment[nsgld - 1];

/*
* If the segment ends with a Data Block, then we are done.
*/
if (NVME_SGL_TYPE(last_sgld->type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) {
status = nvme_map_sgl_data(n, sg, segment, nsgld, &len, cmd);
if (status) {
goto unmap;
}

goto out;
}

/*
* If the last descriptor was not a Data Block, then the current
* segment must not be a Last Segment.
*/
if (NVME_SGL_TYPE(sgld->type) == NVME_SGL_DESCR_TYPE_LAST_SEGMENT) {
status = NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
goto unmap;
}

sgld = last_sgld;
addr = le64_to_cpu(sgld->addr);

/*
* Do not map the last descriptor; it will be a Segment or Last Segment
* descriptor and is handled by the next iteration.
*/
status = nvme_map_sgl_data(n, sg, segment, nsgld - 1, &len, cmd);
if (status) {
goto unmap;
}
}

out:
/* if there is any residual left in len, the SGL was too short */
if (len) {
status = NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
goto unmap;
}

return NVME_SUCCESS;

unmap:
qemu_sglist_destroy(&sg);
return status;
}

uint16_t dma_write_prp(FemuCtrl *n, uint8_t *ptr, uint32_t len, uint64_t prp1,
uint64_t prp2)
{
Expand Down
3 changes: 2 additions & 1 deletion hw/femu/femu.c
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ static void nvme_init_ctrl(FemuCtrl *n)
id->psd[0].mp = cpu_to_le16(0x9c4);
id->psd[0].enlat = cpu_to_le32(0x10);
id->psd[0].exlat = cpu_to_le32(0x4);
id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN | NVME_CTRL_SGLS_BITBUCKET); // NVMe SGL Support

n->features.arbitration = 0x1f0f0706;
n->features.power_mgmt = 0;
Expand Down Expand Up @@ -722,4 +723,4 @@ static void femu_register_types(void)
type_register_static(&femu_info);
}

type_init(femu_register_types)
type_init(femu_register_types)
22 changes: 20 additions & 2 deletions hw/femu/nvme-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,24 @@ void *nvme_poller(void *arg)
return NULL;
}

static uint16_t nvme_map_dptr(FemuCtrl *n, size_t len, NvmeRequest *req)
{
uint64_t prp1, prp2;

switch (req->cmd.psdt) {
case NVME_PSDT_PRP:
prp1 = le64_to_cpu(req->cmd.dptr.prp1);
prp2 = le64_to_cpu(req->cmd.dptr.prp2);

return nvme_map_prp(&req->qsg, &req->iov, prp1, prp2, len, n);
case NVME_PSDT_SGL_MPTR_CONTIGUOUS:
case NVME_PSDT_SGL_MPTR_SGL:
return nvme_map_sgl(n, &req->qsg, req->cmd.dptr.sgl, len, &req->cmd);
default:
return NVME_INVALID_FIELD;
}
}

uint16_t nvme_rw(FemuCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, NvmeRequest *req)
{
NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
Expand All @@ -259,13 +277,13 @@ uint16_t nvme_rw(FemuCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, NvmeRequest *req)
if (err)
return err;

if (nvme_map_prp(&req->qsg, &req->iov, prp1, prp2, data_size, n)) {
if(nvme_map_dptr(n, data_size, req)) {
nvme_set_error_page(n, req->sq->sqid, cmd->cid, NVME_INVALID_FIELD,
offsetof(NvmeRwCmd, prp1), 0, ns->id);
return NVME_INVALID_FIELD | NVME_DNR;
}

assert((nlb << data_shift) == req->qsg.size);
// assert((nlb << data_shift) == req->qsg.size);

req->slba = slba;
req->status = NVME_SUCCESS;
Expand Down
58 changes: 55 additions & 3 deletions hw/femu/nvme.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "qemu/memalign.h"
#include "hw/pci/msix.h"
#include "hw/pci/msi.h"
#include "hw/pci/pci.h"
#include "hw/virtio/vhost.h"
#include "qapi/error.h"
#include "sysemu/kvm.h"
Expand All @@ -18,6 +19,21 @@
#include "nand/nand.h"
#include "timing-model/timing.h"

// from include/block/nvme.h
#define NVME_CTRL_SGLS_SUPPORT_MASK (0x3 << 0)
#define NVME_CTRL_SGLS_SUPPORT_NO_ALIGN (0x1 << 0)
#define NVME_CTRL_SGLS_SUPPORT_DWORD_ALIGN (0x1 << 1)
#define NVME_CTRL_SGLS_KEYED (0x1 << 2)
#define NVME_CTRL_SGLS_BITBUCKET (0x1 << 16)
#define NVME_CTRL_SGLS_MPTR_CONTIGUOUS (0x1 << 17)
#define NVME_CTRL_SGLS_EXCESS_LENGTH (0x1 << 18)
#define NVME_CTRL_SGLS_MPTR_SGL (0x1 << 19)
#define NVME_CTRL_SGLS_ADDR_OFFSET (0x1 << 20)

#define NVME_SGL_TYPE(type) ((type >> 4) & 0xf)
#define NVME_SGL_SUBTYPE(type) (type & 0xf)


#define NVME_ID_NS_LBADS(ns) \
((ns)->id_ns.lbaf[NVME_ID_NS_FLBAS_INDEX((ns)->id_ns.flbas)].lbads)

Expand Down Expand Up @@ -73,6 +89,34 @@ enum NvmeCapMask {
CAP_MPSMAX_MASK = 0xf,
};

enum NvmeSglDescriptorType {
NVME_SGL_DESCR_TYPE_DATA_BLOCK = 0x0,
NVME_SGL_DESCR_TYPE_BIT_BUCKET = 0x1,
NVME_SGL_DESCR_TYPE_SEGMENT = 0x2,
NVME_SGL_DESCR_TYPE_LAST_SEGMENT = 0x3,
NVME_SGL_DESCR_TYPE_KEYED_DATA_BLOCK = 0x4,

NVME_SGL_DESCR_TYPE_VENDOR_SPECIFIC = 0xf,
};

enum NvmeSglDescriptorSubtype {
NVME_SGL_DESCR_SUBTYPE_ADDRESS = 0x0,
};

enum {
NVME_SG_ALLOC = 1 << 0,
NVME_SG_DMA = 1 << 1,
};

typedef struct NvmeSg {
int flags;

union {
QEMUSGList qsg;
QEMUIOVector iov;
};
} NvmeSg;

#define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE
#define NVME_MAX_QUEUE_ENTRIES 0xffff
#define NVME_MAX_STRIDE 12
Expand Down Expand Up @@ -531,6 +575,12 @@ enum NvmeStatusCodes {
NVME_CMD_ABORT_MISSING_FUSE = 0x000a,
NVME_INVALID_NSID = 0x000b,
NVME_CMD_SEQ_ERROR = 0x000c,
NVME_INVALID_SGL_SEG_DESCR = 0x000d,
NVME_INVALID_NUM_SGL_DESCRS = 0x000e,
NVME_DATA_SGL_LEN_INVALID = 0x000f,
NVME_MD_SGL_LEN_INVALID = 0x0010,
NVME_SGL_DESCR_TYPE_INVALID = 0x0011,
NVME_INVALID_USE_OF_CMB = 0x0012,
NVME_INVALID_CMD_SET = 0x002c,
NVME_LBA_RANGE = 0x0080,
NVME_CAP_EXCEEDED = 0x0081,
Expand Down Expand Up @@ -1440,15 +1490,17 @@ int nvme_setup_virq(FemuCtrl *n, NvmeCQueue *cq);
int nvme_clear_virq(FemuCtrl *n);

/* Public DMA APIs from dma.c */
void nvme_addr_read(FemuCtrl *n, hwaddr addr, void *buf, int size);
void nvme_addr_write(FemuCtrl *n, hwaddr addr, void *buf, int size);
int nvme_addr_read(FemuCtrl *n, hwaddr addr, void *buf, int size);
int nvme_addr_write(FemuCtrl *n, hwaddr addr, void *buf, int size);
uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
uint64_t prp2, uint32_t len, FemuCtrl *n);
uint16_t dma_write_prp(FemuCtrl *n, uint8_t *ptr, uint32_t len, uint64_t
prp1, uint64_t prp2);
uint16_t dma_read_prp(FemuCtrl *n, uint8_t *ptr, uint32_t len, uint64_t
prp1, uint64_t prp2);

// uint16_t nvme_map_dptr(FemuCtrl *n, size_t len, NvmeRequest *req);
uint16_t nvme_map_sgl(FemuCtrl *n, QEMUSGList *sg, NvmeSglDescriptor sgl,
size_t len, NvmeCmd *cmd);

/* Misc */
uint64_t *nvme_setup_discontig(FemuCtrl *n, uint64_t prp_addr, uint16_t
Expand Down
Loading