Skip to content

Commit

Permalink
drivers: xen: gnttab: rework grant table driver
Browse files Browse the repository at this point in the history
Previously Zephyr has simple gnttab driver adapted from MiniOS. It
had a lot of problems related to non-optimal grant frame usage,
ignoring Xen max_grant_frames limit etc.

The main problem, that led to this improvement was issues with grant
table DT region interpretation - all pages from Xen gnttab reserved
region (first reg in hypervisor DT node) was treated as number of
available grant frames for mapping, but it was not correct. Actual
limit of grant frames is usually significantly less than this region,
which caused Xen warning/errors during grant table init.

Now grant table driver maps single frame at start and all other (prior
to max amount) on demand by expanding gref list. Max amount is
discovered by gnttab_query_size operation on driver init (Xen
hypercall).

Please note that Stage 1 mapping (region from device tree) is left
as is (whole region on init with Zephyr top level map), changes are
related only to Stage 2 mapping (actual grant table frame pages in
hypervisor). As all accesses to grant table region is fully controlled
by driver function this will not cause any problems.

Signed-off-by: Dmytro Firsov <[email protected]>
Reviewed-by: Volodymyr Babchuk <[email protected]>
  • Loading branch information
firscity committed May 3, 2024
1 parent 0907151 commit 660a899
Showing 1 changed file with 187 additions and 56 deletions.
243 changes: 187 additions & 56 deletions drivers/xen/gnttab.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,52 +35,74 @@ LOG_MODULE_REGISTER(xen_gnttab);

/* Timeout for grant table ops retrying */
#define GOP_RETRY_DELAY 200
#define DT_GNTTAB_SIZE DT_REG_SIZE_BY_IDX(DT_INST(0, xen_xen), 0)
#define GNT_ENTRIES_PER_FRAME (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))

#define GNTTAB_SIZE DT_REG_SIZE_BY_IDX(DT_INST(0, xen_xen), 0)
BUILD_ASSERT(!(GNTTAB_SIZE % XEN_PAGE_SIZE), "Size of gnttab have to be aligned on XEN_PAGE_SIZE");
/* Grant refs are unsigned integer and we need some value for invalid cases. Since
* first GNTTAB_NR_RESERVED_ENTRIES are reserved we can use 0 for these purposes.
*/
#define GNTTAB_INVAL_GREF 0

#define GNTTAB_GREF_USED (UINT32_MAX - 1)
#define GNTTAB_LAST_GREF UINT32_MAX

/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
#define NR_GRANT_FRAMES (GNTTAB_SIZE / XEN_PAGE_SIZE)
#define NR_GRANT_ENTRIES \
(NR_GRANT_FRAMES * XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
BUILD_ASSERT(!(DT_GNTTAB_SIZE % XEN_PAGE_SIZE),
"Size of gnttab have to be aligned on XEN_PAGE_SIZE");
BUILD_ASSERT(DT_GNTTAB_SIZE <= CONFIG_KERNEL_VM_SIZE);

BUILD_ASSERT(GNTTAB_SIZE <= CONFIG_KERNEL_VM_SIZE);
DEVICE_MMIO_TOPLEVEL_STATIC(grant_tables, DT_INST(0, xen_xen));

static struct gnttab {
struct k_sem sem;
struct k_mutex lock;
unsigned long nr_grant_frames;
unsigned long max_grant_frames;
grant_entry_v1_t *table;
grant_ref_t gref_list[NR_GRANT_ENTRIES];
grant_ref_t *gref_list;
} gnttab;

static grant_ref_t get_free_entry(void)
{
grant_ref_t gref;
unsigned int flags;
static int extend_gnttab(void);

k_sem_take(&gnttab.sem, K_FOREVER);
static grant_ref_t get_grant_entry(void)
{
int rc;
grant_ref_t gref = GNTTAB_INVAL_GREF;

k_mutex_lock(&gnttab.lock, K_FOREVER);
if (gnttab.gref_list[0] == GNTTAB_LAST_GREF) {
/* Map one more frame if possible, need to hold mutex */
rc = extend_gnttab();
if (rc) {
k_mutex_unlock(&gnttab.lock);
LOG_WRN("Failed to extend gnttab rc = %d, can't allocate gref", rc);
return gref;
}
}

flags = irq_lock();
/* Take first free entry stored in 0 gref */
gref = gnttab.gref_list[0];
__ASSERT((gref >= GNTTAB_NR_RESERVED_ENTRIES &&
gref < NR_GRANT_ENTRIES), "Invalid gref = %d", gref);
/* Update first free entry */
gnttab.gref_list[0] = gnttab.gref_list[gref];
irq_unlock(flags);
/* Mark picked entry as used */
gnttab.gref_list[gref] = GNTTAB_GREF_USED;
k_mutex_unlock(&gnttab.lock);

return gref;
}

static void put_free_entry(grant_ref_t gref)
static void put_grant_entry(grant_ref_t gref)
{
unsigned int flags;
k_mutex_lock(&gnttab.lock, K_FOREVER);
if (gnttab.gref_list[gref] != GNTTAB_GREF_USED) {
k_mutex_unlock(&gnttab.lock);
LOG_WRN("Trying to put already free gref = %u", gref);

flags = irq_lock();
return;
}
/* Store current first free entry */
gnttab.gref_list[gref] = gnttab.gref_list[0];
/* Update first free entry with one we trying to put */
gnttab.gref_list[0] = gref;

irq_unlock(flags);

k_sem_give(&gnttab.sem);
k_mutex_unlock(&gnttab.lock);
}

static void gnttab_grant_permit_access(grant_ref_t gref, domid_t domid,
Expand All @@ -103,7 +125,7 @@ static void gnttab_grant_permit_access(grant_ref_t gref, domid_t domid,
grant_ref_t gnttab_grant_access(domid_t domid, unsigned long gfn,
bool readonly)
{
grant_ref_t gref = get_free_entry();
grant_ref_t gref = get_grant_entry();

gnttab_grant_permit_access(gref, domid, gfn, readonly);

Expand Down Expand Up @@ -136,15 +158,16 @@ int gnttab_end_access(grant_ref_t gref)
{
int rc;

__ASSERT((gref >= GNTTAB_NR_RESERVED_ENTRIES &&
gref < NR_GRANT_ENTRIES), "Invalid gref = %d", gref);
__ASSERT((gref >= GNTTAB_NR_RESERVED_ENTRIES) &&
(gref < gnttab.nr_grant_frames * GNT_ENTRIES_PER_FRAME),
"Invalid gref = %d", gref);

rc = gnttab_reset_flags(gref);
if (!rc) {
return rc;
}

put_free_entry(gref);
put_grant_entry(gref);

return 0;
}
Expand Down Expand Up @@ -324,42 +347,150 @@ const char *gnttabop_error(int16_t status)
}
}

static int gnttab_init(void)
static int setup_grant_table(unsigned long nr_frames)
{
grant_ref_t gref;
struct xen_add_to_physmap xatp;
int rc;
struct gnttab_setup_table setup;
xen_pfn_t frames[NR_GRANT_FRAMES];
int rc = 0, i;

/* Will be taken/given during gnt_refs allocation/release */
k_sem_init(&gnttab.sem, 0, NR_GRANT_ENTRIES - GNTTAB_NR_RESERVED_ENTRIES);

for (
gref = GNTTAB_NR_RESERVED_ENTRIES;
gref < NR_GRANT_ENTRIES;
gref++
) {
put_free_entry(gref);
}
xen_pfn_t *frames;

for (i = 0; i < NR_GRANT_FRAMES; i++) {
xatp.domid = DOMID_SELF;
xatp.size = 0;
xatp.space = XENMAPSPACE_grant_table;
xatp.idx = i;
xatp.gpfn = xen_virt_to_gfn(Z_TOPLEVEL_ROM_NAME(grant_tables).phys_addr) + i;
rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
__ASSERT(!rc, "add_to_physmap failed; status = %d\n", rc);
frames = k_calloc(gnttab.nr_grant_frames, sizeof(*frames));
if (!frames) {
LOG_ERR("Failed to allocate memory for frames");
return -ENOMEM;
}

setup.dom = DOMID_SELF;
setup.nr_frames = NR_GRANT_FRAMES;
setup.nr_frames = gnttab.nr_grant_frames;
set_xen_guest_handle(setup.frame_list, frames);
rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
__ASSERT((!rc) && (!setup.status), "Table setup failed; status = %s\n",
gnttabop_error(setup.status));
if (rc || setup.status) {
LOG_ERR("Table setup failed; status = %s", gnttabop_error(setup.status));
if (!rc) {
/* Xen may return 0 with negative setup status, set it as call result */
rc = setup.status;
}
}
k_free(frames);

return rc;
}

static int map_grant_frame(unsigned int start_frame)
{
int rc;
struct xen_add_to_physmap xatp;

if (gnttab.nr_grant_frames == gnttab.max_grant_frames) {
LOG_ERR("Reached max number of Xen grant frames");
return -ENOMEM;
}

/* Stage 2 frame mapping */
xatp.domid = DOMID_SELF;
xatp.size = 0;
xatp.space = XENMAPSPACE_grant_table;
xatp.idx = start_frame;
xatp.gpfn = xen_virt_to_gfn(Z_TOPLEVEL_ROM_NAME(grant_tables).phys_addr) + start_frame;
rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
if (rc) {
LOG_ERR("add_to_physmap failed; status = %d\n", rc);
return rc;
}

gnttab.nr_grant_frames++;

return setup_grant_table(gnttab.nr_grant_frames);
}

static int extend_gnttab(void)
{
int rc;
grant_ref_t iter, start_gref, end_gref;
grant_ref_t *old_list = gnttab.gref_list;
unsigned long start = gnttab.nr_grant_frames;
bool is_first_map = !gnttab.nr_grant_frames;
size_t new_size, old_size = gnttab.nr_grant_frames * GNT_ENTRIES_PER_FRAME;

if (gnttab.nr_grant_frames == gnttab.max_grant_frames) {
LOG_ERR("Reached limit of Xen grant frames!");
return -ENOSPC;
}

rc = map_grant_frame(start);
if (rc) {
/* Nothing to do here, left previous part of gnttab as is */
return rc;
}

/* gnttab.nr_grant_frames will be updated after success map */
new_size = gnttab.nr_grant_frames * GNT_ENTRIES_PER_FRAME;

/* Since Zephyr does not have realloc, need to do it manually */
gnttab.gref_list = k_calloc(new_size, sizeof(grant_ref_t));
if (!gnttab.gref_list) {
gnttab.gref_list = old_list;
return -ENOMEM;
}

if (!is_first_map) {
memcpy(gnttab.gref_list, old_list, old_size * sizeof(grant_ref_t));
k_free(old_list);

start_gref = old_size - 1;
} else {
start_gref = GNTTAB_NR_RESERVED_ENTRIES;
}
end_gref = new_size - 1;

for (iter = end_gref; iter > start_gref; iter--) {
gnttab.gref_list[iter] = gnttab.gref_list[0];
gnttab.gref_list[0] = iter;
}
gnttab.gref_list[end_gref] = GNTTAB_LAST_GREF;

return 0;
}

/* Picked from Linux implementation */
#define LEGACY_MAX_GNT_FRAMES_SUPPORTED 4
static unsigned long gnttab_get_max_frames(void)
{
int ret;
struct gnttab_query_size q = {
.dom = DOMID_SELF,
};

ret = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &q, 1);
if ((ret < 0) || (q.status != GNTST_okay)) {
return LEGACY_MAX_GNT_FRAMES_SUPPORTED;
}

return q.max_nr_frames;
}

static int gnttab_init(void)
{
int rc;

k_mutex_init(&gnttab.lock);
gnttab.nr_grant_frames = 0;
/* We need to know Xen limitations for domain */
gnttab.max_grant_frames = gnttab_get_max_frames();

/* initial mapping of a single gnttab frame, other will be mapped on demand */
rc = extend_gnttab();
if (rc) {
LOG_ERR("Failed to init grant table frames, err = %d", rc);
return rc;
}

/*
* Here we are doing Stage 1 mapping of whole DT region for grant tables.
* It may be much bigger, than actually mapped number of frames and may cause
* exception when someone try to access Stage 2 unmapped area, but since access
* is managed via get/put_grant_entry that can expand Stage 2 mapping,
* we do not need to care about it.
*/
DEVICE_MMIO_TOPLEVEL_MAP(grant_tables, K_MEM_CACHE_WB | K_MEM_PERM_RW);
gnttab.table = (grant_entry_v1_t *)DEVICE_MMIO_TOPLEVEL_GET(grant_tables);

Expand Down

0 comments on commit 660a899

Please sign in to comment.