Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mlxcx + Robert's IPD 39 changes to support mlxcx #444

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 66 additions & 76 deletions usr/src/uts/common/io/mlxcx/mlxcx.c
Original file line number Diff line number Diff line change
@@ -650,61 +650,6 @@ mlxcx_panic(mlxcx_t *mlxp, const char *fmt, ...)
va_end(ap);
}

uint16_t
mlxcx_get16(mlxcx_t *mlxp, uintptr_t off)
{
uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base;
return (ddi_get16(mlxp->mlx_regs_handle, (void *)addr));
}

uint32_t
mlxcx_get32(mlxcx_t *mlxp, uintptr_t off)
{
uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base;
return (ddi_get32(mlxp->mlx_regs_handle, (void *)addr));
}

uint64_t
mlxcx_get64(mlxcx_t *mlxp, uintptr_t off)
{
uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base;
return (ddi_get64(mlxp->mlx_regs_handle, (void *)addr));
}

void
mlxcx_put32(mlxcx_t *mlxp, uintptr_t off, uint32_t val)
{
uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base;
ddi_put32(mlxp->mlx_regs_handle, (void *)addr, val);
}

void
mlxcx_put64(mlxcx_t *mlxp, uintptr_t off, uint64_t val)
{
uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base;
ddi_put64(mlxp->mlx_regs_handle, (void *)addr, val);
}

void
mlxcx_uar_put32(mlxcx_t *mlxp, mlxcx_uar_t *mlu, uintptr_t off, uint32_t val)
{
/*
* The UAR is always inside the first BAR, which we mapped as
* mlx_regs
*/
uintptr_t addr = off + (uintptr_t)mlu->mlu_base +
(uintptr_t)mlxp->mlx_regs_base;
ddi_put32(mlxp->mlx_regs_handle, (void *)addr, val);
}

void
mlxcx_uar_put64(mlxcx_t *mlxp, mlxcx_uar_t *mlu, uintptr_t off, uint64_t val)
{
uintptr_t addr = off + (uintptr_t)mlu->mlu_base +
(uintptr_t)mlxp->mlx_regs_base;
ddi_put64(mlxp->mlx_regs_handle, (void *)addr, val);
}

static void
mlxcx_fm_fini(mlxcx_t *mlxp)
{
@@ -816,6 +761,7 @@ mlxcx_teardown_bufs(mlxcx_t *mlxp)
list_destroy(&mlxp->mlx_buf_shards);

kmem_cache_destroy(mlxp->mlx_bufs_cache);
kmem_cache_destroy(mlxp->mlx_mbrm_cache);
}

static void
@@ -1259,7 +1205,7 @@ mlxcx_regs_map(mlxcx_t *mlxp)
* device.
*/
bzero(&da, sizeof (ddi_device_acc_attr_t));
da.devacc_attr_version = DDI_DEVICE_ATTR_V0;
da.devacc_attr_version = DDI_DEVICE_ATTR_V1;
da.devacc_attr_endian_flags = DDI_STRUCTURE_BE_ACC;
da.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
if (DDI_FM_ACC_ERR_CAP(mlxp->mlx_fm_caps)) {
@@ -1434,6 +1380,26 @@ mlxcx_bufs_cache_destr(void *arg, void *cookie)
list_destroy(&b->mlb_tx_chain);
}

static int
mlxcx_mbrm_cache_constr(void *arg, void *cookie, int kmflags)
{
mlxcx_t *mlxp = cookie;
mlxcx_buf_return_mblk_t *mbrm = arg;
(void)mlxp;
bzero(mbrm, sizeof (mlxcx_buf_return_mblk_t));
return (0);
}

static void
mlxcx_mbrm_cache_destr(void *arg, void *cookie)
{
mlxcx_t *mlxp = cookie;
mlxcx_buf_return_mblk_t *mbrm = arg;
(void)mlxp;
VERIFY3P(mbrm->mbrm_mp, ==, NULL);
VERIFY(!list_link_active(&mbrm->mbrm_entry));
}

mlxcx_buf_shard_t *
mlxcx_mlbs_create(mlxcx_t *mlxp)
{
@@ -1467,6 +1433,12 @@ mlxcx_setup_bufs(mlxcx_t *mlxp)
sizeof (mlxcx_buffer_t), sizeof (uint64_t),
mlxcx_bufs_cache_constr, mlxcx_bufs_cache_destr,
NULL, mlxp, NULL, 0);
(void) snprintf(namebuf, KSTAT_STRLEN, "mlxcx%d_mbrm_cache",
ddi_get_instance(mlxp->mlx_dip));
mlxp->mlx_mbrm_cache = kmem_cache_create(namebuf,
sizeof (mlxcx_buf_return_mblk_t), sizeof (uint64_t),
mlxcx_mbrm_cache_constr, mlxcx_mbrm_cache_destr,
NULL, mlxp, NULL, 0);

list_create(&mlxp->mlx_buf_shards, sizeof (mlxcx_buf_shard_t),
offsetof(mlxcx_buf_shard_t, mlbs_entry));
@@ -1518,11 +1490,12 @@ mlxcx_eq_check(void *arg)
{
mlxcx_t *mlxp = (mlxcx_t *)arg;
mlxcx_event_queue_t *eq;
mlxcx_eventq_ctx_t ctx;
mlxcx_eventq_ctx_t *ctx;
const char *str;

uint_t i;

ctx = kmem_zalloc(sizeof (*ctx), KM_SLEEP);

for (i = 0; i < mlxp->mlx_intr_count; ++i) {
eq = &mlxp->mlx_eqs[i];

@@ -1536,26 +1509,26 @@ mlxcx_eq_check(void *arg)
*/
ASSERT0(eq->mleq_state & MLXCX_EQ_DESTROYED);

if (!mlxcx_cmd_query_eq(mlxp, eq, &ctx))
if (!mlxcx_cmd_query_eq(mlxp, eq, ctx))
continue;

str = "???";
switch (ctx.mleqc_status) {
switch (ctx->mleqc_status) {
case MLXCX_EQ_STATUS_OK:
break;
case MLXCX_EQ_STATUS_WRITE_FAILURE:
str = "WRITE_FAILURE";
break;
}

if (ctx.mleqc_status != MLXCX_EQ_STATUS_OK) {
if (ctx->mleqc_status != MLXCX_EQ_STATUS_OK) {
mlxcx_fm_qstate_ereport(mlxp, "event",
eq->mleq_num, str, ctx.mleqc_status);
eq->mleq_num, str, ctx->mleqc_status);
mlxcx_warn(mlxp, "EQ %u is in bad status: %x (%s)",
eq->mleq_intr_index, ctx.mleqc_status, str);
eq->mleq_intr_index, ctx->mleqc_status, str);
}

if (ctx.mleqc_state != MLXCX_EQ_ST_ARMED &&
if (ctx->mleqc_state != MLXCX_EQ_ST_ARMED &&
(eq->mleq_state & MLXCX_EQ_ARMED)) {
if (eq->mleq_cc == eq->mleq_check_disarm_cc &&
++eq->mleq_check_disarm_cnt >= 3) {
@@ -1569,17 +1542,21 @@ mlxcx_eq_check(void *arg)
eq->mleq_check_disarm_cnt = 0;
}
}

kmem_free(ctx, sizeof (*ctx));
}

static void
mlxcx_cq_check(void *arg)
{
mlxcx_t *mlxp = (mlxcx_t *)arg;
mlxcx_completion_queue_t *cq;
mlxcx_completionq_ctx_t ctx;
mlxcx_completionq_ctx_t *ctx;
const char *str, *type;
uint_t v;

ctx = kmem_zalloc(sizeof (*ctx), KM_SLEEP);

for (cq = list_head(&mlxp->mlx_cqs); cq != NULL;
cq = list_next(&mlxp->mlx_cqs, cq)) {

@@ -1597,7 +1574,7 @@ mlxcx_cq_check(void *arg)
if (cq->mlcq_fm_repd_qstate)
continue;

if (!mlxcx_cmd_query_cq(mlxp, cq, &ctx))
if (!mlxcx_cmd_query_cq(mlxp, cq, ctx))
continue;

if (cq->mlcq_wq != NULL) {
@@ -1613,7 +1590,7 @@ mlxcx_cq_check(void *arg)
}

str = "???";
v = get_bits32(ctx.mlcqc_flags, MLXCX_CQ_CTX_STATUS);
v = get_bits32(ctx->mlcqc_flags, MLXCX_CQ_CTX_STATUS);
switch (v) {
case MLXCX_CQC_STATUS_OK:
break;
@@ -1636,7 +1613,7 @@ mlxcx_cq_check(void *arg)
cq->mlcq_fm_repd_qstate = B_TRUE;
}

v = get_bits32(ctx.mlcqc_flags, MLXCX_CQ_CTX_STATE);
v = get_bits32(ctx->mlcqc_flags, MLXCX_CQ_CTX_STATE);
if (v != MLXCX_CQC_STATE_ARMED &&
(cq->mlcq_state & MLXCX_CQ_ARMED) &&
!(cq->mlcq_state & MLXCX_CQ_POLLING)) {
@@ -1652,19 +1629,25 @@ mlxcx_cq_check(void *arg)
cq->mlcq_check_disarm_cc = 0;
}
}

kmem_free(ctx, sizeof (*ctx));
}

void
mlxcx_check_sq(mlxcx_t *mlxp, mlxcx_work_queue_t *sq)
{
mlxcx_sq_ctx_t ctx;
mlxcx_sq_ctx_t *ctx;
mlxcx_sq_state_t state;

if (!mlxcx_cmd_query_sq(mlxp, sq, &ctx))
ctx = kmem_zalloc(sizeof (mlxcx_sq_ctx_t), KM_SLEEP);

if (!mlxcx_cmd_query_sq(mlxp, sq, ctx)) {
kmem_free(ctx, sizeof (*ctx));
return;
}

ASSERT3U(from_be24(ctx.mlsqc_cqn), ==, sq->mlwq_cq->mlcq_num);
state = get_bits32(ctx.mlsqc_flags, MLXCX_SQ_STATE);
ASSERT3U(from_be24(ctx->mlsqc_cqn), ==, sq->mlwq_cq->mlcq_num);
state = get_bits32(ctx->mlsqc_flags, MLXCX_SQ_STATE);
switch (state) {
case MLXCX_SQ_STATE_RST:
if (sq->mlwq_state & MLXCX_WQ_STARTED) {
@@ -1691,20 +1674,25 @@ mlxcx_check_sq(mlxcx_t *mlxp, mlxcx_work_queue_t *sq)
sq->mlwq_fm_repd_qstate = B_TRUE;
break;
}

kmem_free(ctx, sizeof (mlxcx_sq_ctx_t));
}

void
mlxcx_check_rq(mlxcx_t *mlxp, mlxcx_work_queue_t *rq)
{
mlxcx_rq_ctx_t ctx;
mlxcx_rq_ctx_t *ctx;
mlxcx_rq_state_t state;

ctx = kmem_zalloc(sizeof (*ctx), KM_SLEEP);

if (!mlxcx_cmd_query_rq(mlxp, rq, &ctx))
if (!mlxcx_cmd_query_rq(mlxp, rq, ctx)) {
kmem_free(ctx, sizeof (*ctx));
return;
}

ASSERT3U(from_be24(ctx.mlrqc_cqn), ==, rq->mlwq_cq->mlcq_num);
state = get_bits32(ctx.mlrqc_flags, MLXCX_RQ_STATE);
ASSERT3U(from_be24(ctx->mlrqc_cqn), ==, rq->mlwq_cq->mlcq_num);
state = get_bits32(ctx->mlrqc_flags, MLXCX_RQ_STATE);
switch (state) {
case MLXCX_RQ_STATE_RST:
if (rq->mlwq_state & MLXCX_WQ_STARTED) {
@@ -1731,6 +1719,8 @@ mlxcx_check_rq(mlxcx_t *mlxp, mlxcx_work_queue_t *rq)
rq->mlwq_fm_repd_qstate = B_TRUE;
break;
}

kmem_free(ctx, sizeof (*ctx));
}

static void
176 changes: 149 additions & 27 deletions usr/src/uts/common/io/mlxcx/mlxcx.h
Original file line number Diff line number Diff line change
@@ -167,7 +167,7 @@ extern "C" {
* How big does an mblk have to be before we dma_bind() it instead of
* bcopying?
*/
#define MLXCX_TX_BIND_THRESHOLD_DFLT 2048
#define MLXCX_TX_BIND_THRESHOLD_DFLT 512

/*
* How often to check the status of completion queues for overflow and
@@ -246,6 +246,21 @@ extern uint_t mlxcx_stuck_intr_count;
*/
#define MLXCX_FUNC_ID_MAX 0

#if defined(DEBUG)
#define MLXCX_PERF_TIMERS
#endif

#if defined(MLXCX_PERF_TIMERS)
static inline void
mlxcx_ptimer(hrtime_t *arr, uint idx)
{
arr[idx] = gethrtime();
}
#define MLXCX_PTIMER(A, I) mlxcx_ptimer(A, I)
#else
#define MLXCX_PTIMER(A, I)
#endif

/*
* Forwards
*/
@@ -318,12 +333,7 @@ typedef struct mlxcx_cmd_queue {
uint8_t mcmd_size_l2;
uint8_t mcmd_stride_l2;
uint_t mcmd_size;
/*
* The mask has a bit for each command slot, there are a maximum
* of 32 slots. When the bit is set in the mask, it indicates
* the slot is available.
*/
uint32_t mcmd_mask;
uint8_t mcmd_next; /* next command slot */

mlxcx_cmd_t *mcmd_active[MLXCX_CMD_MAX];

@@ -552,6 +562,25 @@ typedef struct mlxcx_buf_shard {
kcondvar_t mlbs_free_nonempty;
} mlxcx_buf_shard_t;

typedef enum {
MLXCX_BUF_TIMER_PRE_RING_TX,
MLXCX_BUF_TIMER_POST_OFFLOAD_INFO,
MLXCX_BUF_TIMER_POST_INLINE_BCOPY,
MLXCX_BUF_TIMER_POST_BUF_BIND_COPY,
MLXCX_BUF_TIMER_POST_SQE_BUF,
MLXCX_BUF_TIMER_POST_PREPARE_SQE_INLINE,
MLXCX_BUF_TIMER_POST_PREPARE_SQE,
MLXCX_BUF_TIMER_POST_WQ_MTX,
MLXCX_BUF_TIMER_POST_SQE_IN_RING,
MLXCX_BUF_TIMER_POST_SQ_ADD_BUF,
MLXCX_BUF_TIMER_PRE_TX_COMP,
MLXCX_BUF_TIMER_PRE_STEP2,
MLXCX_BUF_TIMER_COPY_TOTAL,
MLXCX_BUF_TIMER_TAKE_FOREIGN_TOTAL,
MLXCX_BUF_TIMER_BIND_MBLK_TOTAL,
MLXCX_BUF_TIMER_MAX
} mlxcx_buf_timer_t;

typedef struct mlxcx_buffer {
mlxcx_buf_shard_t *mlb_shard;
list_node_t mlb_entry;
@@ -576,6 +605,18 @@ typedef struct mlxcx_buffer {
mlxcx_dma_buffer_t mlb_dma;
mblk_t *mlb_mp;
frtn_t mlb_frtn;

/* spooled up sendq entries ready to push into the ring */
union {
mlxcx_sendq_ent_t *mlb_sqe;
mlxcx_sendq_extra_ent_t *mlb_esqe;
};
size_t mlb_sqe_size;
uint_t mlb_sqe_count;

#if defined(MLXCX_PERF_TIMERS)
hrtime_t mlb_t[MLXCX_BUF_TIMER_MAX];
#endif
} mlxcx_buffer_t;

typedef enum {
@@ -629,6 +670,7 @@ typedef struct mlxcx_completion_queue {
list_t mlcq_buffers;
kmutex_t mlcq_bufbmtx;
list_t mlcq_buffers_b;
uint64_t mlcq_bufbgen;

uint_t mlcq_check_disarm_cnt;
uint64_t mlcq_check_disarm_cc;
@@ -643,14 +685,15 @@ typedef struct mlxcx_completion_queue {
} mlxcx_completion_queue_t;

typedef enum {
MLXCX_WQ_ALLOC = 1 << 0,
MLXCX_WQ_CREATED = 1 << 1,
MLXCX_WQ_STARTED = 1 << 2,
MLXCX_WQ_DESTROYED = 1 << 3,
MLXCX_WQ_TEARDOWN = 1 << 4,
MLXCX_WQ_BUFFERS = 1 << 5,
MLXCX_WQ_REFILLING = 1 << 6,
MLXCX_WQ_BLOCKED_MAC = 1 << 7
MLXCX_WQ_INIT = 1 << 0,
MLXCX_WQ_ALLOC = 1 << 1,
MLXCX_WQ_CREATED = 1 << 2,
MLXCX_WQ_STARTED = 1 << 3,
MLXCX_WQ_DESTROYED = 1 << 4,
MLXCX_WQ_TEARDOWN = 1 << 5,
MLXCX_WQ_BUFFERS = 1 << 6,
MLXCX_WQ_REFILLING = 1 << 7,
MLXCX_WQ_BLOCKED_MAC = 1 << 8
} mlxcx_workq_state_t;

typedef enum {
@@ -891,6 +934,8 @@ typedef enum {
MLXCX_TIRS_PER_GROUP
} mlxcx_tir_role_t;

#define MLXCX_TIS_PER_GROUP 8

typedef struct {
avl_node_t mlgm_group_entry;
list_node_t mlgm_fe_entry;
@@ -915,7 +960,7 @@ struct mlxcx_ring_group {
mac_group_handle_t mlg_mac_hdl;

union {
mlxcx_tis_t mlg_tis;
mlxcx_tis_t mlg_tis[MLXCX_TIS_PER_GROUP];
mlxcx_tir_t mlg_tir[MLXCX_TIRS_PER_GROUP];
};
mlxcx_port_t *mlg_port;
@@ -1230,6 +1275,7 @@ struct mlxcx {
mlxcx_ring_group_t *mlx_tx_groups;

kmem_cache_t *mlx_bufs_cache;
kmem_cache_t *mlx_mbrm_cache;
list_t mlx_buf_shards;

ddi_periodic_t mlx_eq_checktimer;
@@ -1243,18 +1289,83 @@ struct mlxcx {
mlxcx_temp_sensor_t *mlx_temp_sensors;
};

typedef struct mlxcx_buf_return_mblk {
list_node_t mbrm_entry;
mblk_t *mbrm_mp;
} mlxcx_buf_return_mblk_t;

#define MLXCX_BRB_SHARDS 4
#define MLXCX_BRB_INLINE_MBLKS 8
typedef struct mlxcx_buf_return_batch {
uint mbrb_n[MLXCX_BRB_SHARDS];
mlxcx_buf_shard_t *mbrb_shard[MLXCX_BRB_SHARDS];
list_t mbrb_list[MLXCX_BRB_SHARDS];
list_t mbrb_mblks;
mblk_t *mbrb_inline_mblk[MLXCX_BRB_INLINE_MBLKS];
uint mbrb_inline_mblks;
} mlxcx_buf_return_batch_t;

extern void mlxcx_buf_return_batch_init(mlxcx_buf_return_batch_t *);
extern void mlxcx_buf_return_batch_flush(mlxcx_t *, mlxcx_buf_return_batch_t *);


/*
* Register access
* Register access. Use static inlines.
*/
extern uint16_t mlxcx_get16(mlxcx_t *, uintptr_t);
extern uint32_t mlxcx_get32(mlxcx_t *, uintptr_t);
extern uint64_t mlxcx_get64(mlxcx_t *, uintptr_t);
static inline uint16_t
mlxcx_get16(mlxcx_t *mlxp, uintptr_t off)
{
uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base;
return (ddi_get16(mlxp->mlx_regs_handle, (void *)addr));
}

extern void mlxcx_put32(mlxcx_t *, uintptr_t, uint32_t);
extern void mlxcx_put64(mlxcx_t *, uintptr_t, uint64_t);
static inline uint32_t
mlxcx_get32(mlxcx_t *mlxp, uintptr_t off)
{
uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base;
return (ddi_get32(mlxp->mlx_regs_handle, (void *)addr));
}

static inline uint64_t
mlxcx_get64(mlxcx_t *mlxp, uintptr_t off)
{
uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base;
return (ddi_get64(mlxp->mlx_regs_handle, (void *)addr));
}

static inline void
mlxcx_put32(mlxcx_t *mlxp, uintptr_t off, uint32_t val)
{
uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base;
ddi_put32(mlxp->mlx_regs_handle, (void *)addr, val);
}

static inline void
mlxcx_put64(mlxcx_t *mlxp, uintptr_t off, uint64_t val)
{
uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base;
ddi_put64(mlxp->mlx_regs_handle, (void *)addr, val);
}

extern void mlxcx_uar_put32(mlxcx_t *, mlxcx_uar_t *, uintptr_t, uint32_t);
extern void mlxcx_uar_put64(mlxcx_t *, mlxcx_uar_t *, uintptr_t, uint64_t);
static inline void
mlxcx_uar_put32(mlxcx_t *mlxp, mlxcx_uar_t *mlu, uintptr_t off, uint32_t val)
{
/*
* The UAR is always inside the first BAR, which we mapped as
* mlx_regs
*/
uintptr_t addr = off + (uintptr_t)mlu->mlu_base +
(uintptr_t)mlxp->mlx_regs_base;
ddi_put32(mlxp->mlx_regs_handle, (void *)addr, val);
}

static inline void
mlxcx_uar_put64(mlxcx_t *mlxp, mlxcx_uar_t *mlu, uintptr_t off, uint64_t val)
{
uintptr_t addr = off + (uintptr_t)mlu->mlu_base +
(uintptr_t)mlxp->mlx_regs_base;
ddi_put64(mlxp->mlx_regs_handle, (void *)addr, val);
}

/*
* Logging functions.
@@ -1343,7 +1454,7 @@ extern void mlxcx_shard_ready(mlxcx_buf_shard_t *);
extern void mlxcx_shard_draining(mlxcx_buf_shard_t *);

extern uint_t mlxcx_buf_bind_or_copy(mlxcx_t *, mlxcx_work_queue_t *,
mblk_t *, size_t, mlxcx_buffer_t **);
mblk_t *, mblk_t *, size_t, mlxcx_buffer_t **);

extern boolean_t mlxcx_rx_group_setup(mlxcx_t *, mlxcx_ring_group_t *);
extern boolean_t mlxcx_tx_group_setup(mlxcx_t *, mlxcx_ring_group_t *);
@@ -1359,18 +1470,29 @@ extern boolean_t mlxcx_rq_add_buffer(mlxcx_t *, mlxcx_work_queue_t *,
extern boolean_t mlxcx_rq_add_buffers(mlxcx_t *, mlxcx_work_queue_t *,
mlxcx_buffer_t **, size_t);
extern boolean_t mlxcx_sq_add_buffer(mlxcx_t *, mlxcx_work_queue_t *,
uint8_t *, size_t, uint32_t, mlxcx_buffer_t *);
mlxcx_buffer_t *);
extern boolean_t mlxcx_sq_add_nop(mlxcx_t *, mlxcx_work_queue_t *);
extern void mlxcx_rq_refill(mlxcx_t *, mlxcx_work_queue_t *);

typedef struct mlxcx_tx_ctx {
uint8_t mtc_inline_hdrs[MLXCX_MAX_INLINE_HEADERLEN];
size_t mtc_inline_hdrlen;
uint32_t mtc_chkflags;
uint32_t mtc_mss;
uint32_t mtc_lsoflags;
} mlxcx_tx_ctx_t;

extern boolean_t mlxcx_buf_prepare_sqe(mlxcx_t *, mlxcx_work_queue_t *,
mlxcx_buffer_t *, const mlxcx_tx_ctx_t *);

extern void mlxcx_teardown_groups(mlxcx_t *);
extern void mlxcx_wq_teardown(mlxcx_t *, mlxcx_work_queue_t *);
extern void mlxcx_cq_teardown(mlxcx_t *, mlxcx_completion_queue_t *);
extern void mlxcx_teardown_rx_group(mlxcx_t *, mlxcx_ring_group_t *);
extern void mlxcx_teardown_tx_group(mlxcx_t *, mlxcx_ring_group_t *);

extern void mlxcx_tx_completion(mlxcx_t *, mlxcx_completion_queue_t *,
mlxcx_completionq_ent_t *, mlxcx_buffer_t *);
mlxcx_completionq_ent_t *, mlxcx_buffer_t *, mlxcx_buf_return_batch_t *);
extern mblk_t *mlxcx_rx_completion(mlxcx_t *, mlxcx_completion_queue_t *,
mlxcx_completionq_ent_t *, mlxcx_buffer_t *);

179 changes: 108 additions & 71 deletions usr/src/uts/common/io/mlxcx/mlxcx_cmd.c

Large diffs are not rendered by default.

243 changes: 218 additions & 25 deletions usr/src/uts/common/io/mlxcx/mlxcx_gld.c
Original file line number Diff line number Diff line change
@@ -13,6 +13,7 @@
* Copyright (c) 2021, the University of Queensland
* Copyright 2020 RackTop Systems, Inc.
* Copyright 2023 MNX Cloud, Inc.
* Copyright 2023 Oxide Computer Company
*/

/*
@@ -29,6 +30,7 @@
#include <sys/dlpi.h>

#include <sys/mac_provider.h>
#include <sys/mac_ether.h>

/* Need these for mac_vlan_header_info() */
#include <sys/mac_client.h>
@@ -200,6 +202,119 @@ mlxcx_link_fec_cap(link_fec_t fec, mlxcx_pplm_fec_caps_t *pfecp)
return (B_TRUE);
}

static mac_ether_media_t
mlxcx_mac_media(mlxcx_port_t *port)
{
switch (port->mlp_oper_status) {
case MLXCX_PORT_STATUS_UP:
case MLXCX_PORT_STATUS_UP_ONCE:
break;
case MLXCX_PORT_STATUS_DOWN:
return (ETHER_MEDIA_NONE);
case MLXCX_PORT_STATUS_DISABLED:
return (ETHER_MEDIA_UNKNOWN);
}

switch (port->mlp_oper_proto) {
case MLXCX_PROTO_SGMII:
return (ETHER_MEDIA_1000_SGMII);
case MLXCX_PROTO_1000BASE_KX:
return (ETHER_MEDIA_1000BASE_KX);
case MLXCX_PROTO_10GBASE_CX4:
return (ETHER_MEDIA_10GBASE_CX4);
case MLXCX_PROTO_10GBASE_KX4:
return (ETHER_MEDIA_10GBASE_KX4);
case MLXCX_PROTO_10GBASE_KR:
return (ETHER_MEDIA_10GBASE_KR);
case MLXCX_PROTO_40GBASE_CR4:
return (ETHER_MEDIA_40GBASE_CR4);
case MLXCX_PROTO_40GBASE_KR4:
return (ETHER_MEDIA_40GBASE_KR4);
case MLXCX_PROTO_SGMII_100BASE:
return (ETHER_MEDIA_100_SGMII);
case MLXCX_PROTO_10GBASE_CR:
return (ETHER_MEDIA_10GBASE_CR);
case MLXCX_PROTO_10GBASE_SR:
return (ETHER_MEDIA_10GBASE_SR);
case MLXCX_PROTO_10GBASE_ER_LR:
return (ETHER_MEDIA_10GBASE_LR);
case MLXCX_PROTO_40GBASE_SR4:
return (ETHER_MEDIA_40GBASE_SR4);
case MLXCX_PROTO_40GBASE_LR4_ER4:
return (ETHER_MEDIA_40GBASE_LR4);
case MLXCX_PROTO_50GBASE_SR2:
return (ETHER_MEDIA_50GBASE_SR2);
case MLXCX_PROTO_100GBASE_CR4:
return (ETHER_MEDIA_100GBASE_CR4);
case MLXCX_PROTO_100GBASE_SR4:
return (ETHER_MEDIA_100GBASE_SR4);
case MLXCX_PROTO_100GBASE_KR4:
return (ETHER_MEDIA_100GBASE_KR4);
case MLXCX_PROTO_25GBASE_CR:
return (ETHER_MEDIA_25GBASE_CR);
case MLXCX_PROTO_25GBASE_KR:
return (ETHER_MEDIA_25GBASE_KR);
case MLXCX_PROTO_25GBASE_SR:
return (ETHER_MEDIA_25GBASE_SR);
case MLXCX_PROTO_50GBASE_CR2:
return (ETHER_MEDIA_50GBASE_CR2);
case MLXCX_PROTO_50GBASE_KR2:
return (ETHER_MEDIA_50GBASE_KR2);
default:
/* FALLTHRU */
break;
}

switch (port->mlp_ext_oper_proto) {
case MLXCX_EXTPROTO_SGMII_100BASE:
return (ETHER_MEDIA_100_SGMII);
case MLXCX_EXTPROTO_1000BASE_X_SGMII:
return (ETHER_MEDIA_1000_SGMII);
case MLXCX_EXTPROTO_5GBASE_R:
return (ETHER_MEDIA_5000BASE_KR); /* XXX KEBE ASKS use _KR ? */
case MLXCX_EXTPROTO_10GBASE_XFI_XAUI_1:
return (ETHER_MEDIA_10G_XAUI);
case MLXCX_EXTPROTO_40GBASE_XLAUI_4_XLPPI_4:
return (ETHER_MEDIA_40G_XLPPI);
case MLXCX_EXTPROTO_25GAUI_1_25GBASE_CR_KR:
return (ETHER_MEDIA_25G_AUI);
case MLXCX_EXTPROTO_50GAUI_2_LAUI_2_50GBASE_CR2_KR2:
case MLXCX_EXTPROTO_50GAUI_1_LAUI_1_50GBASE_CR_KR:
/* No type for 50G AUI as far as I can see. */
return (ETHER_MEDIA_UNKNOWN);
case MLXCX_EXTPROTO_CAUI_4_100GBASE_CR4_KR4:
return (ETHER_MEDIA_100GBASE_CAUI4);
case MLXCX_EXTPROTO_100GAUI_2_100GBASE_CR2_KR2:
case MLXCX_EXTPROTO_100GAUI_1_100GBASE_CR_KR:
/* No type for 100G AUI as far as I can see. */
return (ETHER_MEDIA_UNKNOWN);
/*
* NOTE: These report unsupported but keeping them in active code for
* detection purposes.
*/
case MLXCX_EXTPROTO_200GAUI_4_200GBASE_CR4_KR4:
return (ETHER_MEDIA_200GAUI_4);
case MLXCX_EXTPROTO_200GAUI_2_200GBASE_CR2_KR2:
return (ETHER_MEDIA_200GAUI_2);
case MLXCX_EXTPROTO_400GAUI_8_400GBASE_CR8:
return (ETHER_MEDIA_400GAUI_8);
case MLXCX_EXTPROTO_400GAUI_4_400GBASE_CR4:
return (ETHER_MEDIA_400GAUI_4);
default:
/*
* There ARE legitimate single-bit values we don't support,
* and should just return 0 immediately. We will ASSERT()
* that it's a single-bit value, however.
*/
/* This check should work okay for 0 too. */
ASSERT0((uint32_t)port->mlp_ext_oper_proto &
((uint32_t)port->mlp_ext_oper_proto - 1U));
break;
}

return (ETHER_MEDIA_UNKNOWN);
}

static int
mlxcx_mac_stat_rfc_2863(mlxcx_t *mlxp, mlxcx_port_t *port, uint_t stat,
uint64_t *val)
@@ -340,6 +455,9 @@ mlxcx_mac_stat(void *arg, uint_t stat, uint64_t *val)
case MAC_STAT_NORCVBUF:
*val = port->mlp_stats.mlps_rx_drops;
break;
case ETHER_STAT_XCVR_INUSE:
*val = (uint64_t)mlxcx_mac_media(port);
break;
default:
ret = ENOTSUP;
}
@@ -509,30 +627,71 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp)
mlxcx_t *mlxp = sq->mlwq_mlx;
mlxcx_completion_queue_t *cq;
mlxcx_buffer_t *b;
mac_header_info_t mhi;
mblk_t *kmp, *nmp;
uint8_t inline_hdrs[MLXCX_MAX_INLINE_HEADERLEN];
size_t inline_hdrlen, rem, off;
uint32_t chkflags = 0;
mac_ether_offload_info_t meoi;
mblk_t *kmp;
size_t rem, off;
boolean_t ok;
size_t take = 0;
uint_t bcount;
mlxcx_tx_ctx_t ctx;
#if defined(MLXCX_PERF_TIMERS)
hrtime_t times[MLXCX_BUF_TIMER_MAX];
uint i;
#endif

VERIFY(mp->b_next == NULL);

mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &chkflags);
#if defined(MLXCX_PERF_TIMERS)
bzero(times, sizeof (times));
times[MLXCX_BUF_TIMER_PRE_RING_TX] = gethrtime();
#endif

mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &ctx.mtc_chkflags);
mac_lso_get(mp, &ctx.mtc_mss, &ctx.mtc_lsoflags);

if (mac_vlan_header_info(mlxp->mlx_mac_hdl, mp, &mhi) != 0) {
if (mac_ether_offload_info(mp, &meoi) != 0 ||
(meoi.meoi_flags & MEOI_L2INFO_SET) == 0) {
/*
* We got given a frame without a valid L2 header on it. We
* can't really transmit that (mlx parts don't like it), so
* we will just drop it on the floor.
*/
mlxcx_warn(mlxp, "!tried to tx packet with no valid L2 header;"
" dropping it on the floor");
freemsg(mp);
return (NULL);
}

#if defined(MLXCX_PERF_TIMERS)
times[MLXCX_BUF_TIMER_POST_OFFLOAD_INFO] = gethrtime();
#endif

ctx.mtc_inline_hdrlen = meoi.meoi_l2hlen;

/*
* If we're doing LSO, we need to find the end of the TCP header, and
* inline up to that point.
*/
if (ctx.mtc_lsoflags & HW_LSO) {
if ((meoi.meoi_flags & MEOI_L3INFO_SET) == 0 ||
(meoi.meoi_flags & MEOI_L4INFO_SET) == 0) {
mlxcx_warn(mlxp, "!tried to tx LSO packet with no "
"valid L3/L4 headers; dropping it on the floor");
freemsg(mp);
return (NULL);
}
ctx.mtc_inline_hdrlen += meoi.meoi_l3hlen + meoi.meoi_l4hlen;
}

if (ctx.mtc_inline_hdrlen > MLXCX_MAX_INLINE_HEADERLEN) {
mlxcx_warn(mlxp, "!tried to tx LSO packet with headers that "
"are too long (%u bytes, max is %u); dropping it on the "
"floor", ctx.mtc_inline_hdrlen, MLXCX_MAX_INLINE_HEADERLEN);
freemsg(mp);
return (NULL);
}

inline_hdrlen = rem = mhi.mhi_hdrsize;
rem = ctx.mtc_inline_hdrlen;

kmp = mp;
off = 0;
@@ -543,7 +702,7 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp)
take = sz;
if (take > rem)
take = rem;
bcopy(kmp->b_rptr, inline_hdrs + off, take);
bcopy(kmp->b_rptr, ctx.mtc_inline_hdrs + off, take);
rem -= take;
off += take;
if (take == sz) {
@@ -552,16 +711,37 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp)
}
}

bcount = mlxcx_buf_bind_or_copy(mlxp, sq, kmp, take, &b);
MLXCX_PTIMER(times, MLXCX_BUF_TIMER_POST_INLINE_BCOPY);

bcount = mlxcx_buf_bind_or_copy(mlxp, sq, mp, kmp, take, &b);
if (bcount == 0) {
atomic_or_uint(&sq->mlwq_state, MLXCX_WQ_BLOCKED_MAC);
return (mp);
}

MLXCX_PTIMER(times, MLXCX_BUF_TIMER_POST_BUF_BIND_COPY);

#if defined(MLXCX_PERF_TIMERS)
/* Copy our temporary timers over to the buffer_t */
for (i = 0; i <= MLXCX_BUF_TIMER_POST_BUF_BIND_COPY; ++i)
b->mlb_t[i] = times[i];
#endif

if (!mlxcx_buf_prepare_sqe(mlxp, sq, b, &ctx)) {
mlxcx_warn(mlxp, "!tried to tx packet that couldn't fit in "
"an SQE, dropping");
freemsg(mp);
return (NULL);
}

MLXCX_PTIMER(b->mlb_t, MLXCX_BUF_TIMER_POST_PREPARE_SQE);

mutex_enter(&sq->mlwq_mtx);
VERIFY3U(sq->mlwq_inline_mode, <=, MLXCX_ETH_INLINE_L2);
cq = sq->mlwq_cq;

MLXCX_PTIMER(b->mlb_t, MLXCX_BUF_TIMER_POST_WQ_MTX);

/*
* state is a single int, so read-only access without the CQ lock
* should be fine.
@@ -595,24 +775,15 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp)
goto blocked;
}

ok = mlxcx_sq_add_buffer(mlxp, sq, inline_hdrs, inline_hdrlen,
chkflags, b);
ok = mlxcx_sq_add_buffer(mlxp, sq, b);
if (!ok) {
atomic_or_uint(&cq->mlcq_state, MLXCX_CQ_BLOCKED_MAC);
atomic_or_uint(&sq->mlwq_state, MLXCX_WQ_BLOCKED_MAC);
goto blocked;
}

/*
* Now that we've successfully enqueued the rest of the packet,
* free any mblks that we cut off while inlining headers.
*/
for (; mp != kmp; mp = nmp) {
nmp = mp->b_cont;
freeb(mp);
}

mutex_exit(&sq->mlwq_mtx);
MLXCX_PTIMER(b->mlb_t, MLXCX_BUF_TIMER_POST_SQ_ADD_BUF);

return (NULL);

@@ -1126,6 +1297,7 @@ mlxcx_mac_getcapab(void *arg, mac_capab_t cap, void *cap_data)
mac_capab_rings_t *cap_rings;
mac_capab_led_t *cap_leds;
mac_capab_transceiver_t *cap_txr;
mac_capab_lso_t *cap_lso;
uint_t i, n = 0;

switch (cap) {
@@ -1158,10 +1330,10 @@ mlxcx_mac_getcapab(void *arg, mac_capab_t cap, void *cap_data)
break;

case MAC_CAPAB_HCKSUM:
if (mlxp->mlx_caps->mlc_checksum) {
*(uint32_t *)cap_data = HCKSUM_INET_FULL_V4 |
HCKSUM_INET_FULL_V6 | HCKSUM_IPHDRCKSUM;
}
if (!mlxp->mlx_caps->mlc_checksum)
return (B_FALSE);
*(uint32_t *)cap_data = HCKSUM_INET_FULL_V4 |
HCKSUM_INET_FULL_V6 | HCKSUM_IPHDRCKSUM;
break;

case MAC_CAPAB_LED:
@@ -1182,6 +1354,24 @@ mlxcx_mac_getcapab(void *arg, mac_capab_t cap, void *cap_data)
cap_txr->mct_read = mlxcx_mac_txr_read;
break;

case MAC_CAPAB_LSO:
cap_lso = cap_data;

if (!mlxp->mlx_caps->mlc_lso)
return (B_FALSE);

cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4 |
LSO_TX_BASIC_TCP_IPV6;
/*
* Cap LSO sends at 64k due to limitations in the TCP stack
* (full length needs to fit in an IP header apparently)
*/
cap_lso->lso_basic_tcp_ipv4.lso_max =
MIN(mlxp->mlx_caps->mlc_max_lso_size, UINT16_MAX);
cap_lso->lso_basic_tcp_ipv6.lso_max =
MIN(mlxp->mlx_caps->mlc_max_lso_size, UINT16_MAX);
break;

default:
return (B_FALSE);
}
@@ -1453,6 +1643,9 @@ mlxcx_mac_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
*(link_state_t *)pr_val = LINK_STATE_UNKNOWN;
}
break;
case MAC_PROP_MEDIA:
*(mac_ether_media_t *)pr_val = mlxcx_mac_media(port);
break;
case MAC_PROP_AUTONEG:
if (pr_valsize < sizeof (uint8_t)) {
ret = EOVERFLOW;
11 changes: 10 additions & 1 deletion usr/src/uts/common/io/mlxcx/mlxcx_intr.c
Original file line number Diff line number Diff line change
@@ -874,6 +874,7 @@ mlxcx_process_cq(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq, mblk_t **mpp,
uint_t rx_frames = 0;
uint_t comp_cnt = 0;
int64_t wqebbs, bufcnt;
mlxcx_buf_return_batch_t rbatch;

*mpp = NULL;

@@ -886,6 +887,8 @@ mlxcx_process_cq(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq, mblk_t **mpp,

nmp = cmp = mp = NULL;

mlxcx_buf_return_batch_init(&rbatch);

wqebbs = 0;
bufcnt = 0;
for (cent = mlxcx_cq_next(mlcq); cent != NULL;
@@ -939,6 +942,7 @@ mlxcx_process_cq(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq, mblk_t **mpp,
list_move_tail(&mlcq->mlcq_buffers,
&mlcq->mlcq_buffers_b);
added = B_TRUE;
++mlcq->mlcq_bufbgen;
}
mutex_exit(&mlcq->mlcq_bufbmtx);
if (added)
@@ -977,9 +981,11 @@ mlxcx_process_cq(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq, mblk_t **mpp,
list_remove(&mlcq->mlcq_buffers, buf);
bufcnt++;

MLXCX_PTIMER(buf->mlb_t, MLXCX_BUF_TIMER_PRE_TX_COMP);

switch (mlcq->mlcq_wq->mlwq_type) {
case MLXCX_WQ_TYPE_SENDQ:
mlxcx_tx_completion(mlxp, mlcq, cent, buf);
mlxcx_tx_completion(mlxp, mlcq, cent, buf, &rbatch);
break;
case MLXCX_WQ_TYPE_RECVQ:
nmp = mlxcx_rx_completion(mlxp, mlcq, cent, buf);
@@ -1006,6 +1012,7 @@ mlxcx_process_cq(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq, mblk_t **mpp,
* high->low water mark.
*/
if (bufcnt > (MLXCX_CQ_LWM_GAP - MLXCX_CQ_HWM_GAP)) {
mlxcx_buf_return_batch_flush(mlxp, &rbatch);
mlxcx_update_cqci(mlxp, mlcq);
/*
* Both these variables are incremented using
@@ -1024,6 +1031,8 @@ mlxcx_process_cq(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq, mblk_t **mpp,
break;
}

mlxcx_buf_return_batch_flush(mlxp, &rbatch);

if (comp_cnt > 0) {
mlxcx_update_cqci(mlxp, mlcq);
atomic_add_64(&mlcq->mlcq_bufcnt, -bufcnt);
18 changes: 10 additions & 8 deletions usr/src/uts/common/io/mlxcx/mlxcx_reg.h
Original file line number Diff line number Diff line change
@@ -71,8 +71,8 @@
#define MLXCX_UAR_EQ_NOARM 0x0048

/* Number of blue flame reg pairs per UAR */
#define MLXCX_BF_PER_UAR 2
#define MLXCX_BF_PER_UAR_MASK 0x1
#define MLXCX_BF_PER_UAR 4
#define MLXCX_BF_PER_UAR_MASK (MLXCX_BF_PER_UAR - 1)
#define MLXCX_BF_SIZE 0x100
#define MLXCX_BF_BASE 0x0800

@@ -404,6 +404,8 @@ typedef enum {

#define MLXCX_WQE_OCTOWORD 16
#define MLXCX_SQE_MAX_DS ((1 << 6) - 1)

#define MLXCX_SQE_BUF 16
/*
* Calculate the max number of address pointers in a single ethernet
* send message. This is the remainder from MLXCX_SQE_MAX_DS
@@ -456,16 +458,16 @@ typedef enum {
/* CSTYLED */
#define MLXCX_SQE_ETH_INLINE_HDR_SZ (bitdef_t){0, 0x03ff}
#define MLXCX_SQE_ETH_SZFLAG_VLAN (1 << 15)
#define MLXCX_MAX_INLINE_HEADERLEN 64
#define MLXCX_MAX_INLINE_HEADERLEN (2 + MLXCX_WQE_OCTOWORD * 12)

typedef struct {
uint8_t mles_rsvd[4];
bits8_t mles_csflags;
uint8_t mles_rsvd2[1];
uint16_t mles_mss;
uint16be_t mles_mss;
uint8_t mles_rsvd3[4];
bits16_t mles_szflags;
uint8_t mles_inline_headers[18];
uint8_t mles_inline_headers[2];
} mlxcx_wqe_eth_seg_t;

typedef struct {
@@ -479,7 +481,7 @@ typedef struct {
typedef struct {
mlxcx_wqe_control_seg_t mlsqe_control;
mlxcx_wqe_eth_seg_t mlsqe_eth;
mlxcx_wqe_data_seg_t mlsqe_data[1];
mlxcx_wqe_data_seg_t mlsqe_data[2];
} mlxcx_sendq_ent_t;

typedef struct {
@@ -640,7 +642,7 @@ typedef enum {
.bit_shift = 25, \
.bit_mask = 0x06000000 }

#define MLXCX_WORKQ_CTX_MAX_ADDRESSES 128
#define MLXCX_WORKQ_CTX_MAX_ADDRESSES 1024

typedef struct mlxcx_workq_ctx {
bits32_t mlwqc_flags;
@@ -1588,7 +1590,7 @@ typedef struct {
/*
* This is an artificial limit that we're imposing on our actions.
*/
#define MLXCX_CREATE_QUEUE_MAX_PAGES 128
#define MLXCX_CREATE_QUEUE_MAX_PAGES 1024

typedef struct {
mlxcx_cmd_in_t mlxi_create_eq_head;
693 changes: 567 additions & 126 deletions usr/src/uts/common/io/mlxcx/mlxcx_ring.c

Large diffs are not rendered by default.