Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

introduce kni isolate rx queue support #695

Open
wants to merge 2 commits into
base: devel
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions conf/dpvs.conf.items
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ netif_defs {
! mtu 1500 <1500,0-9000>
! promisc_mode <disable>
! kni_name dpdk0.kni <char[32]>
! kni_isolate on <on, on/off>
! kni_ipaddress {
! ipv4 <lan link ipv4 address>
! ipv6 <lan link ipv6 address>
! }
}

<init> device dpdk1 {
Expand All @@ -59,6 +64,11 @@ netif_defs {
! mtu 1500
! promisc_mode
! kni_name dpdk1.kni
! kni_isolate on <on, on/off>
! kni_ipaddress {
! ipv4 <wan link ipv4 address>
! ipv6 <wan link ipv6 address>
! }
}

<init> device bond0 {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is rx_queue_id needed to config in kni worker type?

Expand Down
10 changes: 10 additions & 0 deletions conf/dpvs.conf.sample
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ netif_defs {
! mtu 1500
! promisc_mode
kni_name dpdk0.kni
kni_isolate on
kni_ipaddress {
ipv4 192.168.0.1
ipv6 0000:0000:0000:0000:0000:FFFF:C0A8:0001
}
}

<init> device dpdk1 {
Expand All @@ -61,6 +66,11 @@ netif_defs {
! mtu 1500
! promisc_mode
kni_name dpdk1.kni
kni_isolate on
kni_ipaddress {
ipv4 192.168.0.2
ipv6 0000:0000:0000:0000:0000:FFFF:C0A8:0002
}
}

! <init> bonding bond0 {
Expand Down
5 changes: 5 additions & 0 deletions conf/dpvs.conf.single-nic.sample
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ netif_defs {
! mtu 1500
! promisc_mode
kni_name dpdk0.kni
kni_isolate on
kni_ipaddress {
ipv4 192.168.0.2
ipv6 0000:0000:0000:0000:0000:FFFF:C0A8:0002
}
}
}

Expand Down
44 changes: 44 additions & 0 deletions include/kni.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,28 @@
#include <stdbool.h>
#include "netif.h"

/*
* NOTE:
* 1. local ip filter will make input set fixed on ixgbe/i40e.
* 2. dip filter is not supported by ixgbe and i40e under the
* premise of local ip filter.
* 3. use dip + dport + dst_port_mask filters to cover port range
* [0-65535] to replace dip filter on ixgbe/i40e.
* 4. kni fdir filter support tcp and udp, icmp not supported.
* 5. if (fdir_conf.mask.dst_port_mask & pkt.dport) equal to an
* element in the port_base_array, pkt will match kni fdir
* filter and redirected to kni rx queue.
Comment on lines +34 to +42
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a solution to support both tcp/udp and IP protocols? Somebody may prefer OSPF (IP protocol 89) to BGP(TCP) for ECMP routes.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

flow支持ip粒度或者tcp/udp的,这里的注释是解释的ixgbe/i40e,intel卡有input set,一种filter/flow会固定input set,其他input set的filter/flow可能就不生效。

* 6. rss rte_flow to specfic rss queue region should with lower
* priority than lip and kni fdir filter.
*/
typedef struct kni_fdir {
bool init_success; /* kni fdir init flag */
uint16_t filter_mask; /* kni filter's port mask */
uint16_t port_base_num; /* kni port_base num */
__be16 port_base_array[DPVS_MAX_LCORE]; /* kni port_base set */
uint32_t soft_id_array[DPVS_MAX_LCORE][MAX_FDIR_PROTO];
} dp_vs_kni_fdir;

/*
* @dev - real device kni attach to.
* @kniname - optional, kni device name or auto generate.
Expand All @@ -38,6 +60,11 @@ int kni_add_dev(struct netif_port *dev, const char *kniname);
int kni_del_dev(struct netif_port *dev);
int kni_init(void);

int kni_fdir_init(void);
int kni_fdir_filter_add(struct netif_port *dev,
const union inet_addr *kni_ip,
int af);

static inline bool kni_dev_exist(const struct netif_port *dev)
{
return dev->kni.kni ? true : false;
Expand All @@ -51,4 +78,21 @@ static inline void kni_handle_request(const struct netif_port *dev)
rte_kni_handle_request(dev->kni.kni);
}

static inline bool kni_fwd_valid(const struct netif_port *dev,
kni_fwd_mode_t fwd_mode)
{
if (fwd_mode == KNI_FWD_MODE_DEFAULT) {
return true;
}

if ((fwd_mode == KNI_FWD_MODE_ISOLATE_RX)
&& (dev->kni.fwd_mode == fwd_mode)
&& (dev->kni.rx_queue_id != NETIF_QUEUE_ID_INVALID))
{
Comment on lines +90 to +91
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

&& (dev->kni.rx_queue_id != NETIF_QUEUE_ID_INVALID)) {

return true;
}

return false;
}

#endif /* __DPVS_KNI_H__ */
163 changes: 163 additions & 0 deletions include/netdev_flow.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
/*
* DPVS is a software load balancer (Virtual Server) based on DPDK.
*
* Copyright (C) 2020 ByteDance (www.bytedance.com).
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* Copyright (C) 2020 ByteDance (www.bytedance.com).
* All Rights Reserved.
*
* [email protected], 12/2020.
*/
#ifndef __NETDEV_FLOW_H__
#define __NETDEV_FLOW_H__

#include <assert.h>
#include <rte_flow.h>

#include "conf/common.h"
#include "netif.h"

#ifndef NETDEV
#define NETDEV
#define RTE_LOGTYPE_NETDEV RTE_LOGTYPE_USER1
#endif

#define DEFAULT_MAX_PATTERNS 6
#define DEFAULT_MAX_ACTIONS 6

#define NETDEV_FLOW_DEFAULT_MARK_ID 1
#define NETDEV_FLOW_DEFAULT_RSS_LEVEL 0

/* fuzzy match level with signature mode */
#define DEFAULT_FUZZY_SPEC 2
#define DEFAULT_FUZZY_LAST 0xfffffff0
#define DEFAULT_FUZZY_MASK 0xffffffff

#define NETDEV_IXGBE_DRIVER_NAME "ixgbe"
#define NETDEV_I40E_DRIVER_NAME "i40e"
#define NETDEV_MLNX_DRIVER_NAME "net_mlx5"

/* flags for netdev flow */
#define NETDEV_FLOW_F_SIP_FIELD (1 << 0)
#define NETDEV_FLOW_F_DIP_FIELD (1 << 1)
#define NETDEV_FLOW_F_SPORT_FIELD (1 << 2)
#define NETDEV_FLOW_F_DPORT_FIELD (1 << 3)
#define NETDEV_FLOW_F_L3_PROTO_FIELD (1 << 4)
#define NETDEV_FLOW_F_L4_PROTO_FIELD (1 << 5)

/*
* assign static priority on various flow
* the smaller the priority higher on mellanox nic.
*/
enum netdev_flow_priority {
NETDEV_FLOW_PRIORITY_NONE = 0,
NETDEV_FLOW_PRIORITY_FILTER,
NETDEV_FLOW_PRIORITY_VXLAN,
NETDEV_FLOW_PRIORITY_RSS,
};

/* move to next acts index, abort on failure */
#define get_next_acts_index(index) do { \
assert((index) < DEFAULT_MAX_ACTIONS - 1); \
(index)++; \
} while(0)

/* move to next patts index, abort on failure */
#define get_next_patts_index(index) do { \
assert((index) < DEFAULT_MAX_PATTERNS - 1); \
(index)++; \
} while(0)

/* netdev rss flow init */
#define NETDEV_RSS_FLOW_INIT(flow, port) do { \
flow->type = NETDEV_FLOW_TYPE_RSS; \
flow->port_id = port->id; \
flow->flow_handle = NULL; \
flow->hw_offloaded = false; \
flow->flow_id = netdev_flow_hash(flow); \
} while(0)

enum netdev_flow_type {
NETDEV_FLOW_TYPE_RSS,
NETDEV_FLOW_TYPE_FILTER,
NETDEV_FLOW_TYPE_MAX
};

union netdev_flow_query {
struct rte_flow_query_count count;
struct rte_flow_action_queue queue;
struct rte_flow_action_rss rss_conf;
};

struct netdev_flow_stats {
uint64_t n_pkts;
uint64_t n_bytes;
};

struct netdev_flow {
enum netdev_flow_type type;
portid_t port_id;

/* flow meta data */
union {
struct {
queueid_t rss_queues[NETIF_MAX_QUEUES];
uint32_t rss_queue_num;
} rss_info;
struct {
queueid_t queue_id;
uint16_t sport;
uint16_t dport;
uint8_t l3_proto;
uint8_t l4_proto;
union inet_addr saddr;
union inet_addr daddr;
} filter_info;
} data;

uint32_t flags;
/* unique flow id */
uint32_t flow_id;

/* pointer to rte flow in hardware */
struct rte_flow *flow_handle;
bool hw_offloaded;
struct list_head list;
struct netdev_flow_stats stats;
};

/* l4_proto used by i40e only */
int netdev_flow_add_kni_filter(struct netif_port *port,
ywc689 marked this conversation as resolved.
Show resolved Hide resolved
const union inet_addr *kni_ip,
queueid_t kni_queue_id,
uint8_t l3_proto,
uint8_t l4_proto);
/* called on dpvs initial */
int netdev_flow_add_rss_filter(struct netif_port *port);

/*
* NOTE: netdev flow api, operate flow on initial or terminal,
* need to use lock on rte_flow_* in case of concurrent.
*/
int netdev_flow_init(struct netif_port *port);
int netdev_flow_add(struct netif_port *port,
struct netdev_flow *netdev_flow);
int netdev_flow_del(struct netif_port *port,
struct netdev_flow *netdev_flow);
int netdev_flow_query(struct netif_port *port,
struct netdev_flow *netdev_flow,
union netdev_flow_query *query);
int netdev_flow_flush(struct netif_port *port);

#endif /* __NETDEV_FLOW_H__ */
37 changes: 37 additions & 0 deletions include/netif.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ enum {

/* max tx/rx queue number for each nic */
#define NETIF_MAX_QUEUES 16
/* invalid queue id for initial val */
#define NETIF_QUEUE_ID_INVALID -1
/* max addr count on kni interface */
#define NETIF_KNI_ADDR_MAX_NUM 32
/* max nic number used in the program */
#define NETIF_MAX_PORTS 4096
/* maximum pkt number at a single burst */
Expand All @@ -73,6 +77,8 @@ enum {

#define NETIF_LCORE_ID_INVALID 0xFF

#define MAX_FDIR_PROTO 2

/************************* lcore conf ***************************/
struct rx_partner;

Expand Down Expand Up @@ -165,13 +171,28 @@ typedef enum {
PORT_TYPE_INVAL,
} port_type_t;

typedef enum {
KNI_FWD_MODE_DEFAULT,
KNI_FWD_MODE_ISOLATE_RX,
KNI_FWD_MODE_MAX,
} kni_fwd_mode_t;

struct kni_addr {
int af;
union inet_addr addr;
} __rte_cache_aligned;

struct netif_kni {
char name[IFNAMSIZ];
struct rte_kni *kni;
struct ether_addr addr;
struct dpvs_timer kni_rtnl_timer;
int ip_addr_cnt; /* total count of kni addrs */
ywc689 marked this conversation as resolved.
Show resolved Hide resolved
int kni_rtnl_fd;
struct rte_ring *rx_ring;
struct kni_addr ip[NETIF_KNI_ADDR_MAX_NUM]; /* ipv4 or ipv6 */
queueid_t rx_queue_id; /* only one kni queue supported by default */
kni_fwd_mode_t fwd_mode; /* kni fwd mode: default or isolated rx */
} __rte_cache_aligned;

union netif_bond {
Expand Down Expand Up @@ -227,6 +248,12 @@ struct netif_hw_addr_list {
int count;
};

struct flow_info {
struct list_head flow_list; /* store rte flow related on port */
int flow_cnt; /* current flow count */
int flow_err; /* error flow count */
};

struct netif_port {
char name[IFNAMSIZ]; /* device name */
portid_t id; /* device id */
Expand Down Expand Up @@ -254,6 +281,9 @@ struct netif_port {
struct vlan_info *vlan_info; /* VLANs info for real device */
struct netif_tc tc; /* traffic control */
struct netif_ops *netif_ops;
int rss_queue_num;
queueid_t rss_queues[NETIF_MAX_QUEUES];
struct flow_info hw_flow_info; /* hardware rte flow on port */
} __rte_cache_aligned;

/**************************** lcore API *******************************/
Expand Down Expand Up @@ -316,6 +346,7 @@ int netif_ctrl_term(void); /* netif ctrl plane cleanup */
void netif_cfgfile_init(void);
void netif_keyword_value_init(void);
void install_netif_keywords(void);
lcoreid_t netif_get_kni_lcore_id(void);
void kni_ingress(struct rte_mbuf *mbuf, struct netif_port *dev);

static inline void *netif_priv(struct netif_port *dev)
Expand All @@ -339,4 +370,10 @@ static inline uint16_t dpvs_rte_eth_dev_count(void)

extern bool dp_vs_fdir_filter_enable;

extern bool dp_vs_kni_isolate_rx_enable;

typedef int (* netif_filter_op_func)(int af, struct netif_port *dev, lcoreid_t cid,
const union inet_addr *dip, __be16 dport,
uint32_t filter_id[], bool add);

#endif /* __DPVS_NETIF_H__ */
Loading