From df1460dec015d133a94c22ba480731920695c328 Mon Sep 17 00:00:00 2001 From: zhuangyan Date: Mon, 10 Aug 2020 19:44:02 +0800 Subject: [PATCH] dpvs: split all the members of dpvs connection into two groups for its initialization and it's for the benefit of CPS. a) Remove the operation of bezeroing all the members of dpvs connection within dp_vs_conn_alloc(). b) Each member of dpvs connection is initialized ONCE via dp_vs_conn_pre_init() or dp_vs_conn_new(). - dp_vs_conn_pre_init() initializes all the members of one group as 0, but ack_mbuf. - dp_vs_conn_new() initializes all the members of the other group with the specific values. --- include/ipvs/conn.h | 169 +++++++++++++++++++++++------------------- src/ipvs/ip_vs_conn.c | 20 ++++- 2 files changed, 111 insertions(+), 78 deletions(-) diff --git a/include/ipvs/conn.h b/include/ipvs/conn.h index 3aee76b00..2dc53acd6 100644 --- a/include/ipvs/conn.h +++ b/include/ipvs/conn.h @@ -51,27 +51,26 @@ enum { #define DPVS_CONN_F_NOFASTXMIT IP_VS_CONN_F_NOFASTXMIT struct dp_vs_conn_param { - int af; - uint16_t proto; - const union inet_addr *caddr; - const union inet_addr *vaddr; - uint16_t cport; - uint16_t vport; - uint16_t ct_dport; /* RS port for template connection */ - bool outwall; + uint8_t af; + bool outwall; + uint16_t proto; + uint16_t cport; + uint16_t vport; + const union inet_addr *caddr; + const union inet_addr *vaddr; + uint16_t ct_dport; /* RS port for template connection */ }; struct conn_tuple_hash { struct list_head list; - int direct; /* inbound/outbound */ - - /* tuple info */ - int af; - uint16_t proto; - union inet_addr saddr; /* pkt's source addr */ - union inet_addr daddr; /* pkt's dest addr */ + uint8_t af; + uint8_t proto; + uint8_t direct; /* inbound/outbound */ + uint8_t pad; uint16_t sport; uint16_t dport; + union inet_addr saddr; /* pkt's source addr */ + union inet_addr daddr; /* pkt's dest addr */ } __rte_cache_aligned; struct dp_vs_conn_stats { @@ -83,89 +82,109 @@ struct dp_vs_conn_stats { struct dp_vs_proto; +/* + * All the members of dp_vs_conn are classified into two groups, A and B. + * And a new member must be added to either of them. + */ struct dp_vs_conn { - int af; + RTE_MARKER cacheline0; + /* + * Group A: the below members are initialized in dp_vs_conn_new(). + */ + struct conn_tuple_hash tuplehash[DPVS_CONN_DIR_MAX]; + + RTE_MARKER cacheline2 __rte_cache_min_aligned; + union inet_addr caddr; /* Client address */ + union inet_addr vaddr; /* Virtual address */ + union inet_addr laddr; /* director Local address */ + union inet_addr daddr; /* Destination (RS) address */ + + RTE_MARKER cacheline3 __rte_cache_min_aligned; + uint8_t af; uint8_t proto; - union inet_addr caddr; /* Client address */ - union inet_addr vaddr; /* Virtual address */ - union inet_addr laddr; /* director Local address */ - union inet_addr daddr; /* Destination (RS) address */ + lcoreid_t lcore; + bool outwall; /* flag for gfwip */ + rte_atomic32_t refcnt; + uint16_t cport; uint16_t vport; uint16_t lport; uint16_t dport; - struct rte_mempool *connpool; - struct conn_tuple_hash tuplehash[DPVS_CONN_DIR_MAX]; - rte_atomic32_t refcnt; - struct dpvs_timer timer; + struct rte_mempool *connpool; + struct dp_vs_dest *dest; /* real server */ struct timeval timeout; - lcoreid_t lcore; - struct dp_vs_dest *dest; /* real server */ - void *prot_data; /* protocol specific data */ - - /* for FNAT */ - struct dp_vs_laddr *local; /* local address */ - struct dp_vs_seq fnat_seq; - - /* save last SEQ/ACK from RS for RST when conn expire*/ - uint32_t rs_end_seq; - uint32_t rs_end_ack; - int (*packet_xmit)(struct dp_vs_proto *prot, - struct dp_vs_conn *conn, - struct rte_mbuf *mbuf); + struct dp_vs_conn *conn, struct rte_mbuf *mbuf); int (*packet_out_xmit)(struct dp_vs_proto *prot, - struct dp_vs_conn *conn, - struct rte_mbuf *mbuf); + struct dp_vs_conn *conn, struct rte_mbuf *mbuf); + + RTE_MARKER cacheline4 __rte_cache_min_aligned; + struct dp_vs_laddr *local; /* local address in fnat mode */ + volatile uint32_t flags; + + /* + * Group B: the below members are initialized in dp_vs_conn_pre_init(). + */ + /* state transition */ + uint16_t pad1; + volatile uint8_t state; + volatile uint8_t old_state; /* used for state transition + triggered synchronization */ + /* route for neigbour */ + struct netif_port *in_dev; /* inside to rs */ + struct netif_port *out_dev; /* outside to client */ + + union inet_addr in_nexthop; /* to rs*/ + union inet_addr out_nexthop; /* to client*/ + RTE_MARKER cacheline5 __rte_cache_min_aligned; /* L2 fast xmit */ struct rte_ether_addr in_smac; struct rte_ether_addr in_dmac; struct rte_ether_addr out_smac; struct rte_ether_addr out_dmac; - /* route for neigbour */ - struct netif_port *in_dev; /* inside to rs*/ - struct netif_port *out_dev; /* outside to client*/ - union inet_addr in_nexthop; /* to rs*/ - union inet_addr out_nexthop; /* to client*/ - -#ifdef CONFIG_DPVS_IPVS_STATS_DEBUG - /* statistics */ - struct dp_vs_conn_stats stats; -#endif + /* save last SEQ/ACK from RS for RST when conn expire */ + uint32_t rs_end_seq; + uint32_t rs_end_ack; - /* synproxy related members */ - struct dp_vs_seq syn_proxy_seq; /* seq used in synproxy */ - struct list_head ack_mbuf; /* ack mbuf saved in step2 */ - uint32_t ack_num; /* ack mbuf number stored */ - struct rte_mbuf *syn_mbuf; /* saved rs syn packet for retransmition */ - rte_atomic32_t syn_retry_max; /* syn retransmition max packets */ + /* for FNAT */ + struct dp_vs_seq fnat_seq; /* add for stopping ack storm */ - uint32_t last_seq; /* seq of the last ack packet */ - uint32_t last_ack_seq; /* ack seq of the last ack packet */ - rte_atomic32_t dup_ack_cnt; /* count of repeated ack packets */ - - /* flags and state transition */ - volatile uint16_t flags; - volatile uint16_t state; - volatile uint16_t old_state; /* old state, to be used for state transition - triggered synchronization */ - /* controll members */ - struct dp_vs_conn *control; /* master who controlls me */ - rte_atomic32_t n_control; /* number of connections controlled by me*/ -#ifdef CONFIG_DPVS_IPVS_STATS_DEBUG - uint64_t ctime; /* create time */ -#endif + uint32_t last_seq; /* seq of the last ack packet */ + uint32_t last_ack_seq; /* ack seq of the last ack packet */ + rte_atomic32_t dup_ack_cnt; /* count of repeated ack packets */ + uint32_t pad2; - /* connection redirect in fnat/snat/nat modes */ - struct dp_vs_redirect *redirect; + RTE_MARKER cacheline6 __rte_cache_min_aligned; + /* synproxy related members */ + struct list_head ack_mbuf; /* ack mbuf saved in step2 */ + struct dp_vs_seq syn_proxy_seq; /* seq used in synproxy */ + struct rte_mbuf *syn_mbuf; /* saved rs syn packet for retransmition */ + uint32_t ack_num; /* ack mbuf number stored */ + rte_atomic32_t syn_retry_max; /* syn retransmition max packets */ + + /* control members */ + RTE_MARKER cacheline7 __rte_cache_min_aligned; + struct dpvs_timer timer; + struct dp_vs_conn *control; /* master who controlls me */ + void *prot_data; /* protocol specific data */ - /* flag for gfwip */ - bool outwall; + rte_atomic32_t n_control; /* number of connections controlled + by me */ + uint32_t pad3; + RTE_MARKER cacheline8 __rte_cache_min_aligned; +#ifdef CONFIG_DPVS_IPVS_STATS_DEBUG + uint64_t ctime; /* create time */ + struct dp_vs_conn_stats stats; /* statistics */ +#endif + /* + * the below member is initialized in dp_vs_conn_alloc(). + */ + struct dp_vs_redirect *redirect; /* used in fnat/snat/nat modes */ } __rte_cache_aligned; /* for syn-proxy to save all ack packet in conn before rs's syn-ack arrives */ diff --git a/src/ipvs/ip_vs_conn.c b/src/ipvs/ip_vs_conn.c index 68a74608b..b6821526c 100644 --- a/src/ipvs/ip_vs_conn.c +++ b/src/ipvs/ip_vs_conn.c @@ -99,14 +99,13 @@ static struct dp_vs_conn *dp_vs_conn_alloc(enum dpvs_fwd_mode fwdmode, return NULL; } - conn->connpool = this_conn_cache; this_conn_count++; /* no need to create redirect for the global template connection */ if (likely((flags & DPVS_CONN_F_TEMPLATE) == 0)) r = dp_vs_redirect_alloc(fwdmode); - conn->redirect = r; + conn->redirect = r; return conn; } @@ -800,6 +799,18 @@ static void conn_flush(void) #endif } +static inline void dp_vs_conn_pre_init(struct dp_vs_conn *new) +{ + size_t len; + + len = offsetof(struct dp_vs_conn, redirect) + - offsetof(struct dp_vs_conn, pad1); + + memset(&new->pad1, 0, len); + + INIT_LIST_HEAD(&new->ack_mbuf); +} + struct dp_vs_conn *dp_vs_conn_new(struct rte_mbuf *mbuf, const struct dp_vs_iphdr *iph, struct dp_vs_conn_param *param, @@ -817,7 +828,10 @@ struct dp_vs_conn *dp_vs_conn_new(struct rte_mbuf *mbuf, if (unlikely(!new)) return NULL; - new->flags = flags; + dp_vs_conn_pre_init(new); + + new->connpool = this_conn_cache; + new->flags = flags; /* set proper RS port */ if (dp_vs_conn_is_template(new) || param->ct_dport != 0)