Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dpvs session synchronization #476

Open
wants to merge 4 commits into
base: devel
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions conf/dpvs.bond.conf.sample
Original file line number Diff line number Diff line change
Expand Up @@ -371,4 +371,5 @@ ipvs_defs {
! sa_pool config
sa_pool {
pool_hash_size 16
pool_mode laddr_lcore_mapping
}
1 change: 1 addition & 0 deletions conf/dpvs.conf.items
Original file line number Diff line number Diff line change
Expand Up @@ -245,4 +245,5 @@ ipvs_defs {

sa_pool {
<init> pool_hash_size 16 <16, 1-128>
<init> pool_mode laddr_lcore_mapping
}
17 changes: 17 additions & 0 deletions conf/dpvs.conf.sample
Original file line number Diff line number Diff line change
Expand Up @@ -324,4 +324,21 @@ ipvs_defs {
! sa_pool config
sa_pool {
pool_hash_size 16
pool_mode laddr_lcore_mapping
}

session_sync {
sync_session_enable
sync_session_elapse 2 !secondes elapsed since the connection is established
sync_buff_delay 2
laddr_ifname dpdk0
sync_id 10

socket {
mcast_addr 224.0.1.100
mcast_port 8088
mcast_ttl 20
mtu 1500
unicast_port 8089
}
}
1 change: 1 addition & 0 deletions conf/dpvs.conf.single-bond.sample
Original file line number Diff line number Diff line change
Expand Up @@ -276,4 +276,5 @@ ipvs_defs {
! sa_pool config
sa_pool {
pool_hash_size 16
pool_mode laddr_lcore_mapping
}
1 change: 1 addition & 0 deletions conf/dpvs.conf.single-nic.sample
Original file line number Diff line number Diff line change
Expand Up @@ -249,4 +249,5 @@ ipvs_defs {
! sa_pool config
sa_pool {
pool_hash_size 16
pool_mode laddr_lcore_mapping
}
Binary file added doc/pics/synchronization-principle.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
67 changes: 67 additions & 0 deletions doc/tutorial.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ DPVS Tutorial
- [KNI for virtual device](#vdev-kni)
* [UDP Option of Address (UOA)](#uoa)
* [Launch DPVS in Virtual Machine (Ubuntu)](#Ubuntu16.04)
* [Session synchronization](#session-sync)

> To compile and launch DPVS, pls check *README.md* for this project.

Expand Down Expand Up @@ -1120,3 +1121,69 @@ worker_defs {
}

```
<a id='session-sync'/>

# Session synchronization

Session synchronization to decrease the connection break due to the DPVS node failure in the cluster.
There are two synchronization modes: full and incremental
* incremental synchronization is used for the new sessions
* full synchronization is used for the existing sessions

The basic synchronization principle,looks like below
![synchronization-principle.png](pics/synchronization-principle.png)

Adding new DPVS nodes in the cluster requires the following commands to select synchronous nodes and synchronize existing sessions.

```bash
$ ipvsadm --conn-sync
```
weiyanhua100 marked this conversation as resolved.
Show resolved Hide resolved
If you would like to use session synchronization, add the following lines into the device configs of `dpvs.conf`:

```
! worker config (lcores)
worker_defs {
<init> worker cpu0 {
type master
cpu_id 0
}

<init> worker cpu1 {
type slave
cpu_id 1
port dpdk0 {
rx_queue_ids 0
tx_queue_ids 0
! isol_rx_cpu_ids 9
! isol_rxq_ring_sz 1048576
}
}
.......

<init> worker cpu10 {
type sync-tx
cpu_id 10
}

<init> worker cpu11 {
type sync-rx
cpu_id 11
}
}

session_sync {
sync_session_enable
sync_session_elapse 2 !secondes elapsed since the connection is established
sync_buff_delay 2
laddr_ifname dpdk0
sync_id 8

socket {
mcast_addr 224.0.1.100
mcast_port 8088
mcast_ttl 20
mtu 1500
unicast_port 8089
}
}
```
1 change: 1 addition & 0 deletions include/conf/conn.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ struct ip_vs_conn_entry {
uint16_t in_af;
uint16_t out_af;
uint16_t proto;
uint16_t syncid;
union inet_addr caddr;
union inet_addr vaddr;
union inet_addr laddr;
Expand Down
12 changes: 12 additions & 0 deletions include/ipvs/conn.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ enum {
};

enum {
DPVS_CONN_F_SYNCED = 0x0020,
DPVS_CONN_F_HASHED = 0x0040,
DPVS_CONN_F_REDIRECT_HASHED = 0x0080,
DPVS_CONN_F_INACTIVE = 0x0100,
Expand Down Expand Up @@ -95,6 +96,12 @@ struct dp_vs_conn {
rte_atomic32_t refcnt;
struct dpvs_timer timer;
struct timeval timeout;

struct dpvs_timer conn_sync_timer;
struct timeval conn_sync_timeout;
uint16_t syncid;
queueid_t qid; /* used in session synchronization*/

lcoreid_t lcore;
struct dp_vs_dest *dest; /* real server */
void *prot_data; /* protocol specific data */
Expand Down Expand Up @@ -180,6 +187,11 @@ dp_vs_conn_new(struct rte_mbuf *mbuf,
uint32_t flags);
int dp_vs_conn_del(struct dp_vs_conn *conn);

struct dp_vs_conn * dp_vs_conn_copy_from_sync(void *sync_conn,
struct dp_vs_dest *dest);

int dp_vs_conn_lcore_tx(lcoreid_t cid);

struct dp_vs_conn *
dp_vs_conn_get(int af, uint16_t proto,
const union inet_addr *saddr,
Expand Down
4 changes: 4 additions & 0 deletions include/ipvs/dest.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,10 @@ int dp_vs_new_dest(struct dp_vs_service *svc, struct dp_vs_dest_conf *udest,
struct dp_vs_dest *dp_vs_lookup_dest(int af, struct dp_vs_service *svc,
const union inet_addr *daddr, uint16_t dport);

struct dp_vs_dest *dp_vs_find_dest(int af, const union inet_addr *daddr,
uint16_t dport, const union inet_addr *vaddr,
uint16_t vport, uint16_t protocol);

struct dp_vs_dest *dp_vs_trash_get_dest(struct dp_vs_service *svc,
const union inet_addr *daddr, uint16_t dport);

Expand Down
12 changes: 11 additions & 1 deletion include/ipvs/service.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@

rte_rwlock_t __dp_vs_svc_lock;

struct laddr_list_pre_lcore {
struct list_head laddr_list; /* local address (LIP) pool */
struct list_head *laddr_curr;
uint32_t num_laddrs;
};

/* virtual service */
struct dp_vs_service {
struct list_head s_list; /* node for normal service table */
Expand Down Expand Up @@ -88,6 +94,9 @@ struct dp_vs_service {
rte_rwlock_t laddr_lock;
uint32_t num_laddrs;

struct laddr_list_pre_lcore pre_list[RTE_MAX_LCORE];
#define this_pre_list pre_list[rte_lcore_id()]

/* ... flags, timer ... */
} __rte_cache_aligned;
#endif
Expand Down Expand Up @@ -232,6 +241,7 @@ enum{
DPVS_SO_SET_EDITDEST,
DPVS_SO_SET_DELDEST,
DPVS_SO_SET_GRATARP,
DPVS_SO_SET_CONN_SYNC,
};

enum{
Expand All @@ -244,7 +254,7 @@ enum{


#define SOCKOPT_SVC_BASE DPVS_SO_SET_FLUSH
#define SOCKOPT_SVC_SET_CMD_MAX DPVS_SO_SET_GRATARP
#define SOCKOPT_SVC_SET_CMD_MAX DPVS_SO_SET_CONN_SYNC
#define SOCKOPT_SVC_GET_CMD_MAX DPVS_SO_GET_DESTS

#define MAX_ARG_LEN (sizeof(struct dp_vs_service_user) + \
Expand Down
177 changes: 177 additions & 0 deletions include/ipvs/sync.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
/*
* DPVS is a software load balancer (Virtual Server) based on DPDK.
*
* Copyright (C) 2017 iQIYI (www.iqiyi.com).
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __DPVS_SYNC_H__
#define __DPVS_SYNC_H__

#include "common.h"
#include "list.h"
#include "ipvs/conn.h"

#define RTE_LOGTYPE_SYNC RTE_LOGTYPE_USER1

#define CONN_SYNC_RING_SIZE 2048

/* maximum pkt number at a single burst */
#define CONN_SYNC_MAX_PKT_BURST 32

/*
* DPVS sync connection entry
*/
struct dp_vs_sync_conn {
uint8_t reserved;

int af;
uint8_t proto;
union inet_addr caddr; /* Client address */
union inet_addr vaddr; /* Virtual address */
union inet_addr laddr; /* director Local address */
union inet_addr daddr; /* Destination (RS) address */
uint16_t cport;
uint16_t vport;
uint16_t lport;
uint16_t dport;
queueid_t qid;
lcoreid_t lcore;

/* Flags and state transition */
uint16_t flags; /* status flags */
uint16_t state; /* state info */

/* The sequence options start here */
struct dp_vs_seq fnat_seq;
uint32_t rs_end_seq;
uint32_t rs_end_ack;
uint16_t syncid;
};

struct dp_vs_sync_head {
uint8_t type;
uint8_t syncid;
uint16_t size;
};

struct dp_vs_sync_mesg {
struct dp_vs_sync_head head;
uint8_t nr_conns;
};
weiyanhua100 marked this conversation as resolved.
Show resolved Hide resolved

struct dp_vs_sync_nego {
struct dp_vs_sync_head head;
uint32_t code;
uint8_t peer_syncid;
uint64_t uptime;
};

struct dp_vs_sync_peer {
uint8_t syncid;
uint64_t uptime;
struct sockaddr_in addr;
};

struct dp_vs_sync_buff {
struct list_head list;
uint64_t firstuse;

/* pointers for the message data */
struct dp_vs_sync_mesg *mesg;
unsigned char *head;
unsigned char *end;
};

struct dp_vs_sync_fwd_core {
int cid;
int last_index;
bool start;
bool end;
};

struct dp_vs_sync_core {
int core_cnt;
struct dp_vs_sync_fwd_core fwd_core[DPVS_MAX_LCORE];
};

typedef enum {
DP_VS_SYNC_MCAST = 0,
DP_VS_SYNC_UNICAST = 1,
DP_VS_SYNC_MAX = 2,
} dp_vs_sync_type;

struct dp_vs_sync_conf {
lcoreid_t sync_rx_lcore;
lcoreid_t sync_tx_lcore;
int syncid;
int sync_enable;
int sync_conn_elapse;
int sync_buff_delay;
int sync_per_time_cnt;
int send_mesg_maxlen;
int recv_mesg_maxlen;
char laddr_ifname[IFNAMSIZ];
};

#define DP_VS_SYNC_CONN_SIZE (sizeof(struct dp_vs_sync_conn))
#define DP_VS_SYNC_MESG_HEADER_LEN (sizeof(struct dp_vs_sync_mesg))

#define DP_VS_SYNC_CONN_INFO (0)
#define DP_VS_SYNC_NEGO_INFO (1)

#define DP_VS_SYNC_INFO_PROBE_CODE (0)
#define DP_VS_SYNC_INFO_REPLY_CODE (1)
#define DP_VS_SYNC_INFO_FETCH_CODE (2)
#define DP_VS_SYNC_INFO_DONE_CODE (3)

#define DP_VS_SYNC_DELAY_SECONDS (2)
#define DP_VS_SYNC_CONN_CNT_PER_TIME (128)

#define MAX(x, y) ((x) > (y) ? (x) : (y))

extern struct dp_vs_sync_core g_dp_vs_sync_fwd_core;
#define DP_VS_SYNC_FULL_IS_START(cid) \
(g_dp_vs_sync_fwd_core.fwd_core[cid].start == true)

#define DP_VS_SYNC_FULL_IS_END(cid) \
(g_dp_vs_sync_fwd_core.fwd_core[cid].end == true)

#define DP_VS_SYNC_FULL_SET_LAST_INDEX(cid, index) \
(g_dp_vs_sync_fwd_core.fwd_core[cid].last_index = index)

#define DP_VS_SYNC_FULL_GET_LAST_INDEX(cid) \
(g_dp_vs_sync_fwd_core.fwd_core[cid].last_index)

extern struct dp_vs_sync_conf g_dp_vs_sync_conf;
#define DP_VS_SYNC_FULL_CNT_PER_TIME \
g_dp_vs_sync_conf.sync_per_time_cnt

void dp_vs_sync_conn_enqueue(struct dp_vs_conn *cp, dp_vs_sync_type type);
int dp_vs_sync_conn_handler(struct dp_vs_conn *conn, int new_state);
int dp_vs_sync_lcore_process_rx_msg(lcoreid_t cid);
int dp_vs_sync_set_rx_core(lcoreid_t cid);
int dp_vs_sync_set_tx_core(lcoreid_t cid);
void dp_vs_sync_run_loop(lcoreid_t cid);
int dp_vs_sync_init(void);
int dp_vs_sync_term(void);

int dp_vs_sync_recv_nego(const char * buf, int len,
struct sockaddr_in* remote_addr);
int dp_vs_sync_full_end(lcoreid_t cid);
int dp_vs_sync_full_start(lcoreid_t cid);
int dp_vs_sync_conn_start(void);
char* dp_vs_sync_laddr_ifname(void);
void install_session_sync_keywords(void);

#endif /* __DPVS_SYNC_H__ */
Loading