Skip to content

Commit

Permalink
eBPF based IP masquerading for external network connectivity
Browse files Browse the repository at this point in the history
  • Loading branch information
vinaykul committed Aug 9, 2022
1 parent 0c49929 commit 2b8c69b
Show file tree
Hide file tree
Showing 21 changed files with 368 additions and 40 deletions.
14 changes: 14 additions & 0 deletions mizar/common/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,3 +497,17 @@ def get_cluster_vpc_vni():
cluster_vpc_vni = OBJ_DEFAULTS.default_vpc_vni
logger.info("Using default cluster VNI {}.".format(cluster_vpc_vni))
return cluster_vpc_vni

def get_host_cidr():
default_if = get_default_itf()
default_if_ip_cmd = "ip addr show {} | grep 'inet '".format(default_if)
ret, default_if_ip_line = run_cmd(default_if_ip_cmd)
default_if_ip = default_if_ip_line.split()[1]
host_cidr_cmd = "ip route show src {} dev {} proto kernel | cut -d' ' -f1".format(default_if_ip, default_if)
ret, host_cidr = run_cmd(host_cidr_cmd)
return host_cidr

def get_service_cidr():
#TODO: Hard-coded default for now. Fix it to get from kube API.
service_cidr = "10.96.0.0/16"
return service_cidr
16 changes: 15 additions & 1 deletion mizar/daemon/interface_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,21 @@ def update_agent_metadata(self, interface):
"ip": interface.droplet.ip_address,
"mac": interface.droplet.mac,
"iface": default_itf
}
},
"cluster_cidr": {
"host_cidr": {
"ip": interface.host_cidr_ip,
"prefix": interface.host_cidr_prefix,
},
"pod_cidr": {
"ip": interface.pod_cidr_ip,
"prefix": interface.pod_cidr_prefix,
},
"service_cidr": {
"ip": interface.service_cidr_ip,
"prefix": interface.service_cidr_prefix,
},
},
}
jsonconf = json.dumps(jsonconf)
cmd = f'''{self.trn_cli_update_agent_metadata} -i \'{itf}\' -j \'{jsonconf}\''''
Expand Down
10 changes: 9 additions & 1 deletion mizar/dp/mizar/operators/endpoints/endpoints_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,8 @@ def produce_simple_endpoint_interface(self, ep, task):
bouncers.append(SubstrateAddress(
version="4", ip_address=bouncer.ip, mac=bouncer.mac))

host_cidr = get_host_cidr()
service_cidr = get_service_cidr()
interfaces_list = [Interface(
interface_id=interface.interface_id,
interface_type=interface.interface_type,
Expand All @@ -314,7 +316,13 @@ def produce_simple_endpoint_interface(self, ep, task):
pod_network_class=interface.pod_network_class,
pod_network_priority=interface.pod_network_priority,
vpc_ip=ep.vpc_ip,
vpc_prefix=ep.vpc_prefix
vpc_prefix=ep.vpc_prefix,
host_cidr_ip=host_cidr.split('/')[0],
host_cidr_prefix=host_cidr.split('/')[1],
pod_cidr_ip=ep.vpc_ip,
pod_cidr_prefix=ep.vpc_prefix,
service_cidr_ip=service_cidr.split('/')[0],
service_cidr_prefix=service_cidr.split('/')[1],
)]

if ep.type == OBJ_DEFAULTS.ep_type_host:
Expand Down
6 changes: 6 additions & 0 deletions mizar/proto/mizar/proto/interface.proto
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@ message Interface {
string pod_network_class = 13;
string vpc_ip = 14;
string vpc_prefix = 15;
string host_cidr_ip=16;
string host_cidr_prefix=17;
string pod_cidr_ip=18;
string pod_cidr_prefix=19;
string service_cidr_ip=20;
string service_cidr_prefix=21;
}

message InterfacesList {
Expand Down
31 changes: 30 additions & 1 deletion pkg/util/netutil/netutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,14 +108,43 @@ func ActivateInterface(
return err
}

cmdTxt, result, err := executil.Execute("nsenter", "-t", "1", "-m", "-u", "-n", "-i", "ip", "route", "show", "default")
activateIfLog.WriteString(fmt.Sprintf(" + [Cmd: '%s' -> Result: '%s']", cmdTxt, result))
if err != nil {
return err
}
if result == "" {
activateIfLog.WriteString(fmt.Sprintf(" + [Static ARP entry for gateway nexthop not configured]"))
return nil
}
result = strings.TrimSuffix(result,"\n")
default_gw := strings.Fields(result)[2]
arpCmdTxt, arpEntry, err := executil.Execute("nsenter", "-t", "1", "-m", "-u", "-n", "-i", "arp", "-an", default_gw)
activateIfLog.WriteString(fmt.Sprintf(" + [ARPCmd: '%s' -> ARPEntry: '%s']", arpCmdTxt, arpEntry))
if err != nil {
return err
}
if arpEntry == "" {
activateIfLog.WriteString(fmt.Sprintf(" + [Static ARP entry for gateway nexthop not configured]"))
return nil
}
arpEntry = strings.TrimSuffix(arpEntry,"\n")
gw_nexthop_mac := strings.Fields(arpEntry)[3]
activateIfLog.WriteString(fmt.Sprintf(" + [Add static ARP entry '%s' for gateway nexthop]", gw_nexthop_mac))
arpSetCmd, arpSetResult, err := executil.Execute("arp", "-s", gatewayIp, gw_nexthop_mac)
activateIfLog.WriteString(fmt.Sprintf(" + [ARPsetCmd: '%s' -> ARPsetResult: '%s']", arpSetCmd, arpSetResult))
if err != nil {
return err
}

return nil
}); err != nil {
return activateIfLog.String(), err
}

activateIfLog.WriteString(fmt.Sprintf(" + [Disable tso on interface '%s']", ifName))
cmdTxt, result, err := executil.Execute("ip", "netns", "exec", netNSFileName, "ethtool", "-K", ifName, "tso", "off", "gso", "off", "ufo", "off")
activateIfLog.WriteString(fmt.Sprintf(" + [Cmd: '%s -> Result: '%s']", cmdTxt, result))
activateIfLog.WriteString(fmt.Sprintf(" + [Cmd: '%s' -> Result: '%s']", cmdTxt, result))
if err != nil {
return activateIfLog.String(), err
}
Expand Down
6 changes: 3 additions & 3 deletions src/cli/trn_cli_agent.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ int trn_cli_update_agent_ep_subcmd(CLIENT *clnt, int argc, char *argv[])
rpc_trn_endpoint_t ep;
char rpc[] = "update_agent_ep_1";

char veth[20];
char hosted_itf[20];
uint32_t remote_ips[RPC_TRN_MAX_REMOTE_IPS];
char veth[20] = {0};
char hosted_itf[20] = {0};
uint32_t remote_ips[RPC_TRN_MAX_REMOTE_IPS] = {0};
ep.remote_ips.remote_ips_val = remote_ips;
ep.remote_ips.remote_ips_len = 0;
ep.veth = veth;
Expand Down
49 changes: 45 additions & 4 deletions src/cli/trn_cli_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -605,21 +605,62 @@ int trn_cli_parse_tun_intf(const cJSON *jsonobj, rpc_trn_tun_intf_t *itf)
return 0;
}

int trn_cli_parse_cidr(const cJSON *jsonobj, rpc_trn_ip_cidr_t *cidr)
{
cJSON *ip = cJSON_GetObjectItem(jsonobj, "ip");
cJSON *prefix = cJSON_GetObjectItem(jsonobj, "prefix");

if (ip != NULL && cJSON_IsString(ip)) {
struct sockaddr_in sa;
inet_pton(AF_INET, ip->valuestring, &(sa.sin_addr));
cidr->ip = htonl(sa.sin_addr.s_addr);
} else {
print_err("Error: CIDR IP is missing or non-string\n");
return -EINVAL;
}
if (cJSON_IsString(prefix)) {
uint32_t prefixlen = (uint32_t)atoi(prefix->valuestring);
prefixlen = (prefixlen > 32) ? 32 : prefixlen;
cidr->netmask = (0xFFFFFFFF << (32 - prefixlen)) & 0xFFFFFFFF;
} else {
print_err("Error: CIDR prefix length missing or non-string\n");
return -EINVAL;
}

return 0;
}

int trn_cli_parse_cluster_cidr(const cJSON *jsonobj, rpc_trn_cluster_cidr_t *cluster_cidr)
{
cJSON *host_cidr = cJSON_GetObjectItem(jsonobj, "host_cidr");
cJSON *pod_cidr = cJSON_GetObjectItem(jsonobj, "pod_cidr");
cJSON *service_cidr = cJSON_GetObjectItem(jsonobj, "service_cidr");

int err_host_cidr, err_pod_cidr, err_service_cidr;
err_host_cidr = trn_cli_parse_cidr(host_cidr, &cluster_cidr->host_cidr);
err_pod_cidr = trn_cli_parse_cidr(pod_cidr, &cluster_cidr->pod_cidr);
err_service_cidr = trn_cli_parse_cidr(service_cidr, &cluster_cidr->service_cidr);
if (err_host_cidr || err_pod_cidr || err_service_cidr) {
return -EINVAL;
}
return 0;
}

int trn_cli_parse_agent_md(const cJSON *jsonobj,
rpc_trn_agent_metadata_t *agent_md)
{
cJSON *ep = cJSON_GetObjectItem(jsonobj, "ep");
cJSON *net = cJSON_GetObjectItem(jsonobj, "net");
cJSON *eth = cJSON_GetObjectItem(jsonobj, "eth");
int err_ep, err_net, err_eth;
cJSON *cluster_cidr = cJSON_GetObjectItem(jsonobj, "cluster_cidr");
int err_ep, err_net, err_eth, err_cidr;
err_ep = trn_cli_parse_ep(ep, &agent_md->ep);
err_net = trn_cli_parse_net(net, &agent_md->net);
err_eth = trn_cli_parse_tun_intf(eth, &agent_md->eth);

if (err_ep || err_net || err_eth) {
err_cidr = trn_cli_parse_cluster_cidr(cluster_cidr, &agent_md->cluster_cidr);
if (err_ep || err_net || err_eth || err_cidr) {
return -EINVAL;
}

return 0;
}

Expand Down
6 changes: 3 additions & 3 deletions src/cli/trn_cli_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ int trn_cli_update_ep_subcmd(CLIENT *clnt, int argc, char *argv[])
rpc_trn_endpoint_t ep;
char rpc[] = "update_ep_1";

char veth[20];
char hosted_itf[20];
uint32_t remote_ips[RPC_TRN_MAX_REMOTE_IPS];
char veth[20] = {0};
char hosted_itf[20] = {0};
uint32_t remote_ips[RPC_TRN_MAX_REMOTE_IPS] = {0};
ep.remote_ips.remote_ips_val = remote_ips;
ep.veth = veth;
ep.hosted_interface = hosted_itf;
Expand Down
8 changes: 6 additions & 2 deletions src/dmn/trn_agent_xdp_usr.c
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,8 @@ int trn_agent_bpf_maps_init(struct agent_user_metadata_t *md)
md->ing_vsip_supp_map = bpf_map__next(md->ing_vsip_ppo_map, md->obj);
md->ing_vsip_except_map = bpf_map__next(md->ing_vsip_supp_map, md->obj);
md->conn_track_cache = bpf_map__next(md->ing_vsip_except_map, md->obj);
md->ing_pod_label_policy_map = bpf_map__next(md->conn_track_cache, md->obj);
md->masquerade_conn_map = bpf_map__next(md->conn_track_cache, md->obj);
md->ing_pod_label_policy_map = bpf_map__next(md->masquerade_conn_map, md->obj);
md->ing_namespace_label_policy_map = bpf_map__next(md->ing_pod_label_policy_map, md->obj);
md->ing_pod_and_namespace_label_policy_map = bpf_map__next(md->ing_namespace_label_policy_map, md->obj);
md->tx_stats_map = bpf_map__next(md->ing_pod_and_namespace_label_policy_map, md->obj);
Expand All @@ -318,7 +319,7 @@ int trn_agent_bpf_maps_init(struct agent_user_metadata_t *md)
!md->eg_vsip_except_map || !md->ing_vsip_enforce_map ||
!md->ing_vsip_prim_map || !md->ing_vsip_ppo_map ||
!md->ing_vsip_supp_map || !md->ing_vsip_except_map ||
!md->conn_track_cache || !md->packet_metadata_map ||
!md->conn_track_cache || !md->masquerade_conn_map || !md->packet_metadata_map ||
!md->ing_pod_label_policy_map || !md->ing_namespace_label_policy_map ||
!md->ing_pod_and_namespace_label_policy_map || !md->tx_stats_map) {
TRN_LOG_ERROR("Failure finding maps objects.");
Expand All @@ -345,6 +346,7 @@ int trn_agent_bpf_maps_init(struct agent_user_metadata_t *md)
md->ing_vsip_supp_map_fd = bpf_map__fd(md->ing_vsip_supp_map);
md->ing_vsip_except_map_fd = bpf_map__fd(md->ing_vsip_except_map);
md->conn_track_cache_fd = bpf_map__fd(md->conn_track_cache);
md->masquerade_conn_map_fd = bpf_map__fd(md->masquerade_conn_map);
md->ing_pod_label_policy_map_fd = bpf_map__fd(md->ing_pod_label_policy_map);
md->ing_namespace_label_policy_map_fd = bpf_map__fd(md->ing_namespace_label_policy_map);
md->ing_pod_and_namespace_label_policy_map_fd = bpf_map__fd(md->ing_pod_and_namespace_label_policy_map);
Expand Down Expand Up @@ -373,6 +375,7 @@ int trn_agent_bpf_maps_init(struct agent_user_metadata_t *md)
bpf_map__pin(md->ing_vsip_supp_map, ing_vsip_supp_map_path);
bpf_map__pin(md->ing_vsip_except_map, ing_vsip_except_map_path);
bpf_map__pin(md->conn_track_cache, conn_track_cache_path);
bpf_map__pin(md->masquerade_conn_map, masquerade_conn_map_path);
bpf_map__pin(md->ing_pod_label_policy_map, ing_pod_label_policy_map_path);
bpf_map__pin(md->ing_namespace_label_policy_map, ing_namespace_label_policy_map_path);
bpf_map__pin(md->ing_pod_and_namespace_label_policy_map, ing_pod_and_namespace_label_policy_map_path);
Expand Down Expand Up @@ -519,6 +522,7 @@ static int _trn_bpf_agent_prog_load_xattr(struct agent_user_metadata_t *md,
_REUSE_MAP_IF_PINNED(ing_vsip_supp_map);
_REUSE_MAP_IF_PINNED(ing_vsip_except_map);
_REUSE_MAP_IF_PINNED(conn_track_cache);
_REUSE_MAP_IF_PINNED(masquerade_conn_map);
_REUSE_MAP_IF_PINNED(ing_pod_label_policy_map);
_REUSE_MAP_IF_PINNED(ing_namespace_label_policy_map);
_REUSE_MAP_IF_PINNED(ing_pod_and_namespace_label_policy_map);
Expand Down
2 changes: 2 additions & 0 deletions src/dmn/trn_agent_xdp_usr.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ struct agent_user_metadata_t {
int ing_vsip_supp_map_fd;
int ing_vsip_except_map_fd;
int conn_track_cache_fd;
int masquerade_conn_map_fd;
int ing_pod_label_policy_map_fd;
int ing_namespace_label_policy_map_fd;
int ing_pod_and_namespace_label_policy_map_fd;
Expand Down Expand Up @@ -111,6 +112,7 @@ struct agent_user_metadata_t {
struct bpf_map *ing_vsip_supp_map;
struct bpf_map *ing_vsip_except_map;
struct bpf_map *conn_track_cache;
struct bpf_map *masquerade_conn_map;
struct bpf_map *ing_pod_label_policy_map;
struct bpf_map *ing_namespace_label_policy_map;
struct bpf_map *ing_pod_and_namespace_label_policy_map;
Expand Down
21 changes: 18 additions & 3 deletions src/dmn/trn_rpc_protocol_handlers_1.c
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ int *update_ep_1_svc(rpc_trn_endpoint_t *ep, struct svc_req *rqstp)
goto error;
}

memset(&epval, 0, sizeof(epval));
memcpy(epkey.tunip, &ep->tunid, sizeof(ep->tunid));
epkey.tunip[2] = ep->ip;
epval.eptype = ep->eptype;
Expand Down Expand Up @@ -594,7 +595,9 @@ rpc_trn_endpoint_t *get_ep_1_svc(rpc_trn_endpoint_key_t *argp,
result.eptype = epval.eptype;
memcpy(result.mac, epval.mac, sizeof(epval.mac));
result.remote_ips.remote_ips_len = epval.nremote_ips;
result.remote_ips.remote_ips_val = epval.remote_ips;
for (unsigned int i = 0; i < epval.nremote_ips; i++) {
result.remote_ips.remote_ips_val[i] = epval.remote_ips[i];
}
result.veth = ""; // field to be removed
return &result;

Expand Down Expand Up @@ -832,6 +835,7 @@ int *update_agent_ep_1_svc(rpc_trn_endpoint_t *ep, struct svc_req *rqstp)
goto error;
}

memset(&epval, 0, sizeof(epval));
memcpy(epkey.tunip, &ep->tunid, sizeof(ep->tunid));
epkey.tunip[2] = ep->ip;
epval.eptype = ep->eptype;
Expand Down Expand Up @@ -980,7 +984,9 @@ rpc_trn_endpoint_t *get_agent_ep_1_svc(rpc_trn_endpoint_key_t *argp,
result.eptype = epval.eptype;
memcpy(result.mac, epval.mac, sizeof(epval.mac));
result.remote_ips.remote_ips_len = epval.nremote_ips;
result.remote_ips.remote_ips_val = epval.remote_ips;
for (unsigned int i = 0; i < epval.nremote_ips; i++) {
result.remote_ips.remote_ips_val[i] = epval.remote_ips[i];
}
result.veth = ""; //field to be removed
return &result;

Expand Down Expand Up @@ -1159,6 +1165,13 @@ int *update_agent_md_1_svc(rpc_trn_agent_metadata_t *agent_md,
amd.ep.hosted_iface = amd.eth.iface_index;
memcpy(amd.ep.mac, agent_md->ep.mac, 6 * sizeof(amd.ep.mac[0]));

amd.cluster_cidr.host_cidr.ip = agent_md->cluster_cidr.host_cidr.ip;
amd.cluster_cidr.host_cidr.netmask = agent_md->cluster_cidr.host_cidr.netmask;
amd.cluster_cidr.pod_cidr.ip = agent_md->cluster_cidr.pod_cidr.ip;
amd.cluster_cidr.pod_cidr.netmask = agent_md->cluster_cidr.pod_cidr.netmask;
amd.cluster_cidr.service_cidr.ip = agent_md->cluster_cidr.service_cidr.ip;
amd.cluster_cidr.service_cidr.netmask = agent_md->cluster_cidr.service_cidr.netmask;

rc = trn_agent_update_agent_metadata(md, &amd, eth_md);

if (rc != 0) {
Expand Down Expand Up @@ -1278,7 +1291,9 @@ rpc_trn_agent_metadata_t *get_agent_md_1_svc(rpc_intf_t *argp,
memcpy(result.ep.mac, amd.ep.mac, sizeof(amd.ep.mac));
result.ep.veth = "";
result.ep.remote_ips.remote_ips_len = amd.ep.nremote_ips;
result.ep.remote_ips.remote_ips_val = amd.ep.remote_ips;
for (unsigned int i = 0; i < amd.ep.nremote_ips; i++) {
result.ep.remote_ips.remote_ips_val[i] = amd.ep.remote_ips[i];
}

result.ep.hosted_interface = if_indextoname(amd.ep.hosted_iface, buf);
if (result.ep.hosted_interface == NULL) {
Expand Down
Loading

0 comments on commit 2b8c69b

Please sign in to comment.