Skip to content

Latest commit

 

History

History
398 lines (308 loc) · 12.3 KB

2016-11-25-two-way-to-set-vf-mac.md

File metadata and controls

398 lines (308 loc) · 12.3 KB
layout title date categories tags excerpt
post
Two aways to set mac address of SR-IOV VF
2016-11-25 09:00:30 -0800
Linux
network sriov
Two aways to set mac address of SR-IOV VF

1 问题

# ls /sys/class/net/eth1/device/virtfn2/net/
dev8

# ip link show eth1                         
2: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP qlen 1000
    link/ether 8c:dc:d4:b1:60:c0 brd ff:ff:ff:ff:ff:ff
    vf 0 MAC 14:05:0a:f5:ac:36, vlan 3
    vf 1 MAC 14:05:0a:f5:ac:3a, vlan 3
    vf 2 MAC 14:05:0a:f5:ac:3e, vlan 3
    vf 3 MAC 14:05:0a:f5:ac:42, vlan 3
    vf 4 MAC 14:05:0a:f5:ac:46, vlan 3
    vf 5 MAC 00:00:00:00:00:00
    vf 6 MAC 00:00:00:00:00:00

# ip link show dev8
8: dev8: <BROADCAST,MULTICAST> mtu 1500 qdisc pfifo_fast state DOWN qlen 1000
    link/ether 14:05:0a:f5:ac:3e brd ff:ff:ff:ff:ff:ff

直接设置VF设备dev8的MAC返回错误:

# ip link set dev8 address 14:05:00:f5:ac:3e
RTNETLINK answers: Cannot assign requested address

# dmesg
[682286.034307] igb 0000:03:00.0: VF 2 attempted to override administratively set MAC address
[682286.034307] Reload the VF driver to resume operations

通过PF设置VF的MAC没有返回错误:

# ip link set eth1 vf 2 mac 14:05:00:f5:ac:3e
# dmesg
[682350.583348] igb 0000:03:00.0: setting MAC 14:05:00:f5:ac:3e on VF 2
[682350.583351] igb 0000:03:00.0: Reload the VF driver to make this change effective.

# ip link show eth1
2: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP qlen 1000
    link/ether 8c:dc:d4:b1:60:c0 brd ff:ff:ff:ff:ff:ff
    vf 0 MAC 14:05:0a:f5:ac:36, vlan 3
    vf 1 MAC 14:05:0a:f5:ac:3a, vlan 3
    vf 2 MAC 14:05:00:f5:ac:3e, vlan 3
...

# ip link show dev8
8: dev8: <BROADCAST,MULTICAST> mtu 1500 qdisc pfifo_fast state DOWN qlen 1000
    link/ether 14:05:0a:f5:ac:3e brd ff:ff:ff:ff:ff:ff

但是,新的MAC地址的确写到了PF的配置,但没有写到VF网络设备。

这里有2个问题:

(1)为什么不能通过第一种方式直接设置VF网络设备的MAC地址?

(2)通过第二种方式设置VF的MAC地址后,为什么不能反映到VF网络设备?

2 原因

先看看两者的区别与实现:

2.1 ip link set dev $VFDEV address $MAC

  • VF端

最终会到VF的驱动igb/igbvf/netdev.c

/**
 * igbvf_set_mac - Change the Ethernet Address of the NIC
 * @netdev: network interface device structure
 * @p: pointer to an address structure
 *
 * Returns 0 on success, negative on failure
 **/
static int igbvf_set_mac(struct net_device *netdev, void *p)
{
	struct igbvf_adapter *adapter = netdev_priv(netdev);
	struct e1000_hw *hw = &adapter->hw;
	struct sockaddr *addr = p;

	if (!is_valid_ether_addr(addr->sa_data))
		return -EADDRNOTAVAIL;

	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);

	hw->mac.ops.rar_set(hw, hw->mac.addr, 0); ///e1000_rar_set_vf

	if (memcmp(addr->sa_data, hw->mac.addr, 6))
		return -EADDRNOTAVAIL;

	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); 

	return 0;
}

在到MAC地址拷贝到net_device->dev_addr之前,会调用e1000_rar_set_vf,向PF发送E1000_VF_SET_MAC_ADDR消息

/**
 *  e1000_rar_set_vf - set device MAC address
 *  @hw: pointer to the HW structure
 *  @addr: pointer to the receive address
 *  @index: receive address array register
 **/
static void e1000_rar_set_vf(struct e1000_hw *hw, u8 * addr, u32 index)
{
	struct e1000_mbx_info *mbx = &hw->mbx;
	u32 msgbuf[3];
	u8 *msg_addr = (u8 *)(&msgbuf[1]);
	s32 ret_val;

	memset(msgbuf, 0, 12);
	msgbuf[0] = E1000_VF_SET_MAC_ADDR;
	memcpy(msg_addr, addr, 6);
	ret_val = mbx->ops.write_posted(hw, msgbuf, 3);

	if (!ret_val)
		ret_val = mbx->ops.read_posted(hw, msgbuf, 3); ///e1000_read_posted_mbx

	msgbuf[0] &= ~E1000_VT_MSGTYPE_CTS;

	/* if nacked the address was rejected, use "perm_addr" */
	if (!ret_val &&
	    (msgbuf[0] == (E1000_VF_SET_MAC_ADDR | E1000_VT_MSGTYPE_NACK)))
		e1000_read_mac_addr_vf(hw);
}

如果PF返回NACK(E1000_VF_SET_MAC_ADDR | E1000_VT_MSGTYPE_NACK),则使用perm_addr:

/**
 *  e1000_read_mac_addr_vf - Read device MAC address
 *  @hw: pointer to the HW structure
 **/
static s32 e1000_read_mac_addr_vf(struct e1000_hw *hw)
{
	memcpy(hw->mac.addr, hw->mac.perm_addr, ETH_ALEN);

	return E1000_SUCCESS;
}
  • PF端 当PF收到VF的E1000_VF_SET_MAC_ADDR消息时,如果没有设置过IGB_VF_FLAG_PF_SET_MAC标志,则更新PF驱动保存的有关VF的MAC信息;
static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
{
///...
	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);

	switch ((msgbuf[0] & 0xFFFF)) {
	case E1000_VF_SET_MAC_ADDR:
		retval = -EINVAL;
		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
		else
			dev_warn(&pdev->dev,
				 "VF %d attempted to override administratively set MAC address\nReload the VF driver to resume operations\n",
				 vf);
		break;

	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
out:
	/* notify the VF of the results of what it sent us */
	if (retval)
		msgbuf[0] |= E1000_VT_MSGTYPE_NACK; ///PF更新MAC失败
	else
		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;

	igb_write_mbx(hw, msgbuf, 1, vf);
}

当PF更新MAC失败或者标志位IGB_VF_FLAG_PF_SET_MAC设置时,会给VF返回E1000_VT_MSGTYPE_NACK消息。

igb_set_vf_mac_addr直接调用igb_set_vf_mac:

static int igb_set_vf_mac(struct igb_adapter *adapter,
			  int vf, unsigned char *mac_addr)
{
	struct e1000_hw *hw = &adapter->hw;
	/* VF MAC addresses start at end of receive addresses and moves
	 * towards the first, as a result a collision should not be possible
	 */
	int rar_entry = hw->mac.rar_entry_count - (vf + 1);

	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);

	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);

	return 0;
}

2.2 ip link set dev eth1 vf 2 mac $MAC

当通过PF去设置VF的MAC地址时,内核会通过PF的驱动函数igb_ndo_set_vf_mac更新PF驱动中文保存的VF的MAC信息igb_adapter->vf_data[vf],并设置IGB_VF_FLAG_PF_SET_MAC标志,然后直接调用igb_set_vf_mac

static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
{
	struct igb_adapter *adapter = netdev_priv(netdev);
	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
		return -EINVAL;
	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
	dev_info(&adapter->pdev->dev,
		 "Reload the VF driver to make this change effective.");
	if (test_bit(__IGB_DOWN, &adapter->state)) {
		dev_warn(&adapter->pdev->dev,
			 "The VF MAC address has been set, but the PF device is not up.\n");
		dev_warn(&adapter->pdev->dev,
			 "Bring the PF device up before attempting to use the VF device.\n");
	}
	return igb_set_vf_mac(adapter, vf, mac);
}

到这里基本上明白了第一方式设置mac地址失败的原因了,因为一旦通过第二种方式设置了VF的MAC地址,就会设置 IGB_VF_FLAG_PF_SET_MAC标示位,就能再使用第一种方式了。

下面继续讨论第二个问题。从igb_ndo_set_vf_mac的提示可以看到,当我们通过PF去设置VF的MAC的时候,需要Reload the VF driver to make this change effective.

难道要得重新加载VF驱动,如果是这样的话,会对所有的VF都有影响。实际上,VF驱动在加载的时候,的确会从PF的配置读取VF的MAC信息,然后设置VF:

static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
///...
	/*reset the controller to put the device in a known good state */
	err = hw->mac.ops.reset_hw(hw);
	if (err) {
		dev_info(&pdev->dev,
			 "PF still in reset state. Is the PF interface up?\n");
	} else {
		err = hw->mac.ops.read_mac_addr(hw); ///read MAC from PF
		if (err)
			dev_info(&pdev->dev, "Error reading MAC address.\n");
		else if (is_zero_ether_addr(adapter->hw.mac.addr))
			dev_info(&pdev->dev, "MAC address not assigned by administrator.\n");
		memcpy(netdev->dev_addr, adapter->hw.mac.addr, ///set MAC address
		       netdev->addr_len);
	}

///...
}

此外,VF驱动函数igbvf_reset也会设置网络设备地址net_device->dev_addr:

static void igbvf_reset(struct igbvf_adapter *adapter)
{
	struct e1000_mac_info *mac = &adapter->hw.mac;
	struct net_device *netdev = adapter->netdev;
	struct e1000_hw *hw = &adapter->hw;

	/* Allow time for pending master requests to run */
	if (mac->ops.reset_hw(hw)) ///e1000_reset_hw_vf
		dev_err(&adapter->pdev->dev, "PF still resetting\n");

	mac->ops.init_hw(hw);///e1000_init_hw_vf

	if (is_valid_ether_addr(adapter->hw.mac.addr)) {
		memcpy(netdev->dev_addr, adapter->hw.mac.addr, ///set net_device MAC
		       netdev->addr_len);
		memcpy(netdev->perm_addr, adapter->hw.mac.addr,
		       netdev->addr_len);
	}

	adapter->last_reset = jiffies;
}

可以,VF驱动会用adapter->hw.mac.addr的值,该值从哪里获取?

实际上reset_hw,即e1000_reset_hw_vf会向PF发送E1000_VF_RESET消息,PF会返回MAC信息,VF读取然后保存在hw->mac.perm_addr

static s32 e1000_reset_hw_vf(struct e1000_hw *hw)
{
	if (timeout) {
		/* mailbox timeout can now become active */
		mbx->timeout = E1000_VF_MBX_INIT_TIMEOUT;

		/* notify pf of vf reset completion */
		msgbuf[0] = E1000_VF_RESET;
		mbx->ops.write_posted(hw, msgbuf, 1);

		msleep(10);

		/* set our "perm_addr" based on info provided by PF */
		ret_val = mbx->ops.read_posted(hw, msgbuf, 3);
		if (!ret_val) {
			if (msgbuf[0] == (E1000_VF_RESET | E1000_VT_MSGTYPE_ACK))
				memcpy(hw->mac.perm_addr, addr, 6); ///保存MAC
			else
				ret_val = -E1000_ERR_MAC_INIT;
		}
	}

init_hw,即e1000_init_hw_vf会尝试直接使用发送E1000_VF_SET_MAC_ADDR,PF当然返回E1000_VT_MSGTYPE_NACK

static s32 e1000_init_hw_vf(struct e1000_hw *hw)
{
	/* attempt to set and restore our mac address */
	e1000_rar_set_vf(hw, hw->mac.addr, 0); ///上面已经分析

	return E1000_SUCCESS;
}

此时,VF就会使用前面的hw->mac.perm_addr覆盖hw->mac.addr,到这里,hw->mac.addr就保存从PF获取的VF的MAC信息。

最后,最重要的一点,igbvf_reset什么时候会被调用?

实际上上,igbvf_down会调用igbvf_reset:

void igbvf_down(struct igbvf_adapter *adapter)
{

///...
	igbvf_reset(adapter);
	igbvf_clean_tx_ring(adapter->tx_ring);
	igbvf_clean_rx_ring(adapter->rx_ring);
}

这意味着,我们只需要将VF shutdown,我们通过PF给VF设置的MAC信息就会反映到VF网络设备:

# ip link set dev8 up  ##由于VF处于down状态,需要先将其UP
# ip link show dev8    
8: dev8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
    link/ether 14:05:0a:f5:ac:3e brd ff:ff:ff:ff:ff:ff

# ip link show eth1    
2: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP qlen 1000
    link/ether 8c:dc:d4:b1:60:c0 brd ff:ff:ff:ff:ff:ff
    vf 0 MAC 14:05:0a:f5:ac:36, vlan 3
    vf 1 MAC 14:05:0a:f5:ac:3a, vlan 3
    vf 2 MAC 14:05:00:f5:ac:3e, vlan 3
...
# ip link set dev8 down
# ip link show dev8    
8: dev8: <BROADCAST,MULTICAST> mtu 1500 qdisc pfifo_fast state DOWN qlen 1000
    link/ether 14:05:00:f5:ac:3e brd ff:ff:ff:ff:ff:ff

可以看到dev8的地址从14:05:0a:f5:ac:3e变成了14:05:00:f5:ac:3e

down对应的dmesg信息:

[699929.948823] igb 0000:03:00.0: VF 2 attempted to override administratively set MAC address
[699929.948823] Reload the VF driver to resume operations
[699929.950056] igb 0000:03:00.0: VF 2 attempted to override administratively set VLAN tag
[699929.950056] Reload the VF driver to resume operations
[699929.950539] igbvf 0000:03:11.0: Failed to remove vlan id 0
[699929.950543] failed to kill vid 0081/0 for device dev8

update at 2019-03-26

实际上第二种方式设置MAC是更安全的方式:

User can use IPROUTE2 utility to assign a unique MAC address to a VF from within the host Operating System. Once the new MAC address is assigned, VM that has this particular VF assigned to will not be able to alter its MAC address. This is called "administratively assigned MAC" and is a security feature.

参考这里.

3 总结

通过PF设置VF的MAC后,需要重启VF网络设备,VF才能同步到PF的MAC信息。