2023年7月15日发(作者:)
UDP隧道
隧道创建对于隧道L2TP、FOU/GUE、GENEVE和VXLAN,隧道创建时,都需要在内核中新建⼀个UDP套接⼝,框架中的函数udp_sock_create4提供此功能。不仅是套接⼝的创建,还有本机接⼝的绑定bind,以及如果特定隧道提供了对端地址信息,进⾏连接connect。int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp){
err = sock_create_kern(net, AF_INET, SOCK_DGRAM, 0, &sock);udp__family = AF_INET;udp__addr = cfg->local_ip;udp__port = cfg->local_udp_port;
err = kernel_bind(sock, (struct sockaddr *)&udp_addr, sizeof(udp_addr));if (cfg->peer_udp_port) {
udp__family = AF_INET;udp__addr = cfg->peer_ip;udp__port = cfg->peer_udp_port;err = kernel_connect(sock, (struct sockaddr *)&udp_addr, sizeof(udp_addr), 0);}sock->sk->sk_no_check_tx = !cfg->use_udp_checksums;}
函数setup_udp_tunnel_sock建⽴套接⼝的隧道绑定。此函数将第⼆个参数socket套接⼝关联tunnel隧道属性,内核由此套接⼝接收到的数据包交由配置的encap_rcv回调函数处理(cfg->encap_rcv)。⽬前基于UDP的隧道协议主要有L2TP、VxLAN和GENEVE,分别注册了接收处理函数l2tp_udp_encap_recv、vxlan_rcv和geneve_udp_encap_recv。通⽤的UDP隧道协议FOU和GUE,处理函数分别为fou_udp_recv和gue_udp_recv。void setup_udp_tunnel_sock(struct net *net, struct socket *sock, struct udp_tunnel_sock_cfg *cfg){struct sock *sk = sock->sk;udp_sk(sk)->encap_type = cfg->encap_type;udp_sk(sk)->encap_rcv = cfg->encap_rcv;udp_sk(sk)->encap_destroy = cfg->encap_destroy;udp_sk(sk)->gro_receive = cfg->gro_receive;udp_sk(sk)->gro_complete = cfg->gro_complete;udp_tunnel_encap_enable(sock);}
UDP隧道接收在UDP数据包处理路径中,函数udp_queue_rcv_skb判断当前套接⼝的udp_encap_needed是否使能,并且encap_type不为0。随即调⽤绑定在此套接⼝上的封装数据包回调处理函数encap_rcv进⾏处理。static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb){ struct udp_sock *up = udp_sk(sk);
if (static_key_false(&udp_encap_needed) && up->encap_type) { encap_rcv = READ_ONCE(up->encap_rcv); if (encap_rcv) { ret = encap_rcv(sk, skb); }
void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, ...){ uh = udp_hdr(skb);
uh->dest = dst_port; uh->source = src_port;
uh->len = htons(skb->len); iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet);}
UDP隧道发送当数据包到达UDP隧道设备的发送函数(ndo_start_xmit)时,例如GENEVE隧道发送函数(geneve_xmit),进⾏特定隧道相关处理,之后由通⽤UDP隧道发送函数udp_tunnel_xmit_skb进⾏发送。
const struct net_device_ops mlx5e_netdev_ops = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, .ndo_start_xmit = mlx5e_xmit, .ndo_setup_tc = mlx5e_setup_tc, .ndo_select_queue = mlx5e_select_queue, .ndo_get_stats64 = mlx5e_get_stats, .ndo_set_rx_mode = mlx5e_set_rx_mode, .ndo_set_mac_address = mlx5e_set_mac, .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, .ndo_set_features = mlx5e_set_features, .ndo_fix_features = mlx5e_fix_features, .ndo_change_mtu = mlx5e_change_nic_mtu, .ndo_do_ioctl = mlx5e_ioctl, .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, .ndo_udp_tunnel_add = mlx5e_add_vxlan_port, .ndo_udp_tunnel_del = mlx5e_del_vxlan_port, .ndo_features_check = mlx5e_features_check, .ndo_tx_timeout = mlx5e_tx_timeout, .ndo_bpf = mlx5e_xdp, .ndo_xdp_xmit = mlx5e_xdp_xmit, .ndo_xsk_wakeup = mlx5e_xsk_wakeup,#ifdef CONFIG_MLX5_EN_ARFS .ndo_rx_flow_steer = mlx5e_rx_flow_steer,#endif#ifdef CONFIG_MLX5_ESWITCH .ndo_bridge_setlink = mlx5e_bridge_setlink, .ndo_bridge_getlink = mlx5e_bridge_getlink, /* SRIOV E-Switch NDOs */ .ndo_set_vf_mac = mlx5e_set_vf_mac, .ndo_set_vf_vlan = mlx5e_set_vf_vlan, .ndo_set_vf_spoofchk = mlx5e_set_vf_spoofchk, .ndo_set_vf_trust = mlx5e_set_vf_trust, .ndo_set_vf_rate = mlx5e_set_vf_rate, .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats,#endif};
UDP隧道Offload对于⽀持UDP隧道(VXLAN/GENEVE)Offloading功能的物理⽹卡,其通过标志位NETDEV_UDP_TUNNEL_PUSH_INFO/NETDEV_UDP_TUNNEL_DROP_INFO进⾏表⽰。函数udp_tunnel_push_rx_port与udp_tunnel_drop_rx_port⽤于设置和取消⽹卡的Offloading功能。
void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, unsigned short type){ struct sock *sk = sock->sk; struct udp_tunnel_info ti; if (!dev->netdev_ops->ndo_udp_tunnel_add || !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) return; = type; _family = sk->sk_family; = inet_sk(sk)->inet_sport; dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti);}EXPORT_SYMBOL_GPL(udp_tunnel_push_rx_port);void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock, unsigned short type){ struct sock *sk = sock->sk; struct udp_tunnel_info ti; if (!dev->netdev_ops->ndo_udp_tunnel_del || !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) return; = type; _family = sk->sk_family; = inet_sk(sk)->inet_sport; dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);}
static int vxlan_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr){ struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); if (event == NETDEV_UNREGISTER) { vxlan_offload_rx_ports(dev, false); vxlan_handle_lowerdev_unregister(vn, dev); } else if (event == NETDEV_REGISTER) { vxlan_offload_rx_ports(dev, true); } else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO || event == NETDEV_UDP_TUNNEL_DROP_INFO) { vxlan_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO); } return NOTIFY_DONE;}static void vxlan_offload_rx_ports(struct net_device *dev, bool push){ struct vxlan_sock *vs; struct net *net = dev_net(dev); struct vxlan_net *vn = net_generic(net, vxlan_net_id); unsigned int i; spin_lock(&vn->sock_lock); for (i = 0; i < PORT_HASH_SIZE; ++i) { hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) { unsigned short type; if (vs->flags & VXLAN_F_GPE) type = UDP_TUNNEL_TYPE_VXLAN_GPE; else type = UDP_TUNNEL_TYPE_VXLAN; if (push) udp_tunnel_push_rx_port(dev, vs->sock, type); else udp_tunnel_drop_rx_port(dev, vs->sock, type); } } spin_unlock(&vn->sock_lock);}
static void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, u16 port, int add){ struct mlx5e_vxlan_work *vxlan_work; vxlan_work = kmalloc(sizeof(*vxlan_work), GFP_ATOMIC); if (!vxlan_work) return; if (add) INIT_WORK(&vxlan_work->work, mlx5e_vxlan_add_work); else INIT_WORK(&vxlan_work->work, mlx5e_vxlan_del_work); vxlan_work->priv = priv; vxlan_work->port = port; queue_work(priv->wq, &vxlan_work->work);}void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti){ struct mlx5e_priv *priv = netdev_priv(netdev); if (ti->type != UDP_TUNNEL_TYPE_VXLAN) return; if (!mlx5_vxlan_allowed(priv->mdev->vxlan)) return; mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 1);}static void mlx5e_vxlan_add_work(struct work_struct *work){ struct mlx5e_vxlan_work *vxlan_work = container_of(work, struct mlx5e_vxlan_work, work); struct mlx5e_priv *priv = vxlan_work->priv; u16 port = vxlan_work->port; mutex_lock(&priv->state_lock); mlx5_vxlan_add_port(priv->mdev->vxlan, port); mutex_unlock(&priv->state_lock); kfree(vxlan_work);}int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port){ ret = mlx5_vxlan_core_add_port_cmd(vxlan->mdev, port);
}static int mlx5_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port){ u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)] = {0}; u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)] = {0}; MLX5_SET(add_vxlan_udp_dport_in, in, opcode, MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT); MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port); return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));}
外部UDP隧道对于⾮使⽤ip link系统⽣成的UDP隧道,即控制通道在外部系统的隧道,如路由系统,其通过ip route encap指定隧道参数,就需要将这些有路由相关的隧道信息保存在路由缓存中。参见UDP框架函数udp_tun_rx_dst,使⽤metadata_dst结构体保存通⽤路由信息和隧道信息。
struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, __be16 flags, __be64 tunnel_id, int md_size){ struct metadata_dst *tun_dst; struct ip_tunnel_info *info;
tun_dst = ip_tun_rx_dst(skb, flags, tunnel_id, md_size);
info = &tun_dst->_info; info->_src = udp_hdr(skb)->source; info->_dst = udp_hdr(skb)->dest; if (udp_hdr(skb)->check)
info->_flags |= TUNNEL_CSUM;
}
发布者:admin,转转请注明出处:http://www.yc00.com/news/1689410631a243594.html
评论列表(0条)