mirror of
https://github.com/Telecominfraproject/wlan-ap.git
synced 2025-10-29 09:32:34 +00:00
ipq807x: backport vxlan path mtu changes
Fixes: WIFI-7571 Signed-off-by: Felix Fietkau <nbd@nbd.name>
This commit is contained in:
committed by
John Crispin
parent
cf657dbd94
commit
7bab50dd3c
@@ -0,0 +1,72 @@
|
||||
From: Stefano Brivio <sbrivio@redhat.com>
|
||||
Date: Tue, 4 Aug 2020 07:53:42 +0200
|
||||
Subject: [PATCH] ipv4: route: Ignore output interface in FIB lookup for PMTU
|
||||
route
|
||||
|
||||
Currently, processes sending traffic to a local bridge with an
|
||||
encapsulation device as a port don't get ICMP errors if they exceed
|
||||
the PMTU of the encapsulated link.
|
||||
|
||||
David Ahern suggested this as a hack, but it actually looks like
|
||||
the correct solution: when we update the PMTU for a given destination
|
||||
by means of updating or creating a route exception, the encapsulation
|
||||
might trigger this because of PMTU discovery happening either on the
|
||||
encapsulation device itself, or its lower layer. This happens on
|
||||
bridged encapsulations only.
|
||||
|
||||
The output interface shouldn't matter, because we already have a
|
||||
valid destination. Drop the output interface restriction from the
|
||||
associated route lookup.
|
||||
|
||||
For UDP tunnels, we will now have a route exception created for the
|
||||
encapsulation itself, with a MTU value reflecting its headroom, which
|
||||
allows a bridge forwarding IP packets originated locally to deliver
|
||||
errors back to the sending socket.
|
||||
|
||||
The behaviour is now consistent with IPv6 and verified with selftests
|
||||
pmtu_ipv{4,6}_br_{geneve,vxlan}{4,6}_exception introduced later in
|
||||
this series.
|
||||
|
||||
v2:
|
||||
- reset output interface only for bridge ports (David Ahern)
|
||||
- add and use netif_is_any_bridge_port() helper (David Ahern)
|
||||
|
||||
Suggested-by: David Ahern <dsahern@gmail.com>
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
Reviewed-by: David Ahern <dsahern@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -4015,6 +4015,16 @@ static inline bool netif_is_ovs_master(c
|
||||
return dev->priv_flags & IFF_OPENVSWITCH;
|
||||
}
|
||||
|
||||
+static inline bool netif_is_ovs_port(const struct net_device *dev)
|
||||
+{
|
||||
+ return dev->priv_flags & IFF_OVS_DATAPATH;
|
||||
+}
|
||||
+
|
||||
+static inline bool netif_is_any_bridge_port(const struct net_device *dev)
|
||||
+{
|
||||
+ return netif_is_bridge_port(dev) || netif_is_ovs_port(dev);
|
||||
+}
|
||||
+
|
||||
static inline bool netif_is_ifb_dev(const struct net_device *dev)
|
||||
{
|
||||
return dev->priv_flags_ext & IFF_EXT_IFB;
|
||||
--- a/net/ipv4/route.c
|
||||
+++ b/net/ipv4/route.c
|
||||
@@ -1004,6 +1004,11 @@ static void ip_rt_update_pmtu(struct dst
|
||||
struct flowi4 fl4;
|
||||
|
||||
ip_rt_build_flow_key(&fl4, sk, skb);
|
||||
+
|
||||
+ /* Don't make lookup fail for bridged encapsulations */
|
||||
+ if (skb && netif_is_any_bridge_port(skb->dev))
|
||||
+ fl4.flowi4_oif = 0;
|
||||
+
|
||||
__ip_rt_update_pmtu(rt, &fl4, mtu);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
From: Matteo Croce <mcroce@redhat.com>
|
||||
Date: Sat, 2 Nov 2019 01:12:03 +0100
|
||||
Subject: [PATCH] icmp: add helpers to recognize ICMP error packets
|
||||
|
||||
Add two helper functions, one for IPv4 and one for IPv6, to recognize
|
||||
the ICMP packets which are error responses.
|
||||
This packets are special because they have as payload the original
|
||||
header of the packet which generated it (RFC 792 says at least 8 bytes,
|
||||
but Linux actually includes much more than that).
|
||||
|
||||
Signed-off-by: Matteo Croce <mcroce@redhat.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
|
||||
--- a/include/linux/icmp.h
|
||||
+++ b/include/linux/icmp.h
|
||||
@@ -24,4 +24,19 @@ static inline struct icmphdr *icmp_hdr(c
|
||||
{
|
||||
return (struct icmphdr *)skb_transport_header(skb);
|
||||
}
|
||||
+
|
||||
+static inline bool icmp_is_err(int type)
|
||||
+{
|
||||
+ switch (type) {
|
||||
+ case ICMP_DEST_UNREACH:
|
||||
+ case ICMP_SOURCE_QUENCH:
|
||||
+ case ICMP_REDIRECT:
|
||||
+ case ICMP_TIME_EXCEEDED:
|
||||
+ case ICMP_PARAMETERPROB:
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
#endif /* _LINUX_ICMP_H */
|
||||
--- a/include/linux/icmpv6.h
|
||||
+++ b/include/linux/icmpv6.h
|
||||
@@ -42,4 +42,18 @@ extern void icmpv6_flow_init(struct s
|
||||
const struct in6_addr *saddr,
|
||||
const struct in6_addr *daddr,
|
||||
int oif);
|
||||
+
|
||||
+static inline bool icmpv6_is_err(int type)
|
||||
+{
|
||||
+ switch (type) {
|
||||
+ case ICMPV6_DEST_UNREACH:
|
||||
+ case ICMPV6_PKT_TOOBIG:
|
||||
+ case ICMPV6_TIME_EXCEED:
|
||||
+ case ICMPV6_PARAMPROB:
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
#endif
|
||||
@@ -0,0 +1,360 @@
|
||||
From: Stefano Brivio <sbrivio@redhat.com>
|
||||
Date: Tue, 4 Aug 2020 07:53:43 +0200
|
||||
Subject: [PATCH] tunnels: PMTU discovery support for directly bridged IP
|
||||
packets
|
||||
|
||||
It's currently possible to bridge Ethernet tunnels carrying IP
|
||||
packets directly to external interfaces without assigning them
|
||||
addresses and routes on the bridged network itself: this is the case
|
||||
for UDP tunnels bridged with a standard bridge or by Open vSwitch.
|
||||
|
||||
PMTU discovery is currently broken with those configurations, because
|
||||
the encapsulation effectively decreases the MTU of the link, and
|
||||
while we are able to account for this using PMTU discovery on the
|
||||
lower layer, we don't have a way to relay ICMP or ICMPv6 messages
|
||||
needed by the sender, because we don't have valid routes to it.
|
||||
|
||||
On the other hand, as a tunnel endpoint, we can't fragment packets
|
||||
as a general approach: this is for instance clearly forbidden for
|
||||
VXLAN by RFC 7348, section 4.3:
|
||||
|
||||
VTEPs MUST NOT fragment VXLAN packets. Intermediate routers may
|
||||
fragment encapsulated VXLAN packets due to the larger frame size.
|
||||
The destination VTEP MAY silently discard such VXLAN fragments.
|
||||
|
||||
The same paragraph recommends that the MTU over the physical network
|
||||
accomodates for encapsulations, but this isn't a practical option for
|
||||
complex topologies, especially for typical Open vSwitch use cases.
|
||||
|
||||
Further, it states that:
|
||||
|
||||
Other techniques like Path MTU discovery (see [RFC1191] and
|
||||
[RFC1981]) MAY be used to address this requirement as well.
|
||||
|
||||
Now, PMTU discovery already works for routed interfaces, we get
|
||||
route exceptions created by the encapsulation device as they receive
|
||||
ICMP Fragmentation Needed and ICMPv6 Packet Too Big messages, and
|
||||
we already rebuild those messages with the appropriate MTU and route
|
||||
them back to the sender.
|
||||
|
||||
Add the missing bits for bridged cases:
|
||||
|
||||
- checks in skb_tunnel_check_pmtu() to understand if it's appropriate
|
||||
to trigger a reply according to RFC 1122 section 3.2.2 for ICMP and
|
||||
RFC 4443 section 2.4 for ICMPv6. This function is already called by
|
||||
UDP tunnels
|
||||
|
||||
- a new function generating those ICMP or ICMPv6 replies. We can't
|
||||
reuse icmp_send() and icmp6_send() as we don't see the sender as a
|
||||
valid destination. This doesn't need to be generic, as we don't
|
||||
cover any other type of ICMP errors given that we only provide an
|
||||
encapsulation function to the sender
|
||||
|
||||
While at it, make the MTU check in skb_tunnel_check_pmtu() accurate:
|
||||
we might receive GSO buffers here, and the passed headroom already
|
||||
includes the inner MAC length, so we don't have to account for it
|
||||
a second time (that would imply three MAC headers on the wire, but
|
||||
there are just two).
|
||||
|
||||
This issue became visible while bridging IPv6 packets with 4500 bytes
|
||||
of payload over GENEVE using IPv4 with a PMTU of 4000. Given the 50
|
||||
bytes of encapsulation headroom, we would advertise MTU as 3950, and
|
||||
we would reject fragmented IPv6 datagrams of 3958 bytes size on the
|
||||
wire. We're exclusively dealing with network MTU here, though, so we
|
||||
could get Ethernet frames up to 3964 octets in that case.
|
||||
|
||||
v2:
|
||||
- moved skb_tunnel_check_pmtu() to ip_tunnel_core.c (David Ahern)
|
||||
- split IPv4/IPv6 functions (David Ahern)
|
||||
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
Reviewed-by: David Ahern <dsahern@gmail.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
|
||||
--- a/include/net/ip_tunnels.h
|
||||
+++ b/include/net/ip_tunnels.h
|
||||
@@ -279,6 +279,8 @@ int iptunnel_xmit(struct sock *sk, struc
|
||||
u8 tos, u8 ttl, __be16 df, bool xnet);
|
||||
struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
|
||||
gfp_t flags);
|
||||
+int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst,
|
||||
+ int headroom, bool reply);
|
||||
|
||||
struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum,
|
||||
int gso_type_mask);
|
||||
--- a/net/ipv4/ip_tunnel_core.c
|
||||
+++ b/net/ipv4/ip_tunnel_core.c
|
||||
@@ -204,6 +204,252 @@ error:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iptunnel_handle_offloads);
|
||||
|
||||
+/**
|
||||
+ * iptunnel_pmtud_build_icmp() - Build ICMP error message for PMTUD
|
||||
+ * @skb: Original packet with L2 header
|
||||
+ * @mtu: MTU value for ICMP error
|
||||
+ *
|
||||
+ * Return: length on success, negative error code if message couldn't be built.
|
||||
+ */
|
||||
+static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu)
|
||||
+{
|
||||
+ const struct iphdr *iph = ip_hdr(skb);
|
||||
+ struct icmphdr *icmph;
|
||||
+ struct iphdr *niph;
|
||||
+ struct ethhdr eh;
|
||||
+ int len, err;
|
||||
+
|
||||
+ if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct iphdr)))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ skb_copy_bits(skb, skb_mac_header(skb) - skb->data, &eh, ETH_HLEN);
|
||||
+ pskb_pull(skb, ETH_HLEN);
|
||||
+ skb_reset_network_header(skb);
|
||||
+
|
||||
+ err = pskb_trim(skb, 576 - sizeof(*niph) - sizeof(*icmph));
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ len = skb->len + sizeof(*icmph);
|
||||
+ err = skb_cow(skb, sizeof(*niph) + sizeof(*icmph) + ETH_HLEN);
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ icmph = (struct icmphdr *)skb_push(skb, sizeof(*icmph));
|
||||
+ *icmph = (struct icmphdr) {
|
||||
+ .type = ICMP_DEST_UNREACH,
|
||||
+ .code = ICMP_FRAG_NEEDED,
|
||||
+ .checksum = 0,
|
||||
+ .un.frag.__unused = 0,
|
||||
+ .un.frag.mtu = ntohs(mtu),
|
||||
+ };
|
||||
+ icmph->checksum = ip_compute_csum(icmph, len);
|
||||
+ skb_reset_transport_header(skb);
|
||||
+
|
||||
+ niph = (struct iphdr *)skb_push(skb, sizeof(*niph));
|
||||
+ *niph = (struct iphdr) {
|
||||
+ .ihl = sizeof(*niph) / 4u,
|
||||
+ .version = 4,
|
||||
+ .tos = 0,
|
||||
+ .tot_len = htons(len + sizeof(*niph)),
|
||||
+ .id = 0,
|
||||
+ .frag_off = htons(IP_DF),
|
||||
+ .ttl = iph->ttl,
|
||||
+ .protocol = IPPROTO_ICMP,
|
||||
+ .saddr = iph->daddr,
|
||||
+ .daddr = iph->saddr,
|
||||
+ };
|
||||
+ ip_send_check(niph);
|
||||
+ skb_reset_network_header(skb);
|
||||
+
|
||||
+ skb->ip_summed = CHECKSUM_NONE;
|
||||
+
|
||||
+ eth_header(skb, skb->dev, htons(eh.h_proto), eh.h_source, eh.h_dest, 0);
|
||||
+ skb_reset_mac_header(skb);
|
||||
+
|
||||
+ return skb->len;
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * iptunnel_pmtud_check_icmp() - Trigger ICMP reply if needed and allowed
|
||||
+ * @skb: Buffer being sent by encapsulation, L2 headers expected
|
||||
+ * @mtu: Network MTU for path
|
||||
+ *
|
||||
+ * Return: 0 for no ICMP reply, length if built, negative value on error.
|
||||
+ */
|
||||
+static int iptunnel_pmtud_check_icmp(struct sk_buff *skb, int mtu)
|
||||
+{
|
||||
+ const struct icmphdr *icmph = icmp_hdr(skb);
|
||||
+ const struct iphdr *iph = ip_hdr(skb);
|
||||
+
|
||||
+ if (mtu <= 576 || iph->frag_off != htons(IP_DF))
|
||||
+ return 0;
|
||||
+
|
||||
+ if (ipv4_is_lbcast(iph->daddr) || ipv4_is_multicast(iph->daddr) ||
|
||||
+ ipv4_is_zeronet(iph->saddr) || ipv4_is_loopback(iph->saddr) ||
|
||||
+ ipv4_is_lbcast(iph->saddr) || ipv4_is_multicast(iph->saddr))
|
||||
+ return 0;
|
||||
+
|
||||
+ if (iph->protocol == IPPROTO_ICMP && icmp_is_err(icmph->type))
|
||||
+ return 0;
|
||||
+
|
||||
+ return iptunnel_pmtud_build_icmp(skb, mtu);
|
||||
+}
|
||||
+
|
||||
+#if IS_ENABLED(CONFIG_IPV6)
|
||||
+/**
|
||||
+ * iptunnel_pmtud_build_icmpv6() - Build ICMPv6 error message for PMTUD
|
||||
+ * @skb: Original packet with L2 header
|
||||
+ * @mtu: MTU value for ICMPv6 error
|
||||
+ *
|
||||
+ * Return: length on success, negative error code if message couldn't be built.
|
||||
+ */
|
||||
+static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu)
|
||||
+{
|
||||
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
|
||||
+ struct icmp6hdr *icmp6h;
|
||||
+ struct ipv6hdr *nip6h;
|
||||
+ struct ethhdr eh;
|
||||
+ int len, err;
|
||||
+ __wsum csum;
|
||||
+
|
||||
+ if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct ipv6hdr)))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ skb_copy_bits(skb, skb_mac_header(skb) - skb->data, &eh, ETH_HLEN);
|
||||
+ pskb_pull(skb, ETH_HLEN);
|
||||
+ skb_reset_network_header(skb);
|
||||
+
|
||||
+ err = pskb_trim(skb, IPV6_MIN_MTU - sizeof(*nip6h) - sizeof(*icmp6h));
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ len = skb->len + sizeof(*icmp6h);
|
||||
+ err = skb_cow(skb, sizeof(*nip6h) + sizeof(*icmp6h) + ETH_HLEN);
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ icmp6h = (struct icmp6hdr *)skb_push(skb, sizeof(*icmp6h));
|
||||
+ *icmp6h = (struct icmp6hdr) {
|
||||
+ .icmp6_type = ICMPV6_PKT_TOOBIG,
|
||||
+ .icmp6_code = 0,
|
||||
+ .icmp6_cksum = 0,
|
||||
+ .icmp6_mtu = htonl(mtu),
|
||||
+ };
|
||||
+ skb_reset_transport_header(skb);
|
||||
+
|
||||
+ nip6h = (struct ipv6hdr *)skb_push(skb, sizeof(*nip6h));
|
||||
+ *nip6h = (struct ipv6hdr) {
|
||||
+ .priority = 0,
|
||||
+ .version = 6,
|
||||
+ .flow_lbl = { 0 },
|
||||
+ .payload_len = htons(len),
|
||||
+ .nexthdr = IPPROTO_ICMPV6,
|
||||
+ .hop_limit = ip6h->hop_limit,
|
||||
+ .saddr = ip6h->daddr,
|
||||
+ .daddr = ip6h->saddr,
|
||||
+ };
|
||||
+ skb_reset_network_header(skb);
|
||||
+
|
||||
+ csum = csum_partial(icmp6h, len, 0);
|
||||
+ icmp6h->icmp6_cksum = csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, len,
|
||||
+ IPPROTO_ICMPV6, csum);
|
||||
+
|
||||
+ skb->ip_summed = CHECKSUM_NONE;
|
||||
+
|
||||
+ eth_header(skb, skb->dev, htons(eh.h_proto), eh.h_source, eh.h_dest, 0);
|
||||
+ skb_reset_mac_header(skb);
|
||||
+
|
||||
+ return skb->len;
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * iptunnel_pmtud_check_icmpv6() - Trigger ICMPv6 reply if needed and allowed
|
||||
+ * @skb: Buffer being sent by encapsulation, L2 headers expected
|
||||
+ * @mtu: Network MTU for path
|
||||
+ *
|
||||
+ * Return: 0 for no ICMPv6 reply, length if built, negative value on error.
|
||||
+ */
|
||||
+static int iptunnel_pmtud_check_icmpv6(struct sk_buff *skb, int mtu)
|
||||
+{
|
||||
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
|
||||
+ int stype = ipv6_addr_type(&ip6h->saddr);
|
||||
+ u8 proto = ip6h->nexthdr;
|
||||
+ __be16 frag_off;
|
||||
+ int offset;
|
||||
+
|
||||
+ if (mtu <= IPV6_MIN_MTU)
|
||||
+ return 0;
|
||||
+
|
||||
+ if (stype == IPV6_ADDR_ANY || stype == IPV6_ADDR_MULTICAST ||
|
||||
+ stype == IPV6_ADDR_LOOPBACK)
|
||||
+ return 0;
|
||||
+
|
||||
+ offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &proto,
|
||||
+ &frag_off);
|
||||
+ if (offset < 0 || (frag_off & htons(~0x7)))
|
||||
+ return 0;
|
||||
+
|
||||
+ if (proto == IPPROTO_ICMPV6) {
|
||||
+ struct icmp6hdr *icmp6h;
|
||||
+
|
||||
+ if (!pskb_may_pull(skb, skb_network_header(skb) +
|
||||
+ offset + 1 - skb->data))
|
||||
+ return 0;
|
||||
+
|
||||
+ icmp6h = (struct icmp6hdr *)(skb_network_header(skb) + offset);
|
||||
+ if (icmpv6_is_err(icmp6h->icmp6_type) ||
|
||||
+ icmp6h->icmp6_type == NDISC_REDIRECT)
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ return iptunnel_pmtud_build_icmpv6(skb, mtu);
|
||||
+}
|
||||
+#endif /* IS_ENABLED(CONFIG_IPV6) */
|
||||
+
|
||||
+/**
|
||||
+ * skb_tunnel_check_pmtu() - Check, update PMTU and trigger ICMP reply as needed
|
||||
+ * @skb: Buffer being sent by encapsulation, L2 headers expected
|
||||
+ * @encap_dst: Destination for tunnel encapsulation (outer IP)
|
||||
+ * @headroom: Encapsulation header size, bytes
|
||||
+ * @reply: Build matching ICMP or ICMPv6 message as a result
|
||||
+ *
|
||||
+ * L2 tunnel implementations that can carry IP and can be directly bridged
|
||||
+ * (currently UDP tunnels) can't always rely on IP forwarding paths to handle
|
||||
+ * PMTU discovery. In the bridged case, ICMP or ICMPv6 messages need to be built
|
||||
+ * based on payload and sent back by the encapsulation itself.
|
||||
+ *
|
||||
+ * For routable interfaces, we just need to update the PMTU for the destination.
|
||||
+ *
|
||||
+ * Return: 0 if ICMP error not needed, length if built, negative value on error
|
||||
+ */
|
||||
+int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst,
|
||||
+ int headroom, bool reply)
|
||||
+{
|
||||
+ struct dst_entry *dst = skb_dst(skb);
|
||||
+ u32 mtu = dst_mtu(encap_dst) - headroom;
|
||||
+
|
||||
+ if ((skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) ||
|
||||
+ (!skb_is_gso(skb) && (skb->len - skb_mac_header_len(skb)) <= mtu))
|
||||
+ return 0;
|
||||
+
|
||||
+ if (dst && dst->ops->update_pmtu)
|
||||
+ dst->ops->update_pmtu(dst, NULL, skb, mtu);
|
||||
+
|
||||
+ if (!reply || skb->pkt_type == PACKET_HOST)
|
||||
+ return 0;
|
||||
+
|
||||
+ if (skb->protocol == htons(ETH_P_IP))
|
||||
+ return iptunnel_pmtud_check_icmp(skb, mtu);
|
||||
+
|
||||
+#if IS_ENABLED(CONFIG_IPV6)
|
||||
+ if (skb->protocol == htons(ETH_P_IPV6))
|
||||
+ return iptunnel_pmtud_check_icmpv6(skb, mtu);
|
||||
+#endif
|
||||
+ return 0;
|
||||
+}
|
||||
+EXPORT_SYMBOL(skb_tunnel_check_pmtu);
|
||||
+
|
||||
/* Often modified stats are per cpu, other are shared (netdev->stats) */
|
||||
struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
|
||||
struct rtnl_link_stats64 *tot)
|
||||
--- a/drivers/net/vxlan.c
|
||||
+++ b/drivers/net/vxlan.c
|
||||
@@ -2076,6 +2076,8 @@ static void vxlan_xmit_one(struct sk_buf
|
||||
/* Reset the skb_iif to Tunnels interface index */
|
||||
skb->skb_iif = dev->ifindex;
|
||||
|
||||
+ skb_tunnel_check_pmtu(skb, &rt->dst, VXLAN_HEADROOM, false);
|
||||
+
|
||||
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
|
||||
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
|
||||
err = vxlan_xmit_skb(rt, sk, skb, fl4.saddr,
|
||||
@@ -2141,6 +2143,8 @@ static void vxlan_xmit_one(struct sk_buf
|
||||
flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
|
||||
}
|
||||
|
||||
+ skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM, false);
|
||||
+
|
||||
ttl = ttl ? : ip6_dst_hoplimit(ndst);
|
||||
err = vxlan6_xmit_skb(ndst, sk, skb, dev, &saddr, &dst->sin6.sin6_addr,
|
||||
0, ttl, src_port, dst_port, htonl(vni << 8), md,
|
||||
@@ -0,0 +1,114 @@
|
||||
From: pravin shelar <pshelar@ovn.org>
|
||||
Date: Sun, 13 Nov 2016 20:43:56 -0800
|
||||
Subject: [PATCH] vxlan: simplify RTF_LOCAL handling.
|
||||
|
||||
Avoid code duplicate code for handling RTF_LOCAL routes.
|
||||
|
||||
Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
|
||||
Acked-by: Jiri Benc <jbenc@redhat.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
|
||||
--- a/drivers/net/vxlan.c
|
||||
+++ b/drivers/net/vxlan.c
|
||||
@@ -1946,6 +1946,40 @@ static void vxlan_encap_bypass(struct sk
|
||||
}
|
||||
}
|
||||
|
||||
+static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
|
||||
+ struct vxlan_dev *vxlan, union vxlan_addr *daddr,
|
||||
+ __be32 dst_port, __be32 vni, struct dst_entry *dst,
|
||||
+ u32 rt_flags)
|
||||
+{
|
||||
+#if IS_ENABLED(CONFIG_IPV6)
|
||||
+ /* IPv6 rt-flags are checked against RTF_LOCAL, but the value of
|
||||
+ * RTF_LOCAL is equal to RTCF_LOCAL. So to keep code simple
|
||||
+ * we can use RTCF_LOCAL which works for ipv4 and ipv6 route entry.
|
||||
+ */
|
||||
+ BUILD_BUG_ON(RTCF_LOCAL != RTF_LOCAL);
|
||||
+#endif
|
||||
+ /* Bypass encapsulation if the destination is local */
|
||||
+ if (rt_flags & RTCF_LOCAL &&
|
||||
+ !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
|
||||
+ struct vxlan_dev *dst_vxlan;
|
||||
+
|
||||
+ dst_release(dst);
|
||||
+ dst_vxlan = vxlan_find_vni(vxlan->net, vni,
|
||||
+ daddr->sa.sa_family, dst_port,
|
||||
+ vxlan->flags);
|
||||
+ if (!dst_vxlan) {
|
||||
+ dev->stats.tx_errors++;
|
||||
+ kfree_skb(skb);
|
||||
+
|
||||
+ return -ENOENT;
|
||||
+ }
|
||||
+ vxlan_encap_bypass(skb, vxlan, dst_vxlan);
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
|
||||
struct vxlan_rdst *rdst, bool did_rsc)
|
||||
{
|
||||
@@ -2059,18 +2093,12 @@ static void vxlan_xmit_one(struct sk_buf
|
||||
}
|
||||
|
||||
/* Bypass encapsulation if the destination is local */
|
||||
- if (rt->rt_flags & RTCF_LOCAL &&
|
||||
- !(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
|
||||
- struct vxlan_dev *dst_vxlan;
|
||||
-
|
||||
- ip_rt_put(rt);
|
||||
- dst_vxlan = vxlan_find_vni(vxlan->net, vni,
|
||||
- dst->sa.sa_family, dst_port,
|
||||
- vxlan->flags);
|
||||
- if (!dst_vxlan)
|
||||
- goto tx_error;
|
||||
- vxlan_encap_bypass(skb, vxlan, dst_vxlan);
|
||||
- return;
|
||||
+ if (!info) {
|
||||
+ err = encap_bypass_if_local(skb, dev, vxlan, dst,
|
||||
+ dst_port, vni, &rt->dst,
|
||||
+ rt->rt_flags);
|
||||
+ if (err)
|
||||
+ return;
|
||||
}
|
||||
|
||||
/* Reset the skb_iif to Tunnels interface index */
|
||||
@@ -2096,7 +2124,6 @@ static void vxlan_xmit_one(struct sk_buf
|
||||
} else {
|
||||
struct dst_entry *ndst;
|
||||
struct in6_addr saddr;
|
||||
- u32 rt6i_flags;
|
||||
|
||||
if (!vxlan->vn6_sock)
|
||||
goto drop;
|
||||
@@ -2121,19 +2148,14 @@ static void vxlan_xmit_one(struct sk_buf
|
||||
}
|
||||
|
||||
/* Bypass encapsulation if the destination is local */
|
||||
- rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
|
||||
- if (rt6i_flags & RTF_LOCAL &&
|
||||
- !(rt6i_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
|
||||
- struct vxlan_dev *dst_vxlan;
|
||||
+ if (!info) {
|
||||
+ u32 rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
|
||||
|
||||
- dst_release(ndst);
|
||||
- dst_vxlan = vxlan_find_vni(vxlan->net, vni,
|
||||
- dst->sa.sa_family, dst_port,
|
||||
- vxlan->flags);
|
||||
- if (!dst_vxlan)
|
||||
- goto tx_error;
|
||||
- vxlan_encap_bypass(skb, vxlan, dst_vxlan);
|
||||
- return;
|
||||
+ err = encap_bypass_if_local(skb, dev, vxlan, dst,
|
||||
+ dst_port, vni, ndst,
|
||||
+ rt6i_flags);
|
||||
+ if (err)
|
||||
+ return;
|
||||
}
|
||||
|
||||
if (info) {
|
||||
@@ -0,0 +1,140 @@
|
||||
From: Stefano Brivio <sbrivio@redhat.com>
|
||||
Date: Tue, 4 Aug 2020 07:53:44 +0200
|
||||
Subject: [PATCH] vxlan: Support for PMTU discovery on directly bridged links
|
||||
|
||||
If the interface is a bridge or Open vSwitch port, and we can't
|
||||
forward a packet because it exceeds the local PMTU estimate,
|
||||
trigger an ICMP or ICMPv6 reply to the sender, using the same
|
||||
interface to forward it back.
|
||||
|
||||
If metadata collection is enabled, reverse destination and source
|
||||
addresses, so that Open vSwitch is able to match this packet against
|
||||
the existing, reverse flow.
|
||||
|
||||
v2: Use netif_is_any_bridge_port() (David Ahern)
|
||||
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
|
||||
--- a/drivers/net/vxlan.c
|
||||
+++ b/drivers/net/vxlan.c
|
||||
@@ -1903,7 +1903,7 @@ static struct dst_entry *vxlan6_get_rout
|
||||
|
||||
/* Bypass encapsulation if the destination is local */
|
||||
static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
|
||||
- struct vxlan_dev *dst_vxlan)
|
||||
+ struct vxlan_dev *dst_vxlan, bool snoop)
|
||||
{
|
||||
struct pcpu_sw_netstats *tx_stats, *rx_stats;
|
||||
union vxlan_addr loopback;
|
||||
@@ -1928,7 +1928,7 @@ static void vxlan_encap_bypass(struct sk
|
||||
#endif
|
||||
}
|
||||
|
||||
- if (dst_vxlan->flags & VXLAN_F_LEARN)
|
||||
+ if ((dst_vxlan->flags & VXLAN_F_LEARN) && snoop)
|
||||
vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source);
|
||||
|
||||
u64_stats_update_begin(&tx_stats->syncp);
|
||||
@@ -1973,7 +1973,7 @@ static int encap_bypass_if_local(struct
|
||||
|
||||
return -ENOENT;
|
||||
}
|
||||
- vxlan_encap_bypass(skb, vxlan, dst_vxlan);
|
||||
+ vxlan_encap_bypass(skb, vxlan, dst_vxlan, true);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -1990,7 +1990,7 @@ static void vxlan_xmit_one(struct sk_buf
|
||||
const struct iphdr *old_iph;
|
||||
struct flowi4 fl4;
|
||||
union vxlan_addr *dst;
|
||||
- union vxlan_addr remote_ip;
|
||||
+ union vxlan_addr remote_ip, local_ip;
|
||||
struct vxlan_metadata _md;
|
||||
struct vxlan_metadata *md = &_md;
|
||||
__be16 src_port = 0, dst_port;
|
||||
@@ -2006,6 +2006,7 @@ static void vxlan_xmit_one(struct sk_buf
|
||||
dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
|
||||
vni = rdst->remote_vni;
|
||||
dst = &rdst->remote_ip;
|
||||
+ local_ip = vxlan->cfg.saddr;
|
||||
} else {
|
||||
if (!info) {
|
||||
WARN_ONCE(1, "%s: Missing encapsulation instructions\n",
|
||||
@@ -2015,17 +2016,20 @@ static void vxlan_xmit_one(struct sk_buf
|
||||
dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
|
||||
vni = be64_to_cpu(info->key.tun_id);
|
||||
remote_ip.sa.sa_family = ip_tunnel_info_af(info);
|
||||
- if (remote_ip.sa.sa_family == AF_INET)
|
||||
+ if (remote_ip.sa.sa_family == AF_INET) {
|
||||
remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
|
||||
- else
|
||||
+ local_ip.sin.sin_addr.s_addr = info->key.u.ipv4.src;
|
||||
+ } else {
|
||||
remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
|
||||
+ local_ip.sin6.sin6_addr = info->key.u.ipv6.src;
|
||||
+ }
|
||||
dst = &remote_ip;
|
||||
}
|
||||
|
||||
if (vxlan_addr_any(dst)) {
|
||||
if (did_rsc) {
|
||||
/* short-circuited back to local bridge */
|
||||
- vxlan_encap_bypass(skb, vxlan, vxlan);
|
||||
+ vxlan_encap_bypass(skb, vxlan, vxlan, true);
|
||||
return;
|
||||
}
|
||||
goto drop;
|
||||
@@ -2104,7 +2108,23 @@ static void vxlan_xmit_one(struct sk_buf
|
||||
/* Reset the skb_iif to Tunnels interface index */
|
||||
skb->skb_iif = dev->ifindex;
|
||||
|
||||
- skb_tunnel_check_pmtu(skb, &rt->dst, VXLAN_HEADROOM, false);
|
||||
+ err = skb_tunnel_check_pmtu(skb, &rt->dst, VXLAN_HEADROOM,
|
||||
+ netif_is_any_bridge_port(dev));
|
||||
+ if (err < 0) {
|
||||
+ goto tx_error;
|
||||
+ } else if (err) {
|
||||
+ if (info) {
|
||||
+ struct in_addr src, dst;
|
||||
+
|
||||
+ src = remote_ip.sin.sin_addr;
|
||||
+ dst = local_ip.sin.sin_addr;
|
||||
+ info->key.u.ipv4.src = src.s_addr;
|
||||
+ info->key.u.ipv4.dst = dst.s_addr;
|
||||
+ }
|
||||
+ vxlan_encap_bypass(skb, vxlan, vxlan, false);
|
||||
+ ip_rt_put(rt);
|
||||
+ return;
|
||||
+ }
|
||||
|
||||
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
|
||||
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
|
||||
@@ -2165,7 +2185,24 @@ static void vxlan_xmit_one(struct sk_buf
|
||||
flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
|
||||
}
|
||||
|
||||
- skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM, false);
|
||||
+ err = skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM,
|
||||
+ netif_is_any_bridge_port(dev));
|
||||
+ if (err < 0) {
|
||||
+ goto tx_error;
|
||||
+ } else if (err) {
|
||||
+ if (info) {
|
||||
+ struct in6_addr src, dst;
|
||||
+
|
||||
+ src = remote_ip.sin6.sin6_addr;
|
||||
+ dst = local_ip.sin6.sin6_addr;
|
||||
+ info->key.u.ipv6.src = src;
|
||||
+ info->key.u.ipv6.dst = dst;
|
||||
+ }
|
||||
+
|
||||
+ vxlan_encap_bypass(skb, vxlan, vxlan, false);
|
||||
+ ip_rt_put(rt);
|
||||
+ return;
|
||||
+ }
|
||||
|
||||
ttl = ttl ? : ip6_dst_hoplimit(ndst);
|
||||
err = vxlan6_xmit_skb(ndst, sk, skb, dev, &saddr, &dst->sin6.sin6_addr,
|
||||
Reference in New Issue
Block a user