ipq95xx: make the target work with the full stack

Signed-off-by: John Crispin <john@phrozen.org>
This commit is contained in:
John Crispin
2023-10-02 11:05:39 +02:00
parent bd67a6646a
commit 0080a5fb15
30 changed files with 1335 additions and 979 deletions

View File

@@ -0,0 +1,88 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
#
# Marek Lindner, Simon Wunderlich
#
# B.A.T.M.A.N meshing protocol
#
config BATMAN_ADV_BATMAN_V
bool "B.A.T.M.A.N. V protocol"
depends on PACKAGE_kmod-batman-adv
default y
help
This option enables the B.A.T.M.A.N. V protocol, the successor
of the currently used B.A.T.M.A.N. IV protocol. The main
changes include splitting of the OGM protocol into a neighbor
discovery protocol (Echo Location Protocol, ELP) and a new OGM
Protocol OGMv2 for flooding protocol information through the
network, as well as a throughput based metric.
B.A.T.M.A.N. V is currently considered experimental and not
compatible to B.A.T.M.A.N. IV networks.
config BATMAN_ADV_BLA
bool "Bridge Loop Avoidance"
depends on PACKAGE_kmod-batman-adv
select PACKAGE_kmod-lib-crc16
default y
help
This option enables BLA (Bridge Loop Avoidance), a mechanism
to avoid Ethernet frames looping when mesh nodes are connected
to both the same LAN and the same mesh. If you will never use
more than one mesh node in the same LAN, you can safely remove
this feature and save some space.
config BATMAN_ADV_DAT
bool "Distributed ARP Table"
depends on PACKAGE_kmod-batman-adv
default y
help
This option enables DAT (Distributed ARP Table), a DHT based
mechanism that increases ARP reliability on sparse wireless
mesh networks. If you think that your network does not need
this option you can safely remove it and save some space.
config BATMAN_ADV_NC
bool "Network Coding"
depends on PACKAGE_kmod-batman-adv
help
This option enables network coding, a mechanism that aims to
increase the overall network throughput by fusing multiple
packets in one transmission.
Note that interfaces controlled by batman-adv must be manually
configured to have promiscuous mode enabled in order to make
network coding work.
If you think that your network does not need this feature you
can safely disable it and save some space.
config BATMAN_ADV_MCAST
bool "Multicast optimisation"
depends on PACKAGE_kmod-batman-adv
default y
help
This option enables the multicast optimisation which aims to
reduce the air overhead while improving the reliability of
multicast messages.
config BATMAN_ADV_DEBUG
bool "B.A.T.M.A.N. debugging"
depends on PACKAGE_kmod-batman-adv
help
This is an option for use by developers; most people should
say N here. This enables compilation of support for
outputting debugging information to the debugfs log or tracing
buffer. The output is controlled via the batadv netdev specific
log_level setting.
config BATMAN_ADV_TRACING
bool "B.A.T.M.A.N. tracing support"
depends on PACKAGE_kmod-batman-adv
select KERNEL_FTRACE
select KERNEL_ENABLE_DEFAULT_TRACERS
help
This is an option for use by developers; most people should
say N here. Select this option to gather traces like the debug
messages using the generic tracing infrastructure of the kernel.
BATMAN_ADV_DEBUG must also be selected to get trace events for
batadv_dbg.

View File

@@ -0,0 +1,101 @@
# SPDX-License-Identifier: GPL-2.0-only
include $(TOPDIR)/rules.mk
PKG_NAME:=batman-adv
PKG_VERSION:=2022.0
PKG_RELEASE:=$(AUTORELEASE)
PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.gz
PKG_SOURCE_URL:=https://downloads.open-mesh.org/batman/releases/batman-adv-$(PKG_VERSION)
PKG_HASH:=49338705bc207709ac84d766688e702571009c827c0a320788ea51fb887714aa
PKG_EXTMOD_SUBDIRS:=net/batman-adv
PKG_MAINTAINER:=Simon Wunderlich <sw@simonwunderlich.de>
PKG_LICENSE:=GPL-2.0-only MIT
PKG_LICENSE_FILES:=LICENSES/preferred/GPL-2.0 LICENSES/preferred/MIT
PKG_BUILD_PARALLEL:=1
STAMP_CONFIGURED_DEPENDS := $(STAGING_DIR)/usr/include/mac80211-backport/backport/autoconf.h
PKG_CONFIG_DEPENDS += \
CONFIG_BATMAN_ADV_BATMAN_V \
CONFIG_BATMAN_ADV_BLA \
CONFIG_BATMAN_ADV_DAT \
CONFIG_BATMAN_ADV_NC \
CONFIG_BATMAN_ADV_MCAST \
CONFIG_BATMAN_ADV_DEBUG \
CONFIG_BATMAN_ADV_TRACING
include $(INCLUDE_DIR)/kernel.mk
include $(INCLUDE_DIR)/package.mk
define KernelPackage/batman-adv
SUBMENU:=Network Support
TITLE:=B.A.T.M.A.N. Adv
URL:=https://www.open-mesh.org/
DEPENDS:=+BATMAN_ADV_BLA:kmod-lib-crc16 +kmod-lib-crc32c +kmod-cfg80211 +batctl
FILES:=$(PKG_BUILD_DIR)/net/batman-adv/batman-adv.$(LINUX_KMOD_SUFFIX)
AUTOLOAD:=$(call AutoProbe,batman-adv)
endef
define KernelPackage/batman-adv/description
B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
a routing protocol for multi-hop ad-hoc mesh networks. The
networks may be wired or wireless. See
https://www.open-mesh.org/ for more information and user space
tools. This package builds version $(PKG_VERSION) of the kernel
module.
endef
define KernelPackage/batman-adv/config
source "$(SOURCE)/Config.in"
endef
define Package/kmod-batman-adv/conffiles
/etc/config/batman-adv
endef
PKG_EXTRA_KCONFIG:= \
CONFIG_BATMAN_ADV=m \
CONFIG_BATMAN_ADV_DEBUG=$(if $(CONFIG_BATMAN_ADV_DEBUG),y,n) \
CONFIG_BATMAN_ADV_BLA=$(if $(CONFIG_BATMAN_ADV_BLA),y,n) \
CONFIG_BATMAN_ADV_DAT=$(if $(CONFIG_BATMAN_ADV_DAT),y,n) \
CONFIG_BATMAN_ADV_MCAST=$(if $(CONFIG_BATMAN_ADV_MCAST),y,n) \
CONFIG_BATMAN_ADV_NC=$(if $(CONFIG_BATMAN_ADV_NC),y,n) \
CONFIG_BATMAN_ADV_BATMAN_V=$(if $(CONFIG_BATMAN_ADV_BATMAN_V),y,n) \
CONFIG_BATMAN_ADV_TRACING=$(if $(CONFIG_BATMAN_ADV_TRACING),y,n) \
PKG_EXTRA_CFLAGS:= \
$(patsubst CONFIG_%, -DCONFIG_%=1, $(patsubst %=m,%,$(filter %=m,$(PKG_EXTRA_KCONFIG)))) \
$(patsubst CONFIG_%, -DCONFIG_%=1, $(patsubst %=y,%,$(filter %=y,$(PKG_EXTRA_KCONFIG)))) \
NOSTDINC_FLAGS = \
$(KERNEL_NOSTDINC_FLAGS) \
-I$(PKG_BUILD_DIR)/net/batman-adv \
-I$(STAGING_DIR)/usr/include/mac80211-backport \
-I$(STAGING_DIR)/usr/include/mac80211-backport/uapi \
-I$(STAGING_DIR)/usr/include/mac80211 \
-I$(STAGING_DIR)/usr/include/mac80211/uapi \
-I$(PKG_BUILD_DIR)/include/ \
-include backport/autoconf.h \
-include backport/backport.h \
-include $(PKG_BUILD_DIR)/compat-hacks.h \
-DBATADV_SOURCE_VERSION=\\\"$(PKG_VERSION)-openwrt-$(PKG_RELEASE)\\\"
define Build/Compile
$(MAKE) $(PKG_JOBS) -C "$(LINUX_DIR)" \
$(KERNEL_MAKE_FLAGS) \
M="$(PKG_BUILD_DIR)/net/batman-adv" \
$(PKG_EXTRA_KCONFIG) \
EXTRA_CFLAGS="$(PKG_EXTRA_CFLAGS)" \
NOSTDINC_FLAGS="$(NOSTDINC_FLAGS)" \
modules
endef
define KernelPackage/batman-adv/install
$(CP) ./files/. $(1)/
endef
$(eval $(call KernelPackage,batman-adv))

View File

@@ -0,0 +1,97 @@
#!/bin/sh
# This UCI-Defaults script will split the batadv proto network interfaces
# in batadv_hardif and batadv proto. The configuration options from
# /etc/config/batman-adv will be moved to the latter.
. /lib/functions.sh
proto_batadv_to_batadv_hardif() {
local section="$1"
local proto
local mesh
local routing_algo
config_get proto "${section}" proto
config_get mesh "${section}" mesh
config_get routing_algo "${section}" routing_algo
if [ -z "$mesh" -o "${proto}" != "batadv" ]; then
continue
fi
uci set network."${section}".proto="batadv_hardif"
uci rename network."${section}".mesh="master"
uci delete network."${section}".routing_algo
# create new section or adjust existing one
uci set network."${mesh}"=interface
uci set network."${mesh}".proto=batadv
[ -n "${routing_algo}" ] && uci set network."${mesh}".routing_algo="${routing_algo}"
}
mv_batadv_config_section() {
local section="$1"
local aggregated_ogms
local ap_isolation
local bonding
local bridge_loop_avoidance
local distributed_arp_table
local fragmentation
local gw_bandwidth
local gw_mode
local gw_sel_class
local hop_penalty
local isolation_mark
local log_level
local multicast_mode
local network_coding
local orig_interval
config_get aggregated_ogms "${section}" aggregated_ogms
config_get ap_isolation "${section}" ap_isolation
config_get bonding "${section}" bonding
config_get bridge_loop_avoidance "${section}" bridge_loop_avoidance
config_get distributed_arp_table "${section}" distributed_arp_table
config_get fragmentation "${section}" fragmentation
config_get gw_bandwidth "${section}" gw_bandwidth
config_get gw_mode "${section}" gw_mode
config_get gw_sel_class "${section}" gw_sel_class
config_get hop_penalty "${section}" hop_penalty
config_get isolation_mark "${section}" isolation_mark
config_get log_level "${section}" log_level
config_get multicast_mode "${section}" multicast_mode
config_get network_coding "${section}" network_coding
config_get orig_interval "${section}" orig_interval
# update section in case it exists
[ -n "${aggregated_ogms}" ] && uci set network."${section}".aggregated_ogms="${aggregated_ogms}"
[ -n "${ap_isolation}" ] && uci set network."${section}".ap_isolation="${ap_isolation}"
[ -n "${bonding}" ] && uci set network."${section}".bonding="${bonding}"
[ -n "${bridge_loop_avoidance}" ] && uci set network."${section}".bridge_loop_avoidance="${bridge_loop_avoidance}"
[ -n "${distributed_arp_table}" ] && uci set network."${section}".distributed_arp_table="${distributed_arp_table}"
[ -n "${fragmentation}" ] && uci set network."${section}".fragmentation="${fragmentation}"
[ -n "${gw_bandwidth}" ] && uci set network."${section}".gw_bandwidth="${gw_bandwidth}"
[ -n "${gw_mode}" ] && uci set network."${section}".gw_mode="${gw_mode}"
[ -n "${gw_sel_class}" ] && uci set network."${section}".gw_sel_class="${gw_sel_class}"
[ -n "${hop_penalty}" ] && uci set network."${section}".hop_penalty="${hop_penalty}"
[ -n "${isolation_mark}" ] && uci set network."${section}".isolation_mark="${isolation_mark}"
[ -n "${log_level}" ] && uci set network."${section}".log_level="${log_level}"
[ -n "${multicast_mode}" ] && uci set network."${section}".multicast_mode="${multicast_mode}"
[ -n "${network_coding}" ] && uci set network."${section}".network_coding="${network_coding}"
[ -n "${orig_interval}" ] && uci set network."${section}".orig_interval="${orig_interval}"
}
if [ -f /etc/config/batman-adv ]; then
config_load network
config_foreach proto_batadv_to_batadv_hardif 'interface'
uci commit network
config_load batman-adv
config_foreach mv_batadv_config_section 'mesh'
uci commit network
rm -f /etc/config/batman-adv
fi
exit 0

View File

@@ -0,0 +1,123 @@
#!/bin/sh
[ -n "$INCLUDE_ONLY" ] || {
. /lib/functions.sh
. ../netifd-proto.sh
init_proto "$@"
}
proto_batadv_init_config() {
no_device=1
available=1
proto_config_add_boolean 'aggregated_ogms:bool'
proto_config_add_boolean 'ap_isolation:bool'
proto_config_add_boolean 'bonding:bool'
proto_config_add_boolean 'bridge_loop_avoidance:bool'
proto_config_add_boolean 'distributed_arp_table:bool'
proto_config_add_boolean 'fragmentation:bool'
proto_config_add_string 'gw_bandwidth'
proto_config_add_string 'gw_mode'
proto_config_add_int 'gw_sel_class'
proto_config_add_int 'hop_penalty'
proto_config_add_string 'isolation_mark'
proto_config_add_string 'log_level'
proto_config_add_int 'multicast_fanout'
proto_config_add_boolean 'multicast_mode:bool'
proto_config_add_boolean 'network_coding:bool'
proto_config_add_int 'orig_interval'
proto_config_add_string 'routing_algo'
}
proto_batadv_setup() {
local config="$1"
local iface="$config"
local aggregated_ogms
local ap_isolation
local bonding
local bridge_loop_avoidance
local distributed_arp_table
local fragmentation
local gw_bandwidth
local gw_mode
local gw_sel_class
local hop_penalty
local isolation_mark
local log_level
local multicast_fanout
local multicast_mode
local network_coding
local orig_interval
local routing_algo
json_get_vars aggregated_ogms
json_get_vars ap_isolation
json_get_vars bonding
json_get_vars bridge_loop_avoidance
json_get_vars distributed_arp_table
json_get_vars fragmentation
json_get_vars gw_bandwidth
json_get_vars gw_mode
json_get_vars gw_sel_class
json_get_vars hop_penalty
json_get_vars isolation_mark
json_get_vars log_level
json_get_vars multicast_fanout
json_get_vars multicast_mode
json_get_vars network_coding
json_get_vars orig_interval
json_get_vars routing_algo
set_default routing_algo 'BATMAN_IV'
batctl routing_algo "$routing_algo"
batctl meshif "$iface" interface create
[ -n "$aggregated_ogms" ] && batctl meshif "$iface" aggregation "$aggregated_ogms"
[ -n "$ap_isolation" ] && batctl meshif "$iface" ap_isolation "$ap_isolation"
[ -n "$bonding" ] && batctl meshif "$iface" bonding "$bonding"
[ -n "$bridge_loop_avoidance" ] && batctl meshif "$iface" bridge_loop_avoidance "$bridge_loop_avoidance" 2>&-
[ -n "$distributed_arp_table" ] && batctl meshif "$iface" distributed_arp_table "$distributed_arp_table" 2>&-
[ -n "$fragmentation" ] && batctl meshif "$iface" fragmentation "$fragmentation"
case "$gw_mode" in
server)
if [ -n "$gw_bandwidth" ]; then
batctl meshif "$iface" gw_mode "server" "$gw_bandwidth"
else
batctl meshif "$iface" gw_mode "server"
fi
;;
client)
if [ -n "$gw_sel_class" ]; then
batctl meshif "$iface" gw_mode "client" "$gw_sel_class"
else
batctl meshif "$iface" gw_mode "client"
fi
;;
*)
batctl meshif "$iface" gw_mode "off"
;;
esac
[ -n "$hop_penalty" ] && batctl meshif "$iface" hop_penalty "$hop_penalty"
[ -n "$isolation_mark" ] && batctl meshif "$iface" isolation_mark "$isolation_mark"
[ -n "$multicast_fanout" ] && batctl meshif "$iface" multicast_fanout "$multicast_fanout"
[ -n "$multicast_mode" ] && batctl meshif "$iface" multicast_mode "$multicast_mode" 2>&-
[ -n "$network_coding" ] && batctl meshif "$iface" network_coding "$network_coding" 2>&-
[ -n "$log_level" ] && batctl meshif "$iface" loglevel "$log_level" 2>&-
[ -n "$orig_interval" ] && batctl meshif "$iface" orig_interval "$orig_interval"
proto_init_update "$iface" 1
proto_send_update "$config"
}
proto_batadv_teardown() {
local config="$1"
local iface="$config"
batctl meshif "$iface" interface destroy
}
add_protocol batadv

View File

@@ -0,0 +1,53 @@
#!/bin/sh
[ -n "$INCLUDE_ONLY" ] || {
. /lib/functions.sh
. ../netifd-proto.sh
init_proto "$@"
}
proto_batadv_hardif_init_config() {
proto_config_add_int 'elp_interval'
proto_config_add_int 'hop_penalty'
proto_config_add_string "master"
proto_config_add_string 'throughput_override'
}
proto_batadv_hardif_setup() {
local config="$1"
local iface="$2"
local elp_interval
local hop_penalty
local master
local throughput_override
json_get_vars elp_interval
json_get_vars hop_penalty
json_get_vars master
json_get_vars throughput_override
( proto_add_host_dependency "$config" '' "$master" )
batctl meshif "$master" interface -M add "$iface"
[ -n "$elp_interval" ] && batctl hardif "$iface" elp_interval "$elp_interval"
[ -n "$hop_penalty" ] && batctl hardif "$iface" hop_penalty "$hop_penalty"
[ -n "$throughput_override" ] && batctl hardif "$iface" throughput_override "$throughput_override"
proto_init_update "$iface" 1
proto_send_update "$config"
}
proto_batadv_hardif_teardown() {
local config="$1"
local iface="$2"
local master
json_get_vars master
batctl meshif "$master" interface -M del "$iface" || true
}
add_protocol batadv_hardif

View File

@@ -0,0 +1,25 @@
#!/bin/sh
. /lib/functions.sh
. ../netifd-proto.sh
init_proto "$@"
proto_batadv_vlan_init_config() {
proto_config_add_boolean 'ap_isolation:bool'
}
proto_batadv_vlan_setup() {
local config="$1"
local iface="$2"
# batadv_vlan options
local ap_isolation
json_get_vars ap_isolation
[ -n "$ap_isolation" ] && batctl vlan "$iface" ap_isolation "$ap_isolation"
proto_init_update "$iface" 1
proto_send_update "$config"
}
add_protocol batadv_vlan

View File

@@ -0,0 +1,128 @@
From: Sven Eckelmann <sven@narfation.org>
Date: Sat, 24 Oct 2020 22:51:23 +0200
Subject: Revert "batman-adv: genetlink: move to smaller ops wherever possible"
The netlink genl_ops interface was splitted into two parts for Linux 5.10.
The batman-adv code changed to the new one because it doesn't use the more
complex policy handling of genl_ops. But the backports-5.8-1 version in
OpenWrt doesn't yet support the new genl_small_ops.
This patch must be dropped directly when OpenWrt switches to backports-5.10
or newer - otherwise it will not work as expected.
This reverts commit 725b4ef5be840cfcd0ca33b9393c14dee40c10f7.
--- a/compat-include/net/genetlink.h
+++ b/compat-include/net/genetlink.h
@@ -31,17 +31,15 @@ void batadv_genl_dump_check_consistent(s
#endif /* LINUX_VERSION_IS_LESS(4, 15, 0) */
-#if LINUX_VERSION_IS_LESS(5, 10, 0)
-
#if LINUX_VERSION_IS_LESS(5, 2, 0)
+
enum genl_validate_flags {
GENL_DONT_VALIDATE_STRICT = BIT(0),
GENL_DONT_VALIDATE_DUMP = BIT(1),
GENL_DONT_VALIDATE_DUMP_STRICT = BIT(2),
};
-#endif /* LINUX_VERSION_IS_LESS(5, 2, 0) */
-struct batadv_genl_small_ops {
+struct batadv_genl_ops {
int (*doit)(struct sk_buff *skb,
struct genl_info *info);
int (*dumpit)(struct sk_buff *skb,
@@ -70,9 +68,9 @@ struct batadv_genl_family {
struct genl_info *info);
void (*post_doit)(const struct genl_ops *ops, struct sk_buff *skb,
struct genl_info *info);
- const struct batadv_genl_small_ops *small_ops;
+ const struct batadv_genl_ops *ops;
const struct genl_multicast_group *mcgrps;
- unsigned int n_small_ops;
+ unsigned int n_ops;
unsigned int n_mcgrps;
struct module *module;
@@ -96,32 +94,24 @@ static inline int batadv_genl_register_f
family->family.pre_doit = family->pre_doit;
family->family.post_doit = family->post_doit;
family->family.mcgrps = family->mcgrps;
- family->family.n_ops = family->n_small_ops;
+ family->family.n_ops = family->n_ops;
family->family.n_mcgrps = family->n_mcgrps;
family->family.module = family->module;
- ops = kzalloc(sizeof(*ops) * family->n_small_ops, GFP_KERNEL);
+ ops = kzalloc(sizeof(*ops) * family->n_ops, GFP_KERNEL);
if (!ops)
return -ENOMEM;
for (i = 0; i < family->family.n_ops; i++) {
- ops[i].doit = family->small_ops[i].doit;
- ops[i].dumpit = family->small_ops[i].dumpit;
- ops[i].done = family->small_ops[i].done;
- ops[i].cmd = family->small_ops[i].cmd;
- ops[i].internal_flags = family->small_ops[i].internal_flags;
- ops[i].flags = family->small_ops[i].flags;
-#if LINUX_VERSION_IS_GEQ(5, 2, 0)
- ops[i].validate = family->small_ops[i].validate;
-#else
+ ops[i].doit = family->ops[i].doit;
+ ops[i].dumpit = family->ops[i].dumpit;
+ ops[i].done = family->ops[i].done;
+ ops[i].cmd = family->ops[i].cmd;
+ ops[i].internal_flags = family->ops[i].internal_flags;
+ ops[i].flags = family->ops[i].flags;
ops[i].policy = family->policy;
-#endif
}
-#if LINUX_VERSION_IS_GEQ(5, 2, 0)
- family->family.policy = family->policy;
-#endif
-
family->family.ops = ops;
family->copy_ops = ops;
@@ -136,7 +126,7 @@ typedef struct genl_ops batadv_genl_ops_
#define batadv_post_doit(__x, __y, __z) \
batadv_post_doit(const batadv_genl_ops_old *ops, __y, __z)
-#define genl_small_ops batadv_genl_small_ops
+#define genl_ops batadv_genl_ops
#define genl_family batadv_genl_family
#define genl_register_family(family) \
@@ -160,6 +150,6 @@ batadv_genl_unregister_family(struct bat
genlmsg_multicast_netns(&(_family)->family, _net, _skb, _portid, \
_group, _flags)
-#endif /* LINUX_VERSION_IS_LESS(5, 10, 0) */
+#endif /* LINUX_VERSION_IS_LESS(5, 2, 0) */
#endif /* _NET_BATMAN_ADV_COMPAT_NET_GENETLINK_H_ */
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -1357,7 +1357,7 @@ static void batadv_post_doit(const struc
}
}
-static const struct genl_small_ops batadv_netlink_ops[] = {
+static const struct genl_ops batadv_netlink_ops[] = {
{
.cmd = BATADV_CMD_GET_MESH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
@@ -1491,8 +1491,8 @@ struct genl_family batadv_netlink_family
.pre_doit = batadv_pre_doit,
.post_doit = batadv_post_doit,
.module = THIS_MODULE,
- .small_ops = batadv_netlink_ops,
- .n_small_ops = ARRAY_SIZE(batadv_netlink_ops),
+ .ops = batadv_netlink_ops,
+ .n_ops = ARRAY_SIZE(batadv_netlink_ops),
.mcgrps = batadv_netlink_mcgrps,
.n_mcgrps = ARRAY_SIZE(batadv_netlink_mcgrps),
};

View File

@@ -0,0 +1,116 @@
From: Sven Eckelmann <sven@narfation.org>
Date: Thu, 28 Jan 2021 21:06:51 +0100
Subject: Revert "batman-adv: Add new include for min/max helpers"
The OpenWrt kernel sources and backports sources are currently missing this
header.
This reverts commit 1810de05310d5c5e9140f870ac21052f38bc06b8.
Signed-off-by: Sven Eckelmann <sven@narfation.org>
--- a/compat-include/linux/minmax.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) B.A.T.M.A.N. contributors:
- *
- * Marek Lindner, Simon Wunderlich
- *
- * This file contains macros for maintaining compatibility with older versions
- * of the Linux kernel.
- */
-
-#ifndef _NET_BATMAN_ADV_COMPAT_LINUX_MINMAX_H_
-#define _NET_BATMAN_ADV_COMPAT_LINUX_MINMAX_H_
-
-#include <linux/version.h>
-#if LINUX_VERSION_IS_GEQ(5, 10, 0)
-#include_next <linux/minmax.h>
-#else
-#include <linux/kernel.h>
-#endif
-
-#endif /* _NET_BATMAN_ADV_COMPAT_LINUX_MINMAX_H_ */
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -15,7 +15,6 @@
#include <linux/jiffies.h>
#include <linux/kref.h>
#include <linux/list.h>
-#include <linux/minmax.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/rculist.h>
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -18,7 +18,6 @@
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/kref.h>
-#include <linux/minmax.h>
#include <linux/netdevice.h>
#include <linux/nl80211.h>
#include <linux/prandom.h>
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -18,7 +18,6 @@
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
-#include <linux/minmax.h>
#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/prandom.h>
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -14,8 +14,8 @@
#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/jiffies.h>
+#include <linux/kernel.h>
#include <linux/lockdep.h>
-#include <linux/minmax.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -17,7 +17,6 @@
#include <linux/kref.h>
#include <linux/limits.h>
#include <linux/list.h>
-#include <linux/minmax.h>
#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -23,7 +23,6 @@
#include <linux/kobject.h>
#include <linux/kref.h>
#include <linux/list.h>
-#include <linux/minmax.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -23,7 +23,6 @@
#include <linux/kernel.h>
#include <linux/limits.h>
#include <linux/list.h>
-#include <linux/minmax.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/printk.h>
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -23,7 +23,6 @@
#include <linux/kthread.h>
#include <linux/limits.h>
#include <linux/list.h>
-#include <linux/minmax.h>
#include <linux/netdevice.h>
#include <linux/param.h>
#include <linux/printk.h>

View File

@@ -0,0 +1,34 @@
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 14 May 2021 19:34:35 +0200
Subject: batman-adv: Fix build of multicast code against Linux < 5.13
Fixes: 007b4c4b031f ("batman-adv: convert ifmcaddr6 to RCU")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -422,9 +422,14 @@ batadv_mcast_mla_softif_get_ipv6(struct
return 0;
}
+#if LINUX_VERSION_IS_LESS(5, 13, 0)
+ read_lock_bh(&in6_dev->lock);
+ for (pmc6 = in6_dev->mc_list; pmc6; pmc6 = pmc6->next) {
+#else
for (pmc6 = rcu_dereference(in6_dev->mc_list);
pmc6;
pmc6 = rcu_dereference(pmc6->next)) {
+#endif
if (IPV6_ADDR_MC_SCOPE(&pmc6->mca_addr) <
IPV6_ADDR_SCOPE_LINKLOCAL)
continue;
@@ -453,6 +458,9 @@ batadv_mcast_mla_softif_get_ipv6(struct
hlist_add_head(&new->list, mcast_list);
ret++;
}
+#if LINUX_VERSION_IS_LESS(5, 13, 0)
+ read_unlock_bh(&in6_dev->lock);
+#endif
rcu_read_unlock();
return ret;

View File

@@ -0,0 +1,19 @@
From: Sven Eckelmann <sven@narfation.org>
Date: Tue, 14 Sep 2021 21:02:10 +0200
Subject: Revert "batman-adv: Switch to kstrtox.h for kstrtou64"
This header is only available after Linux 5.14
This reverts commit c9a69cb4048ebef3a4d91835669011a26d9b7dab.
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -10,7 +10,7 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
-#include <linux/kstrtox.h>
+#include <linux/kernel.h>
#include <linux/limits.h>
#include <linux/math64.h>
#include <linux/netdevice.h>

View File

@@ -0,0 +1,19 @@
From: Sven Eckelmann <sven@narfation.org>
Date: Tue, 14 Sep 2021 21:07:34 +0200
Subject: Revert "batman-adv: use Linux's stdarg.h"
This header is only available since Linux 5.15
This reverts commit 36d059797a14f0e373fdc3c79df7b467435925ad.
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -7,7 +7,7 @@
#include "log.h"
#include "main.h"
-#include <linux/stdarg.h>
+#include <stdarg.h>
#include "trace.h"

View File

@@ -0,0 +1,27 @@
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 2 Mar 2022 20:05:13 +0100
Subject: batman-adv: make mc_forwarding atomic
This fixes minor data-races in ip6_mc_input() and
batadv_mcast_mla_rtr_flags_softif_get_ipv6()
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
[sven@narfation.org: Add ugly hack to get it building with old kernels]
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Origin: upstream, https://git.open-mesh.org/batman-adv.git/commit/56db7c0540e733a1f063ccd6bab1b537a80857eb
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -134,7 +134,11 @@ static u8 batadv_mcast_mla_rtr_flags_sof
{
struct inet6_dev *in6_dev = __in6_dev_get(dev);
+#if LINUX_VERSION_IS_GEQ(5, 18, 0) // UGLY_HACK_NEW
+ if (in6_dev && atomic_read(&in6_dev->cnf.mc_forwarding))
+#else // UGLY_HACK_OLD
if (in6_dev && in6_dev->cnf.mc_forwarding)
+#endif // UGLY_HACK_STOP
return BATADV_NO_FLAGS;
else
return BATADV_MCAST_WANT_NO_RTR6;

View File

@@ -0,0 +1,23 @@
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 15 Apr 2022 15:12:45 +0200
Subject: batman-adv: compat: Add atomic mc_fowarding support for stable kernels
Fixes: 56db7c0540e7 ("batman-adv: make mc_forwarding atomic")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Origin: upstream, https://git.open-mesh.org/batman-adv.git/commit/350adcaec82fbaa358a2406343b6130ac8dad126
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -134,7 +134,11 @@ static u8 batadv_mcast_mla_rtr_flags_sof
{
struct inet6_dev *in6_dev = __in6_dev_get(dev);
-#if LINUX_VERSION_IS_GEQ(5, 18, 0) // UGLY_HACK_NEW
+#if (LINUX_VERSION_IS_GEQ(5, 4, 189) && LINUX_VERSION_IS_LESS(5, 5, 0)) || /* UGLY_HACK */ \
+ (LINUX_VERSION_IS_GEQ(5, 10, 111) && LINUX_VERSION_IS_LESS(5, 11, 0)) || /* UGLY_HACK */ \
+ (LINUX_VERSION_IS_GEQ(5, 15, 34) && LINUX_VERSION_IS_LESS(5, 16, 0)) || /* UGLY_HACK */ \
+ (LINUX_VERSION_IS_GEQ(5, 16, 20) && LINUX_VERSION_IS_LESS(5, 17, 0)) || /* UGLY_HACK */ \
+ LINUX_VERSION_IS_GEQ(5, 17, 3) // UGLY_HACK_NEW
if (in6_dev && atomic_read(&in6_dev->cnf.mc_forwarding))
#else // UGLY_HACK_OLD
if (in6_dev && in6_dev->cnf.mc_forwarding)

View File

@@ -0,0 +1,44 @@
From: Sven Eckelmann <sven@narfation.org>
Date: Wed, 2 Mar 2022 19:49:44 +0100
Subject: batman-adv: Request iflink once in batadv-on-batadv check
There is no need to call dev_get_iflink multiple times for the same
net_device in batadv_is_on_batman_iface. And since some of the
.ndo_get_iflink callbacks are dynamic (for example via RCUs like in
vxcan_get_iflink), it could easily happen that the returned values are not
stable. The pre-checks before __dev_get_by_index are then of course bogus.
Fixes: 3d48811b27f5 ("batman-adv: prevent using any virtual device created on batman-adv as hard-interface")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Origin: upstream, https://git.open-mesh.org/batman-adv.git/commit/f6c0c45356fe3ab9c5e01b81d060f8a436658037
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -148,22 +148,23 @@ static bool batadv_is_on_batman_iface(co
struct net *net = dev_net(net_dev);
struct net_device *parent_dev;
struct net *parent_net;
+ int iflink;
bool ret;
/* check if this is a batman-adv mesh interface */
if (batadv_softif_is_valid(net_dev))
return true;
+ iflink = dev_get_iflink(net_dev);
+
/* no more parents..stop recursion */
- if (dev_get_iflink(net_dev) == 0 ||
- dev_get_iflink(net_dev) == net_dev->ifindex)
+ if (iflink == 0 || iflink == net_dev->ifindex)
return false;
parent_net = batadv_getlink_net(net_dev, net);
/* recurse over the parent device */
- parent_dev = __dev_get_by_index((struct net *)parent_net,
- dev_get_iflink(net_dev));
+ parent_dev = __dev_get_by_index((struct net *)parent_net, iflink);
/* if we got a NULL parent_dev there is something broken.. */
if (!parent_dev) {
pr_err("Cannot find parent device\n");

View File

@@ -0,0 +1,46 @@
From: Sven Eckelmann <sven@narfation.org>
Date: Wed, 2 Mar 2022 19:49:45 +0100
Subject: batman-adv: Request iflink once in batadv_get_real_netdevice
There is no need to call dev_get_iflink multiple times for the same
net_device in batadv_get_real_netdevice. And since some of the
ndo_get_iflink callbacks are dynamic (for example via RCUs like in
vxcan_get_iflink), it could easily happen that the returned values are not
stable. The pre-checks before __dev_get_by_index are then of course bogus.
Fixes: 2b45bb6c3aad ("batman-adv: additional checks for virtual interfaces on top of WiFi")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Origin: upstream, https://git.open-mesh.org/batman-adv.git/commit/445f477ec3e805425186f5fbe8ed0fe89cc226b6
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -214,14 +214,16 @@ static struct net_device *batadv_get_rea
struct net_device *real_netdev = NULL;
struct net *real_net;
struct net *net;
- int ifindex;
+ int iflink;
ASSERT_RTNL();
if (!netdev)
return NULL;
- if (netdev->ifindex == dev_get_iflink(netdev)) {
+ iflink = dev_get_iflink(netdev);
+
+ if (netdev->ifindex == iflink) {
dev_hold(netdev);
return netdev;
}
@@ -231,9 +233,8 @@ static struct net_device *batadv_get_rea
goto out;
net = dev_net(hard_iface->soft_iface);
- ifindex = dev_get_iflink(netdev);
real_net = batadv_getlink_net(netdev, net);
- real_netdev = dev_get_by_index(real_net, ifindex);
+ real_netdev = dev_get_by_index(real_net, iflink);
out:
batadv_hardif_put(hard_iface);

View File

@@ -0,0 +1,86 @@
From: Sven Eckelmann <sven@narfation.org>
Date: Wed, 2 Mar 2022 19:49:46 +0100
Subject: batman-adv: Don't expect inter-netns unique iflink indices
The ifindex doesn't have to be unique for multiple network namespaces on
the same machine.
$ ip netns add test1
$ ip -net test1 link add dummy1 type dummy
$ ip netns add test2
$ ip -net test2 link add dummy2 type dummy
$ ip -net test1 link show dev dummy1
6: dummy1: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether 96:81:55:1e:dd:85 brd ff:ff:ff:ff:ff:ff
$ ip -net test2 link show dev dummy2
6: dummy2: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether 5a:3c:af:35:07:c3 brd ff:ff:ff:ff:ff:ff
But the batman-adv code to walk through the various layers of virtual
interfaces uses this assumption because dev_get_iflink handles it
internally and doesn't return the actual netns of the iflink. And
dev_get_iflink only documents the situation where ifindex == iflink for
physical devices.
But only checking for dev->netdev_ops->ndo_get_iflink is also not an option
because ipoib_get_iflink implements it even when it sometimes returns an
iflink != ifindex and sometimes iflink == ifindex. The caller must
therefore make sure itself to check both netns and iflink + ifindex for
equality. Only when they are equal, a "physical" interface was detected
which should stop the traversal. On the other hand, vxcan_get_iflink can
also return 0 in case there was currently no valid peer. In this case, it
is still necessary to stop.
Fixes: 3d48811b27f5 ("batman-adv: prevent using any virtual device created on batman-adv as hard-interface")
Fixes: 2b45bb6c3aad ("batman-adv: additional checks for virtual interfaces on top of WiFi")
Reported-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Origin: upstream, https://git.open-mesh.org/batman-adv.git/commit/0aac7a9fbbbeec25f2f54a9e6d53ea91217ba720
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -156,13 +156,15 @@ static bool batadv_is_on_batman_iface(co
return true;
iflink = dev_get_iflink(net_dev);
-
- /* no more parents..stop recursion */
- if (iflink == 0 || iflink == net_dev->ifindex)
+ if (iflink == 0)
return false;
parent_net = batadv_getlink_net(net_dev, net);
+ /* iflink to itself, most likely physical device */
+ if (net == parent_net && iflink == net_dev->ifindex)
+ return false;
+
/* recurse over the parent device */
parent_dev = __dev_get_by_index((struct net *)parent_net, iflink);
/* if we got a NULL parent_dev there is something broken.. */
@@ -222,8 +224,7 @@ static struct net_device *batadv_get_rea
return NULL;
iflink = dev_get_iflink(netdev);
-
- if (netdev->ifindex == iflink) {
+ if (iflink == 0) {
dev_hold(netdev);
return netdev;
}
@@ -234,6 +235,14 @@ static struct net_device *batadv_get_rea
net = dev_net(hard_iface->soft_iface);
real_net = batadv_getlink_net(netdev, net);
+
+ /* iflink to itself, most likely physical device */
+ if (net == real_net && netdev->ifindex == iflink) {
+ real_netdev = netdev;
+ dev_hold(real_netdev);
+ goto out;
+ }
+
real_netdev = dev_get_by_index(real_net, iflink);
out:

View File

@@ -0,0 +1,46 @@
From: Sven Eckelmann <sven@narfation.org>
Date: Sat, 16 Apr 2022 14:24:34 +0200
Subject: batman-adv: Don't skb_split skbuffs with frag_list
The receiving interface might have used GRO to receive more fragments than
MAX_SKB_FRAGS fragments. In this case, these will not be stored in
skb_shinfo(skb)->frags but merged into the frag list.
batman-adv relies on the function skb_split to split packets up into
multiple smaller packets which are not larger than the MTU on the outgoing
interface. But this function cannot handle frag_list entries and is only
operating on skb_shinfo(skb)->frags. If it is still trying to split such an
skb and xmit'ing it on an interface without support for NETIF_F_FRAGLIST,
then validate_xmit_skb() will try to linearize it. But this fails due to
inconsistent information. And __pskb_pull_tail will trigger a BUG_ON after
skb_copy_bits() returns an error.
In case of entries in frag_list, just linearize the skb before operating on
it with skb_split().
Reported-by: Felix Kaechele <felix@kaechele.ca>
Tested-by: Felix Kaechele <felix@kaechele.ca>
Fixes: 9de347143505 ("batman-adv: layer2 unicast packet fragmentation")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Origin: upstream, https://git.open-mesh.org/batman-adv.git/commit/d467720acaf1b22b0cee58144eeaf9ef6c5e285c
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -475,6 +475,17 @@ int batadv_frag_send_packet(struct sk_bu
goto free_skb;
}
+ /* GRO might have added fragments to the fragment list instead of
+ * frags[]. But this is not handled by skb_split and must be
+ * linearized to avoid incorrect length information after all
+ * batman-adv fragments were created and submitted to the
+ * hard-interface
+ */
+ if (skb_has_frag_list(skb) && __skb_linearize(skb)) {
+ ret = -ENOMEM;
+ goto free_skb;
+ }
+
/* Create one header to be copied to all fragments */
frag_header.packet_type = BATADV_UNICAST_FRAG;
frag_header.version = BATADV_COMPAT_VERSION;

View File

@@ -0,0 +1,179 @@
/* Please avoid adding hacks here - instead add it to mac80211/backports.git */
#undef CONFIG_MODULE_STRIPPED
#include <linux/version.h> /* LINUX_VERSION_CODE */
#include <linux/types.h>
#if LINUX_VERSION_IS_LESS(5, 10, 0)
#include <linux/if_bridge.h>
struct batadv_br_ip {
union {
__be32 ip4;
#if IS_ENABLED(CONFIG_IPV6)
struct in6_addr ip6;
#endif
} dst;
__be16 proto;
__u16 vid;
};
struct batadv_br_ip_list {
struct list_head list;
struct batadv_br_ip addr;
};
#if 0
/* "static" dropped to force compiler to evaluate it as part of multicast.c
* might need to be added again and then called in some kind of dummy
* compat.c in case this header is included in multiple files.
*/
inline void __batadv_br_ip_list_check(void)
{
BUILD_BUG_ON(sizeof(struct batadv_br_ip_list) != sizeof(struct br_ip_list));
BUILD_BUG_ON(offsetof(struct batadv_br_ip_list, list) != offsetof(struct br_ip_list, list));
BUILD_BUG_ON(offsetof(struct batadv_br_ip_list, addr) != offsetof(struct br_ip_list, addr));
BUILD_BUG_ON(sizeof(struct batadv_br_ip) != sizeof(struct br_ip));
BUILD_BUG_ON(offsetof(struct batadv_br_ip, dst.ip4) != offsetof(struct br_ip, u.ip4));
BUILD_BUG_ON(offsetof(struct batadv_br_ip, dst.ip6) != offsetof(struct br_ip, u.ip6));
BUILD_BUG_ON(offsetof(struct batadv_br_ip, proto) != offsetof(struct br_ip, proto));
BUILD_BUG_ON(offsetof(struct batadv_br_ip, vid) != offsetof(struct br_ip, vid));
}
#endif
#define br_ip batadv_br_ip
#define br_ip_list batadv_br_ip_list
#endif /* LINUX_VERSION_IS_LESS(5, 10, 0) */
#if LINUX_VERSION_IS_LESS(5, 14, 0)
#include <linux/if_bridge.h>
#include <net/addrconf.h>
#if IS_ENABLED(CONFIG_IPV6)
static inline bool
br_multicast_has_router_adjacent(struct net_device *dev, int proto)
{
struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list);
struct br_ip_list *br_ip_entry, *tmp;
int ret;
if (proto != ETH_P_IPV6)
return true;
ret = br_multicast_list_adjacent(dev, &bridge_mcast_list);
if (ret < 0)
return true;
ret = false;
list_for_each_entry_safe(br_ip_entry, tmp, &bridge_mcast_list, list) {
if (br_ip_entry->addr.proto == htons(ETH_P_IPV6) &&
ipv6_addr_is_ll_all_routers(&br_ip_entry->addr.dst.ip6))
ret = true;
list_del(&br_ip_entry->list);
kfree(br_ip_entry);
}
return ret;
}
#else
static inline bool
br_multicast_has_router_adjacent(struct net_device *dev, int proto)
{
return true;
}
#endif
#endif /* LINUX_VERSION_IS_LESS(5, 14, 0) */
#if LINUX_VERSION_IS_LESS(5, 15, 0)
static inline void batadv_eth_hw_addr_set(struct net_device *dev,
const u8 *addr)
{
ether_addr_copy(dev->dev_addr, addr);
}
#define eth_hw_addr_set batadv_eth_hw_addr_set
#endif /* LINUX_VERSION_IS_LESS(5, 15, 0) */
/* <DECLARE_EWMA> */
#include <linux/version.h>
#include_next <linux/average.h>
#include <linux/bug.h>
#ifdef DECLARE_EWMA
#undef DECLARE_EWMA
#endif /* DECLARE_EWMA */
/*
* Exponentially weighted moving average (EWMA)
*
* This implements a fixed-precision EWMA algorithm, with both the
* precision and fall-off coefficient determined at compile-time
* and built into the generated helper funtions.
*
* The first argument to the macro is the name that will be used
* for the struct and helper functions.
*
* The second argument, the precision, expresses how many bits are
* used for the fractional part of the fixed-precision values.
*
* The third argument, the weight reciprocal, determines how the
* new values will be weighed vs. the old state, new values will
* get weight 1/weight_rcp and old values 1-1/weight_rcp. Note
* that this parameter must be a power of two for efficiency.
*/
#define DECLARE_EWMA(name, _precision, _weight_rcp) \
struct ewma_##name { \
unsigned long internal; \
}; \
static inline void ewma_##name##_init(struct ewma_##name *e) \
{ \
BUILD_BUG_ON(!__builtin_constant_p(_precision)); \
BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp)); \
/* \
* Even if you want to feed it just 0/1 you should have \
* some bits for the non-fractional part... \
*/ \
BUILD_BUG_ON((_precision) > 30); \
BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp); \
e->internal = 0; \
} \
static inline unsigned long \
ewma_##name##_read(struct ewma_##name *e) \
{ \
BUILD_BUG_ON(!__builtin_constant_p(_precision)); \
BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp)); \
BUILD_BUG_ON((_precision) > 30); \
BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp); \
return e->internal >> (_precision); \
} \
static inline void ewma_##name##_add(struct ewma_##name *e, \
unsigned long val) \
{ \
unsigned long internal = READ_ONCE(e->internal); \
unsigned long weight_rcp = ilog2(_weight_rcp); \
unsigned long precision = _precision; \
\
BUILD_BUG_ON(!__builtin_constant_p(_precision)); \
BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp)); \
BUILD_BUG_ON((_precision) > 30); \
BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp); \
\
WRITE_ONCE(e->internal, internal ? \
(((internal << weight_rcp) - internal) + \
(val << precision)) >> weight_rcp : \
(val << precision)); \
}
/* </DECLARE_EWMA> */

View File

@@ -249,7 +249,7 @@ hostapd_prepare_device_config() {
[ -n "$maxassoc" ] && append base_cfg "iface_max_num_sta=$maxassoc" "$N"
[ "$maxassoc_ignore_probe" -gt 0 ] && append base_cfg "no_probe_resp_if_max_sta=1" "$N"
[ "$rnr_beacon" -gt 0 ] && append base_cfg "rnr_beacon=$rnr_beacon" "$N"
[ "$he_co_locate" -gt 0 ] && append base_cfg "he_co_locate=$he_co_locate" "$N"
# [ "$he_co_locate" -gt 0 ] && append base_cfg "he_co_locate=$he_co_locate" "$N"
[ "$multiple_bssid" -gt 0 ] && append base_cfg "multiple_bssid=$multiple_bssid" "$N"
[ "$ema" -gt 0 ] && append base_cfg "ema=$ema" "$N"
[ "$acs_exclude_dfs" -gt 0 ] && append base_cfg "acs_exclude_dfs=$acs_exclude_dfs" "$N"

View File

@@ -251,6 +251,7 @@ nand_do_upgrade_success() {
[ -f "$conf_tar" ] && nand_restore_config "$conf_tar"
echo "sysupgrade successful"
umount -a
sleep 5
reboot -f
}

View File

@@ -1273,3 +1273,4 @@ CONFIG_INIT_STACK_NONE=y
# CONFIG_INIT_STACK_ALL is not set
# CONFIG_SECCRYPT is not set
CONFIG_HWMON=y
# CONFIG_CORESIGHT_STREAM is not set

View File

@@ -1,306 +0,0 @@
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -338,7 +338,6 @@ struct napi_struct {
struct list_head dev_list;
struct hlist_node napi_hash_node;
unsigned int napi_id;
- struct work_struct work;
};
enum {
@@ -349,7 +348,6 @@ enum {
NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */
NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
- NAPI_STATE_THREADED, /* Use threaded NAPI */
};
enum {
@@ -360,7 +358,6 @@ enum {
NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED),
NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
- NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
};
enum gro_result {
@@ -2320,26 +2317,6 @@ void netif_napi_add(struct net_device *d
int (*poll)(struct napi_struct *, int), int weight);
/**
- * netif_threaded_napi_add - initialize a NAPI context
- * @dev: network device
- * @napi: NAPI context
- * @poll: polling function
- * @weight: default weight
- *
- * This variant of netif_napi_add() should be used from drivers using NAPI
- * with CPU intensive poll functions.
- * This will schedule polling from a high priority workqueue
- */
-static inline void netif_threaded_napi_add(struct net_device *dev,
- struct napi_struct *napi,
- int (*poll)(struct napi_struct *, int),
- int weight)
-{
- set_bit(NAPI_STATE_THREADED, &napi->state);
- netif_napi_add(dev, napi, poll, weight);
-}
-
-/**
* netif_tx_napi_add - initialize a NAPI context
* @dev: network device
* @napi: NAPI context
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -157,7 +157,6 @@ static DEFINE_SPINLOCK(offload_lock);
struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
struct list_head ptype_all __read_mostly; /* Taps */
static struct list_head offload_base __read_mostly;
-static struct workqueue_struct *napi_workq __read_mostly;
static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_info(unsigned long val,
@@ -6042,11 +6041,6 @@ void __napi_schedule(struct napi_struct
{
unsigned long flags;
- if (test_bit(NAPI_STATE_THREADED, &n->state)) {
- queue_work(napi_workq, &n->work);
- return;
- }
-
local_irq_save(flags);
____napi_schedule(this_cpu_ptr(&softnet_data), n);
local_irq_restore(flags);
@@ -6362,84 +6356,6 @@ static void init_gro_hash(struct napi_st
napi->gro_bitmask = 0;
}
-static int __napi_poll(struct napi_struct *n, bool *repoll)
-{
- int work, weight;
-
- weight = n->weight;
-
- /* This NAPI_STATE_SCHED test is for avoiding a race
- * with netpoll's poll_napi(). Only the entity which
- * obtains the lock and sees NAPI_STATE_SCHED set will
- * actually make the ->poll() call. Therefore we avoid
- * accidentally calling ->poll() when NAPI is not scheduled.
- */
- work = 0;
- if (test_bit(NAPI_STATE_SCHED, &n->state)) {
- work = n->poll(n, weight);
- trace_napi_poll(n, work, weight);
- }
-
- WARN_ON_ONCE(work > weight);
-
- if (likely(work < weight))
- return work;
-
- /* Drivers must not modify the NAPI state if they
- * consume the entire weight. In such cases this code
- * still "owns" the NAPI instance and therefore can
- * move the instance around on the list at-will.
- */
- if (unlikely(napi_disable_pending(n))) {
- napi_complete(n);
- return work;
- }
-
- if (n->gro_bitmask) {
- /* flush too old packets
- * If HZ < 1000, flush all packets.
- */
- napi_gro_flush(n, HZ >= 1000);
- }
-
- gro_normal_list(n);
-
- *repoll = true;
-
- return work;
-}
-
-static void napi_workfn(struct work_struct *work)
-{
- struct napi_struct *n = container_of(work, struct napi_struct, work);
- void *have;
-
- for (;;) {
- bool repoll = false;
-
- local_bh_disable();
-
- have = netpoll_poll_lock(n);
- __napi_poll(n, &repoll);
- netpoll_poll_unlock(have);
-
- local_bh_enable();
-
- if (!repoll)
- return;
-
- if (!need_resched())
- continue;
-
- /*
- * have to pay for the latency of task switch even if
- * napi is scheduled
- */
- queue_work(napi_workq, work);
- return;
- }
-}
-
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
@@ -6459,7 +6375,6 @@ void netif_napi_add(struct net_device *d
#ifdef CONFIG_NETPOLL
napi->poll_owner = -1;
#endif
- INIT_WORK(&napi->work, napi_workfn);
set_bit(NAPI_STATE_SCHED, &napi->state);
set_bit(NAPI_STATE_NPSVC, &napi->state);
list_add_rcu(&napi->dev_list, &dev->napi_list);
@@ -6500,7 +6415,6 @@ static void flush_gro_hash(struct napi_s
void netif_napi_del(struct napi_struct *napi)
{
might_sleep();
- cancel_work_sync(&napi->work);
if (napi_hash_del(napi))
synchronize_net();
list_del_init(&napi->dev_list);
@@ -6513,19 +6427,51 @@ EXPORT_SYMBOL(netif_napi_del);
static int napi_poll(struct napi_struct *n, struct list_head *repoll)
{
- bool do_repoll = false;
void *have;
- int work;
+ int work, weight;
list_del_init(&n->poll_list);
have = netpoll_poll_lock(n);
- work = __napi_poll(n, &do_repoll);
+ weight = n->weight;
- if (!do_repoll)
+ /* This NAPI_STATE_SCHED test is for avoiding a race
+ * with netpoll's poll_napi(). Only the entity which
+ * obtains the lock and sees NAPI_STATE_SCHED set will
+ * actually make the ->poll() call. Therefore we avoid
+ * accidentally calling ->poll() when NAPI is not scheduled.
+ */
+ work = 0;
+ if (test_bit(NAPI_STATE_SCHED, &n->state)) {
+ work = n->poll(n, weight);
+ trace_napi_poll(n, work, weight);
+ }
+
+ WARN_ON_ONCE(work > weight);
+
+ if (likely(work < weight))
goto out_unlock;
+ /* Drivers must not modify the NAPI state if they
+ * consume the entire weight. In such cases this code
+ * still "owns" the NAPI instance and therefore can
+ * move the instance around on the list at-will.
+ */
+ if (unlikely(napi_disable_pending(n))) {
+ napi_complete(n);
+ goto out_unlock;
+ }
+
+ if (n->gro_bitmask) {
+ /* flush too old packets
+ * If HZ < 1000, flush all packets.
+ */
+ napi_gro_flush(n, HZ >= 1000);
+ }
+
+ gro_normal_list(n);
+
/* Some drivers may have called napi_schedule
* prior to exhausting their budget.
*/
@@ -10501,10 +10447,6 @@ static int __init net_dev_init(void)
sd->backlog.weight = weight_p;
}
- napi_workq = alloc_workqueue("napi_workq", WQ_UNBOUND | WQ_HIGHPRI,
- WQ_UNBOUND_MAX_ACTIVE | WQ_SYSFS);
- BUG_ON(!napi_workq);
-
dev_boot_phase = 0;
/* The loopback device is special if any other network devices
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -470,52 +470,6 @@ static ssize_t proto_down_store(struct d
}
NETDEVICE_SHOW_RW(proto_down, fmt_dec);
-static int change_napi_threaded(struct net_device *dev, unsigned long val)
-{
- struct napi_struct *napi;
-
- if (list_empty(&dev->napi_list))
- return -EOPNOTSUPP;
-
- list_for_each_entry(napi, &dev->napi_list, dev_list) {
- if (val)
- set_bit(NAPI_STATE_THREADED, &napi->state);
- else
- clear_bit(NAPI_STATE_THREADED, &napi->state);
- }
-
- return 0;
-}
-
-static ssize_t napi_threaded_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t len)
-{
- return netdev_store(dev, attr, buf, len, change_napi_threaded);
-}
-
-static ssize_t napi_threaded_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct net_device *netdev = to_net_dev(dev);
- struct napi_struct *napi;
- bool enabled = false;
-
- if (!rtnl_trylock())
- return restart_syscall();
-
- list_for_each_entry(napi, &netdev->napi_list, dev_list) {
- if (test_bit(NAPI_STATE_THREADED, &napi->state))
- enabled = true;
- }
-
- rtnl_unlock();
-
- return sprintf(buf, fmt_dec, enabled);
-}
-static DEVICE_ATTR_RW(napi_threaded);
-
static ssize_t phys_port_id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -627,7 +581,6 @@ static struct attribute *net_class_attrs
&dev_attr_flags.attr,
&dev_attr_tx_queue_len.attr,
&dev_attr_gro_flush_timeout.attr,
- &dev_attr_napi_threaded.attr,
&dev_attr_phys_port_id.attr,
&dev_attr_phys_port_name.attr,
&dev_attr_phys_switch_id.attr,

View File

@@ -1,88 +0,0 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 8 Feb 2021 11:34:08 -0800
Subject: [PATCH] net: extract napi poll functionality to __napi_poll()
This commit introduces a new function __napi_poll() which does the main
logic of the existing napi_poll() function, and will be called by other
functions in later commits.
This idea and implementation is done by Felix Fietkau <nbd@nbd.name> and
is proposed as part of the patch to move napi work to work_queue
context.
This commit by itself is a code restructure.
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Wei Wang <weiwan@google.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6425,15 +6425,10 @@ void netif_napi_del(struct napi_struct *
}
EXPORT_SYMBOL(netif_napi_del);
-static int napi_poll(struct napi_struct *n, struct list_head *repoll)
+static int __napi_poll(struct napi_struct *n, bool *repoll)
{
- void *have;
int work, weight;
- list_del_init(&n->poll_list);
-
- have = netpoll_poll_lock(n);
-
weight = n->weight;
/* This NAPI_STATE_SCHED test is for avoiding a race
@@ -6451,7 +6446,7 @@ static int napi_poll(struct napi_struct
WARN_ON_ONCE(work > weight);
if (likely(work < weight))
- goto out_unlock;
+ return work;
/* Drivers must not modify the NAPI state if they
* consume the entire weight. In such cases this code
@@ -6460,7 +6455,7 @@ static int napi_poll(struct napi_struct
*/
if (unlikely(napi_disable_pending(n))) {
napi_complete(n);
- goto out_unlock;
+ return work;
}
if (n->gro_bitmask) {
@@ -6478,12 +6473,29 @@ static int napi_poll(struct napi_struct
if (unlikely(!list_empty(&n->poll_list))) {
pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
n->dev ? n->dev->name : "backlog");
- goto out_unlock;
+ return work;
}
- list_add_tail(&n->poll_list, repoll);
+ *repoll = true;
+
+ return work;
+}
+
+static int napi_poll(struct napi_struct *n, struct list_head *repoll)
+{
+ bool do_repoll = false;
+ void *have;
+ int work;
+
+ list_del_init(&n->poll_list);
+
+ have = netpoll_poll_lock(n);
+
+ work = __napi_poll(n, &do_repoll);
+
+ if (do_repoll)
+ list_add_tail(&n->poll_list, repoll);
-out_unlock:
netpoll_poll_unlock(have);
return work;

View File

@@ -1,261 +0,0 @@
From: Wei Wang <weiwan@google.com>
Date: Mon, 8 Feb 2021 11:34:09 -0800
Subject: [PATCH] net: implement threaded-able napi poll loop support
This patch allows running each napi poll loop inside its own
kernel thread.
The kthread is created during netif_napi_add() if dev->threaded
is set. And threaded mode is enabled in napi_enable(). We will
provide a way to set dev->threaded and enable threaded mode
without a device up/down in the following patch.
Once that threaded mode is enabled and the kthread is
started, napi_schedule() will wake-up such thread instead
of scheduling the softirq.
The threaded poll loop behaves quite likely the net_rx_action,
but it does not have to manipulate local irqs and uses
an explicit scheduling point based on netdev_budget.
Co-developed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Co-developed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Wei Wang <weiwan@google.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -338,6 +338,7 @@ struct napi_struct {
struct list_head dev_list;
struct hlist_node napi_hash_node;
unsigned int napi_id;
+ struct task_struct *thread;
};
enum {
@@ -348,6 +349,7 @@ enum {
NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */
NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
+ NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
};
enum {
@@ -358,6 +360,7 @@ enum {
NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED),
NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
+ NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
};
enum gro_result {
@@ -502,20 +505,7 @@ bool napi_hash_del(struct napi_struct *n
*/
void napi_disable(struct napi_struct *n);
-/**
- * napi_enable - enable NAPI scheduling
- * @n: NAPI context
- *
- * Resume NAPI from being scheduled on this context.
- * Must be paired with napi_disable.
- */
-static inline void napi_enable(struct napi_struct *n)
-{
- BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
- smp_mb__before_atomic();
- clear_bit(NAPI_STATE_SCHED, &n->state);
- clear_bit(NAPI_STATE_NPSVC, &n->state);
-}
+void napi_enable(struct napi_struct *n);
/**
* napi_synchronize - wait until NAPI is not running
@@ -1834,6 +1824,8 @@ enum netdev_ml_priv_type {
*
* @wol_enabled: Wake-on-LAN is enabled
*
+ * @threaded: napi threaded mode is enabled
+ *
* FIXME: cleanup struct net_device such that network protocol info
* moves out.
*/
@@ -2137,6 +2129,7 @@ struct net_device {
struct lock_class_key addr_list_lock_key;
bool proto_down;
unsigned wol_enabled:1;
+ unsigned threaded:1;
};
#define to_net_dev(d) container_of(d, struct net_device, dev)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -91,6 +91,7 @@
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/skbuff.h>
+#include <linux/kthread.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <net/net_namespace.h>
@@ -1286,6 +1287,27 @@ void netdev_notify_peers(struct net_devi
}
EXPORT_SYMBOL(netdev_notify_peers);
+static int napi_threaded_poll(void *data);
+
+static int napi_kthread_create(struct napi_struct *n)
+{
+ int err = 0;
+
+ /* Create and wake up the kthread once to put it in
+ * TASK_INTERRUPTIBLE mode to avoid the blocked task
+ * warning and work with loadavg.
+ */
+ n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
+ n->dev->name, n->napi_id);
+ if (IS_ERR(n->thread)) {
+ err = PTR_ERR(n->thread);
+ pr_err("kthread_run failed with err %d\n", err);
+ n->thread = NULL;
+ }
+
+ return err;
+}
+
static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -3971,6 +3993,21 @@ int gro_normal_batch __read_mostly = 8;
static inline void ____napi_schedule(struct softnet_data *sd,
struct napi_struct *napi)
{
+ struct task_struct *thread;
+
+ if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
+ /* Paired with smp_mb__before_atomic() in
+ * napi_enable(). Use READ_ONCE() to guarantee
+ * a complete read on napi->thread. Only call
+ * wake_up_process() when it's not NULL.
+ */
+ thread = READ_ONCE(napi->thread);
+ if (thread) {
+ wake_up_process(thread);
+ return;
+ }
+ }
+
list_add_tail(&napi->poll_list, &sd->poll_list);
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
@@ -6379,6 +6416,12 @@ void netif_napi_add(struct net_device *d
set_bit(NAPI_STATE_NPSVC, &napi->state);
list_add_rcu(&napi->dev_list, &dev->napi_list);
napi_hash_add(napi);
+ /* Create kthread for this napi if dev->threaded is set.
+ * Clear dev->threaded if kthread creation failed so that
+ * threaded mode will not be enabled in napi_enable().
+ */
+ if (dev->threaded && napi_kthread_create(napi))
+ dev->threaded = 0;
}
EXPORT_SYMBOL(netif_napi_add);
@@ -6395,9 +6438,28 @@ void napi_disable(struct napi_struct *n)
hrtimer_cancel(&n->timer);
clear_bit(NAPI_STATE_DISABLE, &n->state);
+ clear_bit(NAPI_STATE_THREADED, &n->state);
}
EXPORT_SYMBOL(napi_disable);
+/**
+ * napi_enable - enable NAPI scheduling
+ * @n: NAPI context
+ *
+ * Resume NAPI from being scheduled on this context.
+ * Must be paired with napi_disable.
+ */
+void napi_enable(struct napi_struct *n)
+{
+ BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
+ smp_mb__before_atomic();
+ clear_bit(NAPI_STATE_SCHED, &n->state);
+ clear_bit(NAPI_STATE_NPSVC, &n->state);
+ if (n->dev->threaded && n->thread)
+ set_bit(NAPI_STATE_THREADED, &n->state);
+}
+EXPORT_SYMBOL(napi_enable);
+
static void flush_gro_hash(struct napi_struct *napi)
{
int i;
@@ -6422,6 +6484,11 @@ void netif_napi_del(struct napi_struct *
flush_gro_hash(napi);
napi->gro_bitmask = 0;
+
+ if (napi->thread) {
+ kthread_stop(napi->thread);
+ napi->thread = NULL;
+ }
}
EXPORT_SYMBOL(netif_napi_del);
@@ -6501,6 +6568,51 @@ static int napi_poll(struct napi_struct
return work;
}
+static int napi_thread_wait(struct napi_struct *napi)
+{
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ while (!kthread_should_stop() && !napi_disable_pending(napi)) {
+ if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
+ WARN_ON(!list_empty(&napi->poll_list));
+ __set_current_state(TASK_RUNNING);
+ return 0;
+ }
+
+ schedule();
+ set_current_state(TASK_INTERRUPTIBLE);
+ }
+ __set_current_state(TASK_RUNNING);
+ return -1;
+}
+
+static int napi_threaded_poll(void *data)
+{
+ struct napi_struct *napi = data;
+ void *have;
+
+ while (!napi_thread_wait(napi)) {
+ for (;;) {
+ bool repoll = false;
+
+ local_bh_disable();
+
+ have = netpoll_poll_lock(napi);
+ __napi_poll(napi, &repoll);
+ netpoll_poll_unlock(have);
+
+ __kfree_skb_flush();
+ local_bh_enable();
+
+ if (!repoll)
+ break;
+
+ cond_resched();
+ }
+ }
+ return 0;
+}
+
static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);

View File

@@ -1,177 +0,0 @@
From: Wei Wang <weiwan@google.com>
Date: Mon, 8 Feb 2021 11:34:10 -0800
Subject: [PATCH] net: add sysfs attribute to control napi threaded mode
This patch adds a new sysfs attribute to the network device class.
Said attribute provides a per-device control to enable/disable the
threaded mode for all the napi instances of the given network device,
without the need for a device up/down.
User sets it to 1 or 0 to enable or disable threaded mode.
Note: when switching between threaded and the current softirq based mode
for a napi instance, it will not immediately take effect if the napi is
currently being polled. The mode switch will happen for the next time
napi_schedule() is called.
Co-developed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Co-developed-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Wei Wang <weiwan@google.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
--- a/Documentation/ABI/testing/sysfs-class-net
+++ b/Documentation/ABI/testing/sysfs-class-net
@@ -301,3 +301,18 @@ Contact: netdev@vger.kernel.org
Description:
32-bit unsigned integer counting the number of times the link has
been down
+
+What: /sys/class/net/<iface>/threaded
+Date: Jan 2021
+KernelVersion: 5.12
+Contact: netdev@vger.kernel.org
+Description:
+ Boolean value to control the threaded mode per device. User could
+ set this value to enable/disable threaded mode for all napi
+ belonging to this device, without the need to do device up/down.
+
+ Possible values:
+ == ==================================
+ 0 threaded mode disabled for this dev
+ 1 threaded mode enabled for this dev
+ == ==================================
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -496,6 +496,8 @@ static inline bool napi_complete(struct
*/
bool napi_hash_del(struct napi_struct *napi);
+int dev_set_threaded(struct net_device *dev, bool threaded);
+
/**
* napi_disable - prevent NAPI from scheduling
* @n: NAPI context
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3997,8 +3997,9 @@ static inline void ____napi_schedule(str
if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
/* Paired with smp_mb__before_atomic() in
- * napi_enable(). Use READ_ONCE() to guarantee
- * a complete read on napi->thread. Only call
+ * napi_enable()/dev_set_threaded().
+ * Use READ_ONCE() to guarantee a complete
+ * read on napi->thread. Only call
* wake_up_process() when it's not NULL.
*/
thread = READ_ONCE(napi->thread);
@@ -6393,6 +6394,49 @@ static void init_gro_hash(struct napi_st
napi->gro_bitmask = 0;
}
+int dev_set_threaded(struct net_device *dev, bool threaded)
+{
+ struct napi_struct *napi;
+ int err = 0;
+
+ if (dev->threaded == threaded)
+ return 0;
+
+ if (threaded) {
+ list_for_each_entry(napi, &dev->napi_list, dev_list) {
+ if (!napi->thread) {
+ err = napi_kthread_create(napi);
+ if (err) {
+ threaded = false;
+ break;
+ }
+ }
+ }
+ }
+
+ dev->threaded = threaded;
+
+ /* Make sure kthread is created before THREADED bit
+ * is set.
+ */
+ smp_mb__before_atomic();
+
+ /* Setting/unsetting threaded mode on a napi might not immediately
+ * take effect, if the current napi instance is actively being
+ * polled. In this case, the switch between threaded mode and
+ * softirq mode will happen in the next round of napi_schedule().
+ * This should not cause hiccups/stalls to the live traffic.
+ */
+ list_for_each_entry(napi, &dev->napi_list, dev_list) {
+ if (threaded)
+ set_bit(NAPI_STATE_THREADED, &napi->state);
+ else
+ clear_bit(NAPI_STATE_THREADED, &napi->state);
+ }
+
+ return err;
+}
+
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -557,6 +557,45 @@ static ssize_t phys_switch_id_show(struc
}
static DEVICE_ATTR_RO(phys_switch_id);
+static ssize_t threaded_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct net_device *netdev = to_net_dev(dev);
+ ssize_t ret = -EINVAL;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ if (dev_isalive(netdev))
+ ret = sprintf(buf, fmt_dec, netdev->threaded);
+
+ rtnl_unlock();
+ return ret;
+}
+
+static int modify_napi_threaded(struct net_device *dev, unsigned long val)
+{
+ int ret;
+
+ if (list_empty(&dev->napi_list))
+ return -EOPNOTSUPP;
+
+ if (val != 0 && val != 1)
+ return -EOPNOTSUPP;
+
+ ret = dev_set_threaded(dev, val);
+
+ return ret;
+}
+
+static ssize_t threaded_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ return netdev_store(dev, attr, buf, len, modify_napi_threaded);
+}
+static DEVICE_ATTR_RW(threaded);
+
static struct attribute *net_class_attrs[] __ro_after_init = {
&dev_attr_netdev_group.attr,
&dev_attr_type.attr,
@@ -587,6 +626,7 @@ static struct attribute *net_class_attrs
&dev_attr_proto_down.attr,
&dev_attr_carrier_up_count.attr,
&dev_attr_carrier_down_count.attr,
+ &dev_attr_threaded.attr,
NULL,
};
ATTRIBUTE_GROUPS(net_class);

View File

@@ -1,93 +0,0 @@
From: Wei Wang <weiwan@google.com>
Date: Mon, 1 Mar 2021 17:21:13 -0800
Subject: [PATCH] net: fix race between napi kthread mode and busy poll
Currently, napi_thread_wait() checks for NAPI_STATE_SCHED bit to
determine if the kthread owns this napi and could call napi->poll() on
it. However, if socket busy poll is enabled, it is possible that the
busy poll thread grabs this SCHED bit (after the previous napi->poll()
invokes napi_complete_done() and clears SCHED bit) and tries to poll
on the same napi. napi_disable() could grab the SCHED bit as well.
This patch tries to fix this race by adding a new bit
NAPI_STATE_SCHED_THREADED in napi->state. This bit gets set in
____napi_schedule() if the threaded mode is enabled, and gets cleared
in napi_complete_done(), and we only poll the napi in kthread if this
bit is set. This helps distinguish the ownership of the napi between
kthread and other scenarios and fixes the race issue.
Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support")
Reported-by: Martin Zaharinov <micron10@gmail.com>
Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Wei Wang <weiwan@google.com>
Cc: Alexander Duyck <alexanderduyck@fb.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -350,6 +350,7 @@ enum {
NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
+ NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */
};
enum {
@@ -361,6 +362,7 @@ enum {
NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
+ NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED),
};
enum gro_result {
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4004,6 +4004,8 @@ static inline void ____napi_schedule(str
*/
thread = READ_ONCE(napi->thread);
if (thread) {
+ if (thread->state != TASK_INTERRUPTIBLE)
+ set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
wake_up_process(thread);
return;
}
@@ -6181,7 +6183,8 @@ bool napi_complete_done(struct napi_stru
WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
- new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED);
+ new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
+ NAPIF_STATE_SCHED_THREADED);
/* If STATE_MISSED was set, leave STATE_SCHED set,
* because we will call napi->poll() one more time.
@@ -6614,16 +6617,25 @@ static int napi_poll(struct napi_struct
static int napi_thread_wait(struct napi_struct *napi)
{
+ bool woken = false;
+
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop() && !napi_disable_pending(napi)) {
- if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
+ /* Testing SCHED_THREADED bit here to make sure the current
+ * kthread owns this napi and could poll on this napi.
+ * Testing SCHED bit is not enough because SCHED bit might be
+ * set by some other busy poll thread or by napi_disable().
+ */
+ if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) {
WARN_ON(!list_empty(&napi->poll_list));
__set_current_state(TASK_RUNNING);
return 0;
}
schedule();
+ /* woken being true indicates this thread owns this napi. */
+ woken = true;
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);

View File

@@ -1,53 +0,0 @@
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 9 Apr 2021 17:24:17 +0200
Subject: [PATCH] net: fix hangup on napi_disable for threaded napi
napi_disable() is subject to an hangup, when the threaded
mode is enabled and the napi is under heavy traffic.
If the relevant napi has been scheduled and the napi_disable()
kicks in before the next napi_threaded_wait() completes - so
that the latter quits due to the napi_disable_pending() condition,
the existing code leaves the NAPI_STATE_SCHED bit set and the
napi_disable() loop waiting for such bit will hang.
This patch addresses the issue by dropping the NAPI_STATE_DISABLE
bit test in napi_thread_wait(). The later napi_threaded_poll()
iteration will take care of clearing the NAPI_STATE_SCHED.
This also addresses a related problem reported by Jakub:
before this patch a napi_disable()/napi_enable() pair killed
the napi thread, effectively disabling the threaded mode.
On the patched kernel napi_disable() simply stops scheduling
the relevant thread.
v1 -> v2:
- let the main napi_thread_poll() loop clear the SCHED bit
Reported-by: Jakub Kicinski <kuba@kernel.org>
Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/883923fa22745a9589e8610962b7dc59df09fb1f.1617981844.git.pabeni@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6621,7 +6621,7 @@ static int napi_thread_wait(struct napi_
set_current_state(TASK_INTERRUPTIBLE);
- while (!kthread_should_stop() && !napi_disable_pending(napi)) {
+ while (!kthread_should_stop()) {
/* Testing SCHED_THREADED bit here to make sure the current
* kthread owns this napi and could poll on this napi.
* Testing SCHED bit is not enough because SCHED bit might be
@@ -6639,6 +6639,7 @@ static int napi_thread_wait(struct napi_
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
+
return -1;
}

View File

@@ -0,0 +1,55 @@
#
# Copyright (C) 2016-2019 Jason A. Donenfeld <Jason@zx2c4.com>
# Copyright (C) 2016 Baptiste Jonglez <openwrt@bitsofnetworks.org>
# Copyright (C) 2016-2017 Dan Luedtke <mail@danrl.com>
#
# This is free software, licensed under the GNU General Public License v2.
# See /LICENSE for more information.
include $(TOPDIR)/rules.mk
include $(INCLUDE_DIR)/kernel.mk
PKG_NAME:=wireguard-backport
PKG_VERSION:=1.0.20211208
PKG_RELEASE:=1
PKG_SOURCE:=wireguard-linux-compat-$(PKG_VERSION).tar.xz
PKG_SOURCE_URL:=https://git.zx2c4.com/wireguard-linux-compat/snapshot/
PKG_HASH:=c0e607138a17daac656f508d8e63ea3737b5221fa5d9288191ddeb099f5a3b92
PKG_LICENSE:=GPL-2.0
PKG_LICENSE_FILES:=COPYING
PKG_BUILD_DIR:=$(KERNEL_BUILD_DIR)/wireguard-linux-compat-$(PKG_VERSION)
PKG_BUILD_PARALLEL:=1
PKG_USE_MIPS16:=0
# WireGuard's makefile needs this to know where to build the kernel module
export KERNELDIR:=$(LINUX_DIR)
include $(INCLUDE_DIR)/package.mk
include $(INCLUDE_DIR)/kernel-defaults.mk
include $(INCLUDE_DIR)/package-defaults.mk
define Build/Compile
$(MAKE) $(KERNEL_MAKEOPTS) M="$(PKG_BUILD_DIR)/src" modules EXTRA_CFLAGS="-Wno-stringop-overread"
endef
define KernelPackage/wireguard-backport
SECTION:=kernel
CATEGORY:=Kernel modules
SUBMENU:=Network Support
TITLE:=WireGuard kernel module
DEPENDS:=+IPV6:kmod-udptunnel6 +kmod-udptunnel4
FILES:= $(PKG_BUILD_DIR)/src/wireguard.$(LINUX_KMOD_SUFFIX)
AUTOLOAD:=$(call AutoProbe,wireguard)
endef
define KernelPackage/wireguard-backport/description
$(call Package/wireguard/Default/description)
This package provides the kernel module for WireGuard.
endef
$(eval $(call KernelPackage,wireguard-backport))

View File

@@ -0,0 +1,20 @@
Index: wireguard-linux-compat-1.0.20211208/src/compat/compat.h
===================================================================
--- wireguard-linux-compat-1.0.20211208.orig/src/compat/compat.h
+++ wireguard-linux-compat-1.0.20211208/src/compat/compat.h
@@ -686,15 +688,6 @@ struct __compat_dummy_container { char d
#define genl_dump_check_consistent(a, b) genl_dump_check_consistent(a, b, &genl_family)
#endif
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) && !defined(ISRHEL7)
-static inline void *skb_put_data(struct sk_buff *skb, const void *data, unsigned int len)
-{
- void *tmp = skb_put(skb, len);
- memcpy(tmp, data, len);
- return tmp;
-}
-#endif
-
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0) && !defined(ISRHEL7)
#define napi_complete_done(n, work_done) napi_complete(n)
#endif

View File

@@ -7,7 +7,10 @@ image: bin/targets/ipq60xx/generic/openwrt-ipq60xx-edgecore_eap101-squashfs-sysu
feeds:
- name: ipq95xx
path: ../../feeds/ipq95xx
include:
- ucentral-ap
packages:
- ipq95xx
- iperf3
diffconfig: |
CONFIG_KERNEL_IPQ_MEM_PROFILE=0