mirror of
https://github.com/Telecominfraproject/wlan-ap.git
synced 2025-10-29 01:22:25 +00:00
1089 lines
35 KiB
Diff
1089 lines
35 KiB
Diff
From e2bb75821cd023b62bf69bc40d5187ba63237cc2 Mon Sep 17 00:00:00 2001
|
|
From: John Crispin <john@phrozen.org>
|
|
Date: Tue, 14 Dec 2021 09:39:42 +0100
|
|
Subject: [PATCH] kernel: backport napi threading patches
|
|
|
|
Signed-off-by: John Crispin <john@phrozen.org>
|
|
---
|
|
...pi-poll-functionality-to-__napi_poll.patch | 88 +++++
|
|
...threaded-able-napi-poll-loop-support.patch | 261 +++++++++++++
|
|
...tribute-to-control-napi-threaded-mod.patch | 177 +++++++++
|
|
...ween-napi-kthread-mode-and-busy-poll.patch | 93 +++++
|
|
...up-on-napi_disable-for-threaded-napi.patch | 53 +++
|
|
...dd-support-for-threaded-NAPI-polling.patch | 356 ------------------
|
|
6 files changed, 672 insertions(+), 356 deletions(-)
|
|
create mode 100644 target/linux/generic/backport-5.4/600-v5.12-net-extract-napi-poll-functionality-to-__napi_poll.patch
|
|
create mode 100644 target/linux/generic/backport-5.4/601-v5.12-net-implement-threaded-able-napi-poll-loop-support.patch
|
|
create mode 100644 target/linux/generic/backport-5.4/602-v5.12-net-add-sysfs-attribute-to-control-napi-threaded-mod.patch
|
|
create mode 100644 target/linux/generic/backport-5.4/603-v5.12-net-fix-race-between-napi-kthread-mode-and-busy-poll.patch
|
|
create mode 100644 target/linux/generic/backport-5.4/604-v5.12-net-fix-hangup-on-napi_disable-for-threaded-napi.patch
|
|
delete mode 100644 target/linux/generic/pending-5.4/690-net-add-support-for-threaded-NAPI-polling.patch
|
|
|
|
diff --git a/target/linux/generic/backport-5.4/600-v5.12-net-extract-napi-poll-functionality-to-__napi_poll.patch b/target/linux/generic/backport-5.4/600-v5.12-net-extract-napi-poll-functionality-to-__napi_poll.patch
|
|
new file mode 100644
|
|
index 0000000000..961140aabb
|
|
--- /dev/null
|
|
+++ b/target/linux/generic/backport-5.4/600-v5.12-net-extract-napi-poll-functionality-to-__napi_poll.patch
|
|
@@ -0,0 +1,88 @@
|
|
+From: Felix Fietkau <nbd@nbd.name>
|
|
+Date: Mon, 8 Feb 2021 11:34:08 -0800
|
|
+Subject: [PATCH] net: extract napi poll functionality to __napi_poll()
|
|
+
|
|
+This commit introduces a new function __napi_poll() which does the main
|
|
+logic of the existing napi_poll() function, and will be called by other
|
|
+functions in later commits.
|
|
+This idea and implementation is done by Felix Fietkau <nbd@nbd.name> and
|
|
+is proposed as part of the patch to move napi work to work_queue
|
|
+context.
|
|
+This commit by itself is a code restructure.
|
|
+
|
|
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
|
+Signed-off-by: Wei Wang <weiwan@google.com>
|
|
+Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
|
|
+Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
+---
|
|
+
|
|
+--- a/net/core/dev.c
|
|
++++ b/net/core/dev.c
|
|
+@@ -6322,15 +6322,10 @@ void netif_napi_del(struct napi_struct *
|
|
+ }
|
|
+ EXPORT_SYMBOL(netif_napi_del);
|
|
+
|
|
+-static int napi_poll(struct napi_struct *n, struct list_head *repoll)
|
|
++static int __napi_poll(struct napi_struct *n, bool *repoll)
|
|
+ {
|
|
+- void *have;
|
|
+ int work, weight;
|
|
+
|
|
+- list_del_init(&n->poll_list);
|
|
+-
|
|
+- have = netpoll_poll_lock(n);
|
|
+-
|
|
+ weight = n->weight;
|
|
+
|
|
+ /* This NAPI_STATE_SCHED test is for avoiding a race
|
|
+@@ -6348,7 +6343,7 @@ static int napi_poll(struct napi_struct
|
|
+ WARN_ON_ONCE(work > weight);
|
|
+
|
|
+ if (likely(work < weight))
|
|
+- goto out_unlock;
|
|
++ return work;
|
|
+
|
|
+ /* Drivers must not modify the NAPI state if they
|
|
+ * consume the entire weight. In such cases this code
|
|
+@@ -6357,7 +6352,7 @@ static int napi_poll(struct napi_struct
|
|
+ */
|
|
+ if (unlikely(napi_disable_pending(n))) {
|
|
+ napi_complete(n);
|
|
+- goto out_unlock;
|
|
++ return work;
|
|
+ }
|
|
+
|
|
+ if (n->gro_bitmask) {
|
|
+@@ -6375,12 +6370,29 @@ static int napi_poll(struct napi_struct
|
|
+ if (unlikely(!list_empty(&n->poll_list))) {
|
|
+ pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
|
|
+ n->dev ? n->dev->name : "backlog");
|
|
+- goto out_unlock;
|
|
++ return work;
|
|
+ }
|
|
+
|
|
+- list_add_tail(&n->poll_list, repoll);
|
|
++ *repoll = true;
|
|
++
|
|
++ return work;
|
|
++}
|
|
++
|
|
++static int napi_poll(struct napi_struct *n, struct list_head *repoll)
|
|
++{
|
|
++ bool do_repoll = false;
|
|
++ void *have;
|
|
++ int work;
|
|
++
|
|
++ list_del_init(&n->poll_list);
|
|
++
|
|
++ have = netpoll_poll_lock(n);
|
|
++
|
|
++ work = __napi_poll(n, &do_repoll);
|
|
++
|
|
++ if (do_repoll)
|
|
++ list_add_tail(&n->poll_list, repoll);
|
|
+
|
|
+-out_unlock:
|
|
+ netpoll_poll_unlock(have);
|
|
+
|
|
+ return work;
|
|
diff --git a/target/linux/generic/backport-5.4/601-v5.12-net-implement-threaded-able-napi-poll-loop-support.patch b/target/linux/generic/backport-5.4/601-v5.12-net-implement-threaded-able-napi-poll-loop-support.patch
|
|
new file mode 100644
|
|
index 0000000000..c9bd4abb53
|
|
--- /dev/null
|
|
+++ b/target/linux/generic/backport-5.4/601-v5.12-net-implement-threaded-able-napi-poll-loop-support.patch
|
|
@@ -0,0 +1,261 @@
|
|
+From: Wei Wang <weiwan@google.com>
|
|
+Date: Mon, 8 Feb 2021 11:34:09 -0800
|
|
+Subject: [PATCH] net: implement threaded-able napi poll loop support
|
|
+
|
|
+This patch allows running each napi poll loop inside its own
|
|
+kernel thread.
|
|
+The kthread is created during netif_napi_add() if dev->threaded
|
|
+is set. And threaded mode is enabled in napi_enable(). We will
|
|
+provide a way to set dev->threaded and enable threaded mode
|
|
+without a device up/down in the following patch.
|
|
+
|
|
+Once that threaded mode is enabled and the kthread is
|
|
+started, napi_schedule() will wake-up such thread instead
|
|
+of scheduling the softirq.
|
|
+
|
|
+The threaded poll loop behaves quite likely the net_rx_action,
|
|
+but it does not have to manipulate local irqs and uses
|
|
+an explicit scheduling point based on netdev_budget.
|
|
+
|
|
+Co-developed-by: Paolo Abeni <pabeni@redhat.com>
|
|
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
|
|
+Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
|
|
+Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
|
|
+Co-developed-by: Jakub Kicinski <kuba@kernel.org>
|
|
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
|
+Signed-off-by: Wei Wang <weiwan@google.com>
|
|
+Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
|
|
+Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
+---
|
|
+
|
|
+--- a/include/linux/netdevice.h
|
|
++++ b/include/linux/netdevice.h
|
|
+@@ -340,6 +340,7 @@ struct napi_struct {
|
|
+ struct list_head dev_list;
|
|
+ struct hlist_node napi_hash_node;
|
|
+ unsigned int napi_id;
|
|
++ struct task_struct *thread;
|
|
+ };
|
|
+
|
|
+ enum {
|
|
+@@ -350,6 +351,7 @@ enum {
|
|
+ NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */
|
|
+ NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
|
|
+ NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
|
|
++ NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
|
|
+ };
|
|
+
|
|
+ enum {
|
|
+@@ -360,6 +362,7 @@ enum {
|
|
+ NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED),
|
|
+ NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
|
|
+ NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
|
|
++ NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
|
|
+ };
|
|
+
|
|
+ enum gro_result {
|
|
+@@ -504,20 +507,7 @@ bool napi_hash_del(struct napi_struct *n
|
|
+ */
|
|
+ void napi_disable(struct napi_struct *n);
|
|
+
|
|
+-/**
|
|
+- * napi_enable - enable NAPI scheduling
|
|
+- * @n: NAPI context
|
|
+- *
|
|
+- * Resume NAPI from being scheduled on this context.
|
|
+- * Must be paired with napi_disable.
|
|
+- */
|
|
+-static inline void napi_enable(struct napi_struct *n)
|
|
+-{
|
|
+- BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
|
|
+- smp_mb__before_atomic();
|
|
+- clear_bit(NAPI_STATE_SCHED, &n->state);
|
|
+- clear_bit(NAPI_STATE_NPSVC, &n->state);
|
|
+-}
|
|
++void napi_enable(struct napi_struct *n);
|
|
+
|
|
+ /**
|
|
+ * napi_synchronize - wait until NAPI is not running
|
|
+@@ -1783,6 +1773,8 @@ enum netdev_ml_priv_type {
|
|
+ *
|
|
+ * @wol_enabled: Wake-on-LAN is enabled
|
|
+ *
|
|
++ * @threaded: napi threaded mode is enabled
|
|
++ *
|
|
+ * FIXME: cleanup struct net_device such that network protocol info
|
|
+ * moves out.
|
|
+ */
|
|
+@@ -2075,6 +2067,7 @@ struct net_device {
|
|
+ struct lock_class_key addr_list_lock_key;
|
|
+ bool proto_down;
|
|
+ unsigned wol_enabled:1;
|
|
++ unsigned threaded:1;
|
|
+ };
|
|
+ #define to_net_dev(d) container_of(d, struct net_device, dev)
|
|
+
|
|
+--- a/net/core/dev.c
|
|
++++ b/net/core/dev.c
|
|
+@@ -91,6 +91,7 @@
|
|
+ #include <linux/etherdevice.h>
|
|
+ #include <linux/ethtool.h>
|
|
+ #include <linux/skbuff.h>
|
|
++#include <linux/kthread.h>
|
|
+ #include <linux/bpf.h>
|
|
+ #include <linux/bpf_trace.h>
|
|
+ #include <net/net_namespace.h>
|
|
+@@ -1289,6 +1290,27 @@ void netdev_notify_peers(struct net_devi
|
|
+ }
|
|
+ EXPORT_SYMBOL(netdev_notify_peers);
|
|
+
|
|
++static int napi_threaded_poll(void *data);
|
|
++
|
|
++static int napi_kthread_create(struct napi_struct *n)
|
|
++{
|
|
++ int err = 0;
|
|
++
|
|
++ /* Create and wake up the kthread once to put it in
|
|
++ * TASK_INTERRUPTIBLE mode to avoid the blocked task
|
|
++ * warning and work with loadavg.
|
|
++ */
|
|
++ n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
|
|
++ n->dev->name, n->napi_id);
|
|
++ if (IS_ERR(n->thread)) {
|
|
++ err = PTR_ERR(n->thread);
|
|
++ pr_err("kthread_run failed with err %d\n", err);
|
|
++ n->thread = NULL;
|
|
++ }
|
|
++
|
|
++ return err;
|
|
++}
|
|
++
|
|
+ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
|
|
+ {
|
|
+ const struct net_device_ops *ops = dev->netdev_ops;
|
|
+@@ -3885,6 +3907,21 @@ int gro_normal_batch __read_mostly = 8;
|
|
+ static inline void ____napi_schedule(struct softnet_data *sd,
|
|
+ struct napi_struct *napi)
|
|
+ {
|
|
++ struct task_struct *thread;
|
|
++
|
|
++ if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
|
|
++ /* Paired with smp_mb__before_atomic() in
|
|
++ * napi_enable(). Use READ_ONCE() to guarantee
|
|
++ * a complete read on napi->thread. Only call
|
|
++ * wake_up_process() when it's not NULL.
|
|
++ */
|
|
++ thread = READ_ONCE(napi->thread);
|
|
++ if (thread) {
|
|
++ wake_up_process(thread);
|
|
++ return;
|
|
++ }
|
|
++ }
|
|
++
|
|
+ list_add_tail(&napi->poll_list, &sd->poll_list);
|
|
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
|
|
+ }
|
|
+@@ -6276,6 +6313,12 @@ void netif_napi_add(struct net_device *d
|
|
+ set_bit(NAPI_STATE_NPSVC, &napi->state);
|
|
+ list_add_rcu(&napi->dev_list, &dev->napi_list);
|
|
+ napi_hash_add(napi);
|
|
++ /* Create kthread for this napi if dev->threaded is set.
|
|
++ * Clear dev->threaded if kthread creation failed so that
|
|
++ * threaded mode will not be enabled in napi_enable().
|
|
++ */
|
|
++ if (dev->threaded && napi_kthread_create(napi))
|
|
++ dev->threaded = 0;
|
|
+ }
|
|
+ EXPORT_SYMBOL(netif_napi_add);
|
|
+
|
|
+@@ -6292,9 +6335,28 @@ void napi_disable(struct napi_struct *n)
|
|
+ hrtimer_cancel(&n->timer);
|
|
+
|
|
+ clear_bit(NAPI_STATE_DISABLE, &n->state);
|
|
++ clear_bit(NAPI_STATE_THREADED, &n->state);
|
|
+ }
|
|
+ EXPORT_SYMBOL(napi_disable);
|
|
+
|
|
++/**
|
|
++ * napi_enable - enable NAPI scheduling
|
|
++ * @n: NAPI context
|
|
++ *
|
|
++ * Resume NAPI from being scheduled on this context.
|
|
++ * Must be paired with napi_disable.
|
|
++ */
|
|
++void napi_enable(struct napi_struct *n)
|
|
++{
|
|
++ BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
|
|
++ smp_mb__before_atomic();
|
|
++ clear_bit(NAPI_STATE_SCHED, &n->state);
|
|
++ clear_bit(NAPI_STATE_NPSVC, &n->state);
|
|
++ if (n->dev->threaded && n->thread)
|
|
++ set_bit(NAPI_STATE_THREADED, &n->state);
|
|
++}
|
|
++EXPORT_SYMBOL(napi_enable);
|
|
++
|
|
+ static void flush_gro_hash(struct napi_struct *napi)
|
|
+ {
|
|
+ int i;
|
|
+@@ -6319,6 +6381,11 @@ void netif_napi_del(struct napi_struct *
|
|
+
|
|
+ flush_gro_hash(napi);
|
|
+ napi->gro_bitmask = 0;
|
|
++
|
|
++ if (napi->thread) {
|
|
++ kthread_stop(napi->thread);
|
|
++ napi->thread = NULL;
|
|
++ }
|
|
+ }
|
|
+ EXPORT_SYMBOL(netif_napi_del);
|
|
+
|
|
+@@ -6398,6 +6465,51 @@ static int napi_poll(struct napi_struct
|
|
+ return work;
|
|
+ }
|
|
+
|
|
++static int napi_thread_wait(struct napi_struct *napi)
|
|
++{
|
|
++ set_current_state(TASK_INTERRUPTIBLE);
|
|
++
|
|
++ while (!kthread_should_stop() && !napi_disable_pending(napi)) {
|
|
++ if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
|
|
++ WARN_ON(!list_empty(&napi->poll_list));
|
|
++ __set_current_state(TASK_RUNNING);
|
|
++ return 0;
|
|
++ }
|
|
++
|
|
++ schedule();
|
|
++ set_current_state(TASK_INTERRUPTIBLE);
|
|
++ }
|
|
++ __set_current_state(TASK_RUNNING);
|
|
++ return -1;
|
|
++}
|
|
++
|
|
++static int napi_threaded_poll(void *data)
|
|
++{
|
|
++ struct napi_struct *napi = data;
|
|
++ void *have;
|
|
++
|
|
++ while (!napi_thread_wait(napi)) {
|
|
++ for (;;) {
|
|
++ bool repoll = false;
|
|
++
|
|
++ local_bh_disable();
|
|
++
|
|
++ have = netpoll_poll_lock(napi);
|
|
++ __napi_poll(napi, &repoll);
|
|
++ netpoll_poll_unlock(have);
|
|
++
|
|
++ __kfree_skb_flush();
|
|
++ local_bh_enable();
|
|
++
|
|
++ if (!repoll)
|
|
++ break;
|
|
++
|
|
++ cond_resched();
|
|
++ }
|
|
++ }
|
|
++ return 0;
|
|
++}
|
|
++
|
|
+ static __latent_entropy void net_rx_action(struct softirq_action *h)
|
|
+ {
|
|
+ struct softnet_data *sd = this_cpu_ptr(&softnet_data);
|
|
diff --git a/target/linux/generic/backport-5.4/602-v5.12-net-add-sysfs-attribute-to-control-napi-threaded-mod.patch b/target/linux/generic/backport-5.4/602-v5.12-net-add-sysfs-attribute-to-control-napi-threaded-mod.patch
|
|
new file mode 100644
|
|
index 0000000000..d8b932978c
|
|
--- /dev/null
|
|
+++ b/target/linux/generic/backport-5.4/602-v5.12-net-add-sysfs-attribute-to-control-napi-threaded-mod.patch
|
|
@@ -0,0 +1,177 @@
|
|
+From: Wei Wang <weiwan@google.com>
|
|
+Date: Mon, 8 Feb 2021 11:34:10 -0800
|
|
+Subject: [PATCH] net: add sysfs attribute to control napi threaded mode
|
|
+
|
|
+This patch adds a new sysfs attribute to the network device class.
|
|
+Said attribute provides a per-device control to enable/disable the
|
|
+threaded mode for all the napi instances of the given network device,
|
|
+without the need for a device up/down.
|
|
+User sets it to 1 or 0 to enable or disable threaded mode.
|
|
+Note: when switching between threaded and the current softirq based mode
|
|
+for a napi instance, it will not immediately take effect if the napi is
|
|
+currently being polled. The mode switch will happen for the next time
|
|
+napi_schedule() is called.
|
|
+
|
|
+Co-developed-by: Paolo Abeni <pabeni@redhat.com>
|
|
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
|
|
+Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
|
|
+Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
|
|
+Co-developed-by: Felix Fietkau <nbd@nbd.name>
|
|
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
|
+Signed-off-by: Wei Wang <weiwan@google.com>
|
|
+Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
|
|
+Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
+---
|
|
+
|
|
+--- a/Documentation/ABI/testing/sysfs-class-net
|
|
++++ b/Documentation/ABI/testing/sysfs-class-net
|
|
+@@ -301,3 +301,18 @@ Contact: netdev@vger.kernel.org
|
|
+ Description:
|
|
+ 32-bit unsigned integer counting the number of times the link has
|
|
+ been down
|
|
++
|
|
++What: /sys/class/net/<iface>/threaded
|
|
++Date: Jan 2021
|
|
++KernelVersion: 5.12
|
|
++Contact: netdev@vger.kernel.org
|
|
++Description:
|
|
++ Boolean value to control the threaded mode per device. User could
|
|
++ set this value to enable/disable threaded mode for all napi
|
|
++ belonging to this device, without the need to do device up/down.
|
|
++
|
|
++ Possible values:
|
|
++ == ==================================
|
|
++ 0 threaded mode disabled for this dev
|
|
++ 1 threaded mode enabled for this dev
|
|
++ == ==================================
|
|
+--- a/include/linux/netdevice.h
|
|
++++ b/include/linux/netdevice.h
|
|
+@@ -498,6 +498,8 @@ static inline bool napi_complete(struct
|
|
+ */
|
|
+ bool napi_hash_del(struct napi_struct *napi);
|
|
+
|
|
++int dev_set_threaded(struct net_device *dev, bool threaded);
|
|
++
|
|
+ /**
|
|
+ * napi_disable - prevent NAPI from scheduling
|
|
+ * @n: NAPI context
|
|
+--- a/net/core/dev.c
|
|
++++ b/net/core/dev.c
|
|
+@@ -3911,8 +3911,9 @@ static inline void ____napi_schedule(str
|
|
+
|
|
+ if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
|
|
+ /* Paired with smp_mb__before_atomic() in
|
|
+- * napi_enable(). Use READ_ONCE() to guarantee
|
|
+- * a complete read on napi->thread. Only call
|
|
++ * napi_enable()/dev_set_threaded().
|
|
++ * Use READ_ONCE() to guarantee a complete
|
|
++ * read on napi->thread. Only call
|
|
+ * wake_up_process() when it's not NULL.
|
|
+ */
|
|
+ thread = READ_ONCE(napi->thread);
|
|
+@@ -6290,6 +6291,49 @@ static void init_gro_hash(struct napi_st
|
|
+ napi->gro_bitmask = 0;
|
|
+ }
|
|
+
|
|
++int dev_set_threaded(struct net_device *dev, bool threaded)
|
|
++{
|
|
++ struct napi_struct *napi;
|
|
++ int err = 0;
|
|
++
|
|
++ if (dev->threaded == threaded)
|
|
++ return 0;
|
|
++
|
|
++ if (threaded) {
|
|
++ list_for_each_entry(napi, &dev->napi_list, dev_list) {
|
|
++ if (!napi->thread) {
|
|
++ err = napi_kthread_create(napi);
|
|
++ if (err) {
|
|
++ threaded = false;
|
|
++ break;
|
|
++ }
|
|
++ }
|
|
++ }
|
|
++ }
|
|
++
|
|
++ dev->threaded = threaded;
|
|
++
|
|
++ /* Make sure kthread is created before THREADED bit
|
|
++ * is set.
|
|
++ */
|
|
++ smp_mb__before_atomic();
|
|
++
|
|
++ /* Setting/unsetting threaded mode on a napi might not immediately
|
|
++ * take effect, if the current napi instance is actively being
|
|
++ * polled. In this case, the switch between threaded mode and
|
|
++ * softirq mode will happen in the next round of napi_schedule().
|
|
++ * This should not cause hiccups/stalls to the live traffic.
|
|
++ */
|
|
++ list_for_each_entry(napi, &dev->napi_list, dev_list) {
|
|
++ if (threaded)
|
|
++ set_bit(NAPI_STATE_THREADED, &napi->state);
|
|
++ else
|
|
++ clear_bit(NAPI_STATE_THREADED, &napi->state);
|
|
++ }
|
|
++
|
|
++ return err;
|
|
++}
|
|
++
|
|
+ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
|
|
+ int (*poll)(struct napi_struct *, int), int weight)
|
|
+ {
|
|
+--- a/net/core/net-sysfs.c
|
|
++++ b/net/core/net-sysfs.c
|
|
+@@ -557,6 +557,45 @@ static ssize_t phys_switch_id_show(struc
|
|
+ }
|
|
+ static DEVICE_ATTR_RO(phys_switch_id);
|
|
+
|
|
++static ssize_t threaded_show(struct device *dev,
|
|
++ struct device_attribute *attr, char *buf)
|
|
++{
|
|
++ struct net_device *netdev = to_net_dev(dev);
|
|
++ ssize_t ret = -EINVAL;
|
|
++
|
|
++ if (!rtnl_trylock())
|
|
++ return restart_syscall();
|
|
++
|
|
++ if (dev_isalive(netdev))
|
|
++ ret = sprintf(buf, fmt_dec, netdev->threaded);
|
|
++
|
|
++ rtnl_unlock();
|
|
++ return ret;
|
|
++}
|
|
++
|
|
++static int modify_napi_threaded(struct net_device *dev, unsigned long val)
|
|
++{
|
|
++ int ret;
|
|
++
|
|
++ if (list_empty(&dev->napi_list))
|
|
++ return -EOPNOTSUPP;
|
|
++
|
|
++ if (val != 0 && val != 1)
|
|
++ return -EOPNOTSUPP;
|
|
++
|
|
++ ret = dev_set_threaded(dev, val);
|
|
++
|
|
++ return ret;
|
|
++}
|
|
++
|
|
++static ssize_t threaded_store(struct device *dev,
|
|
++ struct device_attribute *attr,
|
|
++ const char *buf, size_t len)
|
|
++{
|
|
++ return netdev_store(dev, attr, buf, len, modify_napi_threaded);
|
|
++}
|
|
++static DEVICE_ATTR_RW(threaded);
|
|
++
|
|
+ static struct attribute *net_class_attrs[] __ro_after_init = {
|
|
+ &dev_attr_netdev_group.attr,
|
|
+ &dev_attr_type.attr,
|
|
+@@ -587,6 +626,7 @@ static struct attribute *net_class_attrs
|
|
+ &dev_attr_proto_down.attr,
|
|
+ &dev_attr_carrier_up_count.attr,
|
|
+ &dev_attr_carrier_down_count.attr,
|
|
++ &dev_attr_threaded.attr,
|
|
+ NULL,
|
|
+ };
|
|
+ ATTRIBUTE_GROUPS(net_class);
|
|
diff --git a/target/linux/generic/backport-5.4/603-v5.12-net-fix-race-between-napi-kthread-mode-and-busy-poll.patch b/target/linux/generic/backport-5.4/603-v5.12-net-fix-race-between-napi-kthread-mode-and-busy-poll.patch
|
|
new file mode 100644
|
|
index 0000000000..19c5a53a27
|
|
--- /dev/null
|
|
+++ b/target/linux/generic/backport-5.4/603-v5.12-net-fix-race-between-napi-kthread-mode-and-busy-poll.patch
|
|
@@ -0,0 +1,93 @@
|
|
+From: Wei Wang <weiwan@google.com>
|
|
+Date: Mon, 1 Mar 2021 17:21:13 -0800
|
|
+Subject: [PATCH] net: fix race between napi kthread mode and busy poll
|
|
+
|
|
+Currently, napi_thread_wait() checks for NAPI_STATE_SCHED bit to
|
|
+determine if the kthread owns this napi and could call napi->poll() on
|
|
+it. However, if socket busy poll is enabled, it is possible that the
|
|
+busy poll thread grabs this SCHED bit (after the previous napi->poll()
|
|
+invokes napi_complete_done() and clears SCHED bit) and tries to poll
|
|
+on the same napi. napi_disable() could grab the SCHED bit as well.
|
|
+This patch tries to fix this race by adding a new bit
|
|
+NAPI_STATE_SCHED_THREADED in napi->state. This bit gets set in
|
|
+____napi_schedule() if the threaded mode is enabled, and gets cleared
|
|
+in napi_complete_done(), and we only poll the napi in kthread if this
|
|
+bit is set. This helps distinguish the ownership of the napi between
|
|
+kthread and other scenarios and fixes the race issue.
|
|
+
|
|
+Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support")
|
|
+Reported-by: Martin Zaharinov <micron10@gmail.com>
|
|
+Suggested-by: Jakub Kicinski <kuba@kernel.org>
|
|
+Signed-off-by: Wei Wang <weiwan@google.com>
|
|
+Cc: Alexander Duyck <alexanderduyck@fb.com>
|
|
+Cc: Eric Dumazet <edumazet@google.com>
|
|
+Cc: Paolo Abeni <pabeni@redhat.com>
|
|
+Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
|
|
+---
|
|
+
|
|
+--- a/include/linux/netdevice.h
|
|
++++ b/include/linux/netdevice.h
|
|
+@@ -352,6 +352,7 @@ enum {
|
|
+ NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
|
|
+ NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
|
|
+ NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
|
|
++ NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */
|
|
+ };
|
|
+
|
|
+ enum {
|
|
+@@ -363,6 +364,7 @@ enum {
|
|
+ NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
|
|
+ NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
|
|
+ NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
|
|
++ NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED),
|
|
+ };
|
|
+
|
|
+ enum gro_result {
|
|
+--- a/net/core/dev.c
|
|
++++ b/net/core/dev.c
|
|
+@@ -3918,6 +3918,8 @@ static inline void ____napi_schedule(str
|
|
+ */
|
|
+ thread = READ_ONCE(napi->thread);
|
|
+ if (thread) {
|
|
++ if (thread->state != TASK_INTERRUPTIBLE)
|
|
++ set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
|
|
+ wake_up_process(thread);
|
|
+ return;
|
|
+ }
|
|
+@@ -6078,7 +6080,8 @@ bool napi_complete_done(struct napi_stru
|
|
+
|
|
+ WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
|
|
+
|
|
+- new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED);
|
|
++ new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
|
|
++ NAPIF_STATE_SCHED_THREADED);
|
|
+
|
|
+ /* If STATE_MISSED was set, leave STATE_SCHED set,
|
|
+ * because we will call napi->poll() one more time.
|
|
+@@ -6511,16 +6514,25 @@ static int napi_poll(struct napi_struct
|
|
+
|
|
+ static int napi_thread_wait(struct napi_struct *napi)
|
|
+ {
|
|
++ bool woken = false;
|
|
++
|
|
+ set_current_state(TASK_INTERRUPTIBLE);
|
|
+
|
|
+ while (!kthread_should_stop() && !napi_disable_pending(napi)) {
|
|
+- if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
|
|
++ /* Testing SCHED_THREADED bit here to make sure the current
|
|
++ * kthread owns this napi and could poll on this napi.
|
|
++ * Testing SCHED bit is not enough because SCHED bit might be
|
|
++ * set by some other busy poll thread or by napi_disable().
|
|
++ */
|
|
++ if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) {
|
|
+ WARN_ON(!list_empty(&napi->poll_list));
|
|
+ __set_current_state(TASK_RUNNING);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ schedule();
|
|
++ /* woken being true indicates this thread owns this napi. */
|
|
++ woken = true;
|
|
+ set_current_state(TASK_INTERRUPTIBLE);
|
|
+ }
|
|
+ __set_current_state(TASK_RUNNING);
|
|
diff --git a/target/linux/generic/backport-5.4/604-v5.12-net-fix-hangup-on-napi_disable-for-threaded-napi.patch b/target/linux/generic/backport-5.4/604-v5.12-net-fix-hangup-on-napi_disable-for-threaded-napi.patch
|
|
new file mode 100644
|
|
index 0000000000..108cf809f8
|
|
--- /dev/null
|
|
+++ b/target/linux/generic/backport-5.4/604-v5.12-net-fix-hangup-on-napi_disable-for-threaded-napi.patch
|
|
@@ -0,0 +1,53 @@
|
|
+From: Paolo Abeni <pabeni@redhat.com>
|
|
+Date: Fri, 9 Apr 2021 17:24:17 +0200
|
|
+Subject: [PATCH] net: fix hangup on napi_disable for threaded napi
|
|
+
|
|
+napi_disable() is subject to an hangup, when the threaded
|
|
+mode is enabled and the napi is under heavy traffic.
|
|
+
|
|
+If the relevant napi has been scheduled and the napi_disable()
|
|
+kicks in before the next napi_threaded_wait() completes - so
|
|
+that the latter quits due to the napi_disable_pending() condition,
|
|
+the existing code leaves the NAPI_STATE_SCHED bit set and the
|
|
+napi_disable() loop waiting for such bit will hang.
|
|
+
|
|
+This patch addresses the issue by dropping the NAPI_STATE_DISABLE
|
|
+bit test in napi_thread_wait(). The later napi_threaded_poll()
|
|
+iteration will take care of clearing the NAPI_STATE_SCHED.
|
|
+
|
|
+This also addresses a related problem reported by Jakub:
|
|
+before this patch a napi_disable()/napi_enable() pair killed
|
|
+the napi thread, effectively disabling the threaded mode.
|
|
+On the patched kernel napi_disable() simply stops scheduling
|
|
+the relevant thread.
|
|
+
|
|
+v1 -> v2:
|
|
+ - let the main napi_thread_poll() loop clear the SCHED bit
|
|
+
|
|
+Reported-by: Jakub Kicinski <kuba@kernel.org>
|
|
+Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support")
|
|
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
|
|
+Reviewed-by: Eric Dumazet <edumazet@google.com>
|
|
+Link: https://lore.kernel.org/r/883923fa22745a9589e8610962b7dc59df09fb1f.1617981844.git.pabeni@redhat.com
|
|
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
|
+---
|
|
+
|
|
+--- a/net/core/dev.c
|
|
++++ b/net/core/dev.c
|
|
+@@ -6518,7 +6518,7 @@ static int napi_thread_wait(struct napi_
|
|
+
|
|
+ set_current_state(TASK_INTERRUPTIBLE);
|
|
+
|
|
+- while (!kthread_should_stop() && !napi_disable_pending(napi)) {
|
|
++ while (!kthread_should_stop()) {
|
|
+ /* Testing SCHED_THREADED bit here to make sure the current
|
|
+ * kthread owns this napi and could poll on this napi.
|
|
+ * Testing SCHED bit is not enough because SCHED bit might be
|
|
+@@ -6536,6 +6536,7 @@ static int napi_thread_wait(struct napi_
|
|
+ set_current_state(TASK_INTERRUPTIBLE);
|
|
+ }
|
|
+ __set_current_state(TASK_RUNNING);
|
|
++
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
diff --git a/target/linux/generic/pending-5.4/690-net-add-support-for-threaded-NAPI-polling.patch b/target/linux/generic/pending-5.4/690-net-add-support-for-threaded-NAPI-polling.patch
|
|
deleted file mode 100644
|
|
index febec868f4..0000000000
|
|
--- a/target/linux/generic/pending-5.4/690-net-add-support-for-threaded-NAPI-polling.patch
|
|
+++ /dev/null
|
|
@@ -1,356 +0,0 @@
|
|
-From: Felix Fietkau <nbd@nbd.name>
|
|
-Date: Sun, 26 Jul 2020 14:03:21 +0200
|
|
-Subject: [PATCH] net: add support for threaded NAPI polling
|
|
-
|
|
-For some drivers (especially 802.11 drivers), doing a lot of work in the NAPI
|
|
-poll function does not perform well. Since NAPI poll is bound to the CPU it
|
|
-was scheduled from, we can easily end up with a few very busy CPUs spending
|
|
-most of their time in softirq/ksoftirqd and some idle ones.
|
|
-
|
|
-Introduce threaded NAPI for such drivers based on a workqueue. The API is the
|
|
-same except for using netif_threaded_napi_add instead of netif_napi_add.
|
|
-
|
|
-In my tests with mt76 on MT7621 using threaded NAPI + a thread for tx scheduling
|
|
-improves LAN->WLAN bridging throughput by 10-50%. Throughput without threaded
|
|
-NAPI is wildly inconsistent, depending on the CPU that runs the tx scheduling
|
|
-thread.
|
|
-
|
|
-With threaded NAPI it seems stable and consistent (and higher than the best
|
|
-results I got without it).
|
|
-
|
|
-Based on a patch by Hillf Danton
|
|
-
|
|
-Cc: Hillf Danton <hdanton@sina.com>
|
|
-Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
|
----
|
|
-
|
|
---- a/include/linux/netdevice.h
|
|
-+++ b/include/linux/netdevice.h
|
|
-@@ -340,6 +340,7 @@ struct napi_struct {
|
|
- struct list_head dev_list;
|
|
- struct hlist_node napi_hash_node;
|
|
- unsigned int napi_id;
|
|
-+ struct work_struct work;
|
|
- };
|
|
-
|
|
- enum {
|
|
-@@ -350,6 +351,7 @@ enum {
|
|
- NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */
|
|
- NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
|
|
- NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
|
|
-+ NAPI_STATE_THREADED, /* Use threaded NAPI */
|
|
- };
|
|
-
|
|
- enum {
|
|
-@@ -360,6 +362,7 @@ enum {
|
|
- NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED),
|
|
- NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
|
|
- NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
|
|
-+ NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
|
|
- };
|
|
-
|
|
- enum gro_result {
|
|
-@@ -2101,6 +2104,7 @@ struct net_device {
|
|
- struct lock_class_key addr_list_lock_key;
|
|
- bool proto_down;
|
|
- unsigned wol_enabled:1;
|
|
-+ unsigned threaded:1;
|
|
- };
|
|
- #define to_net_dev(d) container_of(d, struct net_device, dev)
|
|
-
|
|
-@@ -2281,6 +2285,26 @@ void netif_napi_add(struct net_device *d
|
|
- int (*poll)(struct napi_struct *, int), int weight);
|
|
-
|
|
- /**
|
|
-+ * netif_threaded_napi_add - initialize a NAPI context
|
|
-+ * @dev: network device
|
|
-+ * @napi: NAPI context
|
|
-+ * @poll: polling function
|
|
-+ * @weight: default weight
|
|
-+ *
|
|
-+ * This variant of netif_napi_add() should be used from drivers using NAPI
|
|
-+ * with CPU intensive poll functions.
|
|
-+ * This will schedule polling from a high priority workqueue
|
|
-+ */
|
|
-+static inline void netif_threaded_napi_add(struct net_device *dev,
|
|
-+ struct napi_struct *napi,
|
|
-+ int (*poll)(struct napi_struct *, int),
|
|
-+ int weight)
|
|
-+{
|
|
-+ set_bit(NAPI_STATE_THREADED, &napi->state);
|
|
-+ netif_napi_add(dev, napi, poll, weight);
|
|
-+}
|
|
-+
|
|
-+/**
|
|
- * netif_tx_napi_add - initialize a NAPI context
|
|
- * @dev: network device
|
|
- * @napi: NAPI context
|
|
---- a/net/core/dev.c
|
|
-+++ b/net/core/dev.c
|
|
-@@ -156,6 +156,7 @@ static DEFINE_SPINLOCK(offload_lock);
|
|
- struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
|
|
- struct list_head ptype_all __read_mostly; /* Taps */
|
|
- static struct list_head offload_base __read_mostly;
|
|
-+static struct workqueue_struct *napi_workq __read_mostly;
|
|
-
|
|
- static int netif_rx_internal(struct sk_buff *skb);
|
|
- static int call_netdevice_notifiers_info(unsigned long val,
|
|
-@@ -5937,6 +5938,11 @@ void __napi_schedule(struct napi_struct
|
|
- {
|
|
- unsigned long flags;
|
|
-
|
|
-+ if (test_bit(NAPI_STATE_THREADED, &n->state)) {
|
|
-+ queue_work(napi_workq, &n->work);
|
|
-+ return;
|
|
-+ }
|
|
-+
|
|
- local_irq_save(flags);
|
|
- ____napi_schedule(this_cpu_ptr(&softnet_data), n);
|
|
- local_irq_restore(flags);
|
|
-@@ -5988,6 +5994,11 @@ EXPORT_SYMBOL(napi_schedule_prep);
|
|
- */
|
|
- void __napi_schedule_irqoff(struct napi_struct *n)
|
|
- {
|
|
-+ if (test_bit(NAPI_STATE_THREADED, &n->state)) {
|
|
-+ queue_work(napi_workq, &n->work);
|
|
-+ return;
|
|
-+ }
|
|
-+
|
|
- if (!IS_ENABLED(CONFIG_PREEMPT_RT))
|
|
- ____napi_schedule(this_cpu_ptr(&softnet_data), n);
|
|
- else
|
|
-@@ -6252,9 +6263,89 @@ static void init_gro_hash(struct napi_st
|
|
- napi->gro_bitmask = 0;
|
|
- }
|
|
-
|
|
-+static int __napi_poll(struct napi_struct *n, bool *repoll)
|
|
-+{
|
|
-+ int work, weight;
|
|
-+
|
|
-+ weight = n->weight;
|
|
-+
|
|
-+ /* This NAPI_STATE_SCHED test is for avoiding a race
|
|
-+ * with netpoll's poll_napi(). Only the entity which
|
|
-+ * obtains the lock and sees NAPI_STATE_SCHED set will
|
|
-+ * actually make the ->poll() call. Therefore we avoid
|
|
-+ * accidentally calling ->poll() when NAPI is not scheduled.
|
|
-+ */
|
|
-+ work = 0;
|
|
-+ if (test_bit(NAPI_STATE_SCHED, &n->state)) {
|
|
-+ work = n->poll(n, weight);
|
|
-+ trace_napi_poll(n, work, weight);
|
|
-+ }
|
|
-+
|
|
-+ WARN_ON_ONCE(work > weight);
|
|
-+
|
|
-+ if (likely(work < weight))
|
|
-+ return work;
|
|
-+
|
|
-+ /* Drivers must not modify the NAPI state if they
|
|
-+ * consume the entire weight. In such cases this code
|
|
-+ * still "owns" the NAPI instance and therefore can
|
|
-+ * move the instance around on the list at-will.
|
|
-+ */
|
|
-+ if (unlikely(napi_disable_pending(n))) {
|
|
-+ napi_complete(n);
|
|
-+ return work;
|
|
-+ }
|
|
-+
|
|
-+ if (n->gro_bitmask) {
|
|
-+ /* flush too old packets
|
|
-+ * If HZ < 1000, flush all packets.
|
|
-+ */
|
|
-+ napi_gro_flush(n, HZ >= 1000);
|
|
-+ }
|
|
-+
|
|
-+ gro_normal_list(n);
|
|
-+
|
|
-+ *repoll = true;
|
|
-+
|
|
-+ return work;
|
|
-+}
|
|
-+
|
|
-+static void napi_workfn(struct work_struct *work)
|
|
-+{
|
|
-+ struct napi_struct *n = container_of(work, struct napi_struct, work);
|
|
-+ void *have;
|
|
-+
|
|
-+ for (;;) {
|
|
-+ bool repoll = false;
|
|
-+
|
|
-+ local_bh_disable();
|
|
-+
|
|
-+ have = netpoll_poll_lock(n);
|
|
-+ __napi_poll(n, &repoll);
|
|
-+ netpoll_poll_unlock(have);
|
|
-+
|
|
-+ local_bh_enable();
|
|
-+
|
|
-+ if (!repoll)
|
|
-+ return;
|
|
-+
|
|
-+ if (!need_resched())
|
|
-+ continue;
|
|
-+
|
|
-+ /*
|
|
-+ * have to pay for the latency of task switch even if
|
|
-+ * napi is scheduled
|
|
-+ */
|
|
-+ queue_work(napi_workq, work);
|
|
-+ return;
|
|
-+ }
|
|
-+}
|
|
-+
|
|
- void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
|
|
- int (*poll)(struct napi_struct *, int), int weight)
|
|
- {
|
|
-+ if (dev->threaded)
|
|
-+ set_bit(NAPI_STATE_THREADED, &napi->state);
|
|
- INIT_LIST_HEAD(&napi->poll_list);
|
|
- hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
|
|
- napi->timer.function = napi_watchdog;
|
|
-@@ -6271,6 +6362,7 @@ void netif_napi_add(struct net_device *d
|
|
- #ifdef CONFIG_NETPOLL
|
|
- napi->poll_owner = -1;
|
|
- #endif
|
|
-+ INIT_WORK(&napi->work, napi_workfn);
|
|
- set_bit(NAPI_STATE_SCHED, &napi->state);
|
|
- set_bit(NAPI_STATE_NPSVC, &napi->state);
|
|
- list_add_rcu(&napi->dev_list, &dev->napi_list);
|
|
-@@ -6311,6 +6403,7 @@ static void flush_gro_hash(struct napi_s
|
|
- void netif_napi_del(struct napi_struct *napi)
|
|
- {
|
|
- might_sleep();
|
|
-+ cancel_work_sync(&napi->work);
|
|
- if (napi_hash_del(napi))
|
|
- synchronize_net();
|
|
- list_del_init(&napi->dev_list);
|
|
-@@ -6323,50 +6416,18 @@ EXPORT_SYMBOL(netif_napi_del);
|
|
-
|
|
- static int napi_poll(struct napi_struct *n, struct list_head *repoll)
|
|
- {
|
|
-+ bool do_repoll = false;
|
|
- void *have;
|
|
-- int work, weight;
|
|
-+ int work;
|
|
-
|
|
- list_del_init(&n->poll_list);
|
|
-
|
|
- have = netpoll_poll_lock(n);
|
|
-
|
|
-- weight = n->weight;
|
|
-+ work = __napi_poll(n, &do_repoll);
|
|
-
|
|
-- /* This NAPI_STATE_SCHED test is for avoiding a race
|
|
-- * with netpoll's poll_napi(). Only the entity which
|
|
-- * obtains the lock and sees NAPI_STATE_SCHED set will
|
|
-- * actually make the ->poll() call. Therefore we avoid
|
|
-- * accidentally calling ->poll() when NAPI is not scheduled.
|
|
-- */
|
|
-- work = 0;
|
|
-- if (test_bit(NAPI_STATE_SCHED, &n->state)) {
|
|
-- work = n->poll(n, weight);
|
|
-- trace_napi_poll(n, work, weight);
|
|
-- }
|
|
--
|
|
-- WARN_ON_ONCE(work > weight);
|
|
--
|
|
-- if (likely(work < weight))
|
|
-- goto out_unlock;
|
|
--
|
|
-- /* Drivers must not modify the NAPI state if they
|
|
-- * consume the entire weight. In such cases this code
|
|
-- * still "owns" the NAPI instance and therefore can
|
|
-- * move the instance around on the list at-will.
|
|
-- */
|
|
-- if (unlikely(napi_disable_pending(n))) {
|
|
-- napi_complete(n);
|
|
-+ if (!do_repoll)
|
|
- goto out_unlock;
|
|
-- }
|
|
--
|
|
-- if (n->gro_bitmask) {
|
|
-- /* flush too old packets
|
|
-- * If HZ < 1000, flush all packets.
|
|
-- */
|
|
-- napi_gro_flush(n, HZ >= 1000);
|
|
-- }
|
|
--
|
|
-- gro_normal_list(n);
|
|
-
|
|
- /* Some drivers may have called napi_schedule
|
|
- * prior to exhausting their budget.
|
|
-@@ -10346,6 +10407,10 @@ static int __init net_dev_init(void)
|
|
- sd->backlog.weight = weight_p;
|
|
- }
|
|
-
|
|
-+ napi_workq = alloc_workqueue("napi_workq", WQ_UNBOUND | WQ_HIGHPRI,
|
|
-+ WQ_UNBOUND_MAX_ACTIVE | WQ_SYSFS);
|
|
-+ BUG_ON(!napi_workq);
|
|
-+
|
|
- dev_boot_phase = 0;
|
|
-
|
|
- /* The loopback device is special if any other network devices
|
|
---- a/net/core/net-sysfs.c
|
|
-+++ b/net/core/net-sysfs.c
|
|
-@@ -442,6 +442,52 @@ static ssize_t proto_down_store(struct d
|
|
- }
|
|
- NETDEVICE_SHOW_RW(proto_down, fmt_dec);
|
|
-
|
|
-+static int change_napi_threaded(struct net_device *dev, unsigned long val)
|
|
-+{
|
|
-+ struct napi_struct *napi;
|
|
-+
|
|
-+ if (list_empty(&dev->napi_list))
|
|
-+ return -EOPNOTSUPP;
|
|
-+
|
|
-+ list_for_each_entry(napi, &dev->napi_list, dev_list) {
|
|
-+ if (val)
|
|
-+ set_bit(NAPI_STATE_THREADED, &napi->state);
|
|
-+ else
|
|
-+ clear_bit(NAPI_STATE_THREADED, &napi->state);
|
|
-+ }
|
|
-+
|
|
-+ return 0;
|
|
-+}
|
|
-+
|
|
-+static ssize_t napi_threaded_store(struct device *dev,
|
|
-+ struct device_attribute *attr,
|
|
-+ const char *buf, size_t len)
|
|
-+{
|
|
-+ return netdev_store(dev, attr, buf, len, change_napi_threaded);
|
|
-+}
|
|
-+
|
|
-+static ssize_t napi_threaded_show(struct device *dev,
|
|
-+ struct device_attribute *attr,
|
|
-+ char *buf)
|
|
-+{
|
|
-+ struct net_device *netdev = to_net_dev(dev);
|
|
-+ struct napi_struct *napi;
|
|
-+ bool enabled = false;
|
|
-+
|
|
-+ if (!rtnl_trylock())
|
|
-+ return restart_syscall();
|
|
-+
|
|
-+ list_for_each_entry(napi, &netdev->napi_list, dev_list) {
|
|
-+ if (test_bit(NAPI_STATE_THREADED, &napi->state))
|
|
-+ enabled = true;
|
|
-+ }
|
|
-+
|
|
-+ rtnl_unlock();
|
|
-+
|
|
-+ return sprintf(buf, fmt_dec, enabled);
|
|
-+}
|
|
-+static DEVICE_ATTR_RW(napi_threaded);
|
|
-+
|
|
- static ssize_t phys_port_id_show(struct device *dev,
|
|
- struct device_attribute *attr, char *buf)
|
|
- {
|
|
-@@ -532,6 +578,7 @@ static struct attribute *net_class_attrs
|
|
- &dev_attr_flags.attr,
|
|
- &dev_attr_tx_queue_len.attr,
|
|
- &dev_attr_gro_flush_timeout.attr,
|
|
-+ &dev_attr_napi_threaded.attr,
|
|
- &dev_attr_phys_port_id.attr,
|
|
- &dev_attr_phys_port_name.attr,
|
|
- &dev_attr_phys_switch_id.attr,
|
|
--
|
|
2.25.1
|
|
|