mirror of
https://github.com/Telecominfraproject/wlan-ap.git
synced 2025-10-29 17:42:41 +00:00
Compare commits
2 Commits
release/v4
...
staging-WI
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cb911ccd0f | ||
|
|
ce89ff0ffe |
@@ -0,0 +1,53 @@
|
||||
From 375d0d25e6c02991392e44956c81cbac84909f49 Mon Sep 17 00:00:00 2001
|
||||
From: Venkat Chimata <venkat@nearhop.com>
|
||||
Date: Thu, 4 Sep 2025 00:09:17 +0530
|
||||
Subject: [PATCH] wifi: ap: mitigate peer-delete WMI timeout to reduce blind
|
||||
period & prevent peer leaks
|
||||
|
||||
1. When a connected client roams to another AP, the AP is trying to delete the peer
|
||||
but for some reason the WMI command times out and while driver is waiting for
|
||||
the response, we observed that the AP doesn't respond to any frames from STA
|
||||
(probe requests, authentication etc) and once the response times out (3seconds default)
|
||||
then AP starts responding to the older requets but client has already connected to
|
||||
another AP. As the root cause for the response timing out is in the FW, we added
|
||||
a WAR to reduce the timeout to minimize this blind period, with this AP responds
|
||||
after 100ms and client connects successfully. And 100ms timeout is also reasonable
|
||||
for this internal operation.
|
||||
2. In case of peer deletion timeout, the driver peer database is not cleared, so,
|
||||
if this happens often (which it is) then eventually we hit the max peers in the
|
||||
driver and all subsequent operations fail, so, in case of timeout ignore the failure
|
||||
and proceed with driver peer database cleanup.
|
||||
|
||||
Signed-off-by: Venkat Chimata <venkat@nearhop.com>
|
||||
---
|
||||
drivers/net/wireless/ath/ath11k/peer.c | 7 +++++--
|
||||
1 file changed, 5 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/drivers/net/wireless/ath/ath11k/peer.c b/drivers/net/wireless/ath/ath11k/peer.c
|
||||
index 1907067..aefc6ba 100644
|
||||
--- a/drivers/net/wireless/ath/ath11k/peer.c
|
||||
+++ b/drivers/net/wireless/ath/ath11k/peer.c
|
||||
@@ -771,7 +771,7 @@ int ath11k_wait_for_peer_delete_done(struct ath11k *ar, u32 vdev_id,
|
||||
}
|
||||
|
||||
time_left = wait_for_completion_timeout(&ar->peer_delete_done,
|
||||
- 3 * HZ);
|
||||
+ 100 * HZ / 1000);
|
||||
if (time_left == 0) {
|
||||
ath11k_warn(ar->ab, "Timeout in receiving peer delete response\n");
|
||||
return -ETIMEDOUT;
|
||||
@@ -857,7 +857,10 @@ int ath11k_peer_delete(struct ath11k *ar, u32 vdev_id, u8 *addr)
|
||||
}
|
||||
|
||||
ret = ath11k_wait_for_peer_delete_done(ar, vdev_id, addr);
|
||||
- if (ret)
|
||||
+ /* WAR: For the timeout case, proceed to delete the peer anyway, as FW is
|
||||
+ * still functional, without this, driver ends up hitting max peers
|
||||
+ */
|
||||
+ if (ret && ret != -ETIMEDOUT)
|
||||
return ret;
|
||||
|
||||
ATH11K_MEMORY_STATS_DEC(ar->ab, per_peer_object,
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@@ -0,0 +1,193 @@
|
||||
From: John Crispin <john@phrozen.org>
|
||||
Date: Thu, 2 Oct 2025 09:00:00 +0000
|
||||
Subject: [PATCH] ath11k: fix num_peers counter corruption and add debug
|
||||
logging
|
||||
|
||||
The num_peers counter becomes corrupted during peer deletion due to race
|
||||
conditions between ath11k_peer_delete() and ath11k_peer_unmap_event().
|
||||
The firmware may or may not send unmap events, and the timing varies,
|
||||
causing the counter to either leak (increment without decrement) or
|
||||
underflow (double decrement).
|
||||
|
||||
Root causes:
|
||||
1. ath11k_peer_delete() doesn't decrement num_peers, relying on
|
||||
ath11k_peer_unmap_event() to do it
|
||||
2. Firmware sometimes doesn't send unmap events, leaving num_peers
|
||||
inflated
|
||||
3. When unmap events do arrive, timing races with ath11k_peer_delete()
|
||||
can cause missed decrements
|
||||
4. Cleanup paths may double-decrement if delete_in_progress not checked
|
||||
5. num_peers modified outside proper locking in some paths
|
||||
|
||||
This fix:
|
||||
- Moves num_peers decrement into ath11k_peer_delete() after successful
|
||||
peer deletion wait, ensuring exactly one decrement per deletion
|
||||
- Handles both cases: peer removed by unmap event, or peer still in list
|
||||
- Removes num_peers decrement from ath11k_peer_unmap_event() to prevent
|
||||
double-decrement when unmap event arrives
|
||||
- Adds ath11k_dp_peer_cleanup() call before ath11k_peer_delete() in
|
||||
roaming path to ensure datapath structures properly cleaned up
|
||||
- Adds delete_in_progress checks in cleanup paths to prevent
|
||||
double-delete
|
||||
- Ensures all num_peers modifications happen under base_lock
|
||||
- Adds comprehensive debug logging to track num_peers throughout peer
|
||||
lifecycle
|
||||
|
||||
Signed-off-by: Arif Alam <arif.alam@netexperience.com>
|
||||
Signed-off-by: John Crispin <john@phrozen.org>
|
||||
---
|
||||
--- a/drivers/net/wireless/ath/ath11k/mac.c
|
||||
+++ b/drivers/net/wireless/ath/ath11k/mac.c
|
||||
@@ -5742,14 +5742,22 @@ static int ath11k_mac_op_sta_state(struc
|
||||
mutex_lock(&ar->ab->tbl_mtx_lock);
|
||||
spin_lock_bh(&ar->ab->base_lock);
|
||||
peer = ath11k_peer_find(ar->ab, arvif->vdev_id, sta->addr);
|
||||
- if (peer && peer->sta == sta) {
|
||||
+ /* Skip if peer deletion already in progress to prevent
|
||||
+ * double-delete and num_peers underflow
|
||||
+ */
|
||||
+ if (peer && peer->sta == sta && !peer->delete_in_progress) {
|
||||
ath11k_warn(ar->ab, "Found peer entry %pM n vdev %i after it was supposedly removed\n",
|
||||
vif->addr, arvif->vdev_id);
|
||||
ath11k_peer_rhash_delete(ar->ab, peer);
|
||||
peer->sta = NULL;
|
||||
+ /* num_peers decrement now happens under base_lock when
|
||||
+ * peer is actually removed from list
|
||||
+ */
|
||||
list_del(&peer->list);
|
||||
kfree(peer);
|
||||
ar->num_peers--;
|
||||
+ ath11k_dbg(ar->ab, ATH11K_DBG_PEER, "%s peer deleted %pM vdev_id: %d num_peers: %d\n",
|
||||
+ __func__, sta->addr, arvif->vdev_id, ar->num_peers);
|
||||
}
|
||||
spin_unlock_bh(&ar->ab->base_lock);
|
||||
mutex_unlock(&ar->ab->tbl_mtx_lock);
|
||||
@@ -7847,6 +7855,8 @@ err_peer_del:
|
||||
goto err_keyid;
|
||||
|
||||
ar->num_peers--;
|
||||
+ ath11k_dbg(ar->ab, ATH11K_DBG_PEER, "%s vif peer deleted %pM vdev_id: %d num_peers: %d\n",
|
||||
+ __func__, vif->addr, arvif->vdev_id, ar->num_peers);
|
||||
}
|
||||
|
||||
err_vdev_del:
|
||||
--- a/drivers/net/wireless/ath/ath11k/peer.c
|
||||
+++ b/drivers/net/wireless/ath/ath11k/peer.c
|
||||
@@ -461,6 +461,9 @@ void ath11k_peer_unmap_event(struct ath1
|
||||
ath11k_dbg(ab, ATH11K_DBG_PEER, "peer unmap vdev %d peer %pM id %d\n",
|
||||
peer->vdev_id, peer->addr, peer_id);
|
||||
|
||||
+ /* Don't decrement num_peers here - it's already decremented in
|
||||
+ * ath11k_peer_delete() after successful wait. Just clean up the peer.
|
||||
+ */
|
||||
list_del(&peer->list);
|
||||
kfree(peer);
|
||||
wake_up(&ab->peer_mapping_wq);
|
||||
@@ -726,6 +729,10 @@ void ath11k_peer_cleanup(struct ath11k *
|
||||
if (peer->vdev_id != vdev_id)
|
||||
continue;
|
||||
|
||||
+ /* Skip peers that are being deleted to prevent double-free */
|
||||
+ if (peer->delete_in_progress)
|
||||
+ continue;
|
||||
+
|
||||
ath11k_warn(ab, "removing stale peer %pM from vdev_id %d\n",
|
||||
peer->addr, vdev_id);
|
||||
|
||||
@@ -743,7 +750,10 @@ void ath11k_peer_cleanup(struct ath11k *
|
||||
ath11k_peer_rhash_delete(ab, peer);
|
||||
list_del(&peer->list);
|
||||
kfree(peer);
|
||||
+ /* num_peers decrement happens here under base_lock */
|
||||
ar->num_peers--;
|
||||
+ ath11k_dbg(ar->ab, ATH11K_DBG_PEER, "%s peer cleanup %pM vdev_id: %d num_peers: %d\n",
|
||||
+ __func__, peer->addr, vdev_id, ar->num_peers);
|
||||
}
|
||||
|
||||
spin_unlock_bh(&ab->base_lock);
|
||||
@@ -824,6 +834,12 @@ int ath11k_peer_delete(struct ath11k *ar
|
||||
|
||||
#ifdef CPTCFG_ATH11K_NSS_SUPPORT
|
||||
peer->delete_in_progress = true;
|
||||
+#else
|
||||
+ if (peer)
|
||||
+ peer->delete_in_progress = true;
|
||||
+#endif
|
||||
+
|
||||
+#ifdef CPTCFG_ATH11K_NSS_SUPPORT
|
||||
if (peer->self_ast_entry) {
|
||||
ath11k_peer_del_ast(ar, peer->self_ast_entry);
|
||||
peer->self_ast_entry = NULL;
|
||||
@@ -863,10 +879,51 @@ int ath11k_peer_delete(struct ath11k *ar
|
||||
if (ret && ret != -ETIMEDOUT)
|
||||
return ret;
|
||||
|
||||
- ATH11K_MEMORY_STATS_DEC(ar->ab, per_peer_object,
|
||||
- sizeof(struct ath11k_peer));
|
||||
+ /* If timeout occurred, manually remove peer from list since firmware
|
||||
+ * won't send unmap event. This prevents peer leaks and num_peers corruption.
|
||||
+ */
|
||||
+ if (ret == -ETIMEDOUT) {
|
||||
+ ath11k_warn(ar->ab, "peer delete timeout %pM vdev %d, manually cleaning up\n",
|
||||
+ addr, vdev_id);
|
||||
|
||||
- ar->num_peers--;
|
||||
+ mutex_lock(&ar->ab->tbl_mtx_lock);
|
||||
+ spin_lock_bh(&ar->ab->base_lock);
|
||||
+ peer = ath11k_peer_find(ar->ab, vdev_id, addr);
|
||||
+ if (peer) {
|
||||
+ list_del(&peer->list);
|
||||
+ kfree(peer);
|
||||
+ ar->num_peers--;
|
||||
+ ath11k_dbg(ar->ab, ATH11K_DBG_PEER,
|
||||
+ "%s peer deleted (timeout) %pM vdev_id: %d num_peers: %d\n",
|
||||
+ __func__, addr, vdev_id, ar->num_peers);
|
||||
+ }
|
||||
+ spin_unlock_bh(&ar->ab->base_lock);
|
||||
+ mutex_unlock(&ar->ab->tbl_mtx_lock);
|
||||
+ } else {
|
||||
+ /* Normal path - but firmware may not send unmap event, so decrement here
|
||||
+ * after successful peer deletion wait
|
||||
+ */
|
||||
+ mutex_lock(&ar->ab->tbl_mtx_lock);
|
||||
+ spin_lock_bh(&ar->ab->base_lock);
|
||||
+ peer = ath11k_peer_find(ar->ab, vdev_id, addr);
|
||||
+ if (peer) {
|
||||
+ /* Peer still in list - firmware didn't send unmap event yet */
|
||||
+ list_del(&peer->list);
|
||||
+ kfree(peer);
|
||||
+ ar->num_peers--;
|
||||
+ ath11k_dbg(ar->ab, ATH11K_DBG_PEER,
|
||||
+ "%s peer deleted (no unmap event) %pM vdev_id: %d num_peers: %d\n",
|
||||
+ __func__, addr, vdev_id, ar->num_peers);
|
||||
+ } else {
|
||||
+ /* Peer already removed by unmap event - still need to decrement */
|
||||
+ ar->num_peers--;
|
||||
+ ath11k_dbg(ar->ab, ATH11K_DBG_PEER,
|
||||
+ "%s peer deleted (via unmap event) %pM vdev_id: %d num_peers: %d\n",
|
||||
+ __func__, addr, vdev_id, ar->num_peers);
|
||||
+ }
|
||||
+ spin_unlock_bh(&ar->ab->base_lock);
|
||||
+ mutex_unlock(&ar->ab->tbl_mtx_lock);
|
||||
+ }
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -905,6 +962,7 @@ int ath11k_peer_create(struct ath11k *ar
|
||||
if (vdev_id == param->vdev_id)
|
||||
return -EINVAL;
|
||||
|
||||
+ ath11k_dp_peer_cleanup(ar, vdev_id, param->peer_addr);
|
||||
ath11k_peer_delete(ar, vdev_id, param->peer_addr);
|
||||
}
|
||||
|
||||
@@ -970,7 +1028,8 @@ int ath11k_peer_create(struct ath11k *ar
|
||||
ar->num_peers++;
|
||||
|
||||
if (ath11k_mac_sta_level_info(arvif, sta)) {
|
||||
- ath11k_dbg(ar->ab, ATH11K_DBG_PEER, "peer created %pM\n", param->peer_addr);
|
||||
+ ath11k_dbg(ar->ab, ATH11K_DBG_PEER, "peer created %pM vdev_id: %d num_peers: %d\n",
|
||||
+ param->peer_addr, param->vdev_id, ar->num_peers);
|
||||
peer->peer_logging_enabled = true;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user