diff --git a/feeds/ipq807x_v5.4/mac80211/patches/pending/a-102-ath11k-fix-num-peers-counter-and-add-debug-logs.patch b/feeds/ipq807x_v5.4/mac80211/patches/pending/a-102-ath11k-fix-num-peers-counter-and-add-debug-logs.patch new file mode 100644 index 000000000..70331f7f7 --- /dev/null +++ b/feeds/ipq807x_v5.4/mac80211/patches/pending/a-102-ath11k-fix-num-peers-counter-and-add-debug-logs.patch @@ -0,0 +1,193 @@ +From: John Crispin +Date: Thu, 2 Oct 2025 09:00:00 +0000 +Subject: [PATCH] ath11k: fix num_peers counter corruption and add debug + logging + +The num_peers counter becomes corrupted during peer deletion due to race +conditions between ath11k_peer_delete() and ath11k_peer_unmap_event(). +The firmware may or may not send unmap events, and the timing varies, +causing the counter to either leak (increment without decrement) or +underflow (double decrement). + +Root causes: +1. ath11k_peer_delete() doesn't decrement num_peers, relying on + ath11k_peer_unmap_event() to do it +2. Firmware sometimes doesn't send unmap events, leaving num_peers + inflated +3. When unmap events do arrive, timing races with ath11k_peer_delete() + can cause missed decrements +4. Cleanup paths may double-decrement if delete_in_progress not checked +5. num_peers modified outside proper locking in some paths + +This fix: +- Moves num_peers decrement into ath11k_peer_delete() after successful + peer deletion wait, ensuring exactly one decrement per deletion +- Handles both cases: peer removed by unmap event, or peer still in list +- Removes num_peers decrement from ath11k_peer_unmap_event() to prevent + double-decrement when unmap event arrives +- Adds ath11k_dp_peer_cleanup() call before ath11k_peer_delete() in + roaming path to ensure datapath structures properly cleaned up +- Adds delete_in_progress checks in cleanup paths to prevent + double-delete +- Ensures all num_peers modifications happen under base_lock +- Adds comprehensive debug logging to track num_peers throughout peer + lifecycle + +Signed-off-by: Arif Alam +Signed-off-by: John Crispin +--- +--- a/drivers/net/wireless/ath/ath11k/mac.c ++++ b/drivers/net/wireless/ath/ath11k/mac.c +@@ -5742,14 +5742,22 @@ static int ath11k_mac_op_sta_state(struc + mutex_lock(&ar->ab->tbl_mtx_lock); + spin_lock_bh(&ar->ab->base_lock); + peer = ath11k_peer_find(ar->ab, arvif->vdev_id, sta->addr); +- if (peer && peer->sta == sta) { ++ /* Skip if peer deletion already in progress to prevent ++ * double-delete and num_peers underflow ++ */ ++ if (peer && peer->sta == sta && !peer->delete_in_progress) { + ath11k_warn(ar->ab, "Found peer entry %pM n vdev %i after it was supposedly removed\n", + vif->addr, arvif->vdev_id); + ath11k_peer_rhash_delete(ar->ab, peer); + peer->sta = NULL; ++ /* num_peers decrement now happens under base_lock when ++ * peer is actually removed from list ++ */ + list_del(&peer->list); + kfree(peer); + ar->num_peers--; ++ ath11k_dbg(ar->ab, ATH11K_DBG_PEER, "%s peer deleted %pM vdev_id: %d num_peers: %d\n", ++ __func__, sta->addr, arvif->vdev_id, ar->num_peers); + } + spin_unlock_bh(&ar->ab->base_lock); + mutex_unlock(&ar->ab->tbl_mtx_lock); +@@ -7847,6 +7855,8 @@ err_peer_del: + goto err_keyid; + + ar->num_peers--; ++ ath11k_dbg(ar->ab, ATH11K_DBG_PEER, "%s vif peer deleted %pM vdev_id: %d num_peers: %d\n", ++ __func__, vif->addr, arvif->vdev_id, ar->num_peers); + } + + err_vdev_del: +--- a/drivers/net/wireless/ath/ath11k/peer.c ++++ b/drivers/net/wireless/ath/ath11k/peer.c +@@ -461,6 +461,9 @@ void ath11k_peer_unmap_event(struct ath1 + ath11k_dbg(ab, ATH11K_DBG_PEER, "peer unmap vdev %d peer %pM id %d\n", + peer->vdev_id, peer->addr, peer_id); + ++ /* Don't decrement num_peers here - it's already decremented in ++ * ath11k_peer_delete() after successful wait. Just clean up the peer. ++ */ + list_del(&peer->list); + kfree(peer); + wake_up(&ab->peer_mapping_wq); +@@ -726,6 +729,10 @@ void ath11k_peer_cleanup(struct ath11k * + if (peer->vdev_id != vdev_id) + continue; + ++ /* Skip peers that are being deleted to prevent double-free */ ++ if (peer->delete_in_progress) ++ continue; ++ + ath11k_warn(ab, "removing stale peer %pM from vdev_id %d\n", + peer->addr, vdev_id); + +@@ -743,7 +750,10 @@ void ath11k_peer_cleanup(struct ath11k * + ath11k_peer_rhash_delete(ab, peer); + list_del(&peer->list); + kfree(peer); ++ /* num_peers decrement happens here under base_lock */ + ar->num_peers--; ++ ath11k_dbg(ar->ab, ATH11K_DBG_PEER, "%s peer cleanup %pM vdev_id: %d num_peers: %d\n", ++ __func__, peer->addr, vdev_id, ar->num_peers); + } + + spin_unlock_bh(&ab->base_lock); +@@ -824,6 +834,12 @@ int ath11k_peer_delete(struct ath11k *ar + + #ifdef CPTCFG_ATH11K_NSS_SUPPORT + peer->delete_in_progress = true; ++#else ++ if (peer) ++ peer->delete_in_progress = true; ++#endif ++ ++#ifdef CPTCFG_ATH11K_NSS_SUPPORT + if (peer->self_ast_entry) { + ath11k_peer_del_ast(ar, peer->self_ast_entry); + peer->self_ast_entry = NULL; +@@ -863,10 +879,51 @@ int ath11k_peer_delete(struct ath11k *ar + if (ret && ret != -ETIMEDOUT) + return ret; + +- ATH11K_MEMORY_STATS_DEC(ar->ab, per_peer_object, +- sizeof(struct ath11k_peer)); ++ /* If timeout occurred, manually remove peer from list since firmware ++ * won't send unmap event. This prevents peer leaks and num_peers corruption. ++ */ ++ if (ret == -ETIMEDOUT) { ++ ath11k_warn(ar->ab, "peer delete timeout %pM vdev %d, manually cleaning up\n", ++ addr, vdev_id); + +- ar->num_peers--; ++ mutex_lock(&ar->ab->tbl_mtx_lock); ++ spin_lock_bh(&ar->ab->base_lock); ++ peer = ath11k_peer_find(ar->ab, vdev_id, addr); ++ if (peer) { ++ list_del(&peer->list); ++ kfree(peer); ++ ar->num_peers--; ++ ath11k_dbg(ar->ab, ATH11K_DBG_PEER, ++ "%s peer deleted (timeout) %pM vdev_id: %d num_peers: %d\n", ++ __func__, addr, vdev_id, ar->num_peers); ++ } ++ spin_unlock_bh(&ar->ab->base_lock); ++ mutex_unlock(&ar->ab->tbl_mtx_lock); ++ } else { ++ /* Normal path - but firmware may not send unmap event, so decrement here ++ * after successful peer deletion wait ++ */ ++ mutex_lock(&ar->ab->tbl_mtx_lock); ++ spin_lock_bh(&ar->ab->base_lock); ++ peer = ath11k_peer_find(ar->ab, vdev_id, addr); ++ if (peer) { ++ /* Peer still in list - firmware didn't send unmap event yet */ ++ list_del(&peer->list); ++ kfree(peer); ++ ar->num_peers--; ++ ath11k_dbg(ar->ab, ATH11K_DBG_PEER, ++ "%s peer deleted (no unmap event) %pM vdev_id: %d num_peers: %d\n", ++ __func__, addr, vdev_id, ar->num_peers); ++ } else { ++ /* Peer already removed by unmap event - still need to decrement */ ++ ar->num_peers--; ++ ath11k_dbg(ar->ab, ATH11K_DBG_PEER, ++ "%s peer deleted (via unmap event) %pM vdev_id: %d num_peers: %d\n", ++ __func__, addr, vdev_id, ar->num_peers); ++ } ++ spin_unlock_bh(&ar->ab->base_lock); ++ mutex_unlock(&ar->ab->tbl_mtx_lock); ++ } + + return 0; + } +@@ -905,6 +962,7 @@ int ath11k_peer_create(struct ath11k *ar + if (vdev_id == param->vdev_id) + return -EINVAL; + ++ ath11k_dp_peer_cleanup(ar, vdev_id, param->peer_addr); + ath11k_peer_delete(ar, vdev_id, param->peer_addr); + } + +@@ -970,7 +1028,8 @@ int ath11k_peer_create(struct ath11k *ar + ar->num_peers++; + + if (ath11k_mac_sta_level_info(arvif, sta)) { +- ath11k_dbg(ar->ab, ATH11K_DBG_PEER, "peer created %pM\n", param->peer_addr); ++ ath11k_dbg(ar->ab, ATH11K_DBG_PEER, "peer created %pM vdev_id: %d num_peers: %d\n", ++ param->peer_addr, param->vdev_id, ar->num_peers); + peer->peer_logging_enabled = true; + } +