mirror of
				https://github.com/Telecominfraproject/wlan-ap.git
				synced 2025-10-30 18:07:52 +00:00 
			
		
		
		
	ath11k: fix num_peers counter corruption and add debug logging
The num_peers counter becomes corrupted during peer deletion due to race conditions between ath11k_peer_delete() and ath11k_peer_unmap_event(). The firmware may or may not send unmap events, and the timing varies, causing the counter to either leak (increment without decrement) or underflow (double decrement). Root causes: 1. ath11k_peer_delete() doesn't decrement num_peers, relying on ath11k_peer_unmap_event() to do it 2. Firmware sometimes doesn't send unmap events, leaving num_peers inflated 3. When unmap events do arrive, timing races with ath11k_peer_delete() can cause missed decrements 4. Cleanup paths may double-decrement if delete_in_progress not checked 5. num_peers modified outside proper locking in some paths This fix: - Moves num_peers decrement into ath11k_peer_delete() after successful peer deletion wait, ensuring exactly one decrement per deletion - Handles both cases: peer removed by unmap event, or peer still in list - Removes num_peers decrement from ath11k_peer_unmap_event() to prevent double-decrement when unmap event arrives - Adds ath11k_dp_peer_cleanup() call before ath11k_peer_delete() in roaming path to ensure datapath structures properly cleaned up - Adds delete_in_progress checks in cleanup paths to prevent double-delete - Ensures all num_peers modifications happen under base_lock - Adds comprehensive debug logging to track num_peers throughout peer lifecycle Signed-off-by: Arif Alam <arif.alam@netexperience.com> Signed-off-by: John Crispin <john@phrozen.org>
This commit is contained in:
		| @@ -0,0 +1,193 @@ | |||||||
|  | From: John Crispin <john@phrozen.org> | ||||||
|  | Date: Thu, 2 Oct 2025 09:00:00 +0000 | ||||||
|  | Subject: [PATCH] ath11k: fix num_peers counter corruption and add debug | ||||||
|  |  logging | ||||||
|  |  | ||||||
|  | The num_peers counter becomes corrupted during peer deletion due to race | ||||||
|  | conditions between ath11k_peer_delete() and ath11k_peer_unmap_event(). | ||||||
|  | The firmware may or may not send unmap events, and the timing varies, | ||||||
|  | causing the counter to either leak (increment without decrement) or | ||||||
|  | underflow (double decrement). | ||||||
|  |  | ||||||
|  | Root causes: | ||||||
|  | 1. ath11k_peer_delete() doesn't decrement num_peers, relying on | ||||||
|  |    ath11k_peer_unmap_event() to do it | ||||||
|  | 2. Firmware sometimes doesn't send unmap events, leaving num_peers | ||||||
|  |    inflated | ||||||
|  | 3. When unmap events do arrive, timing races with ath11k_peer_delete() | ||||||
|  |    can cause missed decrements | ||||||
|  | 4. Cleanup paths may double-decrement if delete_in_progress not checked | ||||||
|  | 5. num_peers modified outside proper locking in some paths | ||||||
|  |  | ||||||
|  | This fix: | ||||||
|  | - Moves num_peers decrement into ath11k_peer_delete() after successful | ||||||
|  |   peer deletion wait, ensuring exactly one decrement per deletion | ||||||
|  | - Handles both cases: peer removed by unmap event, or peer still in list | ||||||
|  | - Removes num_peers decrement from ath11k_peer_unmap_event() to prevent | ||||||
|  |   double-decrement when unmap event arrives | ||||||
|  | - Adds ath11k_dp_peer_cleanup() call before ath11k_peer_delete() in | ||||||
|  |   roaming path to ensure datapath structures properly cleaned up | ||||||
|  | - Adds delete_in_progress checks in cleanup paths to prevent | ||||||
|  |   double-delete | ||||||
|  | - Ensures all num_peers modifications happen under base_lock | ||||||
|  | - Adds comprehensive debug logging to track num_peers throughout peer | ||||||
|  |   lifecycle | ||||||
|  |  | ||||||
|  | Signed-off-by: Arif Alam <arif.alam@netexperience.com> | ||||||
|  | Signed-off-by: John Crispin <john@phrozen.org> | ||||||
|  | --- | ||||||
|  | --- a/drivers/net/wireless/ath/ath11k/mac.c | ||||||
|  | +++ b/drivers/net/wireless/ath/ath11k/mac.c | ||||||
|  | @@ -5742,14 +5742,22 @@ static int ath11k_mac_op_sta_state(struc | ||||||
|  |  		mutex_lock(&ar->ab->tbl_mtx_lock); | ||||||
|  |  		spin_lock_bh(&ar->ab->base_lock); | ||||||
|  |  		peer = ath11k_peer_find(ar->ab, arvif->vdev_id, sta->addr); | ||||||
|  | -		if (peer && peer->sta == sta) { | ||||||
|  | +		/* Skip if peer deletion already in progress to prevent | ||||||
|  | +		 * double-delete and num_peers underflow | ||||||
|  | +		 */ | ||||||
|  | +		if (peer && peer->sta == sta && !peer->delete_in_progress) { | ||||||
|  |  			ath11k_warn(ar->ab, "Found peer entry %pM n vdev %i after it was supposedly removed\n", | ||||||
|  |  				    vif->addr, arvif->vdev_id); | ||||||
|  |  			ath11k_peer_rhash_delete(ar->ab, peer); | ||||||
|  |  			peer->sta = NULL; | ||||||
|  | +			/* num_peers decrement now happens under base_lock when | ||||||
|  | +			 * peer is actually removed from list | ||||||
|  | +			 */ | ||||||
|  |  			list_del(&peer->list); | ||||||
|  |  			kfree(peer); | ||||||
|  |  			ar->num_peers--; | ||||||
|  | +			ath11k_dbg(ar->ab, ATH11K_DBG_PEER, "%s peer deleted %pM vdev_id: %d num_peers: %d\n", | ||||||
|  | +				__func__, sta->addr, arvif->vdev_id, ar->num_peers); | ||||||
|  |  		} | ||||||
|  |  		spin_unlock_bh(&ar->ab->base_lock); | ||||||
|  |  		mutex_unlock(&ar->ab->tbl_mtx_lock); | ||||||
|  | @@ -7847,6 +7855,8 @@ err_peer_del: | ||||||
|  |  			goto err_keyid; | ||||||
|  |   | ||||||
|  |  		ar->num_peers--; | ||||||
|  | +		ath11k_dbg(ar->ab, ATH11K_DBG_PEER, "%s vif peer deleted %pM vdev_id: %d num_peers: %d\n", | ||||||
|  | +			__func__, vif->addr, arvif->vdev_id, ar->num_peers); | ||||||
|  |  	} | ||||||
|  |   | ||||||
|  |  err_vdev_del: | ||||||
|  | --- a/drivers/net/wireless/ath/ath11k/peer.c | ||||||
|  | +++ b/drivers/net/wireless/ath/ath11k/peer.c | ||||||
|  | @@ -461,6 +461,9 @@ void ath11k_peer_unmap_event(struct ath1 | ||||||
|  |  		ath11k_dbg(ab, ATH11K_DBG_PEER, "peer unmap vdev %d peer %pM id %d\n", | ||||||
|  |  			   peer->vdev_id, peer->addr, peer_id); | ||||||
|  |   | ||||||
|  | +	/* Don't decrement num_peers here - it's already decremented in | ||||||
|  | +	 * ath11k_peer_delete() after successful wait. Just clean up the peer. | ||||||
|  | +	 */ | ||||||
|  |  	list_del(&peer->list); | ||||||
|  |  	kfree(peer); | ||||||
|  |  	wake_up(&ab->peer_mapping_wq); | ||||||
|  | @@ -726,6 +729,10 @@ void ath11k_peer_cleanup(struct ath11k * | ||||||
|  |  		if (peer->vdev_id != vdev_id) | ||||||
|  |  			continue; | ||||||
|  |   | ||||||
|  | +		/* Skip peers that are being deleted to prevent double-free */ | ||||||
|  | +		if (peer->delete_in_progress) | ||||||
|  | +			continue; | ||||||
|  | + | ||||||
|  |  		ath11k_warn(ab, "removing stale peer %pM from vdev_id %d\n", | ||||||
|  |  			    peer->addr, vdev_id); | ||||||
|  |   | ||||||
|  | @@ -743,7 +750,10 @@ void ath11k_peer_cleanup(struct ath11k * | ||||||
|  |  		ath11k_peer_rhash_delete(ab, peer); | ||||||
|  |  		list_del(&peer->list); | ||||||
|  |  		kfree(peer); | ||||||
|  | +		/* num_peers decrement happens here under base_lock */ | ||||||
|  |  		ar->num_peers--; | ||||||
|  | +		ath11k_dbg(ar->ab, ATH11K_DBG_PEER, "%s peer cleanup %pM vdev_id: %d num_peers: %d\n", | ||||||
|  | +			__func__, peer->addr, vdev_id, ar->num_peers); | ||||||
|  |  	} | ||||||
|  |   | ||||||
|  |  	spin_unlock_bh(&ab->base_lock); | ||||||
|  | @@ -824,6 +834,12 @@ int ath11k_peer_delete(struct ath11k *ar | ||||||
|  |   | ||||||
|  |  #ifdef CPTCFG_ATH11K_NSS_SUPPORT | ||||||
|  |  	peer->delete_in_progress = true; | ||||||
|  | +#else | ||||||
|  | +	if (peer) | ||||||
|  | +		peer->delete_in_progress = true; | ||||||
|  | +#endif | ||||||
|  | + | ||||||
|  | +#ifdef CPTCFG_ATH11K_NSS_SUPPORT | ||||||
|  |  	if (peer->self_ast_entry) { | ||||||
|  |  		ath11k_peer_del_ast(ar, peer->self_ast_entry); | ||||||
|  |  		peer->self_ast_entry = NULL; | ||||||
|  | @@ -863,10 +879,51 @@ int ath11k_peer_delete(struct ath11k *ar | ||||||
|  |  	if (ret && ret != -ETIMEDOUT) | ||||||
|  |  		return ret; | ||||||
|  |   | ||||||
|  | -	ATH11K_MEMORY_STATS_DEC(ar->ab, per_peer_object, | ||||||
|  | -				sizeof(struct ath11k_peer)); | ||||||
|  | +	/* If timeout occurred, manually remove peer from list since firmware | ||||||
|  | +	 * won't send unmap event. This prevents peer leaks and num_peers corruption. | ||||||
|  | +	 */ | ||||||
|  | +	if (ret == -ETIMEDOUT) { | ||||||
|  | +		ath11k_warn(ar->ab, "peer delete timeout %pM vdev %d, manually cleaning up\n", | ||||||
|  | +			    addr, vdev_id); | ||||||
|  |   | ||||||
|  | -	ar->num_peers--; | ||||||
|  | +		mutex_lock(&ar->ab->tbl_mtx_lock); | ||||||
|  | +		spin_lock_bh(&ar->ab->base_lock); | ||||||
|  | +		peer = ath11k_peer_find(ar->ab, vdev_id, addr); | ||||||
|  | +		if (peer) { | ||||||
|  | +			list_del(&peer->list); | ||||||
|  | +			kfree(peer); | ||||||
|  | +			ar->num_peers--; | ||||||
|  | +			ath11k_dbg(ar->ab, ATH11K_DBG_PEER, | ||||||
|  | +				   "%s peer deleted (timeout) %pM vdev_id: %d num_peers: %d\n", | ||||||
|  | +				   __func__, addr, vdev_id, ar->num_peers); | ||||||
|  | +		} | ||||||
|  | +		spin_unlock_bh(&ar->ab->base_lock); | ||||||
|  | +		mutex_unlock(&ar->ab->tbl_mtx_lock); | ||||||
|  | +	} else { | ||||||
|  | +		/* Normal path - but firmware may not send unmap event, so decrement here | ||||||
|  | +		 * after successful peer deletion wait | ||||||
|  | +		 */ | ||||||
|  | +		mutex_lock(&ar->ab->tbl_mtx_lock); | ||||||
|  | +		spin_lock_bh(&ar->ab->base_lock); | ||||||
|  | +		peer = ath11k_peer_find(ar->ab, vdev_id, addr); | ||||||
|  | +		if (peer) { | ||||||
|  | +			/* Peer still in list - firmware didn't send unmap event yet */ | ||||||
|  | +			list_del(&peer->list); | ||||||
|  | +			kfree(peer); | ||||||
|  | +			ar->num_peers--; | ||||||
|  | +			ath11k_dbg(ar->ab, ATH11K_DBG_PEER, | ||||||
|  | +				   "%s peer deleted (no unmap event) %pM vdev_id: %d num_peers: %d\n", | ||||||
|  | +				   __func__, addr, vdev_id, ar->num_peers); | ||||||
|  | +		} else { | ||||||
|  | +			/* Peer already removed by unmap event - still need to decrement */ | ||||||
|  | +			ar->num_peers--; | ||||||
|  | +			ath11k_dbg(ar->ab, ATH11K_DBG_PEER, | ||||||
|  | +				   "%s peer deleted (via unmap event) %pM vdev_id: %d num_peers: %d\n", | ||||||
|  | +				   __func__, addr, vdev_id, ar->num_peers); | ||||||
|  | +		} | ||||||
|  | +		spin_unlock_bh(&ar->ab->base_lock); | ||||||
|  | +		mutex_unlock(&ar->ab->tbl_mtx_lock); | ||||||
|  | +	} | ||||||
|  |   | ||||||
|  |  	return 0; | ||||||
|  |  } | ||||||
|  | @@ -905,6 +962,7 @@ int ath11k_peer_create(struct ath11k *ar | ||||||
|  |  		if (vdev_id == param->vdev_id) | ||||||
|  |  			return -EINVAL; | ||||||
|  |   | ||||||
|  | +		ath11k_dp_peer_cleanup(ar, vdev_id, param->peer_addr); | ||||||
|  |  		ath11k_peer_delete(ar, vdev_id, param->peer_addr); | ||||||
|  |  	} | ||||||
|  |   | ||||||
|  | @@ -970,7 +1028,8 @@ int ath11k_peer_create(struct ath11k *ar | ||||||
|  |  	ar->num_peers++; | ||||||
|  |   | ||||||
|  |  	if (ath11k_mac_sta_level_info(arvif, sta)) { | ||||||
|  | -		ath11k_dbg(ar->ab, ATH11K_DBG_PEER, "peer created %pM\n", param->peer_addr); | ||||||
|  | +		ath11k_dbg(ar->ab, ATH11K_DBG_PEER, "peer created %pM vdev_id: %d num_peers: %d\n", | ||||||
|  | +			param->peer_addr, param->vdev_id, ar->num_peers); | ||||||
|  |  		peer->peer_logging_enabled = true; | ||||||
|  |  	} | ||||||
|  |   | ||||||
		Reference in New Issue
	
	Block a user
	 John Crispin
					John Crispin