mirror of
				https://github.com/Telecominfraproject/ols-nos.git
				synced 2025-10-31 18:17:52 +00:00 
			
		
		
		
	[Mellanox] implement platform wait in python code (#17398)
- Why I did it New implementation of Nvidia platform_wait due to: 1. sysfs deprecated by hw-mgmt 2. new dependencies to SDK 3. For CMIS host management mode - How I did it wait hw-management ready wait SDK sysfs nodes ready - How to verify it manual test unit test sonic-mgmt regression
This commit is contained in:
		| @@ -1,68 +1,32 @@ | ||||
| #!/bin/bash | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| declare -r SYSLOG_LOGGER="/usr/bin/logger" | ||||
| declare -r SYSLOG_IDENTIFIER="platform_wait" | ||||
| declare -r SYSLOG_ERROR="error" | ||||
| declare -r SYSLOG_NOTICE="notice" | ||||
| declare -r SYSLOG_INFO="info" | ||||
| # | ||||
| # Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. | ||||
| # Apache-2.0 | ||||
| # | ||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| # you may not use this file except in compliance with the License. | ||||
| # You may obtain a copy of the License at | ||||
| # | ||||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||||
| # | ||||
| # Unless required by applicable law or agreed to in writing, software | ||||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | ||||
| # | ||||
|  | ||||
| declare -r HW_MGMT_CONFIG="/var/run/hw-management/config" | ||||
| import sys | ||||
| from sonic_platform.device_data import DeviceDataManager | ||||
| from sonic_py_common.logger import Logger | ||||
|  | ||||
| declare -r ASIC_INIT_DONE="${HW_MGMT_CONFIG}/asics_init_done" | ||||
| declare -r NUM_ASICS="${HW_MGMT_CONFIG}/asic_num" | ||||
| declare -r ASIC_CHIPUP_COMPLETED="${HW_MGMT_CONFIG}/asic_chipup_completed" | ||||
|  | ||||
| declare -r EXIT_SUCCESS="0" | ||||
| declare -r EXIT_TIMEOUT="1" | ||||
|  | ||||
| function log_error() { | ||||
|     eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_ERROR} $@" | ||||
| } | ||||
|  | ||||
| function log_notice() { | ||||
|     eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_NOTICE} $@" | ||||
| } | ||||
|  | ||||
| function log_info() { | ||||
|     eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_INFO} $@" | ||||
| } | ||||
|  | ||||
| function wait_for_asic_chipup() { | ||||
|  | ||||
|     local _ASIC_INIT="0" | ||||
|     local _ASIC_COUNT="0" | ||||
|     local _ASICS_CHIPUP="0" | ||||
|  | ||||
|     local -i _WDOG_CNT="1" | ||||
|     local -ir _WDOG_MAX="300" | ||||
|  | ||||
|     local -r _TIMEOUT="1s" | ||||
|  | ||||
|     while [[ "${_WDOG_CNT}" -le "${_WDOG_MAX}" ]]; do | ||||
|         _ASIC_INIT="$(cat ${ASIC_INIT_DONE} 2>&1)" | ||||
|         _ASIC_COUNT="$(cat ${NUM_ASICS} 2>&1)" | ||||
|         _ASICS_CHIPUP="$(cat ${ASIC_CHIPUP_COMPLETED} 2>&1)" | ||||
|  | ||||
|         if [[ "${_ASIC_INIT}" -eq 1 && "${_ASIC_COUNT}" -eq "${_ASICS_CHIPUP}" ]]; then | ||||
|             return "${EXIT_SUCCESS}" | ||||
|         fi | ||||
|  | ||||
|         let "_WDOG_CNT++" | ||||
|         sleep "${_TIMEOUT}" | ||||
|     done | ||||
|  | ||||
|     log_error "Mellanox ASIC is not ready: INIT: ${_ASIC_INIT}, NUM_ASIC: ${_ASIC_COUNT}, CHIPUP: ${_ASICS_CHIPUP} timeout...." | ||||
|     return "${EXIT_TIMEOUT}" | ||||
| } | ||||
|  | ||||
| log_info "Wait for Mellanox ASIC to be ready" | ||||
|  | ||||
| wait_for_asic_chipup | ||||
| EXIT_CODE="$?" | ||||
| if [[ "${EXIT_CODE}" != "${EXIT_SUCCESS}" ]]; then | ||||
|     exit "${EXIT_CODE}" | ||||
| fi | ||||
|  | ||||
| log_notice "Mellanox ASIC is ready" | ||||
|  | ||||
| exit "${EXIT_SUCCESS}" | ||||
| logger = Logger(log_identifier='platform_wait') | ||||
| logger.log_notice('Nvidia: Wait for PMON dependencies to be ready') | ||||
| if DeviceDataManager.wait_platform_ready(): | ||||
|     logger.log_notice('Nvidia: PMON dependencies are ready') | ||||
|     sys.exit(0) | ||||
| else: | ||||
|     logger.log_error('Nvidia: PMON dependencies are not ready: timeout') | ||||
|     sys.exit(-1) | ||||
|   | ||||
| @@ -17,6 +17,7 @@ | ||||
|  | ||||
| import glob | ||||
| import os | ||||
| import time | ||||
|  | ||||
| from . import utils | ||||
|  | ||||
| @@ -167,8 +168,11 @@ class DeviceDataManager: | ||||
|     @classmethod | ||||
|     @utils.read_only_cache() | ||||
|     def get_sfp_count(cls): | ||||
|         sfp_count = utils.read_int_from_file('/run/hw-management/config/sfp_counter') | ||||
|         return sfp_count if sfp_count > 0 else len(glob.glob('/sys/module/sx_core/asic0/module*')) | ||||
|         from sonic_py_common import device_info | ||||
|         platform_path = device_info.get_path_to_platform_dir() | ||||
|         platform_json_path = os.path.join(platform_path, 'platform.json') | ||||
|         platform_data = utils.load_json_file(platform_json_path) | ||||
|         return len(platform_data['chassis']['sfps']) | ||||
|  | ||||
|     @classmethod | ||||
|     def get_linecard_sfp_count(cls, lc_index): | ||||
| @@ -244,3 +248,23 @@ class DeviceDataManager: | ||||
|         sai_profile_file = os.path.join(hwsku_dir, 'sai.profile') | ||||
|         data = utils.read_key_value_file(sai_profile_file, delimeter='=') | ||||
|         return data.get('SAI_INDEPENDENT_MODULE_MODE') == '1' | ||||
|      | ||||
|     @classmethod | ||||
|     def wait_platform_ready(cls): | ||||
|         """ | ||||
|         Wait for Nvidia platform related services(SDK, hw-management) ready | ||||
|         Returns: | ||||
|             bool: True if wait success else timeout | ||||
|         """ | ||||
|         conditions = [] | ||||
|         sysfs_nodes = ['power_mode', 'power_mode_policy', 'present', 'reset', 'status', 'statuserror'] | ||||
|         if cls.is_independent_mode(): | ||||
|             sysfs_nodes.extend(['control', 'frequency', 'frequency_support', 'hw_present', 'hw_reset', | ||||
|                                 'power_good', 'power_limit', 'power_on', 'temperature/input']) | ||||
|         else: | ||||
|             conditions.append(lambda: utils.read_int_from_file('/var/run/hw-management/config/asics_init_done') == 1) | ||||
|         sfp_count = cls.get_sfp_count() | ||||
|         for sfp_index in range(sfp_count): | ||||
|             for sysfs_node in sysfs_nodes: | ||||
|                 conditions.append(lambda: os.path.exists(f'/sys/module/sx_core/asic0/module{sfp_index}/{sysfs_node}')) | ||||
|         return utils.wait_until_conditions(conditions, 300, 1) | ||||
|   | ||||
| @@ -290,6 +290,30 @@ def wait_until(predict, timeout, interval=1, *args, **kwargs): | ||||
|     return False | ||||
|  | ||||
|  | ||||
| def wait_until_conditions(conditions, timeout, interval=1): | ||||
|     """ | ||||
|     Wait until all the conditions become true | ||||
|     Args: | ||||
|         conditions (list): a list of callable which generate True|False | ||||
|         timeout (int): wait time in seconds | ||||
|         interval (int, optional):  interval to check the predict. Defaults to 1. | ||||
|  | ||||
|     Returns: | ||||
|         bool: True if wait success else False | ||||
|     """ | ||||
|     while timeout > 0: | ||||
|         pending_conditions = [] | ||||
|         for condition in conditions: | ||||
|             if not condition(): | ||||
|                 pending_conditions.append(condition) | ||||
|         if not pending_conditions: | ||||
|             return True | ||||
|         conditions = pending_conditions | ||||
|         time.sleep(interval) | ||||
|         timeout -= interval | ||||
|     return False | ||||
|  | ||||
|    | ||||
| class TimerEvent: | ||||
|     def __init__(self, interval, cb, repeat): | ||||
|         self.interval = interval | ||||
|   | ||||
| @@ -60,6 +60,26 @@ class TestDeviceData: | ||||
|         mock_read.return_value = {'SAI_INDEPENDENT_MODULE_MODE': '1'} | ||||
|         assert DeviceDataManager.is_independent_mode() | ||||
|  | ||||
|     @mock.patch('sonic_py_common.device_info.get_path_to_platform_dir', mock.MagicMock(return_value='/tmp')) | ||||
|     @mock.patch('sonic_platform.device_data.utils.load_json_file') | ||||
|     def test_get_sfp_count(self, mock_load_json): | ||||
|         mock_load_json.return_value = { | ||||
|             'chassis': { | ||||
|                 'sfps': [1,2,3] | ||||
|             } | ||||
|         } | ||||
|         assert DeviceDataManager.get_sfp_count() == 3 | ||||
|  | ||||
|  | ||||
|  | ||||
|     @mock.patch('sonic_platform.device_data.time.sleep', mock.MagicMock()) | ||||
|     @mock.patch('sonic_platform.device_data.DeviceDataManager.get_sfp_count', mock.MagicMock(return_value=3)) | ||||
|     @mock.patch('sonic_platform.device_data.utils.read_int_from_file', mock.MagicMock(return_value=1)) | ||||
|     @mock.patch('sonic_platform.device_data.os.path.exists') | ||||
|     @mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode') | ||||
|     def test_wait_platform_ready(self, mock_is_indep, mock_exists): | ||||
|         mock_exists.return_value = True | ||||
|         mock_is_indep.return_value = True | ||||
|         assert DeviceDataManager.wait_platform_ready() | ||||
|         mock_is_indep.return_value = False | ||||
|         assert DeviceDataManager.wait_platform_ready() | ||||
|         mock_exists.return_value = False | ||||
|         assert not DeviceDataManager.wait_platform_ready() | ||||
|   | ||||
| @@ -196,6 +196,13 @@ class TestUtils: | ||||
|         with mock.patch('sonic_platform.utils.open', mock_os_open): | ||||
|             assert utils.read_key_value_file('some_file', delimeter='=') == {'a':'b'} | ||||
|              | ||||
|     @mock.patch('sonic_platform.utils.time.sleep', mock.MagicMock()) | ||||
|     def test_wait_until_conditions(self): | ||||
|         conditions = [lambda: True] | ||||
|         assert utils.wait_until_conditions(conditions, 1) | ||||
|         conditions = [lambda: False] | ||||
|         assert not utils.wait_until_conditions(conditions, 1) | ||||
|  | ||||
|     def test_timer(self): | ||||
|         timer = utils.Timer() | ||||
|         timer.start() | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Junchao-Mellanox
					Junchao-Mellanox