mirror of
				https://github.com/Telecominfraproject/ols-nos.git
				synced 2025-10-31 18:17:52 +00:00 
			
		
		
		
	[Mellanox] implement platform wait in python code (#17398)
- Why I did it New implementation of Nvidia platform_wait due to: 1. sysfs deprecated by hw-mgmt 2. new dependencies to SDK 3. For CMIS host management mode - How I did it wait hw-management ready wait SDK sysfs nodes ready - How to verify it manual test unit test sonic-mgmt regression
This commit is contained in:
		| @@ -1,68 +1,32 @@ | |||||||
| #!/bin/bash | #!/usr/bin/python3 | ||||||
|  |  | ||||||
| declare -r SYSLOG_LOGGER="/usr/bin/logger" | # | ||||||
| declare -r SYSLOG_IDENTIFIER="platform_wait" | # Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. | ||||||
| declare -r SYSLOG_ERROR="error" | # Apache-2.0 | ||||||
| declare -r SYSLOG_NOTICE="notice" | # | ||||||
| declare -r SYSLOG_INFO="info" | # Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
|  | # you may not use this file except in compliance with the License. | ||||||
|  | # You may obtain a copy of the License at | ||||||
|  | # | ||||||
|  | # http://www.apache.org/licenses/LICENSE-2.0 | ||||||
|  | # | ||||||
|  | # Unless required by applicable law or agreed to in writing, software | ||||||
|  | # distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  | # See the License for the specific language governing permissions and | ||||||
|  | # limitations under the License. | ||||||
|  | # | ||||||
|  |  | ||||||
| declare -r HW_MGMT_CONFIG="/var/run/hw-management/config" | import sys | ||||||
|  | from sonic_platform.device_data import DeviceDataManager | ||||||
|  | from sonic_py_common.logger import Logger | ||||||
|  |  | ||||||
| declare -r ASIC_INIT_DONE="${HW_MGMT_CONFIG}/asics_init_done" |  | ||||||
| declare -r NUM_ASICS="${HW_MGMT_CONFIG}/asic_num" |  | ||||||
| declare -r ASIC_CHIPUP_COMPLETED="${HW_MGMT_CONFIG}/asic_chipup_completed" |  | ||||||
|  |  | ||||||
| declare -r EXIT_SUCCESS="0" | logger = Logger(log_identifier='platform_wait') | ||||||
| declare -r EXIT_TIMEOUT="1" | logger.log_notice('Nvidia: Wait for PMON dependencies to be ready') | ||||||
|  | if DeviceDataManager.wait_platform_ready(): | ||||||
| function log_error() { |     logger.log_notice('Nvidia: PMON dependencies are ready') | ||||||
|     eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_ERROR} $@" |     sys.exit(0) | ||||||
| } | else: | ||||||
|  |     logger.log_error('Nvidia: PMON dependencies are not ready: timeout') | ||||||
| function log_notice() { |     sys.exit(-1) | ||||||
|     eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_NOTICE} $@" |  | ||||||
| } |  | ||||||
|  |  | ||||||
| function log_info() { |  | ||||||
|     eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_INFO} $@" |  | ||||||
| } |  | ||||||
|  |  | ||||||
| function wait_for_asic_chipup() { |  | ||||||
|  |  | ||||||
|     local _ASIC_INIT="0" |  | ||||||
|     local _ASIC_COUNT="0" |  | ||||||
|     local _ASICS_CHIPUP="0" |  | ||||||
|  |  | ||||||
|     local -i _WDOG_CNT="1" |  | ||||||
|     local -ir _WDOG_MAX="300" |  | ||||||
|  |  | ||||||
|     local -r _TIMEOUT="1s" |  | ||||||
|  |  | ||||||
|     while [[ "${_WDOG_CNT}" -le "${_WDOG_MAX}" ]]; do |  | ||||||
|         _ASIC_INIT="$(cat ${ASIC_INIT_DONE} 2>&1)" |  | ||||||
|         _ASIC_COUNT="$(cat ${NUM_ASICS} 2>&1)" |  | ||||||
|         _ASICS_CHIPUP="$(cat ${ASIC_CHIPUP_COMPLETED} 2>&1)" |  | ||||||
|  |  | ||||||
|         if [[ "${_ASIC_INIT}" -eq 1 && "${_ASIC_COUNT}" -eq "${_ASICS_CHIPUP}" ]]; then |  | ||||||
|             return "${EXIT_SUCCESS}" |  | ||||||
|         fi |  | ||||||
|  |  | ||||||
|         let "_WDOG_CNT++" |  | ||||||
|         sleep "${_TIMEOUT}" |  | ||||||
|     done |  | ||||||
|  |  | ||||||
|     log_error "Mellanox ASIC is not ready: INIT: ${_ASIC_INIT}, NUM_ASIC: ${_ASIC_COUNT}, CHIPUP: ${_ASICS_CHIPUP} timeout...." |  | ||||||
|     return "${EXIT_TIMEOUT}" |  | ||||||
| } |  | ||||||
|  |  | ||||||
| log_info "Wait for Mellanox ASIC to be ready" |  | ||||||
|  |  | ||||||
| wait_for_asic_chipup |  | ||||||
| EXIT_CODE="$?" |  | ||||||
| if [[ "${EXIT_CODE}" != "${EXIT_SUCCESS}" ]]; then |  | ||||||
|     exit "${EXIT_CODE}" |  | ||||||
| fi |  | ||||||
|  |  | ||||||
| log_notice "Mellanox ASIC is ready" |  | ||||||
|  |  | ||||||
| exit "${EXIT_SUCCESS}" |  | ||||||
|   | |||||||
| @@ -17,6 +17,7 @@ | |||||||
|  |  | ||||||
| import glob | import glob | ||||||
| import os | import os | ||||||
|  | import time | ||||||
|  |  | ||||||
| from . import utils | from . import utils | ||||||
|  |  | ||||||
| @@ -167,8 +168,11 @@ class DeviceDataManager: | |||||||
|     @classmethod |     @classmethod | ||||||
|     @utils.read_only_cache() |     @utils.read_only_cache() | ||||||
|     def get_sfp_count(cls): |     def get_sfp_count(cls): | ||||||
|         sfp_count = utils.read_int_from_file('/run/hw-management/config/sfp_counter') |         from sonic_py_common import device_info | ||||||
|         return sfp_count if sfp_count > 0 else len(glob.glob('/sys/module/sx_core/asic0/module*')) |         platform_path = device_info.get_path_to_platform_dir() | ||||||
|  |         platform_json_path = os.path.join(platform_path, 'platform.json') | ||||||
|  |         platform_data = utils.load_json_file(platform_json_path) | ||||||
|  |         return len(platform_data['chassis']['sfps']) | ||||||
|  |  | ||||||
|     @classmethod |     @classmethod | ||||||
|     def get_linecard_sfp_count(cls, lc_index): |     def get_linecard_sfp_count(cls, lc_index): | ||||||
| @@ -244,3 +248,23 @@ class DeviceDataManager: | |||||||
|         sai_profile_file = os.path.join(hwsku_dir, 'sai.profile') |         sai_profile_file = os.path.join(hwsku_dir, 'sai.profile') | ||||||
|         data = utils.read_key_value_file(sai_profile_file, delimeter='=') |         data = utils.read_key_value_file(sai_profile_file, delimeter='=') | ||||||
|         return data.get('SAI_INDEPENDENT_MODULE_MODE') == '1' |         return data.get('SAI_INDEPENDENT_MODULE_MODE') == '1' | ||||||
|  |      | ||||||
|  |     @classmethod | ||||||
|  |     def wait_platform_ready(cls): | ||||||
|  |         """ | ||||||
|  |         Wait for Nvidia platform related services(SDK, hw-management) ready | ||||||
|  |         Returns: | ||||||
|  |             bool: True if wait success else timeout | ||||||
|  |         """ | ||||||
|  |         conditions = [] | ||||||
|  |         sysfs_nodes = ['power_mode', 'power_mode_policy', 'present', 'reset', 'status', 'statuserror'] | ||||||
|  |         if cls.is_independent_mode(): | ||||||
|  |             sysfs_nodes.extend(['control', 'frequency', 'frequency_support', 'hw_present', 'hw_reset', | ||||||
|  |                                 'power_good', 'power_limit', 'power_on', 'temperature/input']) | ||||||
|  |         else: | ||||||
|  |             conditions.append(lambda: utils.read_int_from_file('/var/run/hw-management/config/asics_init_done') == 1) | ||||||
|  |         sfp_count = cls.get_sfp_count() | ||||||
|  |         for sfp_index in range(sfp_count): | ||||||
|  |             for sysfs_node in sysfs_nodes: | ||||||
|  |                 conditions.append(lambda: os.path.exists(f'/sys/module/sx_core/asic0/module{sfp_index}/{sysfs_node}')) | ||||||
|  |         return utils.wait_until_conditions(conditions, 300, 1) | ||||||
|   | |||||||
| @@ -290,6 +290,30 @@ def wait_until(predict, timeout, interval=1, *args, **kwargs): | |||||||
|     return False |     return False | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def wait_until_conditions(conditions, timeout, interval=1): | ||||||
|  |     """ | ||||||
|  |     Wait until all the conditions become true | ||||||
|  |     Args: | ||||||
|  |         conditions (list): a list of callable which generate True|False | ||||||
|  |         timeout (int): wait time in seconds | ||||||
|  |         interval (int, optional):  interval to check the predict. Defaults to 1. | ||||||
|  |  | ||||||
|  |     Returns: | ||||||
|  |         bool: True if wait success else False | ||||||
|  |     """ | ||||||
|  |     while timeout > 0: | ||||||
|  |         pending_conditions = [] | ||||||
|  |         for condition in conditions: | ||||||
|  |             if not condition(): | ||||||
|  |                 pending_conditions.append(condition) | ||||||
|  |         if not pending_conditions: | ||||||
|  |             return True | ||||||
|  |         conditions = pending_conditions | ||||||
|  |         time.sleep(interval) | ||||||
|  |         timeout -= interval | ||||||
|  |     return False | ||||||
|  |  | ||||||
|  |    | ||||||
| class TimerEvent: | class TimerEvent: | ||||||
|     def __init__(self, interval, cb, repeat): |     def __init__(self, interval, cb, repeat): | ||||||
|         self.interval = interval |         self.interval = interval | ||||||
|   | |||||||
| @@ -60,6 +60,26 @@ class TestDeviceData: | |||||||
|         mock_read.return_value = {'SAI_INDEPENDENT_MODULE_MODE': '1'} |         mock_read.return_value = {'SAI_INDEPENDENT_MODULE_MODE': '1'} | ||||||
|         assert DeviceDataManager.is_independent_mode() |         assert DeviceDataManager.is_independent_mode() | ||||||
|  |  | ||||||
|  |     @mock.patch('sonic_py_common.device_info.get_path_to_platform_dir', mock.MagicMock(return_value='/tmp')) | ||||||
|  |     @mock.patch('sonic_platform.device_data.utils.load_json_file') | ||||||
|  |     def test_get_sfp_count(self, mock_load_json): | ||||||
|  |         mock_load_json.return_value = { | ||||||
|  |             'chassis': { | ||||||
|  |                 'sfps': [1,2,3] | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         assert DeviceDataManager.get_sfp_count() == 3 | ||||||
|  |  | ||||||
|  |     @mock.patch('sonic_platform.device_data.time.sleep', mock.MagicMock()) | ||||||
|  |     @mock.patch('sonic_platform.device_data.DeviceDataManager.get_sfp_count', mock.MagicMock(return_value=3)) | ||||||
|  |     @mock.patch('sonic_platform.device_data.utils.read_int_from_file', mock.MagicMock(return_value=1)) | ||||||
|  |     @mock.patch('sonic_platform.device_data.os.path.exists') | ||||||
|  |     @mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode') | ||||||
|  |     def test_wait_platform_ready(self, mock_is_indep, mock_exists): | ||||||
|  |         mock_exists.return_value = True | ||||||
|  |         mock_is_indep.return_value = True | ||||||
|  |         assert DeviceDataManager.wait_platform_ready() | ||||||
|  |         mock_is_indep.return_value = False | ||||||
|  |         assert DeviceDataManager.wait_platform_ready() | ||||||
|  |         mock_exists.return_value = False | ||||||
|  |         assert not DeviceDataManager.wait_platform_ready() | ||||||
|   | |||||||
| @@ -195,6 +195,13 @@ class TestUtils: | |||||||
|         mock_os_open = mock.mock_open(read_data='a=b') |         mock_os_open = mock.mock_open(read_data='a=b') | ||||||
|         with mock.patch('sonic_platform.utils.open', mock_os_open): |         with mock.patch('sonic_platform.utils.open', mock_os_open): | ||||||
|             assert utils.read_key_value_file('some_file', delimeter='=') == {'a':'b'} |             assert utils.read_key_value_file('some_file', delimeter='=') == {'a':'b'} | ||||||
|  |              | ||||||
|  |     @mock.patch('sonic_platform.utils.time.sleep', mock.MagicMock()) | ||||||
|  |     def test_wait_until_conditions(self): | ||||||
|  |         conditions = [lambda: True] | ||||||
|  |         assert utils.wait_until_conditions(conditions, 1) | ||||||
|  |         conditions = [lambda: False] | ||||||
|  |         assert not utils.wait_until_conditions(conditions, 1) | ||||||
|  |  | ||||||
|     def test_timer(self): |     def test_timer(self): | ||||||
|         timer = utils.Timer() |         timer = utils.Timer() | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Junchao-Mellanox
					Junchao-Mellanox