From 6867d72dae3388fbfd791214d569941c355dc55f Mon Sep 17 00:00:00 2001 From: skkkkkkk Date: Thu, 4 Jan 2024 16:25:39 +0800 Subject: [PATCH] fix gpu --- README.md | 2 + scripts/ack-optimized-os-1.20.sh | 2 +- scripts/ack-optimized-os-1.22.sh | 5 ++- scripts/ack-optimized-os-1.24.sh | 5 ++- scripts/ack-optimized-os-1.26.sh | 5 ++- scripts/ack-optimized-os-all.sh | 55 +++++++++++++++----------- scripts/ack-optimized-os-linux3-all.sh | 55 +++++++++++++++----------- scripts/ack-optimized-os-rhel9-all.sh | 55 +++++++++++++++----------- 8 files changed, 105 insertions(+), 79 deletions(-) diff --git a/README.md b/README.md index d1d7de7..5b1a8a0 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,8 @@ This repository contains resources and configuration scripts for building a cust * Aliyun Linux 2 (Alibaba Cloud Linux 2) * Aliyun Linux 3 (Alibaba Cloud Linux 3) * CentOS 7.6/7.7/7.8/7.9 +* Red Hat Enterprise Linux 9 + ## Setup diff --git a/scripts/ack-optimized-os-1.20.sh b/scripts/ack-optimized-os-1.20.sh index 467a082..85cee7d 100755 --- a/scripts/ack-optimized-os-1.20.sh +++ b/scripts/ack-optimized-os-1.20.sh @@ -184,7 +184,7 @@ source_file() { preset_gpu() { if [[ "$PRESET_GPU" == "true" ]]; then for file_name in $(ls pkg/run/$RELEASE_VERSION/lib | grep -v init.sh); do - source pkg/run/$RELEASE_VERSION/lib/$file_name + source pkg/run/$RELEASE_VERSION/lib/$file_name done if [[ $NVIDIA_DRIVER_VERSION == "" ]];then diff --git a/scripts/ack-optimized-os-1.22.sh b/scripts/ack-optimized-os-1.22.sh index 4ed8465..b24bd57 100644 --- a/scripts/ack-optimized-os-1.22.sh +++ b/scripts/ack-optimized-os-1.22.sh @@ -184,8 +184,9 @@ source_file() { preset_gpu() { if [[ "$PRESET_GPU" == "true" ]]; then - for file_name in $(ls pkg/run/$RELEASE_VERSION/lib | grep -v init.sh); do - source pkg/run/$RELEASE_VERSION/lib/$file_name + export SRC_DIR=pkg/run/$RELEASE_VERSION + for file_name in $(ls $SRC_DIR/lib | grep -v init.sh | grep -v common.sh | grep -v log.sh); do + source $SRC_DIR/lib/$file_name done if [[ $NVIDIA_DRIVER_VERSION == "" ]];then diff --git a/scripts/ack-optimized-os-1.24.sh b/scripts/ack-optimized-os-1.24.sh index 5a8fdfa..7728efd 100644 --- a/scripts/ack-optimized-os-1.24.sh +++ b/scripts/ack-optimized-os-1.24.sh @@ -174,8 +174,9 @@ source_file() { preset_gpu() { if [[ "$PRESET_GPU" == "true" ]]; then - for file_name in $(ls pkg/run/$RELEASE_VERSION/lib | grep -v init.sh); do - source pkg/run/$RELEASE_VERSION/lib/$file_name + export SRC_DIR=pkg/run/$RELEASE_VERSION + for file_name in $(ls $SRC_DIR/lib | grep -v init.sh | grep -v common.sh | grep -v log.sh); do + source $SRC_DIR/lib/$file_name done if [[ $NVIDIA_DRIVER_VERSION == "" ]];then diff --git a/scripts/ack-optimized-os-1.26.sh b/scripts/ack-optimized-os-1.26.sh index 6ba1b36..4d42363 100644 --- a/scripts/ack-optimized-os-1.26.sh +++ b/scripts/ack-optimized-os-1.26.sh @@ -73,8 +73,9 @@ source_file() { preset_gpu() { if [[ "$PRESET_GPU" == "true" ]]; then - for file_name in $(ls pkg/run/$RELEASE_VERSION/lib | grep -v init.sh); do - source pkg/run/$RELEASE_VERSION/lib/$file_name + export SRC_DIR=pkg/run/$RELEASE_VERSION + for file_name in $(ls $SRC_DIR/lib | grep -v init.sh | grep -v common.sh | grep -v log.sh); do + source $SRC_DIR/lib/$file_name done if [[ $NVIDIA_DRIVER_VERSION == "" ]];then diff --git a/scripts/ack-optimized-os-all.sh b/scripts/ack-optimized-os-all.sh index 427c7e2..c65d678 100644 --- a/scripts/ack-optimized-os-all.sh +++ b/scripts/ack-optimized-os-all.sh @@ -88,36 +88,43 @@ install_pkg() { preset_gpu() { - if [[ $(echo "${KUBE_VERSION}" | cut -d. -f2) -lt 20 ]]; then + if [[ "$PRESET_GPU" != "true" ]]; then return fi - if [[ "$PRESET_GPU" == "true" ]]; then + if [[ $(echo "${KUBE_VERSION}" | cut -d. -f2) -lt 20 ]]; then + return + elif [[ $(echo "${KUBE_VERSION}" | cut -d. -f2) -eq 20 ]]; then for file_name in $(ls pkg/run/$RELEASE_VERSION/lib | grep -v init.sh); do - source pkg/run/$RELEASE_VERSION/lib/$file_name + source pkg/run/$RELEASE_VERSION/lib/$file_name + done + else + export SRC_DIR=pkg/run/$RELEASE_VERSION + for file_name in $(ls $SRC_DIR/lib | grep -v init.sh | grep -v common.sh | grep -v log.sh); do + source $SRC_DIR/lib/$file_name done - - if [[ $NVIDIA_DRIVER_VERSION == "" ]];then - export NVIDIA_DRIVER_VERSION=460.91.03 - fi - - nvidia::create_dir - # --nvidia-driver-runfile 指定驱动文件路径 - nvidia::prepare_driver_package - # --nvidia-container-toolkit-rpms 指定nvidia container toolkit包含的rpm包所在目录 - nvidia::prepare_container_runtime_package - # --nvidia-fabricmanager-rpm 指定nvidia fabric manager安装包(rpm格式)路径 - nvidia::prepare_driver_package - # --nvidia-device-plugin-yaml 指定nvidia device plugin yaml文件路径 - nvidia::deploy_static_pod - - if [[ $RUNTIME == "docker" ]];then - export SKIP_CONTAINER_RUNTIME_CONFIG=true - fi - - nvidia::gpu::installer::main - fi + + if [[ $NVIDIA_DRIVER_VERSION == "" ]];then + export NVIDIA_DRIVER_VERSION=460.91.03 + fi + + nvidia::create_dir + # --nvidia-driver-runfile 指定驱动文件路径 + nvidia::prepare_driver_package + # --nvidia-container-toolkit-rpms 指定nvidia container toolkit包含的rpm包所在目录 + nvidia::prepare_container_runtime_package + # --nvidia-fabricmanager-rpm 指定nvidia fabric manager安装包(rpm格式)路径 + nvidia::prepare_driver_package + # --nvidia-device-plugin-yaml 指定nvidia device plugin yaml文件路径 + nvidia::deploy_static_pod + + if [[ $RUNTIME == "docker" ]];then + export SKIP_CONTAINER_RUNTIME_CONFIG=true + fi + + nvidia::gpu::installer::main + } trim_os() { diff --git a/scripts/ack-optimized-os-linux3-all.sh b/scripts/ack-optimized-os-linux3-all.sh index 0d2296e..fc6c6e9 100644 --- a/scripts/ack-optimized-os-linux3-all.sh +++ b/scripts/ack-optimized-os-linux3-all.sh @@ -91,36 +91,43 @@ install_pkg() { preset_gpu() { - if [[ $(echo "${KUBE_VERSION}" | cut -d. -f2) -lt 20 ]]; then + if [[ "$PRESET_GPU" != "true" ]]; then return fi - if [[ "$PRESET_GPU" == "true" ]]; then + if [[ $(echo "${KUBE_VERSION}" | cut -d. -f2) -lt 20 ]]; then + return + elif [[ $(echo "${KUBE_VERSION}" | cut -d. -f2) -eq 20 ]]; then for file_name in $(ls pkg/run/$RELEASE_VERSION/lib | grep -v init.sh); do - source pkg/run/$RELEASE_VERSION/lib/$file_name + source pkg/run/$RELEASE_VERSION/lib/$file_name + done + else + export SRC_DIR=pkg/run/$RELEASE_VERSION + for file_name in $(ls $SRC_DIR/lib | grep -v init.sh | grep -v common.sh | grep -v log.sh); do + source $SRC_DIR/lib/$file_name done - - if [[ $NVIDIA_DRIVER_VERSION == "" ]];then - export NVIDIA_DRIVER_VERSION=460.91.03 - fi - - nvidia::create_dir - # --nvidia-driver-runfile 指定驱动文件路径 - nvidia::prepare_driver_package - # --nvidia-container-toolkit-rpms 指定nvidia container toolkit包含的rpm包所在目录 - nvidia::prepare_container_runtime_package - # --nvidia-fabricmanager-rpm 指定nvidia fabric manager安装包(rpm格式)路径 - nvidia::prepare_driver_package - # --nvidia-device-plugin-yaml 指定nvidia device plugin yaml文件路径 - nvidia::deploy_static_pod - - if [[ $RUNTIME == "docker" ]];then - export SKIP_CONTAINER_RUNTIME_CONFIG=true - fi - - nvidia::gpu::installer::main - fi + + if [[ $NVIDIA_DRIVER_VERSION == "" ]];then + export NVIDIA_DRIVER_VERSION=460.91.03 + fi + + nvidia::create_dir + # --nvidia-driver-runfile 指定驱动文件路径 + nvidia::prepare_driver_package + # --nvidia-container-toolkit-rpms 指定nvidia container toolkit包含的rpm包所在目录 + nvidia::prepare_container_runtime_package + # --nvidia-fabricmanager-rpm 指定nvidia fabric manager安装包(rpm格式)路径 + nvidia::prepare_driver_package + # --nvidia-device-plugin-yaml 指定nvidia device plugin yaml文件路径 + nvidia::deploy_static_pod + + if [[ $RUNTIME == "docker" ]];then + export SKIP_CONTAINER_RUNTIME_CONFIG=true + fi + + nvidia::gpu::installer::main + } trim_os() { diff --git a/scripts/ack-optimized-os-rhel9-all.sh b/scripts/ack-optimized-os-rhel9-all.sh index 17816fe..9e3c9cb 100644 --- a/scripts/ack-optimized-os-rhel9-all.sh +++ b/scripts/ack-optimized-os-rhel9-all.sh @@ -88,36 +88,43 @@ install_pkg() { preset_gpu() { - if [[ $(echo "${KUBE_VERSION}" | cut -d. -f2) -lt 20 ]]; then + if [[ "$PRESET_GPU" != "true" ]]; then return fi - if [[ "$PRESET_GPU" == "true" ]]; then + if [[ $(echo "${KUBE_VERSION}" | cut -d. -f2) -lt 20 ]]; then + return + elif [[ $(echo "${KUBE_VERSION}" | cut -d. -f2) -eq 20 ]]; then for file_name in $(ls pkg/run/$RELEASE_VERSION/lib | grep -v init.sh); do - source pkg/run/$RELEASE_VERSION/lib/$file_name + source pkg/run/$RELEASE_VERSION/lib/$file_name + done + else + export SRC_DIR=pkg/run/$RELEASE_VERSION + for file_name in $(ls $SRC_DIR/lib | grep -v init.sh | grep -v common.sh | grep -v log.sh); do + source $SRC_DIR/lib/$file_name done - - if [[ $NVIDIA_DRIVER_VERSION == "" ]];then - export NVIDIA_DRIVER_VERSION=460.91.03 - fi - - nvidia::create_dir - # --nvidia-driver-runfile 指定驱动文件路径 - nvidia::prepare_driver_package - # --nvidia-container-toolkit-rpms 指定nvidia container toolkit包含的rpm包所在目录 - nvidia::prepare_container_runtime_package - # --nvidia-fabricmanager-rpm 指定nvidia fabric manager安装包(rpm格式)路径 - nvidia::prepare_driver_package - # --nvidia-device-plugin-yaml 指定nvidia device plugin yaml文件路径 - nvidia::deploy_static_pod - - if [[ $RUNTIME == "docker" ]];then - export SKIP_CONTAINER_RUNTIME_CONFIG=true - fi - - nvidia::gpu::installer::main - fi + + if [[ $NVIDIA_DRIVER_VERSION == "" ]];then + export NVIDIA_DRIVER_VERSION=460.91.03 + fi + + nvidia::create_dir + # --nvidia-driver-runfile 指定驱动文件路径 + nvidia::prepare_driver_package + # --nvidia-container-toolkit-rpms 指定nvidia container toolkit包含的rpm包所在目录 + nvidia::prepare_container_runtime_package + # --nvidia-fabricmanager-rpm 指定nvidia fabric manager安装包(rpm格式)路径 + nvidia::prepare_driver_package + # --nvidia-device-plugin-yaml 指定nvidia device plugin yaml文件路径 + nvidia::deploy_static_pod + + if [[ $RUNTIME == "docker" ]];then + export SKIP_CONTAINER_RUNTIME_CONFIG=true + fi + + nvidia::gpu::installer::main + } trim_os() {