mirror of
https://github.com/beclab/Olares
synced 2026-04-21 21:47:56 +00:00
353 lines
21 KiB
Go
353 lines
21 KiB
Go
package utils
|
|
|
|
import (
|
|
"encoding/xml"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"runtime"
|
|
"strings"
|
|
|
|
"github.com/beclab/Olares/cli/pkg/core/connector"
|
|
"github.com/beclab/Olares/cli/pkg/core/util"
|
|
"k8s.io/klog/v2"
|
|
)
|
|
|
|
const (
|
|
// NVIDIA driver install method constants
|
|
GPUDriverInstallMethodUnknown = "unknown"
|
|
GPUDriverInstallMethodApt = "apt"
|
|
GPUDriverInstallMethodRunfile = "runfile"
|
|
|
|
// GPU status/message constants parsed from nvidia-smi outputs
|
|
GPUStatusDriverLibraryMismatch = "Driver/library version mismatch"
|
|
GPUStatusCouldntCommunicateWithDrv = "couldn't communicate with the NVIDIA driver"
|
|
GPUStatusNvmlLibraryVersionPrefix = "NVML library version:"
|
|
)
|
|
|
|
type GPU struct {
|
|
ID string `xml:"id,attr" json:"id"`
|
|
ProductName string `xml:"product_name" json:"product_name"`
|
|
ProductBrand string `xml:"product_brand" json:"product_brand"`
|
|
ProductArchitecture string `xml:"product_architecture" json:"product_architecture"`
|
|
DisplayMode string `xml:"display_mode" json:"display_mode"`
|
|
DisplayActive string `xml:"display_active" json:"display_active"`
|
|
// PersistenceMode string `xml:"persistence_mode" json:"persistence_mode"`
|
|
// mig_mode
|
|
// mig_devices
|
|
// AccountingMode string `xml:"accounting_mode" json:"accounting_mode"`
|
|
// AccountingModeBufferSize string `xml:"accounting_mode_buffer_size" json:"accounting_mode_buffer_size"`
|
|
// CurrentDm string `xml:"driver_model>current_dm" json:"current_dm"`
|
|
// PendingDm string `xml:"driver_model>pending_dm" json:"pending_dm"`
|
|
// Serial string `xml:"serial" json:"serial"`
|
|
UUID string `xml:"uuid" json:"uuid"`
|
|
// MinorNumber string `xml:"minor_number" json:"minor_number"`
|
|
VbiosVersion string `xml:"vbios_version" json:"vbios_version"`
|
|
MultigpuBoard string `xml:"multigpu_board" json:"multigpu_board"`
|
|
BoardID string `xml:"board_id" json:"board_id"`
|
|
// gpu_part_number
|
|
// gpu_module_id
|
|
// ImgVersion string `xml:"inforom_version>img_version" json:"img_version"`
|
|
// OemObject string `xml:"inforom_version>oem_object" json:"oem_object"`
|
|
// EccObject string `xml:"inforom_version>ecc_object" json:"ecc_object"`
|
|
// PwrObject string `xml:"inforom_version>pwr_object" json:"pwr_object"`
|
|
// CurrentGom string `xml:"gpu_operation_mode>current_gom" json:"current_gom"`
|
|
// PendingGom string `xml:"gpu_operation_mode>pending_gom" json:"pending_gom"`
|
|
// gsp_firmware_version
|
|
// gpu_virtualization_mode
|
|
// ibmnpu
|
|
PciBus string `xml:"pci>pci_bus" json:"pci_bus"`
|
|
PciDevice string `xml:"pci>pci_device" json:"pci_device"`
|
|
PciDomain string `xml:"pci>pci_domain" json:"pci_domain"`
|
|
PciDeviceID string `xml:"pci>pci_device_id" json:"pci_device_id"`
|
|
PciBusID string `xml:"pci>pci_bus_id" json:"pci_bus_id"`
|
|
PciSubSystemID string `xml:"pci>pci_sub_system_id" json:"pci_sub_system_id"`
|
|
MaxLinkGen string `xml:"pci>pci_gpu_link_info>pcie_gen>max_link_gen" json:"max_link_gen"`
|
|
CurrentLinkGen string `xml:"pci>pci_gpu_link_info>pcie_gen>current_link_gen" json:"current_link_gen"`
|
|
MaxLinkWidth string `xml:"pci>pci_gpu_link_info>link_widths>max_link_width" json:"max_link_width"`
|
|
CurrentLinkWidth string `xml:"pci>pci_gpu_link_info>link_widths>current_link_width" json:"current_link_width"`
|
|
// BridgeChipType string `xml:"pci>pci_bridge_chip>bridge_chip_type" json:"bridge_chip_type"`
|
|
// BridgeChipFw string `xml:"pci>pci_bridge_chip>bridge_chip_fw" json:"bridge_chip_fw"`
|
|
// ReplayCounter string `xml:"pci>replay_counter" json:"replay_counter"`
|
|
// replay_rollover_counter
|
|
PciTxUtil string `xml:"pci>tx_util" json:"pci_tx_util"`
|
|
PciRxUtil string `xml:"pci>rx_util" json:"pci_rx_util"`
|
|
FanSpeed string `xml:"fan_speed" json:"fan_speed"`
|
|
PerformanceState string `xml:"performance_state" json:"performance_state"`
|
|
ClocksThrottleReasonGpuIdle string `xml:"clocks_throttle_reasons>clocks_throttle_reason_gpu_idle" json:"clocks_throttle_reason_gpu_idle"`
|
|
ClocksThrottleReasonApplicationsClocksSetting string `xml:"clocks_throttle_reasons>clocks_throttle_reason_applications_clocks_setting" json:"clocks_throttle_reason_applications_clocks_setting"`
|
|
ClocksThrottleReasonSwPowerCap string `xml:"clocks_throttle_reasons>clocks_throttle_reason_sw_power_cap" json:"clocks_throttle_reason_sw_power_cap"`
|
|
ClocksThrottleReasonHwSlowdown string `xml:"clocks_throttle_reasons>clocks_throttle_reason_hw_slowdown" json:"clocks_throttle_reason_hw_slowdown"`
|
|
ClocksThrottleReasonHwThermalSlowdown string `xml:"clocks_throttle_reasons>clocks_throttle_reason_hw_thermal_slowdown" json:"clocks_throttle_reason_hw_thermal_slowdown"`
|
|
ClocksThrottleReasonHwPowerBrakeSlowdown string `xml:"clocks_throttle_reasons>clocks_throttle_reason_hw_power_brake_slowdown" json:"clocks_throttle_reason_hw_power_brake_slowdown"`
|
|
ClocksThrottleReasonSyncBoost string `xml:"clocks_throttle_reasons>clocks_throttle_reason_sync_boost" json:"clocks_throttle_reason_sync_boost"`
|
|
ClocksThrottleReasonSwThermalSlowdown string `xml:"clocks_throttle_reasons>clocks_throttle_reason_sw_thermal_slowdown" json:"clocks_throttle_reason_sw_thermal_slowdown"`
|
|
ClocksThrottleReasonDisplayClocksSetting string `xml:"clocks_throttle_reasons>clocks_throttle_reason_display_clocks_setting" json:"clocks_throttle_reason_display_clocks_setting"`
|
|
// ClocksThrottleReasonUnknown string `xml:"clocks_throttle_reasons>clocks_throttle_reason_unknown" json:"clocks_throttle_reason_unknown"`
|
|
FbMemoryUsageTotal string `xml:"fb_memory_usage>total" json:"fb_memory_usage_total"`
|
|
FbMemoryUsageReserved string `xml:"fb_memory_usage>reserved" json:"fb_memory_usage_reserved"`
|
|
FbMemoryUsageUsed string `xml:"fb_memory_usage>used" json:"fb_memory_usage_used"`
|
|
FbMemoryUsageFree string `xml:"fb_memory_usage>free" json:"fb_memory_usage_free"`
|
|
Bar1Total string `xml:"bar1_memory_usage>total" json:"bar1_total"`
|
|
Bar1Used string `xml:"bar1_memory_usage>used" json:"bar1_used"`
|
|
Bar1Free string `xml:"bar1_memory_usage>free" json:"bar1_free"`
|
|
ComputeMode string `xml:"compute_mode" json:"compute_mode"`
|
|
GpuUtil string `xml:"utilization>gpu_util" json:"gpu_util"`
|
|
MemoryUtil string `xml:"utilization>memory_util" json:"memory_util"`
|
|
EncoderUtil string `xml:"utilization>encoder_util" json:"encoder_util"`
|
|
DecoderUtil string `xml:"utilization>decoder_util" json:"decoder_util"`
|
|
// encoder_stats
|
|
// fbc_stats
|
|
// CurrentEcc string `xml:"ecc_mode>current_ecc" json:"current_ecc"`
|
|
// PendingEcc string `xml:"ecc_mode>pending_ecc" json:"pending_ecc"`
|
|
// DeviceMemory string `xml:"ecc_errors>volatile>single_bit>device_memory" json:"device_memory"`
|
|
// L1Cache string `xml:"ecc_errors>volatile>single_bit>l1_cache" json:"l_1_cache"`
|
|
// TotalSingleBitVolatileEccErrorsGpu string `xml:"ecc_errors>volatile>single_bit>total" json:"total_single_bit_volatile_ecc_errors_gpu"`
|
|
// TextureMemory string `xml:"ecc_errors>volatile>single_bit>texture_memory" json:"texture_memory"`
|
|
// RegisterFile string `xml:"ecc_errors>volatile>single_bit>register_file" json:"register_file"`
|
|
// L2Cache string `xml:"ecc_errors>volatile>single_bit>l2_cache" json:"l_2_cache"`
|
|
// TextureMemoryDoubleBitVolatileEccErrorsGpu string `xml:"ecc_errors>volatile>double_bit>texture_memory" json:"texture_memory_double_bit_volatile_ecc_errors_gpu"`
|
|
// DeviceMemoryDoubleBitVolatileEccErrorsGpu string `xml:"ecc_errors>volatile>double_bit>device_memory" json:"device_memory_double_bit_volatile_ecc_errors_gpu"`
|
|
// RegisterFileDoubleBitVolatileEccErrorsGpu string `xml:"ecc_errors>volatile>double_bit>register_file" json:"register_file_double_bit_volatile_ecc_errors_gpu"`
|
|
// TotalDoubleBitVolatileEccErrorsGpu string `xml:"ecc_errors>volatile>double_bit>total" json:"total_double_bit_volatile_ecc_errors_gpu"`
|
|
// L2CacheDoubleBitVolatileEccErrorsGpu string `xml:"ecc_errors>volatile>double_bit>l2_cache" json:"l_2_cache_double_bit_volatile_ecc_errors_gpu"`
|
|
// L1CacheDoubleBitVolatileEccErrorsGpu string `xml:"ecc_errors>volatile>double_bit>l1_cache" json:"l_1_cache_double_bit_volatile_ecc_errors_gpu"`
|
|
// L2CacheSingleBitAggregateEccErrorsGpu string `xml:"ecc_errors>aggregate>single_bit>l2_cache" json:"l_2_cache_single_bit_aggregate_ecc_errors_gpu"`
|
|
// L1CacheSingleBitAggregateEccErrorsGpu string `xml:"ecc_errors>aggregate>single_bit>l1_cache" json:"l_1_cache_single_bit_aggregate_ecc_errors_gpu"`
|
|
// TextureMemorySingleBitAggregateEccErrorsGpu string `xml:"ecc_errors>aggregate>single_bit>texture_memory" json:"texture_memory_single_bit_aggregate_ecc_errors_gpu"`
|
|
// RegisterFileSingleBitAggregateEccErrorsGpu string `xml:"ecc_errors>aggregate>single_bit>register_file" json:"register_file_single_bit_aggregate_ecc_errors_gpu"`
|
|
// DeviceMemorySingleBitAggregateEccErrorsGpu string `xml:"ecc_errors>aggregate>single_bit>device_memory" json:"device_memory_single_bit_aggregate_ecc_errors_gpu"`
|
|
// TotalSingleBitAggregateEccErrorsGpu string `xml:"ecc_errors>aggregate>single_bit>total" json:"total_single_bit_aggregate_ecc_errors_gpu"`
|
|
// DeviceMemoryDoubleBitAggregateEccErrorsGpu string `xml:"ecc_errors>aggregate>double_bit>device_memory" json:"device_memory_double_bit_aggregate_ecc_errors_gpu"`
|
|
// TotalDoubleBitAggregateEccErrorsGpu string `xml:"ecc_errors>aggregate>double_bit>total" json:"total_double_bit_aggregate_ecc_errors_gpu"`
|
|
// RegisterFileDoubleBitAggregateEccErrorsGpu string `xml:"ecc_errors>aggregate>double_bit>register_file" json:"register_file_double_bit_aggregate_ecc_errors_gpu"`
|
|
// L2CacheDoubleBitAggregateEccErrorsGpu string `xml:"ecc_errors>aggregate>double_bit>l2_cache" json:"l_2_cache_double_bit_aggregate_ecc_errors_gpu"`
|
|
// L1CacheDoubleBitAggregateEccErrorsGpu string `xml:"ecc_errors>aggregate>double_bit>l1_cache" json:"l_1_cache_double_bit_aggregate_ecc_errors_gpu"`
|
|
// TextureMemoryDoubleBitAggregateEccErrorsGpu string `xml:"ecc_errors>aggregate>double_bit>texture_memory" json:"texture_memory_double_bit_aggregate_ecc_errors_gpu"`
|
|
// RetiredCount string `xml:"retired_pages>multiple_single_bit_retirement>retired_count" json:"retired_count"`
|
|
// RetiredPagelist string `xml:"retired_pages>multiple_single_bit_retirement>retired_pagelist" json:"retired_pagelist"`
|
|
// RetiredCountDoubleBitRetirementRetiredPagesGpu string `xml:"retired_pages>double_bit_retirement>retired_count" json:"retired_count_double_bit_retirement_retired_pages_gpu"`
|
|
// RetiredPagelistDoubleBitRetirementRetiredPagesGpu string `xml:"retired_pages>double_bit_retirement>retired_pagelist" json:"retired_pagelist_double_bit_retirement_retired_pages_gpu"`
|
|
// pending_blacklist
|
|
// PendingRetirement string `xml:"retired_pages>pending_retirement" json:"pending_retirement"`
|
|
// remapped_rows
|
|
GpuTemp string `xml:"temperature>gpu_temp" json:"gpu_temp"`
|
|
GpuTempMaxThreshold string `xml:"temperature>gpu_temp_max_threshold" json:"gpu_temp_max_threshold"`
|
|
GpuTempSlowThreshold string `xml:"temperature>gpu_temp_slow_threshold" json:"gpu_temp_slow_threshold"`
|
|
GpuTempMaxGpuThreshold string `xml:"temperature>gpu_temp_max_gpu_threshold" json:"gpu_temp_max_gpu_threshold"`
|
|
GpuTargetTemp string `xml:"temperature>gpu_target_temperature" json:"gpu_target_temperature"`
|
|
MemoryTemp string `xml:"temperature>memory_temp" json:"memory_temp"`
|
|
GpuTempMaxMemThreshold string `xml:"temperature>gpu_temp_max_mem_threshold" json:"gpu_temp_max_mem_threshold"`
|
|
GpuTargetTempMin string `xml:"supported_gpu_target_temp>gpu_target_temp_min" json:"gpu_target_temp_min"`
|
|
GpuTargetTempMax string `xml:"supported_gpu_target_temp>gpu_target_temp_max" json:"gpu_target_temp_max"`
|
|
PowerState string `xml:"gpu_power_readings>power_state" json:"power_state"`
|
|
PowerManagement string `xml:"gpu_power_readings>power_management" json:"power_management"`
|
|
PowerDraw string `xml:"gpu_power_readings>power_draw" json:"power_draw"`
|
|
PowerLimit string `xml:"gpu_power_readings>current_power_limit" json:"power_limit"`
|
|
DefaultPowerLimit string `xml:"gpu_power_readings>default_power_limit" json:"default_power_limit"`
|
|
EnforcedPowerLimit string `xml:"gpu_power_readings>requested_power_limit" json:"enforced_power_limit"`
|
|
MinPowerLimit string `xml:"gpu_power_readings>min_power_limit" json:"min_power_limit"`
|
|
MaxPowerLimit string `xml:"gpu_power_readings>max_power_limit" json:"max_power_limit"`
|
|
GraphicsClock string `xml:"clocks>graphics_clock" json:"graphics_clock"`
|
|
SmClock string `xml:"clocks>sm_clock" json:"sm_clock"`
|
|
MemClock string `xml:"clocks>mem_clock" json:"mem_clock"`
|
|
VideoClock string `xml:"clocks>video_clock" json:"video_clock"`
|
|
// GraphicsClockApplicationsClocksGpu string `xml:"applications_clocks>graphics_clock" json:"graphics_clock_applications_clocks_gpu"`
|
|
// MemClockApplicationsClocksGpu string `xml:"applications_clocks>mem_clock" json:"mem_clock_applications_clocks_gpu"`
|
|
// GraphicsClockDefaultApplicationsClocksGpu string `xml:"default_applications_clocks>graphics_clock" json:"graphics_clock_default_applications_clocks_gpu"`
|
|
// MemClockDefaultApplicationsClocksGpu string `xml:"default_applications_clocks>mem_clock" json:"mem_clock_default_applications_clocks_gpu"`
|
|
GraphicsClockMax string `xml:"max_clocks>graphics_clock" json:"graphics_clock_max"`
|
|
SmClockMax string `xml:"max_clocks>sm_clock" json:"sm_clock_max"`
|
|
MemClockMax string `xml:"max_clocks>mem_clock" json:"mem_clock_max"`
|
|
VideoClockMax string `xml:"max_clocks>video_clock" json:"video_clock_max"`
|
|
// max_customer_boost_clocks
|
|
// AutoBoost string `xml:"clock_policy>auto_boost" json:"auto_boost"`
|
|
// AutoBoostDefault string `xml:"clock_policy>auto_boost_default" json:"auto_boost_default"`
|
|
// voltage>graphics_volt
|
|
// Value []string `xml:"supported_clocks>supported_mem_clock>value" json:"value"`
|
|
// SupportedGraphicsClock []string `xml:"supported_clocks>supported_mem_clock>supported_graphics_clock" json:"supported_graphics_clock"`
|
|
// Processes string `xml:"processes" json:"processes"`
|
|
// AccountedProcesses string `xml:"accounted_processes" json:"accounted_processes"`
|
|
}
|
|
|
|
type NvidiaGpuInfo struct {
|
|
Timestamp string `xml:"timestamp" json:"timestamp"`
|
|
DriverVersion string `xml:"driver_version" json:"driver_version"`
|
|
CudaVersion string `xml:"cuda_version" json:"cuda_version"`
|
|
AttachedGpus string `xml:"attached_gpus" json:"attached_gpus"`
|
|
GPUS []GPU `xml:"gpu" json:"gpus"`
|
|
}
|
|
|
|
// NvidiaStatus is the unified GPU/driver status, combining nvidia-smi XML info and driver health.
|
|
type NvidiaStatus struct {
|
|
Installed bool
|
|
Running bool // whether kernel driver module is loaded
|
|
Info *NvidiaGpuInfo
|
|
DriverVersion string
|
|
CudaVersion string
|
|
LibraryVersion string // NVML library version when mismatch occurs
|
|
Mismatch bool // whether nvidia-smi reports Driver/library version mismatch
|
|
InstallMethod string // apt | runfile | unknown
|
|
}
|
|
|
|
func findNvidiaSmiPath() (string, error) {
|
|
cmd := "nvidia-smi"
|
|
if runtime.GOOS == "windows" {
|
|
cmd += ".exe"
|
|
}
|
|
|
|
cmdPath, err := exec.LookPath(cmd)
|
|
if err != nil {
|
|
if os.IsNotExist(err) || errors.Is(err, exec.ErrNotFound) {
|
|
// try to find wsl nvidia-smi
|
|
var nvidiaSmiFile = "/usr/lib/wsl/lib/nvidia-smi"
|
|
_, e := os.Stat(nvidiaSmiFile)
|
|
if e != nil {
|
|
if os.IsNotExist(e) {
|
|
return "", exec.ErrNotFound
|
|
}
|
|
return "", e
|
|
}
|
|
|
|
cmdPath = nvidiaSmiFile
|
|
} else {
|
|
return "", err
|
|
}
|
|
}
|
|
return cmdPath, nil
|
|
}
|
|
|
|
func GetNvidiaStatus(execRuntime connector.Runtime) (*NvidiaStatus, error) {
|
|
status := &NvidiaStatus{InstallMethod: GPUDriverInstallMethodUnknown}
|
|
|
|
if out, _ := execRuntime.GetRunner().SudoCmd("dpkg -l | awk '/^(ii|i[UuFHWt]|rc|..R)/ {print $2}' | grep -i nvidia-driver", false, false); strings.TrimSpace(out) != "" {
|
|
status.InstallMethod = GPUDriverInstallMethodApt
|
|
} else {
|
|
if util.IsExist("/usr/bin/nvidia-uninstall") || util.IsExist("/usr/bin/nvidia-installer") {
|
|
status.InstallMethod = GPUDriverInstallMethodRunfile
|
|
}
|
|
}
|
|
|
|
// detect whether any NVIDIA kernel module is loaded (driver running)
|
|
// this is a seperate status besides the installed status
|
|
if out, _ := execRuntime.GetRunner().SudoCmd("lsmod | grep -i nvidia 2>/dev/null", false, false); strings.TrimSpace(out) != "" {
|
|
status.Running = true
|
|
}
|
|
// read running kernel driver version from sysfs if available
|
|
var kernelDriverVersion string
|
|
if status.Running {
|
|
if v, _ := execRuntime.GetRunner().SudoCmd("cat /sys/module/nvidia/version 2>/dev/null", false, false); strings.TrimSpace(v) != "" {
|
|
kernelDriverVersion = strings.TrimSpace(v)
|
|
}
|
|
}
|
|
|
|
cmdPath, pathErr := findNvidiaSmiPath()
|
|
if pathErr == nil {
|
|
out, err := execRuntime.GetRunner().SudoCmd(cmdPath+" -q -x", false, false)
|
|
if err == nil {
|
|
var data NvidiaGpuInfo
|
|
uerr := xml.Unmarshal([]byte(out), &data)
|
|
if uerr == nil {
|
|
status.Installed = true
|
|
// nvidia-smi works => kernel driver is active
|
|
status.Running = true
|
|
status.Info = &data
|
|
status.DriverVersion = data.DriverVersion
|
|
status.CudaVersion = data.CudaVersion
|
|
return status, nil
|
|
}
|
|
return status, fmt.Errorf("failed to unmarshal nvidia-smi XML: %v", uerr)
|
|
}
|
|
if strings.Contains(out, GPUStatusDriverLibraryMismatch) {
|
|
status.Installed = true
|
|
status.Mismatch = true
|
|
status.LibraryVersion = parseNvmlLibraryVersion(out)
|
|
// kernel may still be running; prefer kernel driver version if available
|
|
if kernelDriverVersion != "" {
|
|
status.DriverVersion = kernelDriverVersion
|
|
}
|
|
return status, nil
|
|
}
|
|
// for now, consider as not installed
|
|
if strings.Contains(out, GPUStatusCouldntCommunicateWithDrv) {
|
|
// even if userland not communicating, kernel may be running
|
|
if kernelDriverVersion != "" {
|
|
status.DriverVersion = kernelDriverVersion
|
|
}
|
|
return status, nil
|
|
}
|
|
return status, fmt.Errorf("failed to get NVIDIA driver status: %v", out)
|
|
}
|
|
// consider as not installed
|
|
// if kernel is running after uninstall (without reboot), reflect the running version
|
|
if kernelDriverVersion != "" {
|
|
status.DriverVersion = kernelDriverVersion
|
|
}
|
|
return status, nil
|
|
}
|
|
|
|
func parseNvmlLibraryVersion(out string) string {
|
|
lines := strings.Split(out, "\n")
|
|
for _, l := range lines {
|
|
l = strings.TrimSpace(l)
|
|
// handle token like "NVML library version:575.57"
|
|
if idx := strings.Index(l, GPUStatusNvmlLibraryVersionPrefix); idx >= 0 {
|
|
v := strings.TrimSpace(strings.TrimPrefix(l, GPUStatusNvmlLibraryVersionPrefix))
|
|
// in case there are trailing characters
|
|
v = strings.FieldsFunc(v, func(r rune) bool { return r == ' ' || r == '\t' || r == '\r' || r == ')' || r == '(' })[0]
|
|
return v
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func DetectNvidiaModelAndArch(execRuntime connector.Runtime) (model string, architecture string, err error) {
|
|
if execRuntime.GetSystemInfo().IsDarwin() {
|
|
return "", "", nil
|
|
}
|
|
out, e := execRuntime.GetRunner().SudoCmd("lspci | grep -i -e vga -e 3d | grep -i nvidia || true", false, false)
|
|
if e != nil {
|
|
klog.Error("Error running lspci:", e)
|
|
return "", "", e
|
|
}
|
|
out = strings.TrimSpace(out)
|
|
if out == "" {
|
|
return "", "", nil
|
|
}
|
|
// try to extract codename in square brackets e.g. "AD106 [GeForce RTX 4060 Ti]"
|
|
// examples: "NVIDIA Corporation AD106 [GeForce RTX 4060 Ti]"
|
|
model = out
|
|
architecture = "Unknown"
|
|
upper := strings.ToUpper(out)
|
|
// codename appears as two letters followed by digits, within the line, often right before '['
|
|
// detect common prefixes: AD(Ada), GB(Blackwell), GH(Hopper), GA(Ampere), TU(Turing), GV(Volta), GP(Pascal), GM(Maxwell), GK(Kepler), GF(Fermi)
|
|
codePrefixes := []struct {
|
|
Prefix string
|
|
Arch string
|
|
}{
|
|
{"AD", "Ada Lovelace"},
|
|
{"GB", "Blackwell"},
|
|
{"GH", "Hopper"},
|
|
{"GA", "Ampere"},
|
|
{"TU", "Turing"},
|
|
{"GV", "Volta"},
|
|
{"GP", "Pascal"},
|
|
{"GM", "Maxwell"},
|
|
{"GK", "Kepler"},
|
|
{"GF", "Fermi"},
|
|
}
|
|
for _, p := range codePrefixes {
|
|
if strings.Contains(upper, p.Prefix) {
|
|
architecture = p.Arch
|
|
break
|
|
}
|
|
}
|
|
// get bracket part as model if present
|
|
if i := strings.Index(out, "["); i >= 0 {
|
|
if j := strings.Index(out[i:], "]"); j > 0 {
|
|
model = strings.TrimSpace(out[i+1 : i+j])
|
|
}
|
|
}
|
|
return model, architecture, nil
|
|
}
|