From ce032201bca119a41fadab68a7b86cbe69b6918c Mon Sep 17 00:00:00 2001 From: Tony Butler Date: Thu, 21 Mar 2019 03:15:04 -0600 Subject: [PATCH] Refactor NVML, allow unavailable items to disappear, make thermal display color thresholds configurable both compile-time and runtime --- README.md | 2 + src/common/interfaces/IConfig.h | 2 + src/core/Config.cpp | 10 +++++ src/core/ConfigLoader_platform.h | 4 ++ src/core/usage.h | 2 + src/defaults.h | 31 ++++++++++++++ src/nvidia/CudaCLI.cpp | 28 +++++++++++++ src/nvidia/CudaCLI.h | 8 ++++ src/nvidia/Health.h | 1 + src/nvidia/NvmlApi.cpp | 52 +++++++++++++++-------- src/workers/CudaThread.cpp | 17 ++++++++ src/workers/CudaThread.h | 6 +++ src/workers/Workers.cpp | 72 +++++++++++++++++++++++--------- 13 files changed, 198 insertions(+), 37 deletions(-) create mode 100644 src/defaults.h diff --git a/README.md b/README.md index 2c612d96..f876cee4 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,8 @@ Use [config.xmrig.com](https://config.xmrig.com/nvidia) to generate, edit or sha --cuda-bfactor=[0-12] run CryptoNight core kernel in smaller pieces --cuda-bsleep=N insert a delay of N microseconds between kernel launches --cuda-affinity=N affine GPU threads to a CPU + --temp-low=N list of celsius temperature below which is green + --temp-high=N list of celsius temperature above which is red --no-color disable colored output --variant algorithm PoW variant --donate-level=N donate level, default 5% (5 minutes in 100 minutes) diff --git a/src/common/interfaces/IConfig.h b/src/common/interfaces/IConfig.h index 7e6931a8..c9fe9ca3 100644 --- a/src/common/interfaces/IConfig.h +++ b/src/common/interfaces/IConfig.h @@ -126,6 +126,8 @@ class IConfig CudaLaunchKey = 1204, CudaAffinityKey = 1205, CudaMaxUsageKey = 1206, + NvmlTempL = 1207, + NvmlTempH = 1208, }; virtual ~IConfig() = default; diff --git a/src/core/Config.cpp b/src/core/Config.cpp index 468f1d0e..df93120d 100644 --- a/src/core/Config.cpp +++ b/src/core/Config.cpp @@ -99,6 +99,8 @@ void xmrig::Config::getJSON(rapidjson::Document &doc) const doc.AddMember("cuda-bfactor", m_cudaCLI.bfactor(), allocator); doc.AddMember("cuda-bsleep", m_cudaCLI.bsleep(), allocator); doc.AddMember("cuda-max-threads", m_maxGpuThreads, allocator); + doc.AddMember("temp-low", m_cudaCLI.temp_low(), allocator); + doc.AddMember("temp-high", m_cudaCLI.temp_high(), allocator); doc.AddMember("donate-level", donateLevel(), allocator); doc.AddMember("log-file", logFile() ? Value(StringRef(logFile())).Move() : Value(kNullType).Move(), allocator); doc.AddMember("pools", m_pools.toJSON(doc), allocator); @@ -181,6 +183,14 @@ bool xmrig::Config::parseString(int key, const char *arg) case CudaMaxUsageKey: return parseUint64(key, strtoul(arg, nullptr, 10)); + case NvmlTempL: /* --temp-low */ + m_cudaCLI.parseTempLow(arg); + break; + + case NvmlTempH: /* --temp-high */ + m_cudaCLI.parseTempHigh(arg); + break; + default: break; } diff --git a/src/core/ConfigLoader_platform.h b/src/core/ConfigLoader_platform.h index cc0e25b6..2255653c 100644 --- a/src/core/ConfigLoader_platform.h +++ b/src/core/ConfigLoader_platform.h @@ -61,6 +61,8 @@ static struct option const options[] = { { "cuda-max-threads", 1, nullptr, xmrig::IConfig::CudaMaxThreadsKey }, { "max-gpu-threads", 1, nullptr, xmrig::IConfig::CudaMaxThreadsKey }, // deprecated, use --cuda-max-threads instead. { "max-gpu-usage", 1, nullptr, xmrig::IConfig::CudaMaxUsageKey }, // deprecated. + { "temp-low", 1, nullptr, xmrig::IConfig::NvmlTempL }, + { "temp-high", 1, nullptr, xmrig::IConfig::NvmlTempH }, { "config", 1, nullptr, xmrig::IConfig::ConfigKey }, { "donate-level", 1, nullptr, xmrig::IConfig::DonateLevelKey }, { "dry-run", 0, nullptr, xmrig::IConfig::DryRunKey }, @@ -106,6 +108,8 @@ static struct option const config_options[] = { { "cuda-max-threads", 1, nullptr, xmrig::IConfig::CudaMaxThreadsKey }, { "max-gpu-threads", 1, nullptr, xmrig::IConfig::CudaMaxThreadsKey }, // deprecated, use --cuda-max-threads instead. { "max-gpu-usage", 1, nullptr, xmrig::IConfig::CudaMaxUsageKey }, // deprecated. + { "temp-low", 1, nullptr, xmrig::IConfig::NvmlTempL }, + { "temp-high", 1, nullptr, xmrig::IConfig::NvmlTempH }, { nullptr, 0, nullptr, 0 } }; diff --git a/src/core/usage.h b/src/core/usage.h index 8239b54e..0aa18357 100644 --- a/src/core/usage.h +++ b/src/core/usage.h @@ -63,6 +63,8 @@ Options:\n\ --cuda-bfactor=[0-12] run CryptoNight core kernel in smaller pieces\n\ --cuda-bsleep=N insert a delay of N microseconds between kernel launches\n\ --cuda-affinity=N affine GPU threads to a CPU\n\ + --temp-low=N list of celsius temperature below which is green\n\ + --temp-high=N list of celsius temperature above which is red\n\ --no-color disable colored output\n\ --variant algorithm PoW variant\n\ --donate-level=N donate level, default 5%% (5 minutes in 100 minutes)\n\ diff --git a/src/defaults.h b/src/defaults.h new file mode 100644 index 00000000..7423b6f7 --- /dev/null +++ b/src/defaults.h @@ -0,0 +1,31 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __DEFAULTS_H__ +#define __DEFAULTS_H__ + +//temperature display points +// (below L is green, between is yellow, above H is red) +#define DFL_nvmlTempL 45 +#define DFL_nvmlTempH 65 + +#endif /* __DEFAULTS_H__ */ diff --git a/src/nvidia/CudaCLI.cpp b/src/nvidia/CudaCLI.cpp index 621ccb7c..017e35d5 100644 --- a/src/nvidia/CudaCLI.cpp +++ b/src/nvidia/CudaCLI.cpp @@ -153,6 +153,34 @@ void CudaCLI::parseLaunch(const char *arg) } +void CudaCLI::parseTempLow(const char *arg) +{ + char *value = strdup(arg); + char *pch = strtok(value, ","); + + while (pch != nullptr) { + m_temp_low.push_back(static_cast(strtoul(pch, nullptr, 10))); + pch = strtok(nullptr, ","); + } + + free(value); +} + + +void CudaCLI::parseTempHigh(const char *arg) +{ + char *value = strdup(arg); + char *pch = strtok(value, ","); + + while (pch != nullptr) { + m_temp_high.push_back(static_cast(strtoul(pch, nullptr, 10))); + pch = strtok(nullptr, ","); + } + + free(value); +} + + int CudaCLI::get(const std::vector &vector, int index, int defaultValue) const { if (vector.empty()) { diff --git a/src/nvidia/CudaCLI.h b/src/nvidia/CudaCLI.h index 25fafde8..2c5412f2 100644 --- a/src/nvidia/CudaCLI.h +++ b/src/nvidia/CudaCLI.h @@ -28,6 +28,7 @@ #include +#include "defaults.h" #include "common/xmrig.h" @@ -46,6 +47,8 @@ class CudaCLI void autoConf(std::vector &threads, xmrig::Algo algo, bool isCNv2); void parseDevices(const char *arg); void parseLaunch(const char *arg); + void parseTempLow(const char *arg); + void parseTempHigh(const char *arg); inline void addBFactor(int bfactor) { m_bfactors.push_back(bfactor); } inline void addBSleep(int bsleep) { m_bsleeps.push_back(bsleep); } @@ -71,6 +74,9 @@ class CudaCLI # endif } + inline int temp_low(int index = 0) const { return get(m_temp_low, index, DFL_nvmlTempL); } + inline int temp_high(int index = 0) const { return get(m_temp_high, index, DFL_nvmlTempH); } + private: inline int affinity(int index) const { return get(m_affinity, index, -1); } inline int blocks(int index) const { return get(m_blocks, index, -1); } @@ -87,6 +93,8 @@ class CudaCLI std::vector m_bsleeps; std::vector m_devices; std::vector m_threads; + std::vector m_temp_low; + std::vector m_temp_high; }; diff --git a/src/nvidia/Health.h b/src/nvidia/Health.h index 81d7a607..b455e742 100644 --- a/src/nvidia/Health.h +++ b/src/nvidia/Health.h @@ -27,6 +27,7 @@ #include +#define PROBED_UNSUPPORTED 0x8675309 class Health { diff --git a/src/nvidia/NvmlApi.cpp b/src/nvidia/NvmlApi.cpp index 8aa99626..23bd4f0c 100644 --- a/src/nvidia/NvmlApi.cpp +++ b/src/nvidia/NvmlApi.cpp @@ -30,7 +30,7 @@ static uv_lib_t nvmlLib; -static char nvmlVerion[80] = { 0 }; +static char nvmlVersion[NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE] = { 0 }; bool NvmlApi::m_available = false; @@ -50,8 +50,8 @@ static nvmlReturn_t(*pNvmlDeviceGetPciInfo)(nvmlDevice_t device, nvmlPciInfo_t * bool NvmlApi::init() { # ifdef _WIN32 - char tmp[512]; - ExpandEnvironmentStringsA("%PROGRAMFILES%\\NVIDIA Corporation\\NVSMI\\nvml.dll", tmp, sizeof(tmp)); + char tmp[261]; //LoadLibrary calls are still "260 char" limited + ExpandEnvironmentStringsA(R"(%ProgramFiles%\NVIDIA Corporation\NVSMI\nvml.dll)", tmp, sizeof(tmp)); if (uv_dlopen(tmp, &nvmlLib) == -1 && uv_dlopen("nvml.dll", &nvmlLib) == -1) { return false; } @@ -78,7 +78,7 @@ bool NvmlApi::init() m_available = pNvmlInit() == NVML_SUCCESS; if (pNvmlSystemGetNVMLVersion) { - pNvmlSystemGetNVMLVersion(nvmlVerion, sizeof(nvmlVerion)); + pNvmlSystemGetNVMLVersion(nvmlVersion, sizeof(nvmlVersion)); } return m_available; @@ -95,34 +95,52 @@ void NvmlApi::release() } -bool NvmlApi::health(int id, Health &health) +bool NvmlApi::health(int i, Health &health) { - if (id == -1 || !isAvailable()) { + const auto id = static_cast(i); + nvmlDevice_t device; + + if (i == -1 || !isAvailable() + || + (pNvmlDeviceGetHandleByIndex && pNvmlDeviceGetHandleByIndex(id, &device) != NVML_SUCCESS) + ) { return false; } - health.reset(); + // cache items previously pegged as unavailable via function call failure + // this has to happen before the reset or we don't see the previous value + const bool hasPowerUsage = PROBED_UNSUPPORTED != health.power; + const bool hasFanSpeed = PROBED_UNSUPPORTED != health.fanSpeed; + const bool hasClockInfo = PROBED_UNSUPPORTED != health.clock; - nvmlDevice_t device; - if (pNvmlDeviceGetHandleByIndex && pNvmlDeviceGetHandleByIndex(id, &device) != NVML_SUCCESS) { - return false; - } + health.reset(); if (pNvmlDeviceGetTemperature) { pNvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &health.temperature); } if (pNvmlDeviceGetPowerUsage) { - pNvmlDeviceGetPowerUsage(device, &health.power); + if (!hasPowerUsage || pNvmlDeviceGetPowerUsage(device, &health.power) != NVML_SUCCESS){ + health.power = PROBED_UNSUPPORTED; + } } if (pNvmlDeviceGetFanSpeed) { - pNvmlDeviceGetFanSpeed(device, &health.fanSpeed); + if (!hasFanSpeed || pNvmlDeviceGetFanSpeed(device, &health.fanSpeed) != NVML_SUCCESS){ + health.fanSpeed = PROBED_UNSUPPORTED; + } } if (pNvmlDeviceGetClockInfo) { - pNvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &health.clock); - pNvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &health.memClock); + if (!hasClockInfo + || + pNvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &health.clock) != NVML_SUCCESS + || + pNvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &health.memClock) != NVML_SUCCESS + ) { + health.clock = PROBED_UNSUPPORTED; + health.memClock = PROBED_UNSUPPORTED; + } } return true; @@ -131,7 +149,7 @@ bool NvmlApi::health(int id, Health &health) const char *NvmlApi::version() { - return nvmlVerion; + return nvmlVersion; } @@ -158,7 +176,7 @@ void NvmlApi::bind(const std::vector &threads) } for (xmrig::IThread *t : threads) { - auto thread = static_cast(t); + auto thread = dynamic_cast(t); if (thread->pciBusID() == pci.bus && thread->pciDeviceID() == pci.device && thread->pciDomainID() == pci.domain) { thread->setNvmlId(i); break; diff --git a/src/workers/CudaThread.cpp b/src/workers/CudaThread.cpp index 9f169e50..9f80a12f 100644 --- a/src/workers/CudaThread.cpp +++ b/src/workers/CudaThread.cpp @@ -27,6 +27,7 @@ #include +#include "defaults.h" #include "rapidjson/document.h" #include "workers/CudaThread.h" @@ -38,6 +39,8 @@ CudaThread::CudaThread() : m_clockRate(0), m_memoryClockRate(0), m_nvmlId(-1), + m_nvmlTempL(DFL_nvmlTempL), + m_nvmlTempH(DFL_nvmlTempH), m_smx(0), m_threads(0), m_affinity(-1), @@ -63,6 +66,8 @@ CudaThread::CudaThread(const nvid_ctx &ctx, int64_t affinity, xmrig::Algo algori m_clockRate(ctx.device_clockRate), m_memoryClockRate(ctx.device_memoryClockRate), m_nvmlId(-1), + m_nvmlTempL(DFL_nvmlTempL), + m_nvmlTempH(DFL_nvmlTempH), m_smx(ctx.device_mpcount), m_threads(ctx.device_threads), m_affinity(affinity), @@ -88,6 +93,8 @@ CudaThread::CudaThread(const rapidjson::Value &object) : m_clockRate(0), m_memoryClockRate(0), m_nvmlId(-1), + m_nvmlTempL(DFL_nvmlTempL), + m_nvmlTempH(DFL_nvmlTempH), m_smx(0), m_threads(0), m_affinity(-1), @@ -117,6 +124,16 @@ CudaThread::CudaThread(const rapidjson::Value &object) : if (affinity.IsInt()) { setAffinity(affinity.GetInt()); } + + const rapidjson::Value &tempL = object["temp_low"]; + if (tempL.IsInt()) { + setNvmlTempL(static_cast(tempL.GetInt())); + } + + const rapidjson::Value &tempH = object["temp_high"]; + if (tempH.IsInt()) { + setNvmlTempH(static_cast(tempH.GetInt())); + } } diff --git a/src/workers/CudaThread.h b/src/workers/CudaThread.h index f6636686..ddf6ee96 100644 --- a/src/workers/CudaThread.h +++ b/src/workers/CudaThread.h @@ -53,6 +53,8 @@ class CudaThread : public xmrig::IThread inline size_t memoryTotal() const { return m_memoryTotal; } inline size_t memoryFree() const { return m_memoryFree; } inline int nvmlId() const { return m_nvmlId; } + inline uint32_t nvmlTempL() const { return m_nvmlTempL; } + inline uint32_t nvmlTempH() const { return m_nvmlTempH; } inline int smx() const { return m_smx; } inline int threads() const { return m_threads; } inline size_t threadId() const { return m_threadId; } @@ -74,6 +76,8 @@ class CudaThread : public xmrig::IThread inline void setBSleep(int bsleep) { m_bsleep = bsleep; } inline void setIndex(size_t index) { m_index = index; } inline void setNvmlId(int id) { m_nvmlId = id; } + inline void setNvmlTempL(uint32_t temp) { m_nvmlTempL = temp; } + inline void setNvmlTempH(uint32_t temp) { m_nvmlTempH = temp; } inline void setThreadId(size_t threadId) { m_threadId = threadId; } inline void setThreads(int threads) { m_threads = threads; } inline void setSyncMode(uint32_t syncMode) { m_syncMode = syncMode > 3 ? 3 : syncMode; } @@ -98,6 +102,8 @@ class CudaThread : public xmrig::IThread int m_clockRate; int m_memoryClockRate; int m_nvmlId; + uint32_t m_nvmlTempL; + uint32_t m_nvmlTempH; int m_smx; int m_threads; int64_t m_affinity; diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp index 3ffda598..f9b05198 100644 --- a/src/workers/Workers.cpp +++ b/src/workers/Workers.cpp @@ -113,7 +113,7 @@ void Workers::printHashrate(bool detail) size_t i = 0; for (const xmrig::IThread *t : m_controller->config()->threads()) { - auto thread = static_cast(t); + auto thread = dynamic_cast(t); Log::i()->text("| %6zu | %3zu | %7s | %7s | %7s | %s%s", i, thread->index(), Hashrate::format(m_hashrate->calc(i, Hashrate::ShortInterval), num1, sizeof num1), @@ -130,6 +130,18 @@ void Workers::printHashrate(bool detail) m_hashrate->print(); } +const std::string _spf(const char * const fmt, ...) +{ + va_list args = {}, copy = {}; + va_start(args, fmt); + va_copy(copy, args); + const auto len = static_cast(std::vsnprintf(nullptr, 0, fmt, copy)); + va_end(copy); + std::vector str(len + 1); + std::vsnprintf(str.data(), str.size(), fmt, args); + va_end(args); + return std::string(str.data(), len); +} void Workers::printHealth() { @@ -140,32 +152,52 @@ void Workers::printHealth() Health health; for (const xmrig::IThread *t : m_controller->config()->threads()) { - auto thread = static_cast(t); + auto thread = dynamic_cast(t); if (!NvmlApi::health(thread->nvmlId(), health)) { continue; } - const uint32_t temp = health.temperature; - - if (health.clock && health.clock) { - if (m_controller->config()->isColors()) { - LOG_INFO("\x1B[00;35mGPU #%d: \x1B[01m%u\x1B[00;35m/\x1B[01m%u MHz\x1B[00;35m \x1B[01m%uW\x1B[00;35m %s%uC\x1B[00;35m FAN \x1B[01m%u%%", - thread->index(), health.clock, health.memClock, health.power / 1000, (temp < 45 ? "\x1B[01;32m" : (temp > 65 ? "\x1B[01;31m" : "\x1B[01;33m")), temp, health.fanSpeed); - } - else { - LOG_INFO(" * GPU #%d: %u/%u MHz %uW %uC FAN %u%%", thread->index(), health.clock, health.memClock, health.power / 1000, health.temperature, health.fanSpeed); - } - - continue; + const bool isColors = m_controller->config()->isColors(); + std::string report, chunk; + + report = _spf(isColors + ? MAGENTA("GPU #%d: ") + : "GPU #%d: " + , thread->index() + ); + if (health.clock != PROBED_UNSUPPORTED && health.memClock != PROBED_UNSUPPORTED) { + report += _spf(isColors + ? MAGENTA_BOLD("%u") MAGENTA("/") MAGENTA_BOLD("%u MHz") " " + : "%u/%u MHz " + , health.clock, health.memClock + ); } - - if (m_controller->config()->isColors()) { - LOG_INFO("\x1B[00;35mGPU #%d: %s%uC\x1B[00;35m FAN \x1B[01m%u%%", - thread->index(), (temp < 45 ? "\x1B[01;32m" : (temp > 65 ? "\x1B[01;31m" : "\x1B[01;33m")), temp, health.fanSpeed); + if (health.power != PROBED_UNSUPPORTED) { + report += _spf(isColors + ? MAGENTA_BOLD("%uW") " " + : "%uW " + , health.power / 1000 + ); + } + if (health.temperature) { + if (isColors) { + if (health.temperature > thread->nvmlTempH()) + report += _spf(RED_BOLD("%uC"),health.temperature); + else if (health.temperature < thread->nvmlTempL()) + report += _spf(GREEN_BOLD("%uC"),health.temperature); + else + report += _spf(YELLOW_BOLD("%uC"),health.temperature); + } else + report += _spf("%uC ", health.temperature); } - else { - LOG_INFO(" * GPU #%d: %uC FAN %u%%", thread->index(), health.temperature, health.fanSpeed); + if (health.fanSpeed != PROBED_UNSUPPORTED) { + report += _spf(isColors + ? "FAN " MAGENTA_BOLD("%u%%") + : "FAN %u%%" + , health.fanSpeed + ); } + LOG_INFO("%s", report.c_str()); } }