Skip to content

Commit

Permalink
Refactor NVML, allow unavailable items to disappear, make thermal dis…
Browse files Browse the repository at this point in the history
…play color thresholds configurable both compile-time and runtime
  • Loading branch information
Spudz76 committed Mar 21, 2019
1 parent f0469b5 commit 5d70761
Show file tree
Hide file tree
Showing 5 changed files with 141 additions and 37 deletions.
31 changes: 31 additions & 0 deletions src/defaults.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/* XMRig
* Copyright 2010 Jeff Garzik <[email protected]>
* Copyright 2012-2014 pooler <[email protected]>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <[email protected]>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <[email protected]>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#ifndef __DEFAULTS_H__
#define __DEFAULTS_H__

//temperature display points
// (below L is green, between is yellow, above H is red)
#define DFL_nvmlTempL 45
#define DFL_nvmlTempH 65

#endif /* __DEFAULTS_H__ */
52 changes: 35 additions & 17 deletions src/nvidia/NvmlApi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@


static uv_lib_t nvmlLib;
static char nvmlVerion[80] = { 0 };
static char nvmlVersion[NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE] = { 0 };


bool NvmlApi::m_available = false;
Expand All @@ -50,8 +50,8 @@ static nvmlReturn_t(*pNvmlDeviceGetPciInfo)(nvmlDevice_t device, nvmlPciInfo_t *
bool NvmlApi::init()
{
# ifdef _WIN32
char tmp[512];
ExpandEnvironmentStringsA("%PROGRAMFILES%\\NVIDIA Corporation\\NVSMI\\nvml.dll", tmp, sizeof(tmp));
char tmp[261]; //LoadLibrary calls are still "260 char" limited
ExpandEnvironmentStringsA(R"(%ProgramFiles%\NVIDIA Corporation\NVSMI\nvml.dll)", tmp, sizeof(tmp));
if (uv_dlopen(tmp, &nvmlLib) == -1 && uv_dlopen("nvml.dll", &nvmlLib) == -1) {
return false;
}
Expand All @@ -78,7 +78,7 @@ bool NvmlApi::init()
m_available = pNvmlInit() == NVML_SUCCESS;

if (pNvmlSystemGetNVMLVersion) {
pNvmlSystemGetNVMLVersion(nvmlVerion, sizeof(nvmlVerion));
pNvmlSystemGetNVMLVersion(nvmlVersion, sizeof(nvmlVersion));
}

return m_available;
Expand All @@ -95,34 +95,52 @@ void NvmlApi::release()
}


bool NvmlApi::health(int id, Health &health)
bool NvmlApi::health(int i, Health &health)
{
if (id == -1 || !isAvailable()) {
const auto id = static_cast<unsigned int>(i);
nvmlDevice_t device;

if (i == -1 || !isAvailable()
||
(pNvmlDeviceGetHandleByIndex && pNvmlDeviceGetHandleByIndex(id, &device) != NVML_SUCCESS)
) {
return false;
}

health.reset();
// cache items previously pegged as unavailable via function call failure
// this has to happen before the reset or we don't see the previous value
const bool hasPowerUsage = MAXUINT32 != health.power;
const bool hasFanSpeed = MAXUINT32 != health.fanSpeed;
const bool hasClockInfo = MAXUINT32 != health.clock;

nvmlDevice_t device;
if (pNvmlDeviceGetHandleByIndex && pNvmlDeviceGetHandleByIndex(id, &device) != NVML_SUCCESS) {
return false;
}
health.reset();

if (pNvmlDeviceGetTemperature) {
pNvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &health.temperature);
}

if (pNvmlDeviceGetPowerUsage) {
pNvmlDeviceGetPowerUsage(device, &health.power);
if (!hasPowerUsage || pNvmlDeviceGetPowerUsage(device, &health.power) != NVML_SUCCESS){
health.power = MAXUINT32;
}
}

if (pNvmlDeviceGetFanSpeed) {
pNvmlDeviceGetFanSpeed(device, &health.fanSpeed);
if (!hasFanSpeed || pNvmlDeviceGetFanSpeed(device, &health.fanSpeed) != NVML_SUCCESS){
health.fanSpeed = MAXUINT32;
}
}

if (pNvmlDeviceGetClockInfo) {
pNvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &health.clock);
pNvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &health.memClock);
if (!hasClockInfo
||
pNvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &health.clock) != NVML_SUCCESS
||
pNvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &health.memClock) != NVML_SUCCESS
) {
health.clock = MAXUINT32;
health.memClock = MAXUINT32;
}
}

return true;
Expand All @@ -131,7 +149,7 @@ bool NvmlApi::health(int id, Health &health)

const char *NvmlApi::version()
{
return nvmlVerion;
return nvmlVersion;
}


Expand All @@ -158,7 +176,7 @@ void NvmlApi::bind(const std::vector<xmrig::IThread*> &threads)
}

for (xmrig::IThread *t : threads) {
auto thread = static_cast<CudaThread *>(t);
auto thread = dynamic_cast<CudaThread *>(t);
if (thread->pciBusID() == pci.bus && thread->pciDeviceID() == pci.device && thread->pciDomainID() == pci.domain) {
thread->setNvmlId(i);
break;
Expand Down
17 changes: 17 additions & 0 deletions src/workers/CudaThread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <string.h>


#include "defaults.h"
#include "rapidjson/document.h"
#include "workers/CudaThread.h"

Expand All @@ -38,6 +39,8 @@ CudaThread::CudaThread() :
m_clockRate(0),
m_memoryClockRate(0),
m_nvmlId(-1),
m_nvmlTempL(DFL_nvmlTempL),
m_nvmlTempH(DFL_nvmlTempH),
m_smx(0),
m_threads(0),
m_affinity(-1),
Expand All @@ -63,6 +66,8 @@ CudaThread::CudaThread(const nvid_ctx &ctx, int64_t affinity, xmrig::Algo algori
m_clockRate(ctx.device_clockRate),
m_memoryClockRate(ctx.device_memoryClockRate),
m_nvmlId(-1),
m_nvmlTempL(DFL_nvmlTempL),
m_nvmlTempH(DFL_nvmlTempH),
m_smx(ctx.device_mpcount),
m_threads(ctx.device_threads),
m_affinity(affinity),
Expand All @@ -88,6 +93,8 @@ CudaThread::CudaThread(const rapidjson::Value &object) :
m_clockRate(0),
m_memoryClockRate(0),
m_nvmlId(-1),
m_nvmlTempL(DFL_nvmlTempL),
m_nvmlTempH(DFL_nvmlTempH),
m_smx(0),
m_threads(0),
m_affinity(-1),
Expand Down Expand Up @@ -117,6 +124,16 @@ CudaThread::CudaThread(const rapidjson::Value &object) :
if (affinity.IsInt()) {
setAffinity(affinity.GetInt());
}

const rapidjson::Value &tempL = object["temp_low"];
if (tempL.IsInt()) {
setNvmlTempL(tempL.GetInt());
}

const rapidjson::Value &tempH = object["temp_high"];
if (tempH.IsInt()) {
setNvmlTempH(tempH.GetInt());
}
}


Expand Down
6 changes: 6 additions & 0 deletions src/workers/CudaThread.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ class CudaThread : public xmrig::IThread
inline size_t memoryTotal() const { return m_memoryTotal; }
inline size_t memoryFree() const { return m_memoryFree; }
inline int nvmlId() const { return m_nvmlId; }
inline int nvmlTempL() const { return m_nvmlTempL; }
inline int nvmlTempH() const { return m_nvmlTempH; }
inline int smx() const { return m_smx; }
inline int threads() const { return m_threads; }
inline size_t threadId() const { return m_threadId; }
Expand All @@ -74,6 +76,8 @@ class CudaThread : public xmrig::IThread
inline void setBSleep(int bsleep) { m_bsleep = bsleep; }
inline void setIndex(size_t index) { m_index = index; }
inline void setNvmlId(int id) { m_nvmlId = id; }
inline void setNvmlTempL(int temp) { m_nvmlTempL = temp; }
inline void setNvmlTempH(int temp) { m_nvmlTempH = temp; }
inline void setThreadId(size_t threadId) { m_threadId = threadId; }
inline void setThreads(int threads) { m_threads = threads; }
inline void setSyncMode(uint32_t syncMode) { m_syncMode = syncMode > 3 ? 3 : syncMode; }
Expand All @@ -98,6 +102,8 @@ class CudaThread : public xmrig::IThread
int m_clockRate;
int m_memoryClockRate;
int m_nvmlId;
uint32_t m_nvmlTempL;
uint32_t m_nvmlTempH;
int m_smx;
int m_threads;
int64_t m_affinity;
Expand Down
72 changes: 52 additions & 20 deletions src/workers/Workers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ void Workers::printHashrate(bool detail)

size_t i = 0;
for (const xmrig::IThread *t : m_controller->config()->threads()) {
auto thread = static_cast<const CudaThread *>(t);
auto thread = dynamic_cast<const CudaThread *>(t);
Log::i()->text("| %6zu | %3zu | %7s | %7s | %7s | %s%s",
i, thread->index(),
Hashrate::format(m_hashrate->calc(i, Hashrate::ShortInterval), num1, sizeof num1),
Expand All @@ -130,6 +130,18 @@ void Workers::printHashrate(bool detail)
m_hashrate->print();
}

const std::string _spf(const char * const fmt, ...)
{
va_list args = nullptr, copy = nullptr;
va_start(args, fmt);
va_copy(copy, args);
const auto len = static_cast<const unsigned __int64>(std::vsnprintf(nullptr, 0, fmt, copy));
va_end(copy);
std::vector<char> str(len + 1);
std::vsnprintf(str.data(), str.size(), fmt, args);
va_end(args);
return std::string(str.data(), len);
}

void Workers::printHealth()
{
Expand All @@ -140,32 +152,52 @@ void Workers::printHealth()

Health health;
for (const xmrig::IThread *t : m_controller->config()->threads()) {
auto thread = static_cast<const CudaThread *>(t);
auto thread = dynamic_cast<const CudaThread *>(t);
if (!NvmlApi::health(thread->nvmlId(), health)) {
continue;
}

const uint32_t temp = health.temperature;

if (health.clock && health.clock) {
if (m_controller->config()->isColors()) {
LOG_INFO("\x1B[00;35mGPU #%d: \x1B[01m%u\x1B[00;35m/\x1B[01m%u MHz\x1B[00;35m \x1B[01m%uW\x1B[00;35m %s%uC\x1B[00;35m FAN \x1B[01m%u%%",
thread->index(), health.clock, health.memClock, health.power / 1000, (temp < 45 ? "\x1B[01;32m" : (temp > 65 ? "\x1B[01;31m" : "\x1B[01;33m")), temp, health.fanSpeed);
}
else {
LOG_INFO(" * GPU #%d: %u/%u MHz %uW %uC FAN %u%%", thread->index(), health.clock, health.memClock, health.power / 1000, health.temperature, health.fanSpeed);
}

continue;
const bool isColors = m_controller->config()->isColors();
std::string report, chunk;

report = _spf(isColors
? MAGENTA("GPU #%d: ")
: "GPU #%d: "
, thread->index()
);
if (health.clock != MAXUINT32 && health.memClock != MAXUINT32) {
report += _spf(isColors
? MAGENTA_BOLD("%u") MAGENTA("/") MAGENTA_BOLD("%u MHz") " "
: "%u/%u MHz "
, health.clock, health.memClock
);
}

if (m_controller->config()->isColors()) {
LOG_INFO("\x1B[00;35mGPU #%d: %s%uC\x1B[00;35m FAN \x1B[01m%u%%",
thread->index(), (temp < 45 ? "\x1B[01;32m" : (temp > 65 ? "\x1B[01;31m" : "\x1B[01;33m")), temp, health.fanSpeed);
if (health.power != MAXUINT32) {
report += _spf(isColors
? MAGENTA_BOLD("%uW")
: "%uW"
, health.power / 1000
);
}
if (health.temperature) {
if (isColors) {
if (health.temperature > thread->nvmlTempH())
report += _spf(RED_BOLD("%uC"),health.temperature);
else if (health.temperature < thread->nvmlTempL())
report += _spf(GREEN_BOLD("%uC"),health.temperature);
else
report += _spf(YELLOW_BOLD("%uC"),health.temperature);
} else
report += _spf("%uC ", health.temperature);
}
else {
LOG_INFO(" * GPU #%d: %uC FAN %u%%", thread->index(), health.temperature, health.fanSpeed);
if (health.fanSpeed != MAXUINT32) {
report += _spf(isColors
? "FAN " MAGENTA_BOLD("%u%%")
: "FAN %u%%"
, health.fanSpeed
);
}
LOG_INFO("%s", report.c_str());
}
}

Expand Down

0 comments on commit 5d70761

Please sign in to comment.