diff --git a/cmake/CUDA.cmake b/cmake/CUDA.cmake index 9cef12a5..b8d70c78 100644 --- a/cmake/CUDA.cmake +++ b/cmake/CUDA.cmake @@ -32,7 +32,7 @@ set(DEFAULT_CUDA_ARCH "30;50") # Fermi GPUs are only supported with CUDA < 9.0 if (CUDA_VERSION VERSION_LESS 9.0) - list(APPEND DEFAULT_CUDA_ARCH "20") + list(APPEND DEFAULT_CUDA_ARCH "20 21") endif() # add Pascal support for CUDA >= 8.0 @@ -61,6 +61,7 @@ foreach(CUDA_ARCH_ELEM ${CUDA_ARCH}) "Use '20' (for compute architecture 2.0) or higher.") endif() endforeach() +list(SORT CUDA_ARCH) option(CUDA_SHOW_REGISTER "Show registers used for each kernel and compute architecture" OFF) option(CUDA_KEEP_FILES "Keep all intermediate files that are generated during internal compilation steps" OFF) @@ -89,11 +90,20 @@ elseif("${CUDA_COMPILER}" STREQUAL "nvcc") if (CUDA_VERSION VERSION_LESS 8.0) add_definitions(-D_FORCE_INLINES) add_definitions(-D_MWAITXINTRIN_H_INCLUDED) + elseif(CUDA_VERSION VERSION_LESS 9.0) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Wno-deprecated-gpu-targets") endif() foreach(CUDA_ARCH_ELEM ${CUDA_ARCH}) # set flags to create device code for the given architecture - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} - "-Wno-deprecated-gpu-targets --generate-code arch=compute_${CUDA_ARCH_ELEM},code=sm_${CUDA_ARCH_ELEM} --generate-code arch=compute_${CUDA_ARCH_ELEM},code=compute_${CUDA_ARCH_ELEM}") + if("${CUDA_ARCH_ELEM}" STREQUAL "21") + # "2.1" actually does run faster when compiled as itself, versus in "2.0" compatible mode + # strange virtual code type on top of compute_20, with no compute_21 (so the normal rule fails) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} + "--generate-code arch=compute_20,code=sm_21") + else() + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} + "--generate-code arch=compute_${CUDA_ARCH_ELEM},code=sm_${CUDA_ARCH_ELEM} --generate-code arch=compute_${CUDA_ARCH_ELEM},code=compute_${CUDA_ARCH_ELEM}") + endif() endforeach() # give each thread an independent default stream diff --git a/src/Summary.cpp b/src/Summary.cpp index 44b6d6f8..769cd314 100644 --- a/src/Summary.cpp +++ b/src/Summary.cpp @@ -5,7 +5,9 @@ * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , - * Copyright 2016-2018 XMRig , + * Copyright 2018-2019 SChernykh + * Copyright 2019 Spudz76 + * Copyright 2016-2019 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -65,10 +67,11 @@ static void print_algo(xmrig::Config *config) static void print_gpu(xmrig::Config *config) { + constexpr size_t byteToMiB = 1024u * 1024u; for (const xmrig::IThread *t : config->threads()) { auto thread = static_cast(t); - Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("GPU #%-8zu") YELLOW("PCI:%04x:%02x:%02x") GREEN(" %s @ %d/%d MHz") " \x1B[1;30m%dx%d %dx%d arch:%d%d SMX:%d" - : " * GPU #%-8zuPCI:%04x:%02x:%02x %s @ %d/%d MHz %dx%d %dx%d arch:%d%d SMX:%d", + Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("GPU #%-8zu") YELLOW("PCI:%04x:%02x:%02x") GREEN(" %s @ %d/%d MHz") " \x1B[1;30m%dx%d %dx%d arch:%d%d SMX:%d MEM:%zu/%zu MiB" + : " * GPU #%-8zuPCI:%04x:%02x:%02x %s @ %d/%d MHz %dx%d %dx%d arch:%d%d SMX:%d MEM:%zu/%zu MiB", thread->index(), thread->pciDomainID(), thread->pciBusID(), @@ -82,7 +85,9 @@ static void print_gpu(xmrig::Config *config) thread->bsleep(), thread->arch()[0], thread->arch()[1], - thread->smx() + thread->smx(), + thread->memoryFree() / byteToMiB, + thread->memoryTotal() / byteToMiB ); } } diff --git a/src/nvidia/cryptonight.h b/src/nvidia/cryptonight.h index 673c7ae8..3eceb1a3 100644 --- a/src/nvidia/cryptonight.h +++ b/src/nvidia/cryptonight.h @@ -1,26 +1,28 @@ /* XMRig -* Copyright 2010 Jeff Garzik -* Copyright 2012-2014 pooler -* Copyright 2014 Lucas Jones -* Copyright 2014-2016 Wolf9466 -* Copyright 2016 Jay D Dee -* Copyright 2017-2018 XMR-Stak , -* Copyright 2018 Lee Clagett -* Copyright 2016-2018 XMRig , -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation, either version 3 of the License, or -* (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program. If not, see . -*/ + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018 Lee Clagett + * Copyright 2018-2019 SChernykh + * Copyright 2019 Spudz76 + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ #pragma once @@ -50,6 +52,8 @@ typedef struct { int device_bsleep; int device_clockRate; int device_memoryClockRate; + size_t device_memoryTotal; + size_t device_memoryFree; uint32_t device_pciBusID; uint32_t device_pciDeviceID; uint32_t device_pciDomainID; diff --git a/src/nvidia/cuda_extra.cu b/src/nvidia/cuda_extra.cu index 7c53ad49..a53377e8 100644 --- a/src/nvidia/cuda_extra.cu +++ b/src/nvidia/cuda_extra.cu @@ -1,27 +1,28 @@ /* XMRig -* Copyright 2010 Jeff Garzik -* Copyright 2012-2014 pooler -* Copyright 2014 Lucas Jones -* Copyright 2014-2016 Wolf9466 -* Copyright 2016 Jay D Dee -* Copyright 2017-2018 XMR-Stak , -* Copyright 2018 Lee Clagett -* Copyright 2016-2018 XMRig , -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation, either version 3 of the License, or -* (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program. If not, see . -*/ - + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018 Lee Clagett + * Copyright 2018-2019 SChernykh + * Copyright 2019 Spudz76 + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ #include #include @@ -529,6 +530,28 @@ int cuda_get_deviceinfo(nvid_ctx* ctx, xmrig::Algo algo, bool isCNv2) return 1; } + // a device must be selected to get the right memory usage later on + if (cudaSetDevice(ctx->device_id) != cudaSuccess) { + printf("WARNING: NVIDIA GPU %d: cannot be selected.\n", ctx->device_id); + return 2; + } + + // trigger that a context on the gpu will be allocated + int* tmp; + if (cudaMalloc(&tmp, 256) != cudaSuccess) { + printf("WARNING: NVIDIA GPU %d: context cannot be created.\n", ctx->device_id); + return 3; + } + + size_t freeMemory = 0; + size_t totalMemory = 0; + + CUDA_CHECK(ctx->device_id, cudaMemGetInfo(&freeMemory, &totalMemory)); + CUDA_CHECK(ctx->device_id, cudaFree(tmp)); + CUDA_CHECK(ctx->device_id, cudaDeviceReset()); + ctx->device_memoryFree = freeMemory; + ctx->device_memoryTotal = totalMemory; + cudaDeviceProp props; err = cudaGetDeviceProperties(&props, ctx->device_id); if (err != cudaSuccess) { @@ -593,26 +616,6 @@ int cuda_get_deviceinfo(nvid_ctx* ctx, xmrig::Algo algo, bool isCNv2) maxMemUsage = size_t(1024u) * byteToMiB; } - // a device must be selected to get the right memory usage later on - if (cudaSetDevice(ctx->device_id) != cudaSuccess) { - printf("WARNING: NVIDIA GPU %d: cannot be selected.\n", ctx->device_id); - return 2; - } - - // trigger that a context on the gpu will be allocated - int* tmp; - if (cudaMalloc(&tmp, 256) != cudaSuccess) { - printf("WARNING: NVIDIA GPU %d: context cannot be created.\n", ctx->device_id); - return 3; - } - - size_t freeMemory = 0; - size_t totalMemory = 0; - - CUDA_CHECK(ctx->device_id, cudaMemGetInfo(&freeMemory, &totalMemory)); - CUDA_CHECK(ctx->device_id, cudaFree(tmp)); - CUDA_CHECK(ctx->device_id, cudaDeviceReset()); - const size_t hashMemSize = xmrig::cn_select_memory(algo); # ifdef _WIN32 /* We use in windows bfactor (split slow kernel into smaller parts) to avoid diff --git a/src/workers/CudaThread.cpp b/src/workers/CudaThread.cpp index d322aa88..9f169e50 100644 --- a/src/workers/CudaThread.cpp +++ b/src/workers/CudaThread.cpp @@ -5,7 +5,9 @@ * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , - * Copyright 2016-2018 XMRig , + * Copyright 2018-2019 SChernykh + * Copyright 2019 Spudz76 + * Copyright 2016-2019 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -40,6 +42,8 @@ CudaThread::CudaThread() : m_threads(0), m_affinity(-1), m_index(0), + m_memoryFree(0), + m_memoryTotal(0), m_threadId(0), m_pciBusID(0), m_pciDeviceID(0), @@ -63,6 +67,8 @@ CudaThread::CudaThread(const nvid_ctx &ctx, int64_t affinity, xmrig::Algo algori m_threads(ctx.device_threads), m_affinity(affinity), m_index(static_cast(ctx.device_id)), + m_memoryFree(ctx.device_memoryFree), + m_memoryTotal(ctx.device_memoryTotal), m_threadId(0), m_pciBusID(ctx.device_pciBusID), m_pciDeviceID(ctx.device_pciDeviceID), @@ -145,6 +151,8 @@ bool CudaThread::init(xmrig::Algo algorithm) m_clockRate = ctx.device_clockRate; m_memoryClockRate = ctx.device_memoryClockRate; + m_memoryTotal = ctx.device_memoryTotal; + m_memoryFree = ctx.device_memoryFree; m_pciBusID = ctx.device_pciBusID; m_pciDeviceID = ctx.device_pciDeviceID; m_pciDomainID = ctx.device_pciDomainID; diff --git a/src/workers/CudaThread.h b/src/workers/CudaThread.h index a9a00076..f6636686 100644 --- a/src/workers/CudaThread.h +++ b/src/workers/CudaThread.h @@ -5,7 +5,9 @@ * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , - * Copyright 2016-2018 XMRig , + * Copyright 2018-2019 SChernykh + * Copyright 2019 Spudz76 + * Copyright 2016-2019 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -48,6 +50,8 @@ class CudaThread : public xmrig::IThread inline int bsleep() const { return m_bsleep; } inline int clockRate() const { return m_clockRate; } inline int memoryClockRate() const { return m_memoryClockRate; } + inline size_t memoryTotal() const { return m_memoryTotal; } + inline size_t memoryFree() const { return m_memoryFree; } inline int nvmlId() const { return m_nvmlId; } inline int smx() const { return m_smx; } inline int threads() const { return m_threads; } @@ -75,8 +79,8 @@ class CudaThread : public xmrig::IThread inline void setSyncMode(uint32_t syncMode) { m_syncMode = syncMode > 3 ? 3 : syncMode; } protected: -# ifdef APP_DEBUG - void print() const override; +# ifdef APP_DEBUG + void print() const override; # endif # ifndef XMRIG_NO_API @@ -98,6 +102,8 @@ class CudaThread : public xmrig::IThread int m_threads; int64_t m_affinity; size_t m_index; + size_t m_memoryFree; + size_t m_memoryTotal; size_t m_threadId; uint32_t m_pciBusID; uint32_t m_pciDeviceID;