diff --git a/cmake/CUDA.cmake b/cmake/CUDA.cmake
index 9cef12a5..b8d70c78 100644
--- a/cmake/CUDA.cmake
+++ b/cmake/CUDA.cmake
@@ -32,7 +32,7 @@ set(DEFAULT_CUDA_ARCH "30;50")
 
 # Fermi GPUs are only supported with CUDA < 9.0
 if (CUDA_VERSION VERSION_LESS 9.0)
-    list(APPEND DEFAULT_CUDA_ARCH "20")
+    list(APPEND DEFAULT_CUDA_ARCH "20 21")
 endif()
 
 # add Pascal support for CUDA >= 8.0
@@ -61,6 +61,7 @@ foreach(CUDA_ARCH_ELEM ${CUDA_ARCH})
                             "Use '20' (for compute architecture 2.0) or higher.")
     endif()
 endforeach()
+list(SORT CUDA_ARCH)
 
 option(CUDA_SHOW_REGISTER "Show registers used for each kernel and compute architecture" OFF)
 option(CUDA_KEEP_FILES "Keep all intermediate files that are generated during internal compilation steps" OFF)
@@ -89,11 +90,20 @@ elseif("${CUDA_COMPILER}" STREQUAL "nvcc")
     if (CUDA_VERSION VERSION_LESS 8.0)
         add_definitions(-D_FORCE_INLINES)
         add_definitions(-D_MWAITXINTRIN_H_INCLUDED)
+    elseif(CUDA_VERSION VERSION_LESS 9.0)
+        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Wno-deprecated-gpu-targets")
     endif()
     foreach(CUDA_ARCH_ELEM ${CUDA_ARCH})
         # set flags to create device code for the given architecture
-        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
-            "-Wno-deprecated-gpu-targets --generate-code arch=compute_${CUDA_ARCH_ELEM},code=sm_${CUDA_ARCH_ELEM} --generate-code arch=compute_${CUDA_ARCH_ELEM},code=compute_${CUDA_ARCH_ELEM}")
+        if("${CUDA_ARCH_ELEM}" STREQUAL "21")
+            # "2.1" actually does run faster when compiled as itself, versus in "2.0" compatible mode
+            # strange virtual code type on top of compute_20, with no compute_21 (so the normal rule fails)
+            set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
+                    "--generate-code arch=compute_20,code=sm_21")
+        else()
+            set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
+                    "--generate-code arch=compute_${CUDA_ARCH_ELEM},code=sm_${CUDA_ARCH_ELEM} --generate-code arch=compute_${CUDA_ARCH_ELEM},code=compute_${CUDA_ARCH_ELEM}")
+        endif()
     endforeach()
 
     # give each thread an independent default stream
diff --git a/src/Summary.cpp b/src/Summary.cpp
index 44b6d6f8..769cd314 100644
--- a/src/Summary.cpp
+++ b/src/Summary.cpp
@@ -5,7 +5,9 @@
  * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
  * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
  * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2016-2018 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
+ * Copyright 2019      Spudz76     <https://github.com/Spudz76>
+ * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
  *
  *   This program is free software: you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -65,10 +67,11 @@ static void print_algo(xmrig::Config *config)
 
 static void print_gpu(xmrig::Config *config)
 {
+    constexpr size_t byteToMiB = 1024u * 1024u;
     for (const xmrig::IThread *t : config->threads()) {
         auto thread = static_cast<const CudaThread *>(t);
-        Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("GPU #%-8zu") YELLOW("PCI:%04x:%02x:%02x") GREEN(" %s @ %d/%d MHz") " \x1B[1;30m%dx%d %dx%d arch:%d%d SMX:%d"
-                                          : " * GPU #%-8zuPCI:%04x:%02x:%02x %s @ %d/%d MHz %dx%d %dx%d arch:%d%d SMX:%d",
+        Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("GPU #%-8zu") YELLOW("PCI:%04x:%02x:%02x") GREEN(" %s @ %d/%d MHz") " \x1B[1;30m%dx%d %dx%d arch:%d%d SMX:%d MEM:%zu/%zu MiB"
+                                          : " * GPU #%-8zuPCI:%04x:%02x:%02x %s @ %d/%d MHz %dx%d %dx%d arch:%d%d SMX:%d MEM:%zu/%zu MiB",
                        thread->index(),
                        thread->pciDomainID(),
                        thread->pciBusID(),
@@ -82,7 +85,9 @@ static void print_gpu(xmrig::Config *config)
                        thread->bsleep(),
                        thread->arch()[0],
                        thread->arch()[1],
-                       thread->smx()
+                       thread->smx(),
+                       thread->memoryFree() / byteToMiB,
+                       thread->memoryTotal() / byteToMiB
         );
     }
 }
diff --git a/src/nvidia/cryptonight.h b/src/nvidia/cryptonight.h
index 673c7ae8..3eceb1a3 100644
--- a/src/nvidia/cryptonight.h
+++ b/src/nvidia/cryptonight.h
@@ -1,26 +1,28 @@
 /* XMRig
-* Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
-* Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
-* Copyright 2014      Lucas Jones <https://github.com/lucasjones>
-* Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
-* Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
-* Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
-* Copyright 2018      Lee Clagett <https://github.com/vtnerd>
-* Copyright 2016-2018 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
-*
-*   This program is free software: you can redistribute it and/or modify
-*   it under the terms of the GNU General Public License as published by
-*   the Free Software Foundation, either version 3 of the License, or
-*   (at your option) any later version.
-*
-*   This program is distributed in the hope that it will be useful,
-*   but WITHOUT ANY WARRANTY; without even the implied warranty of
-*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-*   GNU General Public License for more details.
-*
-*   You should have received a copy of the GNU General Public License
-*   along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+ * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
+ * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
+ * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
+ * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
+ * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
+ * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
+ * Copyright 2018      Lee Clagett <https://github.com/vtnerd>
+ * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
+ * Copyright 2019      Spudz76     <https://github.com/Spudz76>
+ * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
 
 
 #pragma once
@@ -50,6 +52,8 @@ typedef struct {
     int device_bsleep;
     int device_clockRate;
     int device_memoryClockRate;
+    size_t device_memoryTotal;
+    size_t device_memoryFree;
     uint32_t device_pciBusID;
     uint32_t device_pciDeviceID;
     uint32_t device_pciDomainID;
diff --git a/src/nvidia/cuda_extra.cu b/src/nvidia/cuda_extra.cu
index 7c53ad49..a53377e8 100644
--- a/src/nvidia/cuda_extra.cu
+++ b/src/nvidia/cuda_extra.cu
@@ -1,27 +1,28 @@
 /* XMRig
-* Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
-* Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
-* Copyright 2014      Lucas Jones <https://github.com/lucasjones>
-* Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
-* Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
-* Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
-* Copyright 2018      Lee Clagett <https://github.com/vtnerd>
-* Copyright 2016-2018 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
-*
-*   This program is free software: you can redistribute it and/or modify
-*   it under the terms of the GNU General Public License as published by
-*   the Free Software Foundation, either version 3 of the License, or
-*   (at your option) any later version.
-*
-*   This program is distributed in the hope that it will be useful,
-*   but WITHOUT ANY WARRANTY; without even the implied warranty of
-*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-*   GNU General Public License for more details.
-*
-*   You should have received a copy of the GNU General Public License
-*   along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
+ * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
+ * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
+ * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
+ * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
+ * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
+ * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
+ * Copyright 2018      Lee Clagett <https://github.com/vtnerd>
+ * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
+ * Copyright 2019      Spudz76     <https://github.com/Spudz76>
+ * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
 
 #include <algorithm>
 #include <stdio.h>
@@ -529,6 +530,28 @@ int cuda_get_deviceinfo(nvid_ctx* ctx, xmrig::Algo algo, bool isCNv2)
         return 1;
     }
 
+    // a device must be selected to get the right memory usage later on
+    if (cudaSetDevice(ctx->device_id) != cudaSuccess) {
+        printf("WARNING: NVIDIA GPU %d: cannot be selected.\n", ctx->device_id);
+        return 2;
+    }
+
+    // trigger that a context on the gpu will be allocated
+    int* tmp;
+    if (cudaMalloc(&tmp, 256) != cudaSuccess) {
+        printf("WARNING: NVIDIA GPU %d: context cannot be created.\n", ctx->device_id);
+        return 3;
+    }
+
+    size_t freeMemory  = 0;
+    size_t totalMemory = 0;
+
+    CUDA_CHECK(ctx->device_id, cudaMemGetInfo(&freeMemory, &totalMemory));
+    CUDA_CHECK(ctx->device_id, cudaFree(tmp));
+    CUDA_CHECK(ctx->device_id, cudaDeviceReset());
+    ctx->device_memoryFree = freeMemory;
+    ctx->device_memoryTotal = totalMemory;
+
     cudaDeviceProp props;
     err = cudaGetDeviceProperties(&props, ctx->device_id);
     if (err != cudaSuccess) {
@@ -593,26 +616,6 @@ int cuda_get_deviceinfo(nvid_ctx* ctx, xmrig::Algo algo, bool isCNv2)
             maxMemUsage = size_t(1024u) * byteToMiB;
         }
 
-        // a device must be selected to get the right memory usage later on
-        if (cudaSetDevice(ctx->device_id) != cudaSuccess) {
-            printf("WARNING: NVIDIA GPU %d: cannot be selected.\n", ctx->device_id);
-            return 2;
-        }
-
-        // trigger that a context on the gpu will be allocated
-        int* tmp;
-        if (cudaMalloc(&tmp, 256) != cudaSuccess) {
-            printf("WARNING: NVIDIA GPU %d: context cannot be created.\n", ctx->device_id);
-            return 3;
-        }
-
-        size_t freeMemory  = 0;
-        size_t totalMemory = 0;
-
-        CUDA_CHECK(ctx->device_id, cudaMemGetInfo(&freeMemory, &totalMemory));
-        CUDA_CHECK(ctx->device_id, cudaFree(tmp));
-        CUDA_CHECK(ctx->device_id, cudaDeviceReset());
-
         const size_t hashMemSize = xmrig::cn_select_memory(algo);
 #       ifdef _WIN32
         /* We use in windows bfactor (split slow kernel into smaller parts) to avoid
diff --git a/src/workers/CudaThread.cpp b/src/workers/CudaThread.cpp
index d322aa88..9f169e50 100644
--- a/src/workers/CudaThread.cpp
+++ b/src/workers/CudaThread.cpp
@@ -5,7 +5,9 @@
  * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
  * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
  * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2016-2018 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
+ * Copyright 2019      Spudz76     <https://github.com/Spudz76>
+ * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
  *
  *   This program is free software: you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -40,6 +42,8 @@ CudaThread::CudaThread() :
     m_threads(0),
     m_affinity(-1),
     m_index(0),
+    m_memoryFree(0),
+    m_memoryTotal(0),
     m_threadId(0),
     m_pciBusID(0),
     m_pciDeviceID(0),
@@ -63,6 +67,8 @@ CudaThread::CudaThread(const nvid_ctx &ctx, int64_t affinity, xmrig::Algo algori
     m_threads(ctx.device_threads),
     m_affinity(affinity),
     m_index(static_cast<size_t>(ctx.device_id)),
+    m_memoryFree(ctx.device_memoryFree),
+    m_memoryTotal(ctx.device_memoryTotal),
     m_threadId(0),
     m_pciBusID(ctx.device_pciBusID),
     m_pciDeviceID(ctx.device_pciDeviceID),
@@ -145,6 +151,8 @@ bool CudaThread::init(xmrig::Algo algorithm)
 
     m_clockRate       = ctx.device_clockRate;
     m_memoryClockRate = ctx.device_memoryClockRate;
+    m_memoryTotal     = ctx.device_memoryTotal;
+    m_memoryFree      = ctx.device_memoryFree;
     m_pciBusID        = ctx.device_pciBusID;
     m_pciDeviceID     = ctx.device_pciDeviceID;
     m_pciDomainID     = ctx.device_pciDomainID;
diff --git a/src/workers/CudaThread.h b/src/workers/CudaThread.h
index a9a00076..f6636686 100644
--- a/src/workers/CudaThread.h
+++ b/src/workers/CudaThread.h
@@ -5,7 +5,9 @@
  * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
  * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
  * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2016-2018 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
+ * Copyright 2019      Spudz76     <https://github.com/Spudz76>
+ * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
  *
  *   This program is free software: you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -48,6 +50,8 @@ class CudaThread : public xmrig::IThread
     inline int bsleep() const             { return m_bsleep; }
     inline int clockRate() const          { return m_clockRate; }
     inline int memoryClockRate() const    { return m_memoryClockRate; }
+    inline size_t memoryTotal() const     { return m_memoryTotal; }
+    inline size_t memoryFree() const      { return m_memoryFree; }
     inline int nvmlId() const             { return m_nvmlId; }
     inline int smx() const                { return m_smx; }
     inline int threads() const            { return m_threads; }
@@ -75,8 +79,8 @@ class CudaThread : public xmrig::IThread
     inline void setSyncMode(uint32_t syncMode) { m_syncMode = syncMode > 3 ? 3 : syncMode; }
 
 protected:
-#   ifdef APP_DEBUG
-    void print() const override;
+#   ifdef APP_DEBUG
+    void print() const override;
 #   endif
 
 #   ifndef XMRIG_NO_API
@@ -98,6 +102,8 @@ class CudaThread : public xmrig::IThread
     int m_threads;
     int64_t m_affinity;
     size_t m_index;
+    size_t m_memoryFree;
+    size_t m_memoryTotal;
     size_t m_threadId;
     uint32_t m_pciBusID;
     uint32_t m_pciDeviceID;